? Makefile
? blib
? encode_mimewords.patch
? pm_to_blib
? testout
Index: ChangeLog
===================================================================
RCS file: /home/cvs/src/perl-MIME-tools/ChangeLog,v
retrieving revision 1.1.1.1
retrieving revision 1.2
diff -u -r1.1.1.1 -r1.2
--- ChangeLog 2004/10/28 17:12:16 1.1.1.1
+++ ChangeLog 2004/10/28 18:05:22 1.2
@@ -1,3 +1,7 @@
+2004-10-28 Alexey Mahotkin <alexm:eternal-eval.com>
+
+ * Made encode_mimewords fully compliant to RFC1522
+
2004-10-27 David F. Skoll <dfs@roaringpenguin.com>
* VERSION 5.415 RELEASED
Index: lib/MIME/Words.pm
===================================================================
RCS file: /home/cvs/src/perl-MIME-tools/lib/MIME/Words.pm,v
retrieving revision 1.1.1.1
retrieving revision 1.4
diff -u -r1.1.1.1 -r1.4
--- lib/MIME/Words.pm 2004/10/28 17:12:16 1.1.1.1
+++ lib/MIME/Words.pm 2004/10/28 18:05:22 1.4
@@ -267,7 +267,7 @@
I<Function.>
Given a RAW string, try to find and encode all "unsafe" sequences
-of characters:
+of characters, according to RFC1522:
### Encode a string with some unsafe "words":
$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
@@ -292,13 +292,6 @@
=back
-B<Warning:> this is a quick-and-dirty solution, intended for character
-sets which overlap ASCII. B<It does not comply with the RFC-1522
-rules regarding the use of encoded words in message headers>.
-You may want to roll your own variant,
-using C<encoded_mimeword()>, for your application.
-I<Thanks to Jan Kasprzak for reminding me about this problem.>
-
=cut
sub encode_mimewords {
@@ -306,17 +299,60 @@
my $charset = $params{Charset} || 'ISO-8859-1';
my $encoding = lc($params{Encoding} || 'q');
- ### Encode any "words" with unsafe characters.
- ### We limit such words to 18 characters, to guarantee that the
- ### worst-case encoding give us no more than 54 + ~10 < 75 characters
- my $word;
- $rawstr =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{ ### get next "word"
- $word = $1;
- (($word !~ /[$NONPRINT]/o)
- ? $word ### no unsafe chars
- : encode_mimeword($word, $encoding, $charset)); ### has unsafe chars
- }xeg;
- $rawstr;
+ my $safe_chars = "-+*/=_!A-Za-z0-9";
+ my $re = "[$safe_chars]";
+ my $nre = "[^$safe_chars]";
+
+ my $result = "";
+ my $current = $rawstr;
+
+ while ($current ne "") {
+ if ($current =~ s/^(([$safe_chars]|\s)+)//) {
+ # safe chars (w/spaces) are handled as-is
+ $result .= $1;
+ next;
+ } elsif ($current =~ s/^(([^$safe_chars]|\s)+)//) {
+ # unsafe chars (w/spaces) are encoded
+ my $unsafe_chars = $1;
+ CHUNK75:
+ while ($unsafe_chars ne "") {
+
+ my $full_len = length($unsafe_chars);
+ my $len = 1;
+ my $prev_encoded = "";
+
+ while ($len <= $full_len) {
+ # we try to encode next beginning of unsafe string
+ my $possible = substr $unsafe_chars, 0, $len;
+ my $encoded = encode_mimeword($possible, $encoding, $charset);
+
+ if (length($encoded) < 75) {
+ # if it could be encoded in specified maximum length, try
+ # bigger beginning...
+ $prev_encoded = $encoded;
+ } else {
+ #
+ # ...otherwise, add encoded chunk which still fits, and
+ # restart with rest of unsafe string
+ $result .= $prev_encoded;
+ $prev_encoded = "";
+ substr $unsafe_chars, 0, $len - 1, "";
+ next CHUNK75;
+ }
+
+ # if we have reached the end of the string, add final
+ # encoded chunk
+ if ($len == $full_len) {
+ $result .= $encoded;
+ last CHUNK75;
+ }
+
+ $len++;
+ }
+ }
+ }
+ }
+ return $result;
}
1;
@@ -331,10 +367,11 @@
MIME::Base64 and MIME::QuotedPrint.
-=head1 AUTHOR
+=head1 AUTHORS
Eryq (F<eryq@zeegee.com>), ZeeGee Software Inc (F<
http://www.zeegee.com>).
David F. Skoll (dfs@roaringpenguin.com)
http://www.roaringpenguin.com
+Alexey Mahotkin (alexm:eternal-eval.com)
http://eternal-eval.com/
All rights reserved. This program is free software; you can redistribute
it and/or modify it under the same terms as Perl itself.
Index: t/Words.t
===================================================================
RCS file: /home/cvs/src/perl-MIME-tools/t/Words.t,v
retrieving revision 1.1.1.1
retrieving revision 1.4
diff -u -r1.1.1.1 -r1.4
--- t/Words.t 2004/10/28 17:12:16 1.1.1.1
+++ t/Words.t 2004/10/28 18:08:52 1.4
@@ -4,7 +4,7 @@
use ExtUtils::TBone;
use MIME::QuotedPrint qw(decode_qp);
-use MIME::Words qw(decode_mimewords);
+use MIME::Words qw(decode_mimewords encode_mimewords);
#------------------------------------------------------------
# BEGIN
@@ -12,8 +12,22 @@
# Create checker:
my $T = typical ExtUtils::TBone;
-$T->begin(10);
+# we test each non-empty line in subjects.txt
+open WORDS, "<testin/subjects.txt" or die "open: $!";
+my $subjects_count = 0;
+while (my $line = <WORDS>) {
+ next if ($line =~ /^\s*$/);
+ $subjects_count++;
+}
+close WORDS;
+
+
+# for each line we do 4 tests:
+# whether each line correctly encodes/decodes, twice for each encoding
+# whethere encoded chunks are smaller than 75 bytes
+$T->begin(10 + $subjects_count * 4);
+
{
local($/) = '';
open WORDS, "<testin/words.txt" or die "open: $!";
@@ -36,6 +50,47 @@
}
close WORDS;
}
+
+{
+ open WORDS, "<testin/subjects.txt" or die "open: $!";
+ while (my $line = <WORDS>) {
+ chomp $line;
+ next if ($line =~ /^\s*$/);
+
+ foreach my $encoding (qw(q b)) {
+ my $encoded = encode_mimewords($line,
+ Encoding => $encoding,
+ );
+ my $decoded = decode_mimewords($encoded,
+ Encoding => $encoding,
+ );
+ if ($line eq $decoded) {
+ # warn "ok: $line\nencoded: $encoded\ndecoded: $decoded\n";
+ $T->ok( 1 );
+ } else {
+
+ warn "in: $line\nencoded: $encoded\ndecoded: $decoded\n";
+
+ $T->ok( 0 );
+ }
+
+ my $failed_token = "";
+ while ($encoded =~ /(=\?[^\?]+\?[bq]\?[^\?]+\?=)/ig) {
+ if (length($1) > 75) {
+ $failed_token = $1;
+ }
+ }
+ if ($failed_token ne "") {
+ warn "failed_token: '$failed_token'";
+ $T->ok(0);
+ } else {
+ $T->ok(1);
+ }
+ }
+ }
+
+ close WORDS;
+}
# Done!
$T->end;
Index: testin/subjects.txt
===================================================================
RCS file: subjects.txt
diff -N subjects.txt
--- /dev/null Wed May 6 00:32:27 1998
+++ /tmp/cvsfOgeLU Thu Oct 28 22:08:59 2004
@@ -0,0 +1,19 @@
+Á
+ÁÂ
+ÁÂ×
+ÁÂ×Ç
+ÁÂ×ÇÄ
+ÍÁÍÁ ÍÙÌÁ ÒÁÍÕ
+
+a
+ab
+abc
+abcd
+hello world
+
+hello ÒÕÓÓËÉÊ
+hello ÒÕÓÓËÉÊ hello
+
+ÒÕÓÓËÉÊ a ÒÕÓÓËÉÊ b ÒÕÓÓËÉÊ c ÒÕÓÓËÉÊ d ÒÕÓÓËÉÊ e ÒÕÓÓËÉÊ
+ËÁÖÄÙÊ ÏÈÏÔÎÉË ÖÅÌÁÅÔ ÚÎÁÔØ, ÇÄÅ ÓÉÄÉÔ ÆÁÚÁÎ ÓßÅÛØ ÅÝ£ ÜÔÉÈ ÍÑÇËÉÈ ÆÒÁÎÃÕÚÓËÉÊ ÂÕÌÏÞÅË, ÄÁ ×ÙÐÅÊ ÞÁÀ
+