Subject: | MIME::Words::encode_mimewords split one character into separated MIME blocks |
MIME::Words::encode_mimewords split one character into separated MIME blocks, because it split string each 18bytes.
And CJKT codecs has multibyte characters. It causes some troubles(missing characters, unreadable subjects, etc.)
at MUAs which decodes the encoded strings.
Attached patch works when multibyte(not us-ascii nor iso-8859-*)
character string comes. It works as following:
1. decode string into UTF-8
2. sepalate them with 18characters(not byte)
for each chunks..
3. if all characters are in \x00-\xff(single byte chars), put them as is.
4. else, encode them into old charset and do MIME encode.
--- lib/MIME/Words.pm 2003-06-07 08:41:55.000000000 +0900
+++ my/lib/MIME/Words.pm 2005-05-31 13:24:04.689096392 +0900
@@ -307,16 +306,32 @@
my $charset = $params{Charset} || 'ISO-8859-1';
my $encoding = lc($params{Encoding} || 'q');
- ### Encode any "words" with unsafe characters.
- ### We limit such words to 18 characters, to guarantee that the
- ### worst-case encoding give us no more than 54 + ~10 < 75 characters
- my $word;
- $rawstr =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{ ### get next "word"
- $word = $1;
- (($word !~ /[$NONPRINT]/o)
- ? $word ### no unsafe chars
- : encode_mimeword($word, $encoding, $charset)); ### has unsafe chars
- }xeg;
+ if ($charset =~ /^iso-8859-\d+$/i || lc($charset) == 'us-ascii') {
+ ### Encode any "words" with unsafe characters.
+ ### We limit such words to 18 characters, to guarantee that the
+ ### worst-case encoding give us no more than 54 + ~10 < 75 characters
+ my $word;
+ $rawstr =~ s{([a-zA-Z0-9\x7F-\xFF]{1,18})}{ ### get next "word"
+ $word = $1;
+ (($word !~ /[$NONPRINT]/o)
+ ? $word ### no unsafe chars
+ : encode_mimeword($word, $encoding, $charset)); ### has unsafe chars
+ }xeg;
+ } else {
+ ### Encode "words" which contains multibyte characters.
+ use Encode;
+ my $unistr = Encode::decode($charset, $rawstr);
+ my $word;
+ $unistr =~ s{(.{1,18})}{ ### get next "word"
+ $word = $1;
+ (($word =~ /^[a-zA-Z0-9\x7F-\xFF]+$/o) ### is printable?
+ ? Encode::encode('iso-8859-1', $word) ### decode single-byte chars
+ : encode_mimeword(Encode::encode($charset, $word),
+ $encoding, $charset)); ### has unsafe or multibyte char
+ }xeg;
+ $rawstr = $unistr;
+ }
+
$rawstr;
}