File Coverage

lib/MIME/AltWords.pm
Criterion Covered Total %
statement 220 278 79.1
branch 99 126 78.5
condition 45 65 69.2
subroutine 37 38 97.3
pod 1 15 6.6
total 402 522 77.0


line stmt bran cond sub pod time code
1             #
2             # MIME/AltWords.pm -- a lot of fixes on MIME::Words
3             # by pts@fazekas.hu at Fri Jan 20 11:35:08 UTC 2006
4             # -- Fri Mar 31 18:41:14 CEST 2006
5             #
6             # Dat: this assumes Perl v5.8 or later
7             # Dat: run the unit tests with: ./pts-test.pl AltMIMEWords.pm
8             # Dat: see `perldoc MIME::Words' for the original documentation
9             # Dat: a raw string has bytes in 0..255, and it is already encoded in some
10             # encoding
11             # SUXX: perldoc doesn't respect `=encoding utf-8'
12             # lib/MIME/AltWords.pm:30: Unknown command paragraph "=encoding utf8"
13             # !! why `,' in teszt a =?ISO-8859-2?Q?lev=E9lben=2C_t=F6r=F6lhet=F5?= ?? is it standard?
14             # !! document all
15             # !! document test cases
16             # !! MANIFEST etc.
17             #
18              
19             package MIME::AltWords;
20 1     1   13 use v5.8; # Dat: Unicode string support etc.
  1         3  
  1         43  
21 1     1   4 use integer;
  1         1  
  1         9  
22 1     1   18 use strict;
  1         1  
  1         47  
23 1     1   353 use MIME::AltWords0; #use MIME::Words;
  1         2  
  1         19  
24 1     1   4 use MIME::Base64;
  1         1  
  1         33  
25 1     1   825 use Encode;
  1         9948  
  1         77  
26 1     1   7 use warnings;
  1         2  
  1         17  
27 1     1   4 use Exporter;
  1         2  
  1         29  
28 1     1   5 no warnings qw(prototype redefine);
  1         2  
  1         48  
29              
30             # vvv Dat: exports symbols from MIME::AltWords0 by default, works only
31             # without these
32             #use vars qw(@EXPORT_OK %EXPORT_TAGS);
33             #BEGIN {
34             # @EXPORT_OK=@MIME::AltWords0::EXPORT_OK;
35             # %EXPORT_TAGS=%MIME::AltWords0::EXPORT_TAGS;
36             ## die "@EXPORT_OK";
37             #}
38             #Exporter::export_ok_tags('all');
39              
40             =pod
41              
42             =encoding utf8
43              
44             =head1 NAME
45              
46             MIME::AltWords - properly deal with RFC-1522 encoded words
47              
48             =head1 SYNOPSIS
49              
50             The Perl module L is recommended for encoding and
51             decoding MIME words (such as C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in
52             e-mail message headers (mostly Subject, From and To).
53              
54             L is similar to L in
55             L, but it provides an alternate implementation that follows the
56             MIME specification more carefully, and it is actually compatible with
57             existing mail software (tested with Mutt, Pine, JavaMail and OpenWebmail).
58             L extends the functionality of L (version
59             5.420) by adding more functions and more options to existing functions. The
60             original interface is changed in an upward-compatible way.
61              
62             Before reading further, you should see L to make sure that
63             you understand where this module fits into the grand scheme of things.
64             Go on, do it now. I'll wait.
65              
66             Ready? Ok...
67              
68             use MIME::AltWords qw(:all);
69            
70             ### Decode the string into another string, forgetting the charsets:
71             $decoded = decode_mimewords(
72             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
73             );
74            
75             ### Split string into array of decoded [DATA,CHARSET] pairs:
76             @decoded = decode_mimewords(
77             'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
78             );
79            
80             ### Encode a single unsafe word:
81             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
82            
83             ### Encode a string, trying to find the unsafe words inside it:
84             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town");
85              
86              
87              
88             =head1 DESCRIPTION
89              
90             Fellow Americans, you probably won't know what the hell this module
91             is for. Europeans, Russians, et al, you probably do. C<:-)>.
92              
93             For example, here's a valid MIME header you might get:
94              
95             From: =?US-ASCII?Q?Keith_Moore?=
96             To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
97             CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
98             Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
99             =?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
100             =?US-ASCII?Q?.._cool!?=
101              
102             The fields basically decode to (sorry, I can only approximate the
103             Latin characters with 7 bit sequences /o and 'e):
104              
105             From: Keith Moore
106             To: Keld J/orn Simonsen
107             CC: Andr'e Pirard
108             Subject: If you can read this you understand the example... cool!
109              
110              
111             =head1 PUBLIC INTERFACE
112              
113             =over 4
114              
115             =cut
116              
117              
118 1     1   4 use vars qw($NONPRINT $VERSION);
  1         4  
  1         44  
119              
120             ### The package version, both in 1.23 style *and* usable by MakeMaker:
121             BEGIN { # vvv Dat: MakeMaker needs $VERSION in a separate line
122 1     1   1628 $VERSION = "0.12"
123             }
124              
125             # Dat: MIME::Words has [\x00-\x1F\x7F-\xFF]. We prepare for Unicode.
126             $NONPRINT=qr{(?:[^\x20-\x7E]|=)};
127             #$NONPRINT=qr{(?:[^\x20-\x7E]|[=](?=[?]))}; # Imp: is a bare `=' legal?
128              
129             #** @param $_[0] charset name
130             #** @return MIME canonical charset name
131             sub canonical_charset($) {
132 142     142 0 186 my $S=$_[0];
133 142 100       735 if ($S=~/\A(?:iso-?(?:8859-?)?|8859-?)(\d+)\Z(?!\n)/i) { "ISO-8859-$1" }
  79 100       266  
    100          
134 45         108 elsif ($S=~/\AUTF-?8\Z(?!\n)/i) { "UTF-8" }
135 3         8 elsif ($S=~/\A(?:US-)ASCII\Z(?!\n)/i) { "US-ASCII" }
136 15         35 else { uc $S }
137             }
138              
139             #** @param $_[0] string.
140             #** @param $_[1] hashref. options
141             #** @param $_[2] string to append to
142             sub append_encoded_word($$$) {
143 89     89 0 197 my($word,$opts,$dst)=@_;
144 89 100       231 if ($opts->{Encoding} eq "B") {
    50          
145 25         83 $word=MIME::Base64::encode_base64($word, ''); # Dat: empty EOL, as requested by MIME
146 25         43 $word=~s@\s+@@g;
147 25         33 $$dst.=$word
148             } elsif ($opts->{Encoding} eq "Q") {
149             # Dat: improved MIME::Words::_encode_Q
150 64 100       761 $word =~ s{( )|([_\?\=]|$NONPRINT)}{defined $1 ? "_" # Dat: "_" is an improvement
  258         1284  
151             : sprintf("=%02X", ord($2))}eog;
152 64         122 $$dst.=$word
153 0         0 } else { die }
154             undef
155 89         150 }
156              
157             #use vars qw($old_encode_mimewords);
158             #BEGIN { $old_encode_mimewords=\&MIME::AltWords::encode_mimewords }
159              
160             #** @param $_[0] string. Unicode
161             #** @param $_[1] hashref. options preprocessed by out encode_mimewords()
162             #** @return 0..255 string, ends with $opts->{Space} if $opts->{Shorts}
163             sub encode_mimeword1($$) {
164 82     82 0 134 my($src,$opts)=@_;
165             # Imp: warning if Encode::encode cannot represent character
166 82         87 my $dst="";
167 82         194 my $open="=?$opts->{Charset}?$opts->{Encoding}?";
168 82         79 my $maxlen=64; # Dat: good for a subject line !! 63 or 62
169             # ^^^ Dat: $maxlen=75 works fine in postfix 2.1.5 + pine 4.64
170             # ^^^ Dat: one quoted word shouldn't be too long
171             # ^^^ Dat: Subject: =?B?C?3412341234123412341234123412341234123412341234123412341234?=
172 82         92 $maxlen-=length($open);
173 82 100       192 $maxlen=int(($maxlen+3)/4)*3 if $opts->{Encoding} eq "B"; # Dat: `use integer;' anyway
174             #print STDERR "($src) $maxlen\n";
175              
176 82         210 $src=Encode::encode($opts->{Charset},$src);
177 82 100       2386 if ($opts->{Shorts}) {
178 80         104 my $I=0;
179 80         159 while ($I
180             # Dat: split result for too long consecutive headers (i.e. long Subject: line)
181 87         109 my $J=$I+$maxlen;
182 87 100       175 $J=length($src) if $J>length($src);
183 87 100       210 if ($opts->{Charset} eq "UTF-8") { # Dat: UTF-8 is multibyte, it cannot split anywhere
184 25 50       76 if (substr($src,$J,1)=~y/\x80-\xbf//) { # Dat: a half-cut UTF-8 byte sequence
185 0   0     0 $J-- while $J>$I+1 and substr($src,$J-1,1)=~y/\x80-\xbf//;
186 0 0 0     0 $J-- if $J>$I+1 and substr($src,$J-1,1)=~y/\xc0-\xff//;
187             # ^^^Dat: `$I+1': avoid infinite loop in `$I<$maxlen'
188             }
189             }
190             # Imp: else: fix for other multibyte encodings
191 87         116 $dst.=$open;
192 87         111 my $addlen=-length($dst);
193 87         237 append_encoded_word(substr($src,$I,$J-$I),$opts,\$dst);
194 87         129 $addlen+=length($dst);
195 87 100 100     397 if ($opts->{Encoding} eq "Q" and $addlen>$maxlen and $addlen>3) {
      66        
196             # Dat: too many hex `=..' triplets, become too long
197 7         12 my $K=length($dst);
198 7         54 while ($addlen>$maxlen) {
199 48 100       84 if (substr($dst,$K-3,1)eq"=") { $addlen-=3; $K-=3; $J-- }
  3         4  
  3         4  
  3         7  
200 45         43 else { $addlen--; $K--; $J-- }
  45         36  
  45         80  
201             }
202 7         12 substr($dst,$K)="";
203             # Imp: more efficient, don't process the same data many times
204             }
205 87         117 $dst.="?="; $dst.=$opts->{Space};
  87         100  
206 87         228 $I=$J;
207             }
208 2         3 } else { $dst.=$open; append_encoded_word($src,$opts,\$dst); $dst.="?=" }
  2         5  
  2         2  
209 82         184 $dst
210             }
211              
212             #** @returns the specified string quoted in double quotes. All characters
213             #** are printable ASCII.
214             sub dumpstr($) {
215 0     0 0 0 my $S=$_[0];
216 0         0 $S=~s@(["\\])|([^ -~])@
217 0 0       0 defined $2 ? sprintf("\\x{%X}",ord($2)) # Imp: Unicode chars
218             : "\\$1" # Imp: Unicode chars
219             @ge;
220 0         0 "\"$S\"" #"
221             }
222              
223             #** Splits a string on spaces into lines so no line is longer than the maximum
224             #** (except if there is no space nearby).
225             #** Only the 1st space is converted to $_[2] at each break.
226             #** @param $_[0] string. to split
227             #** @param $_[1] integer. maximum # chars in a line (not counting the terminating newline)
228             #** @param $_[2] chars to replace a space with -- not being added to the
229             #** maximum line length
230             sub split_words($$$) {
231 70     70 0 124 my($S,$maxlen,$nl)=@_;
232 70         70 my $lastpos=0; my $I=0; my $J;
  70         76  
  70         67  
233             #** Position after last space to split at, or $I
234             my $K;
235 70         68 my $ret="";
236 70         69 while (1) { # Imp: faster implementation
237 84         92 $K=$J=$I;
238 84   66     429 $J++ while $J
239 84 100 100     222 while ($K==$I ? ($J
240 2698         3079 my $C=substr($S,$J,1);
241 2698 50       5424 if ($C eq"\n") { $K=$I=++$J }
  0 100       0  
242 65         274 elsif ($C eq " ") { $K=++$J }
243 2633         6915 else { $J++ }
244             }
245 84 100 100     248 if ($K>$I and $J>$I+$maxlen) {
246 18         47 $ret.=substr($S,$I,$K-1-$I);
247 18         30 $ret.=$nl;
248 18         22 $I=$K; # Imp: skip more
249             }
250 84 100       169 if ($J+$maxlen>=length($S)) { $ret.=substr($S,$I); last } # Dat: found last line, no way to split
  70         172  
  70         93  
251             }
252             $ret
253 70         182 }
254              
255             =item encode_mimewords RAW, [OPTS]
256              
257             I
258             Given a RAW string, try to find and encode all "unsafe" sequences
259             of characters:
260              
261             ### Encode a string with some unsafe "words":
262             $encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
263              
264             Returns the encoded string.
265             Any arguments past the RAW string are taken to define a hash of options:
266              
267             =over 4
268              
269             =item Charset
270              
271             Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
272             a.k.a. "Latin-1".
273              
274             =item Encoding
275              
276             The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
277              
278             =item Field
279              
280             Name of the mail field this string will be used in. I
281              
282             =back
283              
284             B this is a stable, tested, widely compatible solution. Strict
285             compliance with RFC-1522 (regarding the use of encoded words in message
286             headers), however, was not proven, but strings returned by this function
287             work properly and identically with Mutt, Pine, JavaMail and OpenWebmail. The
288             recommended way is to use this function instead of C or
289             L.
290              
291             =cut
292              
293              
294             #** Dat: no prototype, because original encode_mimewords() doesn't have it
295             #** a prototype either
296             #** @param $_[0] string|raw string. raw
297             #** @param $_[1].. list of key-value pairs. options
298             #** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
299             #** Charset is now autodetected (not always ISO-8859-1)
300             #** New key: Raw:
301             #** -- 1 (default): true: $_[0] is already a raw, encoded string
302             #** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
303             # with Encode::encode(Charset)
304             #** New key: Shorts:
305             #** -- 1 (default for encode_mimewords)
306             #** -- 0 (default for encode_mimeword)
307             #** New key: Whole: (is respected iff Shorts==0)
308             #** -- 1 (default): quote the string as a whole word (not default in the original!)
309             #** -- 0: encode subwords
310             #** New key: Keeptrailnl
311             #** -- 0: treat trailing newline as unprintable
312             #** -- 1 (default, as in original MIME::Words, as expected by Sympla 4):
313             # keep trailing newline at the end
314             #** !! doc more, including new keys
315             sub encode_mimewords_low {
316 72     72 0 858 my($S,%opts)=@_;
317 72 50       155 return undef if !defined($S);
318             # die "no word for encode_mimewords" if !defined $S; # Dat: Sympa calls us with undef
319             #die unless open LOG, ">> /tmp/b.log";
320             #die unless print LOG "=> ".dumpstr($S)."\n";
321             #die unless close LOG;
322             #$opts{Charset}="ISO-8859-1" if !defined $opts{Charset};
323 72 100       200 $opts{Raw}=1 if !defined $opts{Raw};
324 72 100       186 $opts{Charset}=get_best_encode_charset($S) if !defined $opts{Charset};
325 72 50       149 die if !defined $opts{Charset};
326 72         138 $opts{Charset}=canonical_charset($opts{Charset});
327 72 100       165 if ($opts{Raw}) { # Dat: improvement
328 38         45 $opts{Raw}=0;
329             #die if !defined $S;
330 38         105 $S=Encode::decode($opts{Charset}, $S);
331             # ^^^ Dat: better do a Unicode regexp match
332             }
333 71 50       1031 $opts{Encoding}="Q" if !defined $opts{Encoding};
334 71 50       169 $opts{Encoding}="Q" if $opts{Encoding} ne "B"; # Dat: improvement
335 71 100 66     307 $opts{Encoding}="B" if $opts{Encoding} eq "Q" and $opts{Charset} eq "UTF-8";
336             # ^^^ Dat: UTF-8 encoded MimeWords must be in base64 -- quoted-printable is
337             # bad, Pine doesn't display quoted-printable properly
338             # (it assumes ISO-8859-1 for quoted-printable chars), and Mutt does it
339             # the other way;
340             # We need Base64 "=?UTF-8?B?".MIME::Base64::encode_base64("Unicode string")."?=
341 71 100       181 $opts{Shorts}=1 if !defined $opts{Shorts};
342 71 100       157 $opts{Whole}=1 if !defined $opts{Whole};
343 71 50       150 $opts{Space}=" " if !defined $opts{Space}; # Dat: empty =?...?==?...?= in the original MIME::Words
344 71 50       157 $opts{Split}=66 if !defined $opts{Split}; # Dat: split at this many chars
345 71 100       153 $opts{Keeptrailnl}=1 if !defined $opts{Keeptrailnl};
346 71         72 my $toend="";
347 71 100 100     435 $toend=$1 if $opts{Keeptrailnl} and $S=~s@(\n)\Z(?!\n)@@;
348 71 100       206 if (!$opts{Shorts}) {
    100          
349 2         4 $S=encode_mimeword1($S,\%opts)
350             } elsif ($opts{Whole}) {
351 42 100       204 if ($S=~/$NONPRINT/o) {
352 39         90 $S=encode_mimeword1($S,\%opts);
353 39         70 substr($S,-1)=""; # Dat: remove last space
354             }
355 42 50       133 $S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
356             } else {
357 27         38 my $lastpos=0;
358 27         192 while ($S=~/($NONPRINT[^ ]* *)/go) {
359             # ^^^ Dat: having ` *' is a must here, other clients just forget about it
360 41         122 my $I=pos($S)-length($1);
361 41   100     499 $I-- while $I>$lastpos and substr($S,$I-1,1)ne' ';
362 41         64 my $pos=pos($S); my $D;
  41         48  
363 41   33     544 1 while ($pos=pos($S)) and $S=~/ |\Z(?!\n)|($NONPRINT)/goc and defined($D=$1)
      66        
      66        
364             and $S=~/\G[^ ]* */goc;
365 41 50       126 pos($S)=$pos if !defined $D;
366 41         73 my $srclen=pos($S)-$I;
367 41         82 my $src=substr($S,$I,$srclen);
368             ##print STDERR "D($src)(".substr($S,$I+$srclen).")\n";
369 41 50 66     441 if ($I+$srclen!=length($S) and substr($src,-1)eq' ' and $S=~/ |\Z(?!\n)|($NONPRINT)/goc and !defined($1)) {
      66        
      33        
370             ##print STDERR "Strip ending space\n";
371 29         67 substr($src,-1)=""; # Dat: see test case 'ignore_space'
372             }
373             # Dat: now pos($S) is invalid
374 41 50       97 die if 1>length($src);
375             ##print STDERR "E($src)(".substr($S,$I+$srclen).")\n";
376 41         87 my $dst=encode_mimeword1($src,\%opts); # Dat: with trailing space
377             ##print STDERR substr($S,$I,$srclen),";;\n";
378 41         144 substr($S,$I,$srclen)=$dst; # Imp: do with less copying
379 41         305 $lastpos=pos($S)=$I+length($dst);
380             }
381 27 100 66     191 substr($S,-length($opts{Space}))="" if
382             0
383 27 50       82 $S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
384             }
385 71         109 $S.=$toend;
386             #$S=~s@ @\n @g; # !! debug
387             #die unless open LOG, ">> /tmp/b.log";
388             #die unless print LOG "T> ".dumpstr($S)."\n";
389             #die unless close LOG;
390 71         418 $S
391             }
392              
393             #use vars qw($old_encode_mimeword);
394             #BEGIN { $old_encode_mimeword=\&MIME::AltWords::encode_mimeword }
395              
396             =item encode_mimeword RAW, [ENCODING], [CHARSET]
397              
398             I
399             Encode a single RAW "word" that has unsafe characters.
400             The "word" will be encoded in its entirety.
401              
402             ### Encode "<>":
403             $encoded = encode_mimeword("\xABFran\xE7ois\xBB");
404              
405             You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
406             You may specify the CHARSET, which defaults to C.
407              
408             =cut
409              
410              
411             #** Dat: no prototype, because original encode_mimeword() doesn't have it
412             #** a prototype either
413             #** @param $_[0] raw string. raw
414             #** @param $_[1] string. Encoding: "Q" or "B", defaults to "Q"
415             #** @param $_[2] string. Charset: defaults to "ISO-8859-1" (as in MIME::Words code,
416             #** not as in its documentation
417             sub encode_mimeword {
418             #sub encode_mimeword($;$$) {
419 2     2 1 8 encode_mimewords($_[0],Encoding=>$_[1],Charset=>$_[2],Shorts=>0);
420             }
421              
422 1     1   6 use vars qw($old_decode_mimewords);
  1         2  
  1         52  
423 1     1   317 BEGIN { $old_decode_mimewords=\&MIME::AltWords::decode_mimewords_wantarray } #### pts #### AltWords.pm adds `_wantarray'
424              
425             #** Dat: function added by #### pts ####
426             #** @param $_[0] a mimewords-encoded string
427             #** @return a canonical encoding name with which the string can be re-encoded
428             sub get_best_decode_charset($) {
429 23     23 0 37 my $encodedstr=$_[0];
430 23         27 my @L;
431 23         50 for my $token (decode_mimewords($encodedstr)) {
432 45   100     184 my $charset=canonical_charset($token->[1] or "");
433 45 100 100     231 push @L, $charset if $charset and (!@L or $L[-1] ne $charset);
      66        
434             }
435 23 100       83 @L=canonical_charset('UTF-8') if @L!=1; # Dat: default, can accomodate any charset
436 23         73 $L[0]
437             }
438              
439             =item decode_mimewords ENCODED, [OPTS...]
440              
441             I
442             Go through the string looking for RFC-1522-style "Q"
443             (quoted-printable, sort of) or "B" (base64) encoding, and decode them.
444              
445             B splits the ENCODED string into a list of decoded
446             C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
447             data are returned in a 1-element array C<[DATA]>, giving an effective
448             CHARSET of C.
449              
450             $enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
451             foreach (decode_mimewords($enc)) {
452             print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n";
453             }
454              
455             B joins the "data" elements of the above list
456             together, and returns that. I it
457             sanitizes the returned string to use a specific, single charset, either
458             specified using the C option, or autodetecting one (ISO-8859-1,
459             ISO-8859-2 or UTF-8) which can accomodate all characters. In case of charset
460             autodetection, C can be used to query the charset
461             autodetected.
462              
463             You might want to see L as an alternate of
464             L.
465              
466             In the event of a syntax error, $@ will be set to a description
467             of the error, but parsing will continue as best as possible (so as to
468             get I back when decoding headers).
469             $@ will be false if no error was detected.
470              
471             Any arguments past the ENCODED string are taken to define a hash of options:
472              
473             =over 4
474              
475             =item Field
476              
477             Name of the mail field this string came from. I
478              
479             =back
480              
481             =cut
482              
483             #** Dat: no prototype, because original decode_mimewords() doesn't have it
484             #** a prototype either
485             #** Dat: it is unsafe to use this without the Raw=>1 option. Search for
486             #** MIME::WordDecoder in `perldoc MIME::Words'.
487             #** @param $_[0] a mimewords-encoded string
488             #** @param $_[1]... list of options (key=>value pairs).
489             #** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
490             #** New key: Raw:
491             #** -- 1 (default): true: return a raw, encoded string. The encoding will
492             #** be Charset, or the one returned by get_best_decode_charset().
493             #** This is an improvement
494             #** by #### pts #### -- the original decode_mimewords() didn't return
495             #** the string in a consistent encoding.
496             #** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
497             #** with Encode::encode(Charset)
498             #** New key: Charset: specific charset name for Raw=1 (ignored for Raw=0)
499             sub decode_mimewords_low {
500 50 100   50 0 165 return $old_decode_mimewords->(@_) if wantarray;
501 27         62 my($encodedstr,%opts)=@_;
502 27 100       75 $opts{Raw}=1 if !defined $opts{Raw}; # Dat: default
503 27         34 my $ret='';
504             # vvv Dat: not mutually recursive, because get_best_decode_charset() calls
505             # decode_mimewords() in list context, so this line won't be
506             # reached.
507 27 100 100     126 $opts{Charset}=get_best_decode_charset($encodedstr) if
508             $opts{Raw} and !defined $opts{Charset};
509 27         38 my $S;
510 27         100 for my $token ($old_decode_mimewords->($encodedstr,%opts)) { # Dat: $charset in $token->[1]
511 55 100       185 $S=$token->[1] ? Encode::decode($token->[1], $token->[0]) : $token->[0];
512 55 100       1107 $S=Encode::encode($opts{Charset}, $S) if $opts{Raw};
513 55         5359 $ret.=$S
514             }
515             $ret
516 27         182 }
517              
518 1     1   5 use vars qw(@encode_subject_opts);
  1         1  
  1         38  
519 1     1   83 BEGIN { @encode_subject_opts=(Keeptrailnl=>1, Whole=>1); }
520              
521             #** Dat: function added by #### pts ####
522             #** @param $_[0] String. A mimewords-encoded e-mail subject.
523             #** @return String. better mimewords-encoded
524             sub fix_subject($) {
525 1     1 0 2 my $encodedstr=$_[0];
526 1         4 my $best_charset=get_best_decode_charset($encodedstr);
527 1         4 my $decoded=decode_mimewords($encodedstr, Raw=>0);
528 1         6 encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_subject_opts);
529             }
530              
531 1     1   4 use vars qw(@encode_addresses_opts);
  1         2  
  1         34  
532 1     1   168 BEGIN { @encode_addresses_opts=(Keeptrailnl=>1, Whole=>0); }
533              
534             #** Dat: function added by #### pts ####
535             #** @param $_[0] String. A mimewords-encoded e-mail address (or address list),
536             #** e.g. "=?ISO-8859-1?Q?foo?= bar , foo2 bar2 "
537             #** @return String. better mimewords-encoded
538             sub fix_addresses($) {
539 3     3 0 8 my $encodedstr=$_[0];
540 3         7 my $best_charset=get_best_decode_charset($encodedstr);
541 3         8 my $decoded=decode_mimewords($encodedstr, Raw=>0);
542             #print STDERR "DE($decoded)\n";
543             #chomp $decoded;
544             #$decoded.=" alma ";
545 3         11 encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_addresses_opts);
546             }
547              
548             #** Dat: function added by #### pts ####
549             #** @param $_[0] a Unicode string
550             #** @param $_[1] a charset
551             #** @return Boolean: is it encodable?
552             sub is_encodable($$) {
553 48     48 0 94 my $charset=uc$_[1];
554 48         64 my $S=$_[0]; # Dat: must be copied for Encode::encode
555 48 100 66     217 return 1 if $charset eq 'UTF-8' or $charset eq 'UTF8';
556 45         58 eval { Encode::encode($charset, $S, Encode::FB_CROAK) };
  45         154  
557 45 100       1330 $@ ? 0 : 1
558             }
559              
560             #** Please don't include US-ASCII.
561             #** Specify UTF-8 last (because get_best_encode_charset() ignores everything
562             #** after the first UTF-8).
563 1     1   4 use vars qw(@encode_charsets);
  1         8  
  1         35  
564 1     1   295 BEGIN { @encode_charsets=qw(ISO-8859-1 ISO-8859-2 UTF-8) }
565              
566             #** Dat: function added by #### pts ####
567             #** @param $_[0] string to try
568             sub get_best_encode_charset($) {
569 39     39 0 68 for my $charset (@encode_charsets) {
570 48 100       110 return $charset if is_encodable($_[0], $charset);
571             }
572 0         0 return 'UTF-8'
573             }
574              
575             #** Dat: function added by #### pts ####
576             #** @param $_[0] String. Perl Unicode-string
577             #** @param $_[1] encode mode: 'subject' or 'addresses'
578             #** @return String. better mimewords-encoded with the best charset
579             sub encode_unicode($$) {
580 6     6 0 9 my $str=$_[0];
581 6 100       17 my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
582 6         13 my $best_charset=get_best_encode_charset($str);
583 6         18 encode_mimewords($str, Charset=>$best_charset, Raw=>0, @$modeary);
584             }
585              
586             #** Dat: function added by #### pts ####
587             #** @param $_[0] String. Perl 8-bit string, encoded in $charset
588             #** @param $_[1] encode mode: 'subject' or 'addresses'
589             #** @param $_[2] $charset;
590             #** @return String. better mimewords-encoded with the best charset
591             sub encode_8bit($$$) {
592 6     6 0 10 my $str=$_[0];
593 6         9 my $charset=$_[2];
594 6 100       16 my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
595 6         12 $charset=canonical_charset($charset);
596 6         10 my $best_charset;
597 6         9 for my $charset2 (@encode_charsets) {
598 13 100       21 if (canonical_charset($charset2) eq $charset) {
599 5         6 $best_charset=$charset; last
600 5         7 }
601             }
602 6 100       18 $best_charset=canonical_charset(get_best_encode_charset($str)) if
603             !defined $best_charset;
604 6 100       22 ($charset eq $best_charset) ? # Imp: find badly encoded string...
605             encode_mimewords($str, Charset=>$best_charset, Raw=>1, @$modeary) :
606             encode_mimewords(Encode::decode($charset, $str), Charset=>$best_charset, Raw=>0, @$modeary)
607             }
608              
609             # --- Logging...
610              
611 1     1   25 BEGIN { *encode_mimewords=\&encode_mimewords_low }
612 1     1   29 BEGIN { *decode_mimewords=\&decode_mimewords_low }
613              
614 1 50   1   111 BEGIN { if ($main::DEBUG) {
615              
616             #use vars qw($orig_encode_mimewords $orig_decode_mimewords);
617 1     1   5 no warnings qw(prototype redefine);
  1         2  
  1         483  
618              
619             #BEGIN { $orig_encode_mimewords=\&encode_mimewords }
620             *encode_mimewords=sub {
621 0         0 require Carp;
622 0         0 my $dump="\n\n[".scalar(localtime)."] encode_mimewords(@_) = ";
623 0         0 my $ret=&encode_mimewords_low(@_); # Dat: we need `&' to ignore prototype
624 0         0 $dump.=$ret."\n";
625 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
626 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
627 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
628 0         0 binmode($log, ':utf8');
629 0         0 print $log $dump;
630 0         0 Carp::cluck("^^^ encode_mimewords() ");
631 0         0 close $log;
632             }
633             $ret
634 0         0 };
  0         0  
635              
636             #BEGIN { $orig_decode_mimewords=\&decode_mimewords }
637             # Imp: copy prototype of original...
638             *decode_mimewords=sub {
639 0         0 require Carp;
640 0         0 my $dump="\n\n[".scalar(localtime)."] decode_mimewords(@_) = ";
641 0 0       0 if (wantarray) {
642 0         0 my @L=(&decode_mimewords_low(@_));
643 0         0 $dump.="@L (ary)\n";
644 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
645 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
646 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
647 0         0 binmode($log, ':utf8');
648 0         0 print $log $dump;
649 0         0 Carp::cluck("^^^ decode_mimewords() ");
650 0         0 close $log;
651             }
652             @L
653 0         0 } else {
654 0         0 my $ret=decode_mimewords_low(@_);
655 0         0 $dump.=$ret."\n";
656 0 0       0 if (open(my($log), ">> /tmp/em.log")) {
657 0         0 local *STDERR; open STDERR, ">&".fileno($log);
  0         0  
658 0         0 select(STDERR); $|=1; select($log); $|=1;
  0         0  
  0         0  
  0         0  
659 0         0 binmode($log, ':utf8');
660 0         0 print $log $dump;
661 0         0 Carp::cluck("^^^ decode_mimewords() ");
662 0         0 close $log;
663             }
664             $ret
665 0         0 }
666 0         0 };
667              
668             } }
669              
670             # ---
671              
672             =back
673              
674             =head1 NOTES
675              
676             Exports its principle functions by default, in keeping with
677             L and L.
678              
679             Doesn't depend on L or L. All the shared code is
680             copied to L, which is bundled.
681              
682             See also L for the previous version
683             of L integrated into the Sympa 4 mailing list software.
684              
685             =head1 AUTHOR
686              
687             L was written by
688             PĂ©ter SzabĂł (F) in 2006, and it has been uploaded to CPAN on
689             2006-09-27.
690              
691             L uses code from L (in the file
692             C) and it uses documentation from L
693             (in the files C and C).
694              
695             Here is the original author and copyright information for L.
696              
697             Eryq (F), ZeeGee Software Inc (F).
698             David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
699              
700             All rights reserved. This program is free software; you can redistribute
701             it and/or modify it under the same terms as Perl itself.
702              
703             Thanks also to...
704              
705             Kent Boortz For providing the idea, and the baseline
706             RFC-1522-decoding code!
707             KJJ at PrimeNet For requesting that this be split into
708             its own module.
709             Stephane Barizien For reporting a nasty bug.
710              
711              
712             =head1 VERSION
713              
714             See $VERSION in C .
715              
716             =cut
717              
718             =begin testing
719              
720             is(MIME::AltWords::encode_mimewords("foo bar"), "foo bar");
721             is(MIME::AltWords::encode_mimewords("foo-b\x{E9}r"), "=?ISO-8859-1?Q?foo-b=E9r?=");
722             is(MIME::AltWords::encode_mimeword("foo bar"), "=?ISO-8859-1?Q?foo__bar?="); # Dat: improvement over MIME::AltWords
723             is(MIME::AltWords::encode_mimeword("foo__bar "), "=?ISO-8859-1?Q?foo=5F=5Fbar_?="); # Dat: improvement over MIME::AltWords
724             is(MIME::AltWords::encode_mimewords("az űrkikötő földi adatai",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?= adatai");
725             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
726             "az ?rkiköt? földi adatai");
727             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
728             "az ?rkiköt? földi adatai");
729             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
730             "az űrkikötő földi adatai");
731             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
732             "az űrkikötő földi adatai");
733             is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai"),
734             "az űrkikötő földi adatai"); # Dat: guess Charset to ISO-8859-2
735             is(MIME::AltWords::encode_mimewords("az űrkikötő földi adatai"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi_adatai?=");
736             is(MIME::AltWords::encode_mimewords("az űrkikötő földi",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?=");
737             is(MIME::AltWords::encode_mimewords("az űrkikötő földi"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi?=");
738             is(MIME::AltWords::encode_mimewords("foo b\x{E1}r",Whole=>0), "foo =?ISO-8859-1?Q?b=E1r?=");
739             is(MIME::AltWords::encode_mimewords("foo b\x{E1}r"), "=?ISO-8859-1?Q?foo__b=E1r?=");
740             is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1",Whole=>0), "=?ISO-8859-1?Q?b=F5r_?= foo");
741             is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1"), "=?ISO-8859-1?Q?b=F5r__foo?=");
742             { my $S; eval { $S=MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2"); };
743             ok($@=~/^Wide character /); # Dat: Encode::decode fails
744             }
745             is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0,Whole=>0), "=?ISO-8859-2?Q?b=F5r_?= foo");
746             is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0), "=?ISO-8859-2?Q?b=F5r__foo?=");
747             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2",Whole=>0),
748             "ha a =?ISO-8859-2?Q?sz=F3t?= ~ jel",'ignore_space');
749             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2"),
750             "=?ISO-8859-2?Q?ha_a_sz=F3t_~_jel?=",'ignore_space2');
751             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
752             "ha a =?ISO-8859-1?Q?sz=F3t_?=", "ends with one space");
753             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
754             "ha a =?ISO-8859-1?Q?sz=F3t__?=", "ends with two spaces");
755             is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Charset=>"ISO-8859-1"),
756             "=?ISO-8859-1?Q?ha_a_sz=F3t_?=");
757             is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se",Whole=>0), "dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
758             is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?dokumentumok_kezel=E9se?=");
759             is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se",Whole=>0), "=?ISO-8859-1?Q?tartalmaz=F3?= dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
760             is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?tartalmaz=F3_dokumentumok_kezel=E9se?="); # Imp: unify printable and nonprintable to save space
761              
762             is(MIME::AltWords::encode_mimewords("A keresési eredményekb\x{151}l bizonyos ".
763             "szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel el\x{151}zi ".
764             "meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy ".
765             "szóköz) kapcsolatban áll egy nem kizárással.",
766             Charset=>"ISO-8859-2",Raw=>0,Whole=>0),
767             "A =?ISO-8859-2?Q?keres=E9si_eredm=E9nyekb=F5l?= bizonyos szavakat\n =?ISO-8859-2?Q?tartalmaz=F3?= dokumentumok\n =?ISO-8859-2?Q?kiz=E1rhat=F3k,?= ha a =?ISO-8859-2?Q?sz=F3t?= ~\n jel =?ISO-8859-2?Q?el=F5zi?= meg. Figyelem! A\n =?ISO-8859-2?Q?kiz=E1r=E1s?= csak akkor\n =?ISO-8859-2?Q?eredm=E9nyez_tal=E1latot,?= ha\n =?ISO-8859-2?Q?=E9s?= (& vagy =?ISO-8859-2?Q?sz=F3k=F6z)?=\n kapcsolatban =?ISO-8859-2?Q?=E1ll?= egy nem =?ISO-8859-2?Q?kiz=E1r=E1ssal.?=");
768              
769             is(MIME::AltWords::encode_mimewords("A keresési eredményekb\x{151}l bizonyos ".
770             "szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel el\x{151}zi ".
771             "meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy ".
772             "szóköz) kapcsolatban áll egy nem kizárással.",
773             Charset=>"ISO-8859-2",Raw=>0),
774             "=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat_?=\n =?ISO-8859-2?Q?tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k,_ha_a_sz?=\n =?ISO-8859-2?Q?=F3t_~_jel_el=F5zi_meg._Figyelem!_A_kiz=E1r=E1s_c?=\n =?ISO-8859-2?Q?sak_akkor_eredm=E9nyez_tal=E1latot,_ha_=E9s_(&_va?=\n =?ISO-8859-2?Q?gy_sz=F3k=F6z)_kapcsolatban_=E1ll_egy_nem_kiz=E1r?=\n =?ISO-8859-2?Q?=E1ssal.?=");
775             # vvv Dat: composing with Pine emits:
776             #is(MIME::AltWords::encode_mimewords("A keresési eredményekből bizonyos szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel előzi meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy szóköz) kapcsolatban áll egy nem kizárással.",
777             #"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat?=
778             # =?ISO-8859-2?Q?_tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k=2C_ha_?=
779             # =?ISO-8859-2?Q?a_sz=F3t_~_jel_el=F5zi_meg=2E_Figyelem!_A_?=
780             # =?ISO-8859-2?Q?kiz=E1r=E1s_csak_akkor_eredm=E9nyez_tal=E1latot=2C?=
781             # =?ISO-8859-2?Q?_ha_=E9s_=28&_vagy_sz=F3k=F6z=29_kapcsolatban?=
782             # =?ISO-8859-2?Q?_=E1ll_egy_nem_kiz=E1r=E1ssal=2E?=
783              
784             is(MIME::AltWords::encode_mimewords("Árvízt\x{171}r\x{151} egy tükörfúrógép",
785             Charset=>"UTF-8",Raw=>0,Whole=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkQ==?= egy =?UTF-8?B?dMO8a8O2cmbDunLDs2fDqXA=?=");
786             is(MIME::AltWords::encode_mimewords("Árvízt\x{171}r\x{151} egy tükörfúrógép",
787             Charset=>"UTF-8",Raw=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkSBlZ3kgdMO8a8O2cmbDunLDs2fDqXA=?=");
788              
789             is(MIME::AltWords::split_words("fo ot bar aaaaaaaaab cccccccccdddddd e f g ",8,"xy"),"fo otxybarxyaaaaaaaaabxy cccccccccddddddxye f g ",'split_words()');
790              
791             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0),
792             "=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
793             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter",Charset=>"UTF-8",Raw=>0),
794             "=?UTF-8?B?U3phYsOzIFDDqXRlcjxwdHNAb3VyLnVtPg==?="); # Dat: this is what compose_mail returns
795             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
796             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ");
797             is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
798             "=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
799              
800             SKIP: {
801             eval { require Mail::Address };
802             skip "Mail::Address not installed", 1 if $@;
803             my @sender_hdr = Mail::Address->parse("=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
804             my $address=@sender_hdr ? $sender_hdr[0]->address : undef;
805             $address="undef" if !defined $address;
806             is($address, "pts\@our.um");
807             }
808              
809             is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0)),
810             "\x{C5}\x{B1}", 'decode_mimewords()');
811             is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0),Raw=>0),
812             "\x{171}", 'decode_mimewords()');
813              
814             is(MIME::AltWords::get_best_decode_charset(MIME::AltWords::encode_mimewords("f\x{171} fa t\x{F6}lgy",Charset=>"ISO-8859-2",Raw=>0,Whole=>0)),
815             'ISO-8859-2', 'get_best_decode_charset()');
816             is(MIME::AltWords::get_best_decode_charset("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
817             'UTF-8', 'get_best_decode_charset()');
818             is(MIME::AltWords::get_best_decode_charset("fa"), 'UTF-8', 'get_best_decode_charset()');
819              
820             is(MIME::AltWords::fix_addresses("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
821             "=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=", 'fix_addresses()');
822             is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="),
823             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ", 'fix_addresses()');
824             is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4K?="),
825             "=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= \n", 'fix_addresses() Keeptrailnl');
826             is(MIME::AltWords::fix_subject("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
827             "=?UTF-8?B?ZsWxIGZhIHTDtmxneQ==?=", 'fix_subject()');
828              
829             is(MIME::AltWords::decode_mimewords("=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=",Raw=>1),
830             "f\x{C5}\x{B1} fa t\x{C3}\x{B6}lgy", 'decode_mimewords()');
831             is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?=",Raw=>0),
832             "f\x{171} fa t\x{F6}lgy", 'decode_mimewords()');
833             #die "".MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
834             is(MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlcg==?=",Raw=>0),
835             "Szabó Péter", 'decode_mimewords()');
836              
837             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l",Charset=>"UTF-8",Raw=>0)),
838             "f\x{C3}\x{A9}l", 'encode+decode mimewords');
839             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
840             "f\x{C3}\x{A9}l ", 'encode+decode mimewords');
841             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
842             "f\x{C3}\x{A9}l ", 'encode+decode mimewords');
843             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0)),
844             "f\x{C3}\x{A9}l <", 'encode+decode mimewords');
845             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0,Whole=>0)),
846             "f\x{C3}\x{A9}l <", 'encode+decode mimewords');
847             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
848             "d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
849             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
850             "d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
851              
852             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>1),
853             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=1");
854             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>0),
855             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra=0A?=", "encode_mimewords() Keeptrailnl=0");
856             is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n"),
857             "=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=default");
858              
859             is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?m=E1sik_pr=F3b=E1cska?=\n"),
860             "m\x{E1}sik pr\x{F3}b\x{E1}cska\n", "decode_mimewords ISO-8859-2");
861              
862             is(MIME::AltWords::get_best_encode_charset("hello\t\n"), "ISO-8859-1", "get_best_encode_charset() ASCII");
863             is(MIME::AltWords::get_best_encode_charset("hell\x{F3}, w\x{F6}rld\t\n"), "ISO-8859-1", "get_best_encode_charset() ISO-8859-1");
864             is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F6}rld\t\n"), "ISO-8859-2", "get_best_encode_charset() ISO-8859-2");
865             is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F5}rld\t\n"), "UTF-8", "get_best_encode_charset() UTF-8");
866              
867             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'addresses'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_addresses() UTF-8");
868             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'subject'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_subject() UTF-8");
869             is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_subject() ISO-8859-2");
870             is(MIME::AltWords::encode_unicode("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_subject() ISO-8859-1");
871              
872             is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
873             is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
874              
875             is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'addresses', 'uTf8'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() addresses UTF-8");
876             is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'subject', 'uTf8'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() subject UTF-8");
877             is(MIME::AltWords::encode_8bit("[foo] hell\x{F5}, w\x{F6}rld\t\n", 'subject', '88592'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-2");
878             is(MIME::AltWords::encode_8bit("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject', '88591'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-1");
879             is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses', 'us-ascii'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
880             is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses', '88591'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
881              
882             is(join(' ',map{MIME::AltWords::encode_mimewords($_)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
883             "[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-1?Q?pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 a");
884              
885             is(join(' ',map{MIME::AltWords::encode_mimewords($_,Raw=>0)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{151}cska\n"),
886             "[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-2?Q?pr=F3b=F5cska?=\n", "encode_mimewords() default ISO-8859-1,2 b");
887              
888             is(MIME::AltWords::encode_mimewords("[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
889             "=?ISO-8859-1?Q?[nekem]_m=E1sik_pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 c");
890              
891             is(MIME::AltWords::decode_mimewords('=?US-ASCII?Q?Keith_Moore?= '), 'Keith Moore ', "MIME::Words test case 1");
892             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '), 'Keld Jřrn Simonsen ', "MIME::Words test case 2");
893             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Andr=E9_?= Pirard '), 'André Pirard ', "MIME::Words test case 3");
894             is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?==?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?==?US-ASCII?Q?.._cool!?='), 'If you can read this you understand the example... cool!', "MIME::Words test case 4");
895             is(MIME::AltWords::encode_mimewords("\xABFran\xE7ois\xBB"), '=?ISO-8859-1?Q?=ABFran=E7ois=BB?=', "MIME::Words test case 5");
896             is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB at the beach"), '=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB_at_the_beach?=', "MIME::Words test case 6");
897             # vvv !! is this correct (space after \n)?
898             is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave "), "=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB,_down_at_the_beach=0Awith?=\n =?ISO-8859-1?Q?_Dave_?=", "MIME::Words test case 7");
899             is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ")), "Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ", "MIME::Words test case 8");
900              
901             =cut
902              
903             1