File Coverage

lib/MIME/AltWords.pm

Criterion	Covered	Total	%
statement	219	277	79.0
branch	101	126	80.1
condition	47	65	72.3
subroutine	37	38	97.3
pod	1	15	6.6
total	405	521	77.7

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# MIME/AltWords.pm -- a lot of fixes on MIME::Words
3							# by pts@fazekas.hu at Fri Jan 20 11:35:08 UTC 2006
4							# -- Fri Mar 31 18:41:14 CEST 2006
5							#
6							# Dat: this assumes Perl v5.8 or later
7							# Dat: run the unit tests with: ./pts-test.pl AltMIMEWords.pm
8							# Dat: see `perldoc MIME::Words' for the original documentation
9							# Dat: a raw string has bytes in 0..255, and it is already encoded in some
10							# encoding
11							# SUXX: perldoc doesn't respect `=encoding utf-8'
12							# lib/MIME/AltWords.pm:30: Unknown command paragraph "=encoding utf8"
13							# !! why `,' in teszt a =?ISO-8859-2?Q?lev=E9lben=2C_t=F6r=F6lhet=F5?= ?? is it standard?
14							# !! document all
15							# !! document test cases
16							# !! MANIFEST etc.
17							#
18
19							package MIME::AltWords;
20	1			1		11	use v5.8; # Dat: Unicode string support etc.
	1					3
21	1			1		4	use integer;
	1					1
	1					5
22	1			1		17	use strict;
	1					1
	1					19
23	1			1		247	use MIME::AltWords0; #use MIME::Words;
	1					2
	1					23
24	1			1		4	use MIME::Base64;
	1					1
	1					31
25	1			1		325	use Encode;
	1					7086
	1					56
26	1			1		6	use warnings;
	1					1
	1					17
27	1			1		4	use Exporter;
	1					1
	1					26
28	1			1		3	no warnings qw(prototype redefine);
	1					2
	1					34
29
30							# vvv Dat: exports symbols from MIME::AltWords0 by default, works only
31							# without these
32							#use vars qw(@EXPORT_OK %EXPORT_TAGS);
33							#BEGIN {
34							# @EXPORT_OK=@MIME::AltWords0::EXPORT_OK;
35							# %EXPORT_TAGS=%MIME::AltWords0::EXPORT_TAGS;
36							## die "@EXPORT_OK";
37							#}
38							#Exporter::export_ok_tags('all');
39
40							=pod
41
42							=encoding utf8
43
44							=head1 NAME
45
46							MIME::AltWords - properly deal with RFC-1522 encoded words
47
48							=head1 SYNOPSIS
49
50							The Perl module L is recommended for encoding and
51							decoding MIME words (such as C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in
52							e-mail message headers (mostly Subject, From and To).
53
54							L is similar to L in
55							L, but it provides an alternate implementation that follows the
56							MIME specification more carefully, and it is actually compatible with
57							existing mail software (tested with Mutt, Pine, JavaMail and OpenWebmail).
58							L extends the functionality of L (version
59							5.420) by adding more functions and more options to existing functions. The
60							original interface is changed in an upward-compatible way.
61
62							Before reading further, you should see L to make sure that
63							you understand where this module fits into the grand scheme of things.
64							Go on, do it now. I'll wait.
65
66							Ready? Ok...
67
68							use MIME::AltWords qw(:all);
69
70							### Decode the string into another string, forgetting the charsets:
71							$decoded = decode_mimewords(
72							'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
73							);
74
75							### Split string into array of decoded [DATA,CHARSET] pairs:
76							@decoded = decode_mimewords(
77							'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
78							);
79
80							### Encode a single unsafe word:
81							$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
82
83							### Encode a string, trying to find the unsafe words inside it:
84							$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town");
85
86
87
88							=head1 DESCRIPTION
89
90							Fellow Americans, you probably won't know what the hell this module
91							is for. Europeans, Russians, et al, you probably do. C<:-)>.
92
93							For example, here's a valid MIME header you might get:
94
95							From: =?US-ASCII?Q?Keith_Moore?=
96							To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
97							CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
98							Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
99							=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
100							=?US-ASCII?Q?.._cool!?=
101
102							The fields basically decode to (sorry, I can only approximate the
103							Latin characters with 7 bit sequences /o and 'e):
104
105							From: Keith Moore
106							To: Keld J/orn Simonsen
107							CC: Andr'e Pirard
108							Subject: If you can read this you understand the example... cool!
109
110
111							=head1 PUBLIC INTERFACE
112
113							=over 4
114
115							=cut
116
117
118	1			1		4	use vars qw($NONPRINT $VERSION);
	1					1
	1					37
119
120							### The package version, both in 1.23 style and usable by MakeMaker:
121							BEGIN { # vvv Dat: MakeMaker needs $VERSION in a separate line
122	1			1		1311	$VERSION = "0.13"
123							}
124
125							# Dat: MIME::Words has [\x00-\x1F\x7F-\xFF]. We prepare for Unicode.
126							$NONPRINT=qr{(?:[^\x20-\x7E]\|=)};
127							#$NONPRINT=qr{(?:[^\x20-\x7E]\|[=](?=[?]))}; # Imp: is a bare `=' legal?
128
129							#** @param $_[0] charset name
130							#** @return MIME canonical charset name
131							sub canonical_charset($) {
132	150			150	0	148	my $S=$_[0];
133	150	100				652	if ($S=~/\A(?:iso-?(?:8859-?)?\|8859-?)(\d+)\Z(?!\n)/i) { "ISO-8859-$1" }
	79	100				217
		100
134	45					79	elsif ($S=~/\AUTF-?8\Z(?!\n)/i) { "UTF-8" }
135	3					4	elsif ($S=~/\A(?:US-)ASCII\Z(?!\n)/i) { "US-ASCII" }
136	23					43	else { uc $S }
137							}
138
139							#** @param $_[0] string.
140							#** @param $_[1] hashref. options
141							#** @param $_[2] string to append to
142							sub append_encoded_word($$$) {
143	97			97	0	193	my($word,$opts,$dst)=@_;
144	97	100				170	if ($opts->{Encoding} eq "B") {
		50
145	29					64	$word=MIME::Base64::encode_base64($word, ''); # Dat: empty EOL, as requested by MIME
146	29					46	$word=~s@\s+@@g;
147	29					39	$$dst.=$word
148							} elsif ($opts->{Encoding} eq "Q") {
149							# Dat: improved MIME::Words::_encode_Q
150	68	100				317	$word =~ s{( )\|([_\?\=]\|$NONPRINT)}{defined $1 ? "_" # Dat: "_" is an improvement
	306					1109
151							: sprintf("=%02X", ord($2))}eog;
152	68					129	$$dst.=$word
153	0					0	} else { die }
154							undef
155	97					123	}
156
157							#use vars qw($old_encode_mimewords);
158							#BEGIN { $old_encode_mimewords=\&MIME::AltWords::encode_mimewords }
159
160							#** @param $_[0] string. Unicode
161							#** @param $_[1] hashref. options preprocessed by out encode_mimewords()
162							#** @return 0..255 string, ends with $opts->{Space} if $opts->{Shorts}
163							sub encode_mimeword1($$) {
164	90			90	0	144	my($src,$opts)=@_;
165							# Imp: warning if Encode::encode cannot represent character
166	90					93	my $dst="";
167	90					165	my $open="=?$opts->{Charset}?$opts->{Encoding}?";
168	90					85	my $maxlen=64; # Dat: good for a subject line !! 63 or 62
169							# ^^^ Dat: $maxlen=75 works fine in postfix 2.1.5 + pine 4.64
170							# ^^^ Dat: one quoted word shouldn't be too long
171							# ^^^ Dat: Subject: =?B?C?3412341234123412341234123412341234123412341234123412341234?=
172	90					93	$maxlen-=length($open);
173	90	100				140	$maxlen=int(($maxlen+3)/4)*3 if $opts->{Encoding} eq "B"; # Dat: `use integer;' anyway
174							#print STDERR "($src) $maxlen\n";
175
176	90					167	$src=Encode::encode($opts->{Charset},$src);
177	90	100				2860	if ($opts->{Shorts}) {
178	84					85	my $I=0;
179	84					133	while ($I
180							# Dat: split result for too long consecutive headers (i.e. long Subject: line)
181	91					92	my $J=$I+$maxlen;
182	91	100				124	$J=length($src) if $J>length($src);
183	91	100				135	if ($opts->{Charset} eq "UTF-8") { # Dat: UTF-8 is multibyte, it cannot split anywhere
184	25	50				52	if (substr($src,$J,1)=~y/\x80-\xbf//) { # Dat: a half-cut UTF-8 byte sequence
185	0		0			0	$J-- while $J>$I+1 and substr($src,$J-1,1)=~y/\x80-\xbf//;
186	0	0	0			0	$J-- if $J>$I+1 and substr($src,$J-1,1)=~y/\xc0-\xff//;
187							# ^^^Dat: `$I+1': avoid infinite loop in `$I<$maxlen'
188							}
189							}
190							# Imp: else: fix for other multibyte encodings
191	91					131	$dst.=$open;
192	91					91	my $addlen=-length($dst);
193	91					213	append_encoded_word(substr($src,$I,$J-$I),$opts,\$dst);
194	91					126	$addlen+=length($dst);
195	91	100	100			247	if ($opts->{Encoding} eq "Q" and $addlen>$maxlen and $addlen>3) {
			66
196							# Dat: too many hex `=..' triplets, become too long
197	7					9	my $K=length($dst);
198	7					15	while ($addlen>$maxlen) {
199	48	100				69	if (substr($dst,$K-3,1)eq"=") { $addlen-=3; $K-=3; $J-- }
	3					5
	3					4
	3					5
200	45					40	else { $addlen--; $K--; $J-- }
	45					40
	45					57
201							}
202	7					12	substr($dst,$K)="";
203							# Imp: more efficient, don't process the same data many times
204							}
205	91					104	$dst.="?="; $dst.=$opts->{Space};
	91					89
206	91					170	$I=$J;
207							}
208	6					10	} else { $dst.=$open; append_encoded_word($src,$opts,\$dst); $dst.="?=" }
	6					11
	6					8
209	90					132	$dst
210							}
211
212							#** @returns the specified string quoted in double quotes. All characters
213							#** are printable ASCII.
214							sub dumpstr($) {
215	0			0	0	0	my $S=$_[0];
216	0					0	$S=~s@(["\\])\|([^ -~])@
217	0	0				0	defined $2 ? sprintf("\\x{%X}",ord($2)) # Imp: Unicode chars
218							: "\\$1" # Imp: Unicode chars
219							@ge;
220	0					0	"\"$S\"" #"
221							}
222
223							#** Splits a string on spaces into lines so no line is longer than the maximum
224							#** (except if there is no space nearby).
225							#** Only the 1st space is converted to $_[2] at each break.
226							#** @param $_[0] string. to split
227							#** @param $_[1] integer. maximum # chars in a line (not counting the terminating newline)
228							#** @param $_[2] chars to replace a space with -- not being added to the
229							#** maximum line length
230							sub split_words($$$) {
231	74			74	0	119	my($S,$maxlen,$nl)=@_;
232	74					65	my $lastpos=0; my $I=0; my $J;
	74					71
	74					70
233							#** Position after last space to split at, or $I
234							my $K;
235	74					75	my $ret="";
236	74					95	while (1) { # Imp: faster implementation
237	88					74	$K=$J=$I;
238	88		66			334	$J++ while $J
239	88	100	100			168	while ($K==$I ? ($J
240	2878					2708	my $C=substr($S,$J,1);
241	2878	50				3528	if ($C eq"\n") { $K=$I=++$J }
	0	100				0
242	65					140	elsif ($C eq " ") { $K=++$J }
243	2813					3941	else { $J++ }
244							}
245	88	100	100			171	if ($K>$I and $J>$I+$maxlen) {
246	18					40	$ret.=substr($S,$I,$K-1-$I);
247	18					18	$ret.=$nl;
248	18					21	$I=$K; # Imp: skip more
249							}
250	88	100				121	if ($J+$maxlen>=length($S)) { $ret.=substr($S,$I); last } # Dat: found last line, no way to split
	74					124
	74					84
251							}
252							$ret
253	74					131	}
254
255							=item encode_mimewords RAW, [OPTS]
256
257							I
258							Given a RAW string, try to find and encode all "unsafe" sequences
259							of characters:
260
261							### Encode a string with some unsafe "words":
262							$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
263
264							Returns the encoded string.
265							Any arguments past the RAW string are taken to define a hash of options:
266
267							=over 4
268
269							=item Charset
270
271							Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
272							a.k.a. "Latin-1".
273
274							=item Encoding
275
276							The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
277
278							=item Field
279
280							Name of the mail field this string will be used in. I
281
282							=back
283
284							B this is a stable, tested, widely compatible solution. Strict
285							compliance with RFC-1522 (regarding the use of encoded words in message
286							headers), however, was not proven, but strings returned by this function
287							work properly and identically with Mutt, Pine, JavaMail and OpenWebmail. The
288							recommended way is to use this function instead of C or
289							L.
290
291							=cut
292
293
294							#** Dat: no prototype, because original encode_mimewords() doesn't have it
295							#** a prototype either
296							#** @param $_[0] string\|raw string. raw
297							#** @param $_[1].. list of key-value pairs. options
298							#** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
299							#** Charset is now autodetected (not always ISO-8859-1)
300							#** New key: Raw:
301							#** -- 1 (default): true: $_[0] is already a raw, encoded string
302							#** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
303							# with Encode::encode(Charset)
304							#** New key: Shorts:
305							#** -- 1 (default for encode_mimewords)
306							#** -- 0 (default for encode_mimeword)
307							#** New key: Whole: (is respected iff Shorts==0)
308							#** -- 1 (default): quote the string as a whole word (not default in the original!)
309							#** -- 0: encode subwords
310							#** New key: Keeptrailnl
311							#** -- 0: treat trailing newline as unprintable
312							#** -- 1 (default, as in original MIME::Words, as expected by Sympla 4):
313							# keep trailing newline at the end
314							#** !! doc more, including new keys
315							sub encode_mimewords_low {
316	80			80	0	726	my($S,%opts)=@_;
317	80	50				157	return undef if !defined($S);
318							# die "no word for encode_mimewords" if !defined $S; # Dat: Sympa calls us with undef
319							#die unless open LOG, ">> /tmp/b.log";
320							#die unless print LOG "=> ".dumpstr($S)."\n";
321							#die unless close LOG;
322							#$opts{Charset}="ISO-8859-1" if !defined $opts{Charset};
323	80	100				144	$opts{Raw}=1 if !defined $opts{Raw};
324	80	100				137	$opts{Charset}=get_best_encode_charset($S) if !defined $opts{Charset};
325	80	50				114	die if !defined $opts{Charset};
326	80					124	$opts{Charset}=canonical_charset($opts{Charset});
327	80	100				134	if ($opts{Raw}) { # Dat: improvement
328	46					46	$opts{Raw}=0;
329							#die if !defined $S;
330	46					90	$S=Encode::decode($opts{Charset}, $S);
331							# ^^^ Dat: better do a Unicode regexp match
332							}
333	79	100				1996	$opts{Encoding}=defined($opts{Encoding}) ? uc($opts{Encoding}) : "Q";
334	79	100				149	$opts{Encoding}="Q" if $opts{Encoding} ne "B"; # Dat: improvement
335	79	100	100			239	$opts{Encoding}="B" if $opts{Encoding} eq "Q" and $opts{Charset} eq "UTF-8";
336							# ^^^ Dat: UTF-8 encoded MimeWords must be in base64 -- quoted-printable is
337							# bad, Pine doesn't display quoted-printable properly
338							# (it assumes ISO-8859-1 for quoted-printable chars), and Mutt does it
339							# the other way;
340							# We need Base64 "=?UTF-8?B?".MIME::Base64::encode_base64("Unicode string")."?=
341	79	100				114	$opts{Shorts}=1 if !defined $opts{Shorts};
342	79	100				109	$opts{Whole}=1 if !defined $opts{Whole};
343	79	50				118	$opts{Space}=" " if !defined $opts{Space}; # Dat: empty =?...?==?...?= in the original MIME::Words
344	79	50				120	$opts{Split}=66 if !defined $opts{Split}; # Dat: split at this many chars
345	79	100				102	$opts{Keeptrailnl}=1 if !defined $opts{Keeptrailnl};
346	79					79	my $toend="";
347	79	100	100			303	$toend=$1 if $opts{Keeptrailnl} and $S=~s@(\n)\Z(?!\n)@@;
348	79	100				167	if (!$opts{Shorts}) {
		100
349	6					10	$S=encode_mimeword1($S,\%opts)
350							} elsif ($opts{Whole}) {
351	46	100				157	if ($S=~/$NONPRINT/o) {
352	43					85	$S=encode_mimeword1($S,\%opts);
353	43					62	substr($S,-1)=""; # Dat: remove last space
354							}
355	46	50				96	$S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
356							} else {
357	27					28	my $lastpos=0;
358	27					145	while ($S=~/($NONPRINT[^ ]* *)/go) {
359							# ^^^ Dat: having ` *' is a must here, other clients just forget about it
360	41					99	my $I=pos($S)-length($1);
361	41		100			259	$I-- while $I>$lastpos and substr($S,$I-1,1)ne' ';
362	41					51	my $pos=pos($S); my $D;
	41					34
363	41		33			318	1 while ($pos=pos($S)) and $S=~/ \|\Z(?!\n)\|($NONPRINT)/goc and defined($D=$1)
			66
			66
364							and $S=~/\G[^ ]* */goc;
365	41	50				97	pos($S)=$pos if !defined $D;
366	41					64	my $srclen=pos($S)-$I;
367	41					67	my $src=substr($S,$I,$srclen);
368							##print STDERR "D($src)(".substr($S,$I+$srclen).")\n";
369	41	50	66			287	if ($I+$srclen!=length($S) and substr($src,-1)eq' ' and $S=~/ \|\Z(?!\n)\|($NONPRINT)/goc and !defined($1)) {
			66
			33
370							##print STDERR "Strip ending space\n";
371	29					58	substr($src,-1)=""; # Dat: see test case 'ignore_space'
372							}
373							# Dat: now pos($S) is invalid
374	41	50				68	die if 1>length($src);
375							##print STDERR "E($src)(".substr($S,$I+$srclen).")\n";
376	41					73	my $dst=encode_mimeword1($src,\%opts); # Dat: with trailing space
377							##print STDERR substr($S,$I,$srclen),";;\n";
378	41					117	substr($S,$I,$srclen)=$dst; # Imp: do with less copying
379	41					227	$lastpos=pos($S)=$I+length($dst);
380							}
381							substr($S,-length($opts{Space}))="" if
382	27	100	66			95	0
383	27	50				62	$S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
384							}
385	79					101	$S.=$toend;
386							#$S=~s@ @\n @g; # !! debug
387							#die unless open LOG, ">> /tmp/b.log";
388							#die unless print LOG "T> ".dumpstr($S)."\n";
389							#die unless close LOG;
390	79					335	$S
391							}
392
393							#use vars qw($old_encode_mimeword);
394							#BEGIN { $old_encode_mimeword=\&MIME::AltWords::encode_mimeword }
395
396							=item encode_mimeword RAW, [ENCODING], [CHARSET]
397
398							I
399							Encode a single RAW "word" that has unsafe characters.
400							The "word" will be encoded in its entirety.
401
402							### Encode "<>":
403							$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
404
405							You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
406							You may specify the CHARSET, which defaults to C.
407
408							=cut
409
410
411							#** Dat: no prototype, because original encode_mimeword() doesn't have it
412							#** a prototype either
413							#** @param $_[0] raw string. raw
414							#** @param $_[1] string. Encoding: "Q" or "B", defaults to "Q"
415							#** @param $_[2] string. Charset: defaults to "ISO-8859-1" (as in MIME::Words code,
416							#** not as in its documentation
417							sub encode_mimeword {
418							#sub encode_mimeword($;$$) {
419	6			6	1	17	encode_mimewords($_[0],Encoding=>$_[1],Charset=>$_[2],Shorts=>0);
420							}
421
422	1			1		6	use vars qw($old_decode_mimewords);
	1					2
	1					38
423	1			1		226	BEGIN { $old_decode_mimewords=\&MIME::AltWords::decode_mimewords_wantarray } #### pts #### AltWords.pm adds `_wantarray'
424
425							#** Dat: function added by #### pts ####
426							#** @param $_[0] a mimewords-encoded string
427							#** @return a canonical encoding name with which the string can be re-encoded
428							sub get_best_decode_charset($) {
429	23			23	0	27	my $encodedstr=$_[0];
430	23					21	my @L;
431	23					32	for my $token (decode_mimewords($encodedstr)) {
432	45		100			100	my $charset=canonical_charset($token->[1] or "");
433	45	100	100			134	push @L, $charset if $charset and (!@L or $L[-1] ne $charset);
			100
434							}
435	23	100				55	@L=canonical_charset('UTF-8') if @L!=1; # Dat: default, can accomodate any charset
436	23					46	$L[0]
437							}
438
439							=item decode_mimewords ENCODED, [OPTS...]
440
441							I
442							Go through the string looking for RFC-1522-style "Q"
443							(quoted-printable, sort of) or "B" (base64) encoding, and decode them.
444
445							B splits the ENCODED string into a list of decoded
446							C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
447							data are returned in a 1-element array C<[DATA]>, giving an effective
448							CHARSET of C.
449
450							$enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
451							foreach (decode_mimewords($enc)) {
452							print "", ($_[1] \|\| 'US-ASCII'), ": ", $_[0], "\n";
453							}
454
455							B joins the "data" elements of the above list
456							together, and returns that. I it
457							sanitizes the returned string to use a specific, single charset, either
458							specified using the C option, or autodetecting one (ISO-8859-1,
459							ISO-8859-2 or UTF-8) which can accomodate all characters. In case of charset
460							autodetection, C can be used to query the charset
461							autodetected.
462
463							You might want to see L as an alternate of
464							L.
465
466							In the event of a syntax error, $@ will be set to a description
467							of the error, but parsing will continue as best as possible (so as to
468							get I back when decoding headers).
469							$@ will be false if no error was detected.
470
471							Any arguments past the ENCODED string are taken to define a hash of options:
472
473							=over 4
474
475							=item Field
476
477							Name of the mail field this string came from. I
478
479							=back
480
481							=cut
482
483							#** Dat: no prototype, because original decode_mimewords() doesn't have it
484							#** a prototype either
485							#** Dat: it is unsafe to use this without the Raw=>1 option. Search for
486							#** MIME::WordDecoder in `perldoc MIME::Words'.
487							#** @param $_[0] a mimewords-encoded string
488							#** @param $_[1]... list of options (key=>value pairs).
489							#** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
490							#** New key: Raw:
491							#** -- 1 (default): true: return a raw, encoded string. The encoding will
492							#** be Charset, or the one returned by get_best_decode_charset().
493							#** This is an improvement
494							#** by #### pts #### -- the original decode_mimewords() didn't return
495							#** the string in a consistent encoding.
496							#** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
497							#** with Encode::encode(Charset)
498							#** New key: Charset: specific charset name for Raw=1 (ignored for Raw=0)
499							sub decode_mimewords_low {
500	50	100		50	0	108	return $old_decode_mimewords->(@_) if wantarray;
501	27					48	my($encodedstr,%opts)=@_;
502	27	100				48	$opts{Raw}=1 if !defined $opts{Raw}; # Dat: default
503	27					24	my $ret='';
504							# vvv Dat: not mutually recursive, because get_best_decode_charset() calls
505							# decode_mimewords() in list context, so this line won't be
506							# reached.
507							$opts{Charset}=get_best_decode_charset($encodedstr) if
508	27	100	100			81	$opts{Raw} and !defined $opts{Charset};
509	27					29	my $S;
510	27					60	for my $token ($old_decode_mimewords->($encodedstr,%opts)) { # Dat: $charset in $token->[1]
511	55	100				121	$S=$token->[1] ? Encode::decode($token->[1], $token->[0]) : $token->[0];
512	55	100				1110	$S=Encode::encode($opts{Charset}, $S) if $opts{Raw};
513	55					3753	$ret.=$S
514							}
515							$ret
516	27					108	}
517
518	1			1		5	use vars qw(@encode_subject_opts);
	1					1
	1					34
519	1			1		67	BEGIN { @encode_subject_opts=(Keeptrailnl=>1, Whole=>1); }
520
521							#** Dat: function added by #### pts ####
522							#** @param $_[0] String. A mimewords-encoded e-mail subject.
523							#** @return String. better mimewords-encoded
524							sub fix_subject($) {
525	1			1	0	2	my $encodedstr=$_[0];
526	1					2	my $best_charset=get_best_decode_charset($encodedstr);
527	1					2	my $decoded=decode_mimewords($encodedstr, Raw=>0);
528	1					3	encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_subject_opts);
529							}
530
531	1			1		5	use vars qw(@encode_addresses_opts);
	1					1
	1					39
532	1			1		125	BEGIN { @encode_addresses_opts=(Keeptrailnl=>1, Whole=>0); }
533
534							#** Dat: function added by #### pts ####
535							#** @param $_[0] String. A mimewords-encoded e-mail address (or address list),
536							#** e.g. "=?ISO-8859-1?Q?foo?= bar , foo2 bar2 "
537							#** @return String. better mimewords-encoded
538							sub fix_addresses($) {
539	3			3	0	25	my $encodedstr=$_[0];
540	3					7	my $best_charset=get_best_decode_charset($encodedstr);
541	3					6	my $decoded=decode_mimewords($encodedstr, Raw=>0);
542							#print STDERR "DE($decoded)\n";
543							#chomp $decoded;
544							#$decoded.=" alma ";
545	3					8	encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_addresses_opts);
546							}
547
548							#** Dat: function added by #### pts ####
549							#** @param $_[0] a Unicode string
550							#** @param $_[1] a charset
551							#** @return Boolean: is it encodable?
552							sub is_encodable($$) {
553	48			48	0	63	my $charset=uc$_[1];
554	48					49	my $S=$_[0]; # Dat: must be copied for Encode::encode
555	48	100	66			143	return 1 if $charset eq 'UTF-8' or $charset eq 'UTF8';
556	45					47	eval { Encode::encode($charset, $S, Encode::FB_CROAK) };
	45					80
557	45	100				1415	$@ ? 0 : 1
558							}
559
560							#** Please don't include US-ASCII.
561							#** Specify UTF-8 last (because get_best_encode_charset() ignores everything
562							#** after the first UTF-8).
563	1			1		6	use vars qw(@encode_charsets);
	1					1
	1					35
564	1			1		216	BEGIN { @encode_charsets=qw(ISO-8859-1 ISO-8859-2 UTF-8) }
565
566							#** Dat: function added by #### pts ####
567							#** @param $_[0] string to try
568							sub get_best_encode_charset($) {
569	39			39	0	60	for my $charset (@encode_charsets) {
570	48	100				70	return $charset if is_encodable($_[0], $charset);
571							}
572	0					0	return 'UTF-8'
573							}
574
575							#** Dat: function added by #### pts ####
576							#** @param $_[0] String. Perl Unicode-string
577							#** @param $_[1] encode mode: 'subject' or 'addresses'
578							#** @return String. better mimewords-encoded with the best charset
579							sub encode_unicode($$) {
580	6			6	0	9	my $str=$_[0];
581	6	100				14	my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
582	6					9	my $best_charset=get_best_encode_charset($str);
583	6					13	encode_mimewords($str, Charset=>$best_charset, Raw=>0, @$modeary);
584							}
585
586							#** Dat: function added by #### pts ####
587							#** @param $_[0] String. Perl 8-bit string, encoded in $charset
588							#** @param $_[1] encode mode: 'subject' or 'addresses'
589							#** @param $_[2] $charset;
590							#** @return String. better mimewords-encoded with the best charset
591							sub encode_8bit($$$) {
592	6			6	0	9	my $str=$_[0];
593	6					7	my $charset=$_[2];
594	6	100				13	my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
595	6					10	$charset=canonical_charset($charset);
596	6					7	my $best_charset;
597	6					9	for my $charset2 (@encode_charsets) {
598	13	100				15	if (canonical_charset($charset2) eq $charset) {
599	5					6	$best_charset=$charset; last
600	5					7	}
601							}
602	6	100				11	$best_charset=canonical_charset(get_best_encode_charset($str)) if
603							!defined $best_charset;
604	6	100				15	($charset eq $best_charset) ? # Imp: find badly encoded string...
605							encode_mimewords($str, Charset=>$best_charset, Raw=>1, @$modeary) :
606							encode_mimewords(Encode::decode($charset, $str), Charset=>$best_charset, Raw=>0, @$modeary)
607							}
608
609							# --- Logging...
610
611	1			1		16	BEGIN { *encode_mimewords=\&encode_mimewords_low }
612	1			1		27	BEGIN { *decode_mimewords=\&decode_mimewords_low }
613
614	1	50		1		62	BEGIN { if ($main::DEBUG) {
615
616							#use vars qw($orig_encode_mimewords $orig_decode_mimewords);
617	1			1		4	no warnings qw(prototype redefine);
	1					1
	1					357
618
619							#BEGIN { $orig_encode_mimewords=\&encode_mimewords }
620							*encode_mimewords=sub {
621	0					0	require Carp;
622	0					0	my $dump="\n\n[".scalar(localtime)."] encode_mimewords(@_) = ";
623	0					0	my $ret=&encode_mimewords_low(@_); # Dat: we need `&' to ignore prototype
624	0					0	$dump.=$ret."\n";
625	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
626	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
627	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
628	0					0	binmode($log, ':utf8');
629	0					0	print $log $dump;
630	0					0	Carp::cluck("^^^ encode_mimewords() ");
631	0					0	close $log;
632							}
633							$ret
634	0					0	};
	0					0
635
636							#BEGIN { $orig_decode_mimewords=\&decode_mimewords }
637							# Imp: copy prototype of original...
638							*decode_mimewords=sub {
639	0					0	require Carp;
640	0					0	my $dump="\n\n[".scalar(localtime)."] decode_mimewords(@_) = ";
641	0	0				0	if (wantarray) {
642	0					0	my @L=(&decode_mimewords_low(@_));
643	0					0	$dump.="@L (ary)\n";
644	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
645	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
646	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
647	0					0	binmode($log, ':utf8');
648	0					0	print $log $dump;
649	0					0	Carp::cluck("^^^ decode_mimewords() ");
650	0					0	close $log;
651							}
652							@L
653	0					0	} else {
654	0					0	my $ret=decode_mimewords_low(@_);
655	0					0	$dump.=$ret."\n";
656	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
657	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
658	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
659	0					0	binmode($log, ':utf8');
660	0					0	print $log $dump;
661	0					0	Carp::cluck("^^^ decode_mimewords() ");
662	0					0	close $log;
663							}
664							$ret
665	0					0	}
666	0					0	};
667
668							} }
669
670							# ---
671
672							=back
673
674							=head1 NOTES
675
676							Exports its principle functions by default, in keeping with
677							L and L.
678
679							Doesn't depend on L or L. All the shared code is
680							copied to L, which is bundled.
681
682							See also L for the previous version
683							of L integrated into the Sympa 4 mailing list software.
684
685							=head1 AUTHOR
686
687							L was written by
688							Péter Szabó (F) in 2006, and it has been uploaded to CPAN on
689							2006-09-27.
690
691							L uses code from L (in the file
692							C) and it uses documentation from L
693							(in the files C and C).
694
695							Here is the original author and copyright information for L.
696
697							Eryq (F), ZeeGee Software Inc (F).
698							David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
699
700							All rights reserved. This program is free software; you can redistribute
701							it and/or modify it under the same terms as Perl itself.
702
703							Thanks also to...
704
705							Kent Boortz For providing the idea, and the baseline
706							RFC-1522-decoding code!
707							KJJ at PrimeNet For requesting that this be split into
708							its own module.
709							Stephane Barizien For reporting a nasty bug.
710
711
712							=head1 VERSION
713
714							See $VERSION in C .
715
716							=cut
717
718							=begin testing
719
720							is(MIME::AltWords::encode_mimewords("foo bar"), "foo bar");
721							is(MIME::AltWords::encode_mimewords("foo-b\x{E9}r"), "=?ISO-8859-1?Q?foo-b=E9r?=");
722							is(MIME::AltWords::encode_mimeword("foo bar"), "=?ISO-8859-1?Q?foo__bar?="); # Dat: improvement over MIME::AltWords
723							is(MIME::AltWords::encode_mimeword("foo__bar "), "=?ISO-8859-1?Q?foo=5F=5Fbar_?="); # Dat: improvement over MIME::AltWords
724							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi adatai",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?= adatai");
725							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
726							"az ?rkik�t? f�ldi adatai");
727							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
728							"az ?rkik�t? f�ldi adatai");
729							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
730							"az �rkik�t� f�ldi adatai");
731							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
732							"az �rkik�t� f�ldi adatai");
733							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai"),
734							"az �rkik�t� f�ldi adatai"); # Dat: guess Charset to ISO-8859-2
735							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi adatai"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi_adatai?=");
736							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?=");
737							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi?=");
738							is(MIME::AltWords::encode_mimewords("foo b\x{E1}r",Whole=>0), "foo =?ISO-8859-1?Q?b=E1r?=");
739							is(MIME::AltWords::encode_mimewords("foo b\x{E1}r"), "=?ISO-8859-1?Q?foo__b=E1r?=");
740							is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1",Whole=>0), "=?ISO-8859-1?Q?b=F5r_?= foo");
741							is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1"), "=?ISO-8859-1?Q?b=F5r__foo?=");
742							{ my $S; eval { $S=MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2"); };
743							ok($@=~/^Wide character /); # Dat: Encode::decode fails
744							}
745							is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0,Whole=>0), "=?ISO-8859-2?Q?b=F5r_?= foo");
746							is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0), "=?ISO-8859-2?Q?b=F5r__foo?=");
747							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2",Whole=>0),
748							"ha a =?ISO-8859-2?Q?sz=F3t?= ~ jel",'ignore_space');
749							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2"),
750							"=?ISO-8859-2?Q?ha_a_sz=F3t_~_jel?=",'ignore_space2');
751							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
752							"ha a =?ISO-8859-1?Q?sz=F3t_?=", "ends with one space");
753							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
754							"ha a =?ISO-8859-1?Q?sz=F3t__?=", "ends with two spaces");
755							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Charset=>"ISO-8859-1"),
756							"=?ISO-8859-1?Q?ha_a_sz=F3t_?=");
757							is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se",Whole=>0), "dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
758							is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?dokumentumok_kezel=E9se?=");
759							is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se",Whole=>0), "=?ISO-8859-1?Q?tartalmaz=F3?= dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
760							is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?tartalmaz=F3_dokumentumok_kezel=E9se?="); # Imp: unify printable and nonprintable to save space
761
762							is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb\x{151}l bizonyos ".
763							"szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el\x{151}zi ".
764							"meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy ".
765							"sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
766							Charset=>"ISO-8859-2",Raw=>0,Whole=>0),
767							"A =?ISO-8859-2?Q?keres=E9si_eredm=E9nyekb=F5l?= bizonyos szavakat\n =?ISO-8859-2?Q?tartalmaz=F3?= dokumentumok\n =?ISO-8859-2?Q?kiz=E1rhat=F3k,?= ha a =?ISO-8859-2?Q?sz=F3t?= ~\n jel =?ISO-8859-2?Q?el=F5zi?= meg. Figyelem! A\n =?ISO-8859-2?Q?kiz=E1r=E1s?= csak akkor\n =?ISO-8859-2?Q?eredm=E9nyez_tal=E1latot,?= ha\n =?ISO-8859-2?Q?=E9s?= (& vagy =?ISO-8859-2?Q?sz=F3k=F6z)?=\n kapcsolatban =?ISO-8859-2?Q?=E1ll?= egy nem =?ISO-8859-2?Q?kiz=E1r=E1ssal.?=");
768
769							is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb\x{151}l bizonyos ".
770							"szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el\x{151}zi ".
771							"meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy ".
772							"sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
773							Charset=>"ISO-8859-2",Raw=>0),
774							"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat_?=\n =?ISO-8859-2?Q?tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k,_ha_a_sz?=\n =?ISO-8859-2?Q?=F3t_~_jel_el=F5zi_meg._Figyelem!_A_kiz=E1r=E1s_c?=\n =?ISO-8859-2?Q?sak_akkor_eredm=E9nyez_tal=E1latot,_ha_=E9s_(&_va?=\n =?ISO-8859-2?Q?gy_sz=F3k=F6z)_kapcsolatban_=E1ll_egy_nem_kiz=E1r?=\n =?ISO-8859-2?Q?=E1ssal.?=");
775							# vvv Dat: composing with Pine emits:
776							#is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb�l bizonyos szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el�zi meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
777							#"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat?=
778							# =?ISO-8859-2?Q?_tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k=2C_ha_?=
779							# =?ISO-8859-2?Q?a_sz=F3t_~_jel_el=F5zi_meg=2E_Figyelem!_A_?=
780							# =?ISO-8859-2?Q?kiz=E1r=E1s_csak_akkor_eredm=E9nyez_tal=E1latot=2C?=
781							# =?ISO-8859-2?Q?_ha_=E9s_=28&_vagy_sz=F3k=F6z=29_kapcsolatban?=
782							# =?ISO-8859-2?Q?_=E1ll_egy_nem_kiz=E1r=E1ssal=2E?=
783
784							is(MIME::AltWords::encode_mimewords("�rv�zt\x{171}r\x{151} egy t�k�rf�r�g�p",
785							Charset=>"UTF-8",Raw=>0,Whole=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkQ==?= egy =?UTF-8?B?dMO8a8O2cmbDunLDs2fDqXA=?=");
786							is(MIME::AltWords::encode_mimewords("�rv�zt\x{171}r\x{151} egy t�k�rf�r�g�p",
787							Charset=>"UTF-8",Raw=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkSBlZ3kgdMO8a8O2cmbDunLDs2fDqXA=?=");
788
789							is(MIME::AltWords::split_words("fo ot bar aaaaaaaaab cccccccccdddddd e f g ",8,"xy"),"fo otxybarxyaaaaaaaaabxy cccccccccddddddxye f g ",'split_words()');
790
791							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0),
792							"=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
793							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter",Charset=>"UTF-8",Raw=>0),
794							"=?UTF-8?B?U3phYsOzIFDDqXRlcjxwdHNAb3VyLnVtPg==?="); # Dat: this is what compose_mail returns
795							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
796							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ");
797							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
798							"=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
799
800							SKIP: {
801							eval { require Mail::Address };
802							skip "Mail::Address not installed", 1 if $@;
803							my @sender_hdr = Mail::Address->parse("=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
804							my $address=@sender_hdr ? $sender_hdr[0]->address : undef;
805							$address="undef" if !defined $address;
806							is($address, "pts\@our.um");
807							}
808
809							is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0)),
810							"\x{C5}\x{B1}", 'decode_mimewords()');
811							is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0),Raw=>0),
812							"\x{171}", 'decode_mimewords()');
813
814							is(MIME::AltWords::get_best_decode_charset(MIME::AltWords::encode_mimewords("f\x{171} fa t\x{F6}lgy",Charset=>"ISO-8859-2",Raw=>0,Whole=>0)),
815							'ISO-8859-2', 'get_best_decode_charset()');
816							is(MIME::AltWords::get_best_decode_charset("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
817							'UTF-8', 'get_best_decode_charset()');
818							is(MIME::AltWords::get_best_decode_charset("fa"), 'UTF-8', 'get_best_decode_charset()');
819
820							is(MIME::AltWords::fix_addresses("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
821							"=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=", 'fix_addresses()');
822							is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="),
823							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ", 'fix_addresses()');
824							is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4K?="),
825							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= \n", 'fix_addresses() Keeptrailnl');
826							is(MIME::AltWords::fix_subject("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
827							"=?UTF-8?B?ZsWxIGZhIHTDtmxneQ==?=", 'fix_subject()');
828
829							is(MIME::AltWords::decode_mimewords("=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=",Raw=>1),
830							"f\x{C5}\x{B1} fa t\x{C3}\x{B6}lgy", 'decode_mimewords()');
831							is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?=",Raw=>0),
832							"f\x{171} fa t\x{F6}lgy", 'decode_mimewords()');
833							#die "".MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
834							is(MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlcg==?=",Raw=>0),
835							"Szab� P�ter", 'decode_mimewords()');
836
837							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l",Charset=>"UTF-8",Raw=>0)),
838							"f\x{C3}\x{A9}l", 'encode+decode mimewords');
839							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
840							"f\x{C3}\x{A9}l ", 'encode+decode mimewords');
841							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
842							"f\x{C3}\x{A9}l ", 'encode+decode mimewords');
843							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0)),
844							"f\x{C3}\x{A9}l <", 'encode+decode mimewords');
845							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0,Whole=>0)),
846							"f\x{C3}\x{A9}l <", 'encode+decode mimewords');
847							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
848							"d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
849							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
850							"d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
851
852							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>1),
853							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=1");
854							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>0),
855							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra=0A?=", "encode_mimewords() Keeptrailnl=0");
856							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n"),
857							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=default");
858
859							is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?m=E1sik_pr=F3b=E1cska?=\n"),
860							"m\x{E1}sik pr\x{F3}b\x{E1}cska\n", "decode_mimewords ISO-8859-2");
861
862							is(MIME::AltWords::get_best_encode_charset("hello\t\n"), "ISO-8859-1", "get_best_encode_charset() ASCII");
863							is(MIME::AltWords::get_best_encode_charset("hell\x{F3}, w\x{F6}rld\t\n"), "ISO-8859-1", "get_best_encode_charset() ISO-8859-1");
864							is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F6}rld\t\n"), "ISO-8859-2", "get_best_encode_charset() ISO-8859-2");
865							is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F5}rld\t\n"), "UTF-8", "get_best_encode_charset() UTF-8");
866
867							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'addresses'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_addresses() UTF-8");
868							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'subject'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_subject() UTF-8");
869							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_subject() ISO-8859-2");
870							is(MIME::AltWords::encode_unicode("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_subject() ISO-8859-1");
871
872							is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
873							is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
874
875							is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'addresses', 'uTf8'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() addresses UTF-8");
876							is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'subject', 'uTf8'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() subject UTF-8");
877							is(MIME::AltWords::encode_8bit("[foo] hell\x{F5}, w\x{F6}rld\t\n", 'subject', '88592'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-2");
878							is(MIME::AltWords::encode_8bit("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject', '88591'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-1");
879							is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses', 'us-ascii'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
880							is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses', '88591'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
881
882							is(join(' ',map{MIME::AltWords::encode_mimewords($_)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
883							"[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-1?Q?pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 a");
884
885							is(join(' ',map{MIME::AltWords::encode_mimewords($_,Raw=>0)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{151}cska\n"),
886							"[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-2?Q?pr=F3b=F5cska?=\n", "encode_mimewords() default ISO-8859-1,2 b");
887
888							is(MIME::AltWords::encode_mimewords("[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
889							"=?ISO-8859-1?Q?[nekem]_m=E1sik_pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 c");
890
891							is(MIME::AltWords::decode_mimewords('=?US-ASCII?Q?Keith_Moore?= '), 'Keith Moore ', "MIME::Words test case 1");
892							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '), 'Keld J�rn Simonsen ', "MIME::Words test case 2");
893							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Andr=E9_?= Pirard '), 'Andr� Pirard ', "MIME::Words test case 3");
894							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?==?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?==?US-ASCII?Q?.._cool!?='), 'If you can read this you understand the example... cool!', "MIME::Words test case 4");
895							is(MIME::AltWords::encode_mimewords("\xABFran\xE7ois\xBB"), '=?ISO-8859-1?Q?=ABFran=E7ois=BB?=', "MIME::Words test case 5");
896							is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB at the beach"), '=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB_at_the_beach?=', "MIME::Words test case 6");
897							# vvv !! is this correct (space after \n)?
898							is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave "), "=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB,_down_at_the_beach=0Awith?=\n =?ISO-8859-1?Q?_Dave_?=", "MIME::Words test case 7");
899							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ")), "Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ", "MIME::Words test case 8");
900
901							my $in0 = Encode::encode("windows-1251", "\x{422}\x{435}\x{441}\x{442}\x{438}\x{440}\x{43e}\x{432}\x{430}\x{43d}\x{438}\x{435}");
902							my $out0b = "=?WINDOWS-1251?B?0uXx8ujw7uLg7ejl?=";
903							my $out0q = "=?WINDOWS-1251?Q?=D2=E5=F1=F2=E8=F0=EE=E2=E0=ED=E8=E5?=";
904							is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"B"), $out0b);
905							is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"Q"), $out0q);
906							is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"q"), $out0q);
907							is(MIME::AltWords::encode_mimeword($in0, "B", "windows-1251"), $out0b);
908							is(MIME::AltWords::encode_mimeword($in0, "Q", "windows-1251"), $out0q);
909							is(MIME::AltWords::encode_mimeword($in0, "q", "windows-1251"), $out0q);
910							is(MIME::AltWords::encode_mimewords($in0, Charset=>"windows-1251", Encoding=>"b"), $out0b);
911							is(MIME::AltWords::encode_mimeword($in0, "b", "windows-1251"), $out0b);
912
913							=cut
914
915							1