File Coverage

lib/MIME/AltWords.pm

Criterion	Covered	Total	%
statement	220	278	79.1
branch	99	126	78.5
condition	45	65	69.2
subroutine	37	38	97.3
pod	1	15	6.6
total	402	522	77.0

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# MIME/AltWords.pm -- a lot of fixes on MIME::Words
3							# by pts@fazekas.hu at Fri Jan 20 11:35:08 UTC 2006
4							# -- Fri Mar 31 18:41:14 CEST 2006
5							#
6							# Dat: this assumes Perl v5.8 or later
7							# Dat: run the unit tests with: ./pts-test.pl AltMIMEWords.pm
8							# Dat: see `perldoc MIME::Words' for the original documentation
9							# Dat: a raw string has bytes in 0..255, and it is already encoded in some
10							# encoding
11							# SUXX: perldoc doesn't respect `=encoding utf-8'
12							# lib/MIME/AltWords.pm:30: Unknown command paragraph "=encoding utf8"
13							# !! why `,' in teszt a =?ISO-8859-2?Q?lev=E9lben=2C_t=F6r=F6lhet=F5?= ?? is it standard?
14							# !! document all
15							# !! document test cases
16							# !! MANIFEST etc.
17							#
18
19							package MIME::AltWords;
20	1			1		13	use v5.8; # Dat: Unicode string support etc.
	1					3
	1					43
21	1			1		4	use integer;
	1					1
	1					9
22	1			1		18	use strict;
	1					1
	1					47
23	1			1		353	use MIME::AltWords0; #use MIME::Words;
	1					2
	1					19
24	1			1		4	use MIME::Base64;
	1					1
	1					33
25	1			1		825	use Encode;
	1					9948
	1					77
26	1			1		7	use warnings;
	1					2
	1					17
27	1			1		4	use Exporter;
	1					2
	1					29
28	1			1		5	no warnings qw(prototype redefine);
	1					2
	1					48
29
30							# vvv Dat: exports symbols from MIME::AltWords0 by default, works only
31							# without these
32							#use vars qw(@EXPORT_OK %EXPORT_TAGS);
33							#BEGIN {
34							# @EXPORT_OK=@MIME::AltWords0::EXPORT_OK;
35							# %EXPORT_TAGS=%MIME::AltWords0::EXPORT_TAGS;
36							## die "@EXPORT_OK";
37							#}
38							#Exporter::export_ok_tags('all');
39
40							=pod
41
42							=encoding utf8
43
44							=head1 NAME
45
46							MIME::AltWords - properly deal with RFC-1522 encoded words
47
48							=head1 SYNOPSIS
49
50							The Perl module L is recommended for encoding and
51							decoding MIME words (such as C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in
52							e-mail message headers (mostly Subject, From and To).
53
54							L is similar to L in
55							L, but it provides an alternate implementation that follows the
56							MIME specification more carefully, and it is actually compatible with
57							existing mail software (tested with Mutt, Pine, JavaMail and OpenWebmail).
58							L extends the functionality of L (version
59							5.420) by adding more functions and more options to existing functions. The
60							original interface is changed in an upward-compatible way.
61
62							Before reading further, you should see L to make sure that
63							you understand where this module fits into the grand scheme of things.
64							Go on, do it now. I'll wait.
65
66							Ready? Ok...
67
68							use MIME::AltWords qw(:all);
69
70							### Decode the string into another string, forgetting the charsets:
71							$decoded = decode_mimewords(
72							'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
73							);
74
75							### Split string into array of decoded [DATA,CHARSET] pairs:
76							@decoded = decode_mimewords(
77							'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ',
78							);
79
80							### Encode a single unsafe word:
81							$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
82
83							### Encode a string, trying to find the unsafe words inside it:
84							$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town");
85
86
87
88							=head1 DESCRIPTION
89
90							Fellow Americans, you probably won't know what the hell this module
91							is for. Europeans, Russians, et al, you probably do. C<:-)>.
92
93							For example, here's a valid MIME header you might get:
94
95							From: =?US-ASCII?Q?Keith_Moore?=
96							To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=
97							CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard
98							Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=
99							=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=
100							=?US-ASCII?Q?.._cool!?=
101
102							The fields basically decode to (sorry, I can only approximate the
103							Latin characters with 7 bit sequences /o and 'e):
104
105							From: Keith Moore
106							To: Keld J/orn Simonsen
107							CC: Andr'e Pirard
108							Subject: If you can read this you understand the example... cool!
109
110
111							=head1 PUBLIC INTERFACE
112
113							=over 4
114
115							=cut
116
117
118	1			1		4	use vars qw($NONPRINT $VERSION);
	1					4
	1					44
119
120							### The package version, both in 1.23 style and usable by MakeMaker:
121							BEGIN { # vvv Dat: MakeMaker needs $VERSION in a separate line
122	1			1		1628	$VERSION = "0.12"
123							}
124
125							# Dat: MIME::Words has [\x00-\x1F\x7F-\xFF]. We prepare for Unicode.
126							$NONPRINT=qr{(?:[^\x20-\x7E]\|=)};
127							#$NONPRINT=qr{(?:[^\x20-\x7E]\|[=](?=[?]))}; # Imp: is a bare `=' legal?
128
129							#** @param $_[0] charset name
130							#** @return MIME canonical charset name
131							sub canonical_charset($) {
132	142			142	0	186	my $S=$_[0];
133	142	100				735	if ($S=~/\A(?:iso-?(?:8859-?)?\|8859-?)(\d+)\Z(?!\n)/i) { "ISO-8859-$1" }
	79	100				266
		100
134	45					108	elsif ($S=~/\AUTF-?8\Z(?!\n)/i) { "UTF-8" }
135	3					8	elsif ($S=~/\A(?:US-)ASCII\Z(?!\n)/i) { "US-ASCII" }
136	15					35	else { uc $S }
137							}
138
139							#** @param $_[0] string.
140							#** @param $_[1] hashref. options
141							#** @param $_[2] string to append to
142							sub append_encoded_word($$$) {
143	89			89	0	197	my($word,$opts,$dst)=@_;
144	89	100				231	if ($opts->{Encoding} eq "B") {
		50
145	25					83	$word=MIME::Base64::encode_base64($word, ''); # Dat: empty EOL, as requested by MIME
146	25					43	$word=~s@\s+@@g;
147	25					33	$$dst.=$word
148							} elsif ($opts->{Encoding} eq "Q") {
149							# Dat: improved MIME::Words::_encode_Q
150	64	100				761	$word =~ s{( )\|([_\?\=]\|$NONPRINT)}{defined $1 ? "_" # Dat: "_" is an improvement
	258					1284
151							: sprintf("=%02X", ord($2))}eog;
152	64					122	$$dst.=$word
153	0					0	} else { die }
154							undef
155	89					150	}
156
157							#use vars qw($old_encode_mimewords);
158							#BEGIN { $old_encode_mimewords=\&MIME::AltWords::encode_mimewords }
159
160							#** @param $_[0] string. Unicode
161							#** @param $_[1] hashref. options preprocessed by out encode_mimewords()
162							#** @return 0..255 string, ends with $opts->{Space} if $opts->{Shorts}
163							sub encode_mimeword1($$) {
164	82			82	0	134	my($src,$opts)=@_;
165							# Imp: warning if Encode::encode cannot represent character
166	82					87	my $dst="";
167	82					194	my $open="=?$opts->{Charset}?$opts->{Encoding}?";
168	82					79	my $maxlen=64; # Dat: good for a subject line !! 63 or 62
169							# ^^^ Dat: $maxlen=75 works fine in postfix 2.1.5 + pine 4.64
170							# ^^^ Dat: one quoted word shouldn't be too long
171							# ^^^ Dat: Subject: =?B?C?3412341234123412341234123412341234123412341234123412341234?=
172	82					92	$maxlen-=length($open);
173	82	100				192	$maxlen=int(($maxlen+3)/4)*3 if $opts->{Encoding} eq "B"; # Dat: `use integer;' anyway
174							#print STDERR "($src) $maxlen\n";
175
176	82					210	$src=Encode::encode($opts->{Charset},$src);
177	82	100				2386	if ($opts->{Shorts}) {
178	80					104	my $I=0;
179	80					159	while ($I
180							# Dat: split result for too long consecutive headers (i.e. long Subject: line)
181	87					109	my $J=$I+$maxlen;
182	87	100				175	$J=length($src) if $J>length($src);
183	87	100				210	if ($opts->{Charset} eq "UTF-8") { # Dat: UTF-8 is multibyte, it cannot split anywhere
184	25	50				76	if (substr($src,$J,1)=~y/\x80-\xbf//) { # Dat: a half-cut UTF-8 byte sequence
185	0		0			0	$J-- while $J>$I+1 and substr($src,$J-1,1)=~y/\x80-\xbf//;
186	0	0	0			0	$J-- if $J>$I+1 and substr($src,$J-1,1)=~y/\xc0-\xff//;
187							# ^^^Dat: `$I+1': avoid infinite loop in `$I<$maxlen'
188							}
189							}
190							# Imp: else: fix for other multibyte encodings
191	87					116	$dst.=$open;
192	87					111	my $addlen=-length($dst);
193	87					237	append_encoded_word(substr($src,$I,$J-$I),$opts,\$dst);
194	87					129	$addlen+=length($dst);
195	87	100	100			397	if ($opts->{Encoding} eq "Q" and $addlen>$maxlen and $addlen>3) {
			66
196							# Dat: too many hex `=..' triplets, become too long
197	7					12	my $K=length($dst);
198	7					54	while ($addlen>$maxlen) {
199	48	100				84	if (substr($dst,$K-3,1)eq"=") { $addlen-=3; $K-=3; $J-- }
	3					4
	3					4
	3					7
200	45					43	else { $addlen--; $K--; $J-- }
	45					36
	45					80
201							}
202	7					12	substr($dst,$K)="";
203							# Imp: more efficient, don't process the same data many times
204							}
205	87					117	$dst.="?="; $dst.=$opts->{Space};
	87					100
206	87					228	$I=$J;
207							}
208	2					3	} else { $dst.=$open; append_encoded_word($src,$opts,\$dst); $dst.="?=" }
	2					5
	2					2
209	82					184	$dst
210							}
211
212							#** @returns the specified string quoted in double quotes. All characters
213							#** are printable ASCII.
214							sub dumpstr($) {
215	0			0	0	0	my $S=$_[0];
216	0					0	$S=~s@(["\\])\|([^ -~])@
217	0	0				0	defined $2 ? sprintf("\\x{%X}",ord($2)) # Imp: Unicode chars
218							: "\\$1" # Imp: Unicode chars
219							@ge;
220	0					0	"\"$S\"" #"
221							}
222
223							#** Splits a string on spaces into lines so no line is longer than the maximum
224							#** (except if there is no space nearby).
225							#** Only the 1st space is converted to $_[2] at each break.
226							#** @param $_[0] string. to split
227							#** @param $_[1] integer. maximum # chars in a line (not counting the terminating newline)
228							#** @param $_[2] chars to replace a space with -- not being added to the
229							#** maximum line length
230							sub split_words($$$) {
231	70			70	0	124	my($S,$maxlen,$nl)=@_;
232	70					70	my $lastpos=0; my $I=0; my $J;
	70					76
	70					67
233							#** Position after last space to split at, or $I
234							my $K;
235	70					68	my $ret="";
236	70					69	while (1) { # Imp: faster implementation
237	84					92	$K=$J=$I;
238	84		66			429	$J++ while $J
239	84	100	100			222	while ($K==$I ? ($J
240	2698					3079	my $C=substr($S,$J,1);
241	2698	50				5424	if ($C eq"\n") { $K=$I=++$J }
	0	100				0
242	65					274	elsif ($C eq " ") { $K=++$J }
243	2633					6915	else { $J++ }
244							}
245	84	100	100			248	if ($K>$I and $J>$I+$maxlen) {
246	18					47	$ret.=substr($S,$I,$K-1-$I);
247	18					30	$ret.=$nl;
248	18					22	$I=$K; # Imp: skip more
249							}
250	84	100				169	if ($J+$maxlen>=length($S)) { $ret.=substr($S,$I); last } # Dat: found last line, no way to split
	70					172
	70					93
251							}
252							$ret
253	70					182	}
254
255							=item encode_mimewords RAW, [OPTS]
256
257							I
258							Given a RAW string, try to find and encode all "unsafe" sequences
259							of characters:
260
261							### Encode a string with some unsafe "words":
262							$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB");
263
264							Returns the encoded string.
265							Any arguments past the RAW string are taken to define a hash of options:
266
267							=over 4
268
269							=item Charset
270
271							Encode all unsafe stuff with this charset. Default is 'ISO-8859-1',
272							a.k.a. "Latin-1".
273
274							=item Encoding
275
276							The encoding to use, C<"q"> or C<"b">. The default is C<"q">.
277
278							=item Field
279
280							Name of the mail field this string will be used in. I
281
282							=back
283
284							B this is a stable, tested, widely compatible solution. Strict
285							compliance with RFC-1522 (regarding the use of encoded words in message
286							headers), however, was not proven, but strings returned by this function
287							work properly and identically with Mutt, Pine, JavaMail and OpenWebmail. The
288							recommended way is to use this function instead of C or
289							L.
290
291							=cut
292
293
294							#** Dat: no prototype, because original encode_mimewords() doesn't have it
295							#** a prototype either
296							#** @param $_[0] string\|raw string. raw
297							#** @param $_[1].. list of key-value pairs. options
298							#** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
299							#** Charset is now autodetected (not always ISO-8859-1)
300							#** New key: Raw:
301							#** -- 1 (default): true: $_[0] is already a raw, encoded string
302							#** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
303							# with Encode::encode(Charset)
304							#** New key: Shorts:
305							#** -- 1 (default for encode_mimewords)
306							#** -- 0 (default for encode_mimeword)
307							#** New key: Whole: (is respected iff Shorts==0)
308							#** -- 1 (default): quote the string as a whole word (not default in the original!)
309							#** -- 0: encode subwords
310							#** New key: Keeptrailnl
311							#** -- 0: treat trailing newline as unprintable
312							#** -- 1 (default, as in original MIME::Words, as expected by Sympla 4):
313							# keep trailing newline at the end
314							#** !! doc more, including new keys
315							sub encode_mimewords_low {
316	72			72	0	858	my($S,%opts)=@_;
317	72	50				155	return undef if !defined($S);
318							# die "no word for encode_mimewords" if !defined $S; # Dat: Sympa calls us with undef
319							#die unless open LOG, ">> /tmp/b.log";
320							#die unless print LOG "=> ".dumpstr($S)."\n";
321							#die unless close LOG;
322							#$opts{Charset}="ISO-8859-1" if !defined $opts{Charset};
323	72	100				200	$opts{Raw}=1 if !defined $opts{Raw};
324	72	100				186	$opts{Charset}=get_best_encode_charset($S) if !defined $opts{Charset};
325	72	50				149	die if !defined $opts{Charset};
326	72					138	$opts{Charset}=canonical_charset($opts{Charset});
327	72	100				165	if ($opts{Raw}) { # Dat: improvement
328	38					45	$opts{Raw}=0;
329							#die if !defined $S;
330	38					105	$S=Encode::decode($opts{Charset}, $S);
331							# ^^^ Dat: better do a Unicode regexp match
332							}
333	71	50				1031	$opts{Encoding}="Q" if !defined $opts{Encoding};
334	71	50				169	$opts{Encoding}="Q" if $opts{Encoding} ne "B"; # Dat: improvement
335	71	100	66			307	$opts{Encoding}="B" if $opts{Encoding} eq "Q" and $opts{Charset} eq "UTF-8";
336							# ^^^ Dat: UTF-8 encoded MimeWords must be in base64 -- quoted-printable is
337							# bad, Pine doesn't display quoted-printable properly
338							# (it assumes ISO-8859-1 for quoted-printable chars), and Mutt does it
339							# the other way;
340							# We need Base64 "=?UTF-8?B?".MIME::Base64::encode_base64("Unicode string")."?=
341	71	100				181	$opts{Shorts}=1 if !defined $opts{Shorts};
342	71	100				157	$opts{Whole}=1 if !defined $opts{Whole};
343	71	50				150	$opts{Space}=" " if !defined $opts{Space}; # Dat: empty =?...?==?...?= in the original MIME::Words
344	71	50				157	$opts{Split}=66 if !defined $opts{Split}; # Dat: split at this many chars
345	71	100				153	$opts{Keeptrailnl}=1 if !defined $opts{Keeptrailnl};
346	71					72	my $toend="";
347	71	100	100			435	$toend=$1 if $opts{Keeptrailnl} and $S=~s@(\n)\Z(?!\n)@@;
348	71	100				206	if (!$opts{Shorts}) {
		100
349	2					4	$S=encode_mimeword1($S,\%opts)
350							} elsif ($opts{Whole}) {
351	42	100				204	if ($S=~/$NONPRINT/o) {
352	39					90	$S=encode_mimeword1($S,\%opts);
353	39					70	substr($S,-1)=""; # Dat: remove last space
354							}
355	42	50				133	$S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
356							} else {
357	27					38	my $lastpos=0;
358	27					192	while ($S=~/($NONPRINT[^ ]* *)/go) {
359							# ^^^ Dat: having ` *' is a must here, other clients just forget about it
360	41					122	my $I=pos($S)-length($1);
361	41		100			499	$I-- while $I>$lastpos and substr($S,$I-1,1)ne' ';
362	41					64	my $pos=pos($S); my $D;
	41					48
363	41		33			544	1 while ($pos=pos($S)) and $S=~/ \|\Z(?!\n)\|($NONPRINT)/goc and defined($D=$1)
			66
			66
364							and $S=~/\G[^ ]* */goc;
365	41	50				126	pos($S)=$pos if !defined $D;
366	41					73	my $srclen=pos($S)-$I;
367	41					82	my $src=substr($S,$I,$srclen);
368							##print STDERR "D($src)(".substr($S,$I+$srclen).")\n";
369	41	50	66			441	if ($I+$srclen!=length($S) and substr($src,-1)eq' ' and $S=~/ \|\Z(?!\n)\|($NONPRINT)/goc and !defined($1)) {
			66
			33
370							##print STDERR "Strip ending space\n";
371	29					67	substr($src,-1)=""; # Dat: see test case 'ignore_space'
372							}
373							# Dat: now pos($S) is invalid
374	41	50				97	die if 1>length($src);
375							##print STDERR "E($src)(".substr($S,$I+$srclen).")\n";
376	41					87	my $dst=encode_mimeword1($src,\%opts); # Dat: with trailing space
377							##print STDERR substr($S,$I,$srclen),";;\n";
378	41					144	substr($S,$I,$srclen)=$dst; # Imp: do with less copying
379	41					305	$lastpos=pos($S)=$I+length($dst);
380							}
381	27	100	66			191	substr($S,-length($opts{Space}))="" if
382							0
383	27	50				82	$S=split_words($S, $opts{Split}, "\n ") if $opts{Split};
384							}
385	71					109	$S.=$toend;
386							#$S=~s@ @\n @g; # !! debug
387							#die unless open LOG, ">> /tmp/b.log";
388							#die unless print LOG "T> ".dumpstr($S)."\n";
389							#die unless close LOG;
390	71					418	$S
391							}
392
393							#use vars qw($old_encode_mimeword);
394							#BEGIN { $old_encode_mimeword=\&MIME::AltWords::encode_mimeword }
395
396							=item encode_mimeword RAW, [ENCODING], [CHARSET]
397
398							I
399							Encode a single RAW "word" that has unsafe characters.
400							The "word" will be encoded in its entirety.
401
402							### Encode "<>":
403							$encoded = encode_mimeword("\xABFran\xE7ois\xBB");
404
405							You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">.
406							You may specify the CHARSET, which defaults to C.
407
408							=cut
409
410
411							#** Dat: no prototype, because original encode_mimeword() doesn't have it
412							#** a prototype either
413							#** @param $_[0] raw string. raw
414							#** @param $_[1] string. Encoding: "Q" or "B", defaults to "Q"
415							#** @param $_[2] string. Charset: defaults to "ISO-8859-1" (as in MIME::Words code,
416							#** not as in its documentation
417							sub encode_mimeword {
418							#sub encode_mimeword($;$$) {
419	2			2	1	8	encode_mimewords($_[0],Encoding=>$_[1],Charset=>$_[2],Shorts=>0);
420							}
421
422	1			1		6	use vars qw($old_decode_mimewords);
	1					2
	1					52
423	1			1		317	BEGIN { $old_decode_mimewords=\&MIME::AltWords::decode_mimewords_wantarray } #### pts #### AltWords.pm adds `_wantarray'
424
425							#** Dat: function added by #### pts ####
426							#** @param $_[0] a mimewords-encoded string
427							#** @return a canonical encoding name with which the string can be re-encoded
428							sub get_best_decode_charset($) {
429	23			23	0	37	my $encodedstr=$_[0];
430	23					27	my @L;
431	23					50	for my $token (decode_mimewords($encodedstr)) {
432	45		100			184	my $charset=canonical_charset($token->[1] or "");
433	45	100	100			231	push @L, $charset if $charset and (!@L or $L[-1] ne $charset);
			66
434							}
435	23	100				83	@L=canonical_charset('UTF-8') if @L!=1; # Dat: default, can accomodate any charset
436	23					73	$L[0]
437							}
438
439							=item decode_mimewords ENCODED, [OPTS...]
440
441							I
442							Go through the string looking for RFC-1522-style "Q"
443							(quoted-printable, sort of) or "B" (base64) encoding, and decode them.
444
445							B splits the ENCODED string into a list of decoded
446							C<[DATA, CHARSET]> pairs, and returns that list. Unencoded
447							data are returned in a 1-element array C<[DATA]>, giving an effective
448							CHARSET of C.
449
450							$enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ';
451							foreach (decode_mimewords($enc)) {
452							print "", ($_[1] \|\| 'US-ASCII'), ": ", $_[0], "\n";
453							}
454
455							B joins the "data" elements of the above list
456							together, and returns that. I it
457							sanitizes the returned string to use a specific, single charset, either
458							specified using the C option, or autodetecting one (ISO-8859-1,
459							ISO-8859-2 or UTF-8) which can accomodate all characters. In case of charset
460							autodetection, C can be used to query the charset
461							autodetected.
462
463							You might want to see L as an alternate of
464							L.
465
466							In the event of a syntax error, $@ will be set to a description
467							of the error, but parsing will continue as best as possible (so as to
468							get I back when decoding headers).
469							$@ will be false if no error was detected.
470
471							Any arguments past the ENCODED string are taken to define a hash of options:
472
473							=over 4
474
475							=item Field
476
477							Name of the mail field this string came from. I
478
479							=back
480
481							=cut
482
483							#** Dat: no prototype, because original decode_mimewords() doesn't have it
484							#** a prototype either
485							#** Dat: it is unsafe to use this without the Raw=>1 option. Search for
486							#** MIME::WordDecoder in `perldoc MIME::Words'.
487							#** @param $_[0] a mimewords-encoded string
488							#** @param $_[1]... list of options (key=>value pairs).
489							#** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field.
490							#** New key: Raw:
491							#** -- 1 (default): true: return a raw, encoded string. The encoding will
492							#** be Charset, or the one returned by get_best_decode_charset().
493							#** This is an improvement
494							#** by #### pts #### -- the original decode_mimewords() didn't return
495							#** the string in a consistent encoding.
496							#** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered
497							#** with Encode::encode(Charset)
498							#** New key: Charset: specific charset name for Raw=1 (ignored for Raw=0)
499							sub decode_mimewords_low {
500	50	100		50	0	165	return $old_decode_mimewords->(@_) if wantarray;
501	27					62	my($encodedstr,%opts)=@_;
502	27	100				75	$opts{Raw}=1 if !defined $opts{Raw}; # Dat: default
503	27					34	my $ret='';
504							# vvv Dat: not mutually recursive, because get_best_decode_charset() calls
505							# decode_mimewords() in list context, so this line won't be
506							# reached.
507	27	100	100			126	$opts{Charset}=get_best_decode_charset($encodedstr) if
508							$opts{Raw} and !defined $opts{Charset};
509	27					38	my $S;
510	27					100	for my $token ($old_decode_mimewords->($encodedstr,%opts)) { # Dat: $charset in $token->[1]
511	55	100				185	$S=$token->[1] ? Encode::decode($token->[1], $token->[0]) : $token->[0];
512	55	100				1107	$S=Encode::encode($opts{Charset}, $S) if $opts{Raw};
513	55					5359	$ret.=$S
514							}
515							$ret
516	27					182	}
517
518	1			1		5	use vars qw(@encode_subject_opts);
	1					1
	1					38
519	1			1		83	BEGIN { @encode_subject_opts=(Keeptrailnl=>1, Whole=>1); }
520
521							#** Dat: function added by #### pts ####
522							#** @param $_[0] String. A mimewords-encoded e-mail subject.
523							#** @return String. better mimewords-encoded
524							sub fix_subject($) {
525	1			1	0	2	my $encodedstr=$_[0];
526	1					4	my $best_charset=get_best_decode_charset($encodedstr);
527	1					4	my $decoded=decode_mimewords($encodedstr, Raw=>0);
528	1					6	encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_subject_opts);
529							}
530
531	1			1		4	use vars qw(@encode_addresses_opts);
	1					2
	1					34
532	1			1		168	BEGIN { @encode_addresses_opts=(Keeptrailnl=>1, Whole=>0); }
533
534							#** Dat: function added by #### pts ####
535							#** @param $_[0] String. A mimewords-encoded e-mail address (or address list),
536							#** e.g. "=?ISO-8859-1?Q?foo?= bar , foo2 bar2 "
537							#** @return String. better mimewords-encoded
538							sub fix_addresses($) {
539	3			3	0	8	my $encodedstr=$_[0];
540	3					7	my $best_charset=get_best_decode_charset($encodedstr);
541	3					8	my $decoded=decode_mimewords($encodedstr, Raw=>0);
542							#print STDERR "DE($decoded)\n";
543							#chomp $decoded;
544							#$decoded.=" alma ";
545	3					11	encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_addresses_opts);
546							}
547
548							#** Dat: function added by #### pts ####
549							#** @param $_[0] a Unicode string
550							#** @param $_[1] a charset
551							#** @return Boolean: is it encodable?
552							sub is_encodable($$) {
553	48			48	0	94	my $charset=uc$_[1];
554	48					64	my $S=$_[0]; # Dat: must be copied for Encode::encode
555	48	100	66			217	return 1 if $charset eq 'UTF-8' or $charset eq 'UTF8';
556	45					58	eval { Encode::encode($charset, $S, Encode::FB_CROAK) };
	45					154
557	45	100				1330	$@ ? 0 : 1
558							}
559
560							#** Please don't include US-ASCII.
561							#** Specify UTF-8 last (because get_best_encode_charset() ignores everything
562							#** after the first UTF-8).
563	1			1		4	use vars qw(@encode_charsets);
	1					8
	1					35
564	1			1		295	BEGIN { @encode_charsets=qw(ISO-8859-1 ISO-8859-2 UTF-8) }
565
566							#** Dat: function added by #### pts ####
567							#** @param $_[0] string to try
568							sub get_best_encode_charset($) {
569	39			39	0	68	for my $charset (@encode_charsets) {
570	48	100				110	return $charset if is_encodable($_[0], $charset);
571							}
572	0					0	return 'UTF-8'
573							}
574
575							#** Dat: function added by #### pts ####
576							#** @param $_[0] String. Perl Unicode-string
577							#** @param $_[1] encode mode: 'subject' or 'addresses'
578							#** @return String. better mimewords-encoded with the best charset
579							sub encode_unicode($$) {
580	6			6	0	9	my $str=$_[0];
581	6	100				17	my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
582	6					13	my $best_charset=get_best_encode_charset($str);
583	6					18	encode_mimewords($str, Charset=>$best_charset, Raw=>0, @$modeary);
584							}
585
586							#** Dat: function added by #### pts ####
587							#** @param $_[0] String. Perl 8-bit string, encoded in $charset
588							#** @param $_[1] encode mode: 'subject' or 'addresses'
589							#** @param $_[2] $charset;
590							#** @return String. better mimewords-encoded with the best charset
591							sub encode_8bit($$$) {
592	6			6	0	10	my $str=$_[0];
593	6					9	my $charset=$_[2];
594	6	100				16	my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts);
595	6					12	$charset=canonical_charset($charset);
596	6					10	my $best_charset;
597	6					9	for my $charset2 (@encode_charsets) {
598	13	100				21	if (canonical_charset($charset2) eq $charset) {
599	5					6	$best_charset=$charset; last
600	5					7	}
601							}
602	6	100				18	$best_charset=canonical_charset(get_best_encode_charset($str)) if
603							!defined $best_charset;
604	6	100				22	($charset eq $best_charset) ? # Imp: find badly encoded string...
605							encode_mimewords($str, Charset=>$best_charset, Raw=>1, @$modeary) :
606							encode_mimewords(Encode::decode($charset, $str), Charset=>$best_charset, Raw=>0, @$modeary)
607							}
608
609							# --- Logging...
610
611	1			1		25	BEGIN { *encode_mimewords=\&encode_mimewords_low }
612	1			1		29	BEGIN { *decode_mimewords=\&decode_mimewords_low }
613
614	1	50		1		111	BEGIN { if ($main::DEBUG) {
615
616							#use vars qw($orig_encode_mimewords $orig_decode_mimewords);
617	1			1		5	no warnings qw(prototype redefine);
	1					2
	1					483
618
619							#BEGIN { $orig_encode_mimewords=\&encode_mimewords }
620							*encode_mimewords=sub {
621	0					0	require Carp;
622	0					0	my $dump="\n\n[".scalar(localtime)."] encode_mimewords(@_) = ";
623	0					0	my $ret=&encode_mimewords_low(@_); # Dat: we need `&' to ignore prototype
624	0					0	$dump.=$ret."\n";
625	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
626	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
627	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
628	0					0	binmode($log, ':utf8');
629	0					0	print $log $dump;
630	0					0	Carp::cluck("^^^ encode_mimewords() ");
631	0					0	close $log;
632							}
633							$ret
634	0					0	};
	0					0
635
636							#BEGIN { $orig_decode_mimewords=\&decode_mimewords }
637							# Imp: copy prototype of original...
638							*decode_mimewords=sub {
639	0					0	require Carp;
640	0					0	my $dump="\n\n[".scalar(localtime)."] decode_mimewords(@_) = ";
641	0	0				0	if (wantarray) {
642	0					0	my @L=(&decode_mimewords_low(@_));
643	0					0	$dump.="@L (ary)\n";
644	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
645	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
646	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
647	0					0	binmode($log, ':utf8');
648	0					0	print $log $dump;
649	0					0	Carp::cluck("^^^ decode_mimewords() ");
650	0					0	close $log;
651							}
652							@L
653	0					0	} else {
654	0					0	my $ret=decode_mimewords_low(@_);
655	0					0	$dump.=$ret."\n";
656	0	0				0	if (open(my($log), ">> /tmp/em.log")) {
657	0					0	local *STDERR; open STDERR, ">&".fileno($log);
	0					0
658	0					0	select(STDERR); $\|=1; select($log); $\|=1;
	0					0
	0					0
	0					0
659	0					0	binmode($log, ':utf8');
660	0					0	print $log $dump;
661	0					0	Carp::cluck("^^^ decode_mimewords() ");
662	0					0	close $log;
663							}
664							$ret
665	0					0	}
666	0					0	};
667
668							} }
669
670							# ---
671
672							=back
673
674							=head1 NOTES
675
676							Exports its principle functions by default, in keeping with
677							L and L.
678
679							Doesn't depend on L or L. All the shared code is
680							copied to L, which is bundled.
681
682							See also L for the previous version
683							of L integrated into the Sympa 4 mailing list software.
684
685							=head1 AUTHOR
686
687							L was written by
688							Péter Szabó (F) in 2006, and it has been uploaded to CPAN on
689							2006-09-27.
690
691							L uses code from L (in the file
692							C) and it uses documentation from L
693							(in the files C and C).
694
695							Here is the original author and copyright information for L.
696
697							Eryq (F), ZeeGee Software Inc (F).
698							David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com
699
700							All rights reserved. This program is free software; you can redistribute
701							it and/or modify it under the same terms as Perl itself.
702
703							Thanks also to...
704
705							Kent Boortz For providing the idea, and the baseline
706							RFC-1522-decoding code!
707							KJJ at PrimeNet For requesting that this be split into
708							its own module.
709							Stephane Barizien For reporting a nasty bug.
710
711
712							=head1 VERSION
713
714							See $VERSION in C .
715
716							=cut
717
718							=begin testing
719
720							is(MIME::AltWords::encode_mimewords("foo bar"), "foo bar");
721							is(MIME::AltWords::encode_mimewords("foo-b\x{E9}r"), "=?ISO-8859-1?Q?foo-b=E9r?=");
722							is(MIME::AltWords::encode_mimeword("foo bar"), "=?ISO-8859-1?Q?foo__bar?="); # Dat: improvement over MIME::AltWords
723							is(MIME::AltWords::encode_mimeword("foo__bar "), "=?ISO-8859-1?Q?foo=5F=5Fbar_?="); # Dat: improvement over MIME::AltWords
724							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi adatai",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?= adatai");
725							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
726							"az ?rkik�t? f�ldi adatai");
727							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
728							"az ?rkik�t? f�ldi adatai");
729							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'),
730							"az �rkik�t� f�ldi adatai");
731							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'),
732							"az �rkik�t� f�ldi adatai");
733							is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai"),
734							"az �rkik�t� f�ldi adatai"); # Dat: guess Charset to ISO-8859-2
735							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi adatai"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi_adatai?=");
736							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?=");
737							is(MIME::AltWords::encode_mimewords("az �rkik�t� f�ldi"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi?=");
738							is(MIME::AltWords::encode_mimewords("foo b\x{E1}r",Whole=>0), "foo =?ISO-8859-1?Q?b=E1r?=");
739							is(MIME::AltWords::encode_mimewords("foo b\x{E1}r"), "=?ISO-8859-1?Q?foo__b=E1r?=");
740							is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1",Whole=>0), "=?ISO-8859-1?Q?b=F5r_?= foo");
741							is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1"), "=?ISO-8859-1?Q?b=F5r__foo?=");
742							{ my $S; eval { $S=MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2"); };
743							ok($@=~/^Wide character /); # Dat: Encode::decode fails
744							}
745							is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0,Whole=>0), "=?ISO-8859-2?Q?b=F5r_?= foo");
746							is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0), "=?ISO-8859-2?Q?b=F5r__foo?=");
747							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2",Whole=>0),
748							"ha a =?ISO-8859-2?Q?sz=F3t?= ~ jel",'ignore_space');
749							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2"),
750							"=?ISO-8859-2?Q?ha_a_sz=F3t_~_jel?=",'ignore_space2');
751							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
752							"ha a =?ISO-8859-1?Q?sz=F3t_?=", "ends with one space");
753							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"),
754							"ha a =?ISO-8859-1?Q?sz=F3t__?=", "ends with two spaces");
755							is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Charset=>"ISO-8859-1"),
756							"=?ISO-8859-1?Q?ha_a_sz=F3t_?=");
757							is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se",Whole=>0), "dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
758							is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?dokumentumok_kezel=E9se?=");
759							is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se",Whole=>0), "=?ISO-8859-1?Q?tartalmaz=F3?= dokumentumok =?ISO-8859-1?Q?kezel=E9se?=");
760							is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?tartalmaz=F3_dokumentumok_kezel=E9se?="); # Imp: unify printable and nonprintable to save space
761
762							is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb\x{151}l bizonyos ".
763							"szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el\x{151}zi ".
764							"meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy ".
765							"sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
766							Charset=>"ISO-8859-2",Raw=>0,Whole=>0),
767							"A =?ISO-8859-2?Q?keres=E9si_eredm=E9nyekb=F5l?= bizonyos szavakat\n =?ISO-8859-2?Q?tartalmaz=F3?= dokumentumok\n =?ISO-8859-2?Q?kiz=E1rhat=F3k,?= ha a =?ISO-8859-2?Q?sz=F3t?= ~\n jel =?ISO-8859-2?Q?el=F5zi?= meg. Figyelem! A\n =?ISO-8859-2?Q?kiz=E1r=E1s?= csak akkor\n =?ISO-8859-2?Q?eredm=E9nyez_tal=E1latot,?= ha\n =?ISO-8859-2?Q?=E9s?= (& vagy =?ISO-8859-2?Q?sz=F3k=F6z)?=\n kapcsolatban =?ISO-8859-2?Q?=E1ll?= egy nem =?ISO-8859-2?Q?kiz=E1r=E1ssal.?=");
768
769							is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb\x{151}l bizonyos ".
770							"szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el\x{151}zi ".
771							"meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy ".
772							"sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
773							Charset=>"ISO-8859-2",Raw=>0),
774							"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat_?=\n =?ISO-8859-2?Q?tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k,_ha_a_sz?=\n =?ISO-8859-2?Q?=F3t_~_jel_el=F5zi_meg._Figyelem!_A_kiz=E1r=E1s_c?=\n =?ISO-8859-2?Q?sak_akkor_eredm=E9nyez_tal=E1latot,_ha_=E9s_(&_va?=\n =?ISO-8859-2?Q?gy_sz=F3k=F6z)_kapcsolatban_=E1ll_egy_nem_kiz=E1r?=\n =?ISO-8859-2?Q?=E1ssal.?=");
775							# vvv Dat: composing with Pine emits:
776							#is(MIME::AltWords::encode_mimewords("A keres�si eredm�nyekb�l bizonyos szavakat tartalmaz� dokumentumok kiz�rhat�k, ha a sz�t ~ jel el�zi meg. Figyelem! A kiz�r�s csak akkor eredm�nyez tal�latot, ha �s (& vagy sz�k�z) kapcsolatban �ll egy nem kiz�r�ssal.",
777							#"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat?=
778							# =?ISO-8859-2?Q?_tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k=2C_ha_?=
779							# =?ISO-8859-2?Q?a_sz=F3t_~_jel_el=F5zi_meg=2E_Figyelem!_A_?=
780							# =?ISO-8859-2?Q?kiz=E1r=E1s_csak_akkor_eredm=E9nyez_tal=E1latot=2C?=
781							# =?ISO-8859-2?Q?_ha_=E9s_=28&_vagy_sz=F3k=F6z=29_kapcsolatban?=
782							# =?ISO-8859-2?Q?_=E1ll_egy_nem_kiz=E1r=E1ssal=2E?=
783
784							is(MIME::AltWords::encode_mimewords("�rv�zt\x{171}r\x{151} egy t�k�rf�r�g�p",
785							Charset=>"UTF-8",Raw=>0,Whole=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkQ==?= egy =?UTF-8?B?dMO8a8O2cmbDunLDs2fDqXA=?=");
786							is(MIME::AltWords::encode_mimewords("�rv�zt\x{171}r\x{151} egy t�k�rf�r�g�p",
787							Charset=>"UTF-8",Raw=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkSBlZ3kgdMO8a8O2cmbDunLDs2fDqXA=?=");
788
789							is(MIME::AltWords::split_words("fo ot bar aaaaaaaaab cccccccccdddddd e f g ",8,"xy"),"fo otxybarxyaaaaaaaaabxy cccccccccddddddxye f g ",'split_words()');
790
791							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0),
792							"=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
793							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter",Charset=>"UTF-8",Raw=>0),
794							"=?UTF-8?B?U3phYsOzIFDDqXRlcjxwdHNAb3VyLnVtPg==?="); # Dat: this is what compose_mail returns
795							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
796							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ");
797							is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0),
798							"=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
799
800							SKIP: {
801							eval { require Mail::Address };
802							skip "Mail::Address not installed", 1 if $@;
803							my @sender_hdr = Mail::Address->parse("=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= ");
804							my $address=@sender_hdr ? $sender_hdr[0]->address : undef;
805							$address="undef" if !defined $address;
806							is($address, "pts\@our.um");
807							}
808
809							is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0)),
810							"\x{C5}\x{B1}", 'decode_mimewords()');
811							is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0),Raw=>0),
812							"\x{171}", 'decode_mimewords()');
813
814							is(MIME::AltWords::get_best_decode_charset(MIME::AltWords::encode_mimewords("f\x{171} fa t\x{F6}lgy",Charset=>"ISO-8859-2",Raw=>0,Whole=>0)),
815							'ISO-8859-2', 'get_best_decode_charset()');
816							is(MIME::AltWords::get_best_decode_charset("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
817							'UTF-8', 'get_best_decode_charset()');
818							is(MIME::AltWords::get_best_decode_charset("fa"), 'UTF-8', 'get_best_decode_charset()');
819
820							is(MIME::AltWords::fix_addresses("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
821							"=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=", 'fix_addresses()');
822							is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="),
823							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ", 'fix_addresses()');
824							is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4K?="),
825							"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= \n", 'fix_addresses() Keeptrailnl');
826							is(MIME::AltWords::fix_subject("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="),
827							"=?UTF-8?B?ZsWxIGZhIHTDtmxneQ==?=", 'fix_subject()');
828
829							is(MIME::AltWords::decode_mimewords("=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=",Raw=>1),
830							"f\x{C5}\x{B1} fa t\x{C3}\x{B6}lgy", 'decode_mimewords()');
831							is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?=",Raw=>0),
832							"f\x{171} fa t\x{F6}lgy", 'decode_mimewords()');
833							#die "".MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?=");
834							is(MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlcg==?=",Raw=>0),
835							"Szab� P�ter", 'decode_mimewords()');
836
837							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l",Charset=>"UTF-8",Raw=>0)),
838							"f\x{C3}\x{A9}l", 'encode+decode mimewords');
839							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
840							"f\x{C3}\x{A9}l ", 'encode+decode mimewords');
841							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)),
842							"f\x{C3}\x{A9}l ", 'encode+decode mimewords');
843							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0)),
844							"f\x{C3}\x{A9}l <", 'encode+decode mimewords');
845							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0,Whole=>0)),
846							"f\x{C3}\x{A9}l <", 'encode+decode mimewords');
847							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
848							"d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
849							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)),
850							"d\x{C3}\x{A9}l < ", 'encode+decode mimewords');
851
852							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>1),
853							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=1");
854							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>0),
855							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra=0A?=", "encode_mimewords() Keeptrailnl=0");
856							is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n"),
857							"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=default");
858
859							is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?m=E1sik_pr=F3b=E1cska?=\n"),
860							"m\x{E1}sik pr\x{F3}b\x{E1}cska\n", "decode_mimewords ISO-8859-2");
861
862							is(MIME::AltWords::get_best_encode_charset("hello\t\n"), "ISO-8859-1", "get_best_encode_charset() ASCII");
863							is(MIME::AltWords::get_best_encode_charset("hell\x{F3}, w\x{F6}rld\t\n"), "ISO-8859-1", "get_best_encode_charset() ISO-8859-1");
864							is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F6}rld\t\n"), "ISO-8859-2", "get_best_encode_charset() ISO-8859-2");
865							is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F5}rld\t\n"), "UTF-8", "get_best_encode_charset() UTF-8");
866
867							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'addresses'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_addresses() UTF-8");
868							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'subject'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_subject() UTF-8");
869							is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_subject() ISO-8859-2");
870							is(MIME::AltWords::encode_unicode("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_subject() ISO-8859-1");
871
872							is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
873							is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords");
874
875							is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'addresses', 'uTf8'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() addresses UTF-8");
876							is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'subject', 'uTf8'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() subject UTF-8");
877							is(MIME::AltWords::encode_8bit("[foo] hell\x{F5}, w\x{F6}rld\t\n", 'subject', '88592'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-2");
878							is(MIME::AltWords::encode_8bit("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject', '88591'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-1");
879							is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses', 'us-ascii'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
880							is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses', '88591'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords");
881
882							is(join(' ',map{MIME::AltWords::encode_mimewords($_)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
883							"[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-1?Q?pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 a");
884
885							is(join(' ',map{MIME::AltWords::encode_mimewords($_,Raw=>0)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{151}cska\n"),
886							"[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-2?Q?pr=F3b=F5cska?=\n", "encode_mimewords() default ISO-8859-1,2 b");
887
888							is(MIME::AltWords::encode_mimewords("[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"),
889							"=?ISO-8859-1?Q?[nekem]_m=E1sik_pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 c");
890
891							is(MIME::AltWords::decode_mimewords('=?US-ASCII?Q?Keith_Moore?= '), 'Keith Moore ', "MIME::Words test case 1");
892							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '), 'Keld J�rn Simonsen ', "MIME::Words test case 2");
893							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Andr=E9_?= Pirard '), 'Andr� Pirard ', "MIME::Words test case 3");
894							is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?==?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?==?US-ASCII?Q?.._cool!?='), 'If you can read this you understand the example... cool!', "MIME::Words test case 4");
895							is(MIME::AltWords::encode_mimewords("\xABFran\xE7ois\xBB"), '=?ISO-8859-1?Q?=ABFran=E7ois=BB?=', "MIME::Words test case 5");
896							is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB at the beach"), '=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB_at_the_beach?=', "MIME::Words test case 6");
897							# vvv !! is this correct (space after \n)?
898							is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave "), "=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB,_down_at_the_beach=0Awith?=\n =?ISO-8859-1?Q?_Dave_?=", "MIME::Words test case 7");
899							is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ")), "Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ", "MIME::Words test case 8");
900
901							=cut
902
903							1