line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# MIME/AltWords.pm -- a lot of fixes on MIME::Words |
3
|
|
|
|
|
|
|
# by pts@fazekas.hu at Fri Jan 20 11:35:08 UTC 2006 |
4
|
|
|
|
|
|
|
# -- Fri Mar 31 18:41:14 CEST 2006 |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Dat: this assumes Perl v5.8 or later |
7
|
|
|
|
|
|
|
# Dat: run the unit tests with: ./pts-test.pl AltMIMEWords.pm |
8
|
|
|
|
|
|
|
# Dat: see `perldoc MIME::Words' for the original documentation |
9
|
|
|
|
|
|
|
# Dat: a raw string has bytes in 0..255, and it is already encoded in some |
10
|
|
|
|
|
|
|
# encoding |
11
|
|
|
|
|
|
|
# SUXX: perldoc doesn't respect `=encoding utf-8' |
12
|
|
|
|
|
|
|
# lib/MIME/AltWords.pm:30: Unknown command paragraph "=encoding utf8" |
13
|
|
|
|
|
|
|
# !! why `,' in teszt a =?ISO-8859-2?Q?lev=E9lben=2C_t=F6r=F6lhet=F5?= ?? is it standard? |
14
|
|
|
|
|
|
|
# !! document all |
15
|
|
|
|
|
|
|
# !! document test cases |
16
|
|
|
|
|
|
|
# !! MANIFEST etc. |
17
|
|
|
|
|
|
|
# |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
package MIME::AltWords; |
20
|
1
|
|
|
1
|
|
13
|
use v5.8; # Dat: Unicode string support etc. |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
43
|
|
21
|
1
|
|
|
1
|
|
4
|
use integer; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
9
|
|
22
|
1
|
|
|
1
|
|
18
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
47
|
|
23
|
1
|
|
|
1
|
|
353
|
use MIME::AltWords0; #use MIME::Words; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
19
|
|
24
|
1
|
|
|
1
|
|
4
|
use MIME::Base64; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
33
|
|
25
|
1
|
|
|
1
|
|
825
|
use Encode; |
|
1
|
|
|
|
|
9948
|
|
|
1
|
|
|
|
|
77
|
|
26
|
1
|
|
|
1
|
|
7
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
17
|
|
27
|
1
|
|
|
1
|
|
4
|
use Exporter; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
29
|
|
28
|
1
|
|
|
1
|
|
5
|
no warnings qw(prototype redefine); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
48
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# vvv Dat: exports symbols from MIME::AltWords0 by default, works only |
31
|
|
|
|
|
|
|
# without these |
32
|
|
|
|
|
|
|
#use vars qw(@EXPORT_OK %EXPORT_TAGS); |
33
|
|
|
|
|
|
|
#BEGIN { |
34
|
|
|
|
|
|
|
# @EXPORT_OK=@MIME::AltWords0::EXPORT_OK; |
35
|
|
|
|
|
|
|
# %EXPORT_TAGS=%MIME::AltWords0::EXPORT_TAGS; |
36
|
|
|
|
|
|
|
## die "@EXPORT_OK"; |
37
|
|
|
|
|
|
|
#} |
38
|
|
|
|
|
|
|
#Exporter::export_ok_tags('all'); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=pod |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=encoding utf8 |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 NAME |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
MIME::AltWords - properly deal with RFC-1522 encoded words |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 SYNOPSIS |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
The Perl module L is recommended for encoding and |
51
|
|
|
|
|
|
|
decoding MIME words (such as C<=?ISO-8859-2?Q?_=E1ll_e=E1r?=>) found in |
52
|
|
|
|
|
|
|
e-mail message headers (mostly Subject, From and To). |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
L is similar to L in |
55
|
|
|
|
|
|
|
L, but it provides an alternate implementation that follows the |
56
|
|
|
|
|
|
|
MIME specification more carefully, and it is actually compatible with |
57
|
|
|
|
|
|
|
existing mail software (tested with Mutt, Pine, JavaMail and OpenWebmail). |
58
|
|
|
|
|
|
|
L extends the functionality of L (version |
59
|
|
|
|
|
|
|
5.420) by adding more functions and more options to existing functions. The |
60
|
|
|
|
|
|
|
original interface is changed in an upward-compatible way. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Before reading further, you should see L to make sure that |
63
|
|
|
|
|
|
|
you understand where this module fits into the grand scheme of things. |
64
|
|
|
|
|
|
|
Go on, do it now. I'll wait. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
Ready? Ok... |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
use MIME::AltWords qw(:all); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
### Decode the string into another string, forgetting the charsets: |
71
|
|
|
|
|
|
|
$decoded = decode_mimewords( |
72
|
|
|
|
|
|
|
'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ', |
73
|
|
|
|
|
|
|
); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
### Split string into array of decoded [DATA,CHARSET] pairs: |
76
|
|
|
|
|
|
|
@decoded = decode_mimewords( |
77
|
|
|
|
|
|
|
'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ', |
78
|
|
|
|
|
|
|
); |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
### Encode a single unsafe word: |
81
|
|
|
|
|
|
|
$encoded = encode_mimeword("\xABFran\xE7ois\xBB"); |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
### Encode a string, trying to find the unsafe words inside it: |
84
|
|
|
|
|
|
|
$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town"); |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head1 DESCRIPTION |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Fellow Americans, you probably won't know what the hell this module |
91
|
|
|
|
|
|
|
is for. Europeans, Russians, et al, you probably do. C<:-)>. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
For example, here's a valid MIME header you might get: |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
From: =?US-ASCII?Q?Keith_Moore?= |
96
|
|
|
|
|
|
|
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= |
97
|
|
|
|
|
|
|
CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard |
98
|
|
|
|
|
|
|
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= |
99
|
|
|
|
|
|
|
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= |
100
|
|
|
|
|
|
|
=?US-ASCII?Q?.._cool!?= |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The fields basically decode to (sorry, I can only approximate the |
103
|
|
|
|
|
|
|
Latin characters with 7 bit sequences /o and 'e): |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
From: Keith Moore |
106
|
|
|
|
|
|
|
To: Keld J/orn Simonsen |
107
|
|
|
|
|
|
|
CC: Andr'e Pirard |
108
|
|
|
|
|
|
|
Subject: If you can read this you understand the example... cool! |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head1 PUBLIC INTERFACE |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=over 4 |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=cut |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
|
118
|
1
|
|
|
1
|
|
4
|
use vars qw($NONPRINT $VERSION); |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
44
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
### The package version, both in 1.23 style *and* usable by MakeMaker: |
121
|
|
|
|
|
|
|
BEGIN { # vvv Dat: MakeMaker needs $VERSION in a separate line |
122
|
1
|
|
|
1
|
|
1628
|
$VERSION = "0.12" |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# Dat: MIME::Words has [\x00-\x1F\x7F-\xFF]. We prepare for Unicode. |
126
|
|
|
|
|
|
|
$NONPRINT=qr{(?:[^\x20-\x7E]|=)}; |
127
|
|
|
|
|
|
|
#$NONPRINT=qr{(?:[^\x20-\x7E]|[=](?=[?]))}; # Imp: is a bare `=' legal? |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
#** @param $_[0] charset name |
130
|
|
|
|
|
|
|
#** @return MIME canonical charset name |
131
|
|
|
|
|
|
|
sub canonical_charset($) { |
132
|
142
|
|
|
142
|
0
|
186
|
my $S=$_[0]; |
133
|
142
|
100
|
|
|
|
735
|
if ($S=~/\A(?:iso-?(?:8859-?)?|8859-?)(\d+)\Z(?!\n)/i) { "ISO-8859-$1" } |
|
79
|
100
|
|
|
|
266
|
|
|
|
100
|
|
|
|
|
|
134
|
45
|
|
|
|
|
108
|
elsif ($S=~/\AUTF-?8\Z(?!\n)/i) { "UTF-8" } |
135
|
3
|
|
|
|
|
8
|
elsif ($S=~/\A(?:US-)ASCII\Z(?!\n)/i) { "US-ASCII" } |
136
|
15
|
|
|
|
|
35
|
else { uc $S } |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
#** @param $_[0] string. |
140
|
|
|
|
|
|
|
#** @param $_[1] hashref. options |
141
|
|
|
|
|
|
|
#** @param $_[2] string to append to |
142
|
|
|
|
|
|
|
sub append_encoded_word($$$) { |
143
|
89
|
|
|
89
|
0
|
197
|
my($word,$opts,$dst)=@_; |
144
|
89
|
100
|
|
|
|
231
|
if ($opts->{Encoding} eq "B") { |
|
|
50
|
|
|
|
|
|
145
|
25
|
|
|
|
|
83
|
$word=MIME::Base64::encode_base64($word, ''); # Dat: empty EOL, as requested by MIME |
146
|
25
|
|
|
|
|
43
|
$word=~s@\s+@@g; |
147
|
25
|
|
|
|
|
33
|
$$dst.=$word |
148
|
|
|
|
|
|
|
} elsif ($opts->{Encoding} eq "Q") { |
149
|
|
|
|
|
|
|
# Dat: improved MIME::Words::_encode_Q |
150
|
64
|
100
|
|
|
|
761
|
$word =~ s{( )|([_\?\=]|$NONPRINT)}{defined $1 ? "_" # Dat: "_" is an improvement |
|
258
|
|
|
|
|
1284
|
|
151
|
|
|
|
|
|
|
: sprintf("=%02X", ord($2))}eog; |
152
|
64
|
|
|
|
|
122
|
$$dst.=$word |
153
|
0
|
|
|
|
|
0
|
} else { die } |
154
|
|
|
|
|
|
|
undef |
155
|
89
|
|
|
|
|
150
|
} |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
#use vars qw($old_encode_mimewords); |
158
|
|
|
|
|
|
|
#BEGIN { $old_encode_mimewords=\&MIME::AltWords::encode_mimewords } |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
#** @param $_[0] string. Unicode |
161
|
|
|
|
|
|
|
#** @param $_[1] hashref. options preprocessed by out encode_mimewords() |
162
|
|
|
|
|
|
|
#** @return 0..255 string, ends with $opts->{Space} if $opts->{Shorts} |
163
|
|
|
|
|
|
|
sub encode_mimeword1($$) { |
164
|
82
|
|
|
82
|
0
|
134
|
my($src,$opts)=@_; |
165
|
|
|
|
|
|
|
# Imp: warning if Encode::encode cannot represent character |
166
|
82
|
|
|
|
|
87
|
my $dst=""; |
167
|
82
|
|
|
|
|
194
|
my $open="=?$opts->{Charset}?$opts->{Encoding}?"; |
168
|
82
|
|
|
|
|
79
|
my $maxlen=64; # Dat: good for a subject line !! 63 or 62 |
169
|
|
|
|
|
|
|
# ^^^ Dat: $maxlen=75 works fine in postfix 2.1.5 + pine 4.64 |
170
|
|
|
|
|
|
|
# ^^^ Dat: one quoted word shouldn't be too long |
171
|
|
|
|
|
|
|
# ^^^ Dat: Subject: =?B?C?3412341234123412341234123412341234123412341234123412341234?= |
172
|
82
|
|
|
|
|
92
|
$maxlen-=length($open); |
173
|
82
|
100
|
|
|
|
192
|
$maxlen=int(($maxlen+3)/4)*3 if $opts->{Encoding} eq "B"; # Dat: `use integer;' anyway |
174
|
|
|
|
|
|
|
#print STDERR "($src) $maxlen\n"; |
175
|
|
|
|
|
|
|
|
176
|
82
|
|
|
|
|
210
|
$src=Encode::encode($opts->{Charset},$src); |
177
|
82
|
100
|
|
|
|
2386
|
if ($opts->{Shorts}) { |
178
|
80
|
|
|
|
|
104
|
my $I=0; |
179
|
80
|
|
|
|
|
159
|
while ($I
|
180
|
|
|
|
|
|
|
# Dat: split result for too long consecutive headers (i.e. long Subject: line) |
181
|
87
|
|
|
|
|
109
|
my $J=$I+$maxlen; |
182
|
87
|
100
|
|
|
|
175
|
$J=length($src) if $J>length($src); |
183
|
87
|
100
|
|
|
|
210
|
if ($opts->{Charset} eq "UTF-8") { # Dat: UTF-8 is multibyte, it cannot split anywhere |
184
|
25
|
50
|
|
|
|
76
|
if (substr($src,$J,1)=~y/\x80-\xbf//) { # Dat: a half-cut UTF-8 byte sequence |
185
|
0
|
|
0
|
|
|
0
|
$J-- while $J>$I+1 and substr($src,$J-1,1)=~y/\x80-\xbf//; |
186
|
0
|
0
|
0
|
|
|
0
|
$J-- if $J>$I+1 and substr($src,$J-1,1)=~y/\xc0-\xff//; |
187
|
|
|
|
|
|
|
# ^^^Dat: `$I+1': avoid infinite loop in `$I<$maxlen' |
188
|
|
|
|
|
|
|
} |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
# Imp: else: fix for other multibyte encodings |
191
|
87
|
|
|
|
|
116
|
$dst.=$open; |
192
|
87
|
|
|
|
|
111
|
my $addlen=-length($dst); |
193
|
87
|
|
|
|
|
237
|
append_encoded_word(substr($src,$I,$J-$I),$opts,\$dst); |
194
|
87
|
|
|
|
|
129
|
$addlen+=length($dst); |
195
|
87
|
100
|
100
|
|
|
397
|
if ($opts->{Encoding} eq "Q" and $addlen>$maxlen and $addlen>3) { |
|
|
|
66
|
|
|
|
|
196
|
|
|
|
|
|
|
# Dat: too many hex `=..' triplets, become too long |
197
|
7
|
|
|
|
|
12
|
my $K=length($dst); |
198
|
7
|
|
|
|
|
54
|
while ($addlen>$maxlen) { |
199
|
48
|
100
|
|
|
|
84
|
if (substr($dst,$K-3,1)eq"=") { $addlen-=3; $K-=3; $J-- } |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
7
|
|
200
|
45
|
|
|
|
|
43
|
else { $addlen--; $K--; $J-- } |
|
45
|
|
|
|
|
36
|
|
|
45
|
|
|
|
|
80
|
|
201
|
|
|
|
|
|
|
} |
202
|
7
|
|
|
|
|
12
|
substr($dst,$K)=""; |
203
|
|
|
|
|
|
|
# Imp: more efficient, don't process the same data many times |
204
|
|
|
|
|
|
|
} |
205
|
87
|
|
|
|
|
117
|
$dst.="?="; $dst.=$opts->{Space}; |
|
87
|
|
|
|
|
100
|
|
206
|
87
|
|
|
|
|
228
|
$I=$J; |
207
|
|
|
|
|
|
|
} |
208
|
2
|
|
|
|
|
3
|
} else { $dst.=$open; append_encoded_word($src,$opts,\$dst); $dst.="?=" } |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
2
|
|
209
|
82
|
|
|
|
|
184
|
$dst |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
#** @returns the specified string quoted in double quotes. All characters |
213
|
|
|
|
|
|
|
#** are printable ASCII. |
214
|
|
|
|
|
|
|
sub dumpstr($) { |
215
|
0
|
|
|
0
|
0
|
0
|
my $S=$_[0]; |
216
|
0
|
|
|
|
|
0
|
$S=~s@(["\\])|([^ -~])@ |
217
|
0
|
0
|
|
|
|
0
|
defined $2 ? sprintf("\\x{%X}",ord($2)) # Imp: Unicode chars |
218
|
|
|
|
|
|
|
: "\\$1" # Imp: Unicode chars |
219
|
|
|
|
|
|
|
@ge; |
220
|
0
|
|
|
|
|
0
|
"\"$S\"" #" |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
#** Splits a string on spaces into lines so no line is longer than the maximum |
224
|
|
|
|
|
|
|
#** (except if there is no space nearby). |
225
|
|
|
|
|
|
|
#** Only the 1st space is converted to $_[2] at each break. |
226
|
|
|
|
|
|
|
#** @param $_[0] string. to split |
227
|
|
|
|
|
|
|
#** @param $_[1] integer. maximum # chars in a line (not counting the terminating newline) |
228
|
|
|
|
|
|
|
#** @param $_[2] chars to replace a space with -- not being added to the |
229
|
|
|
|
|
|
|
#** maximum line length |
230
|
|
|
|
|
|
|
sub split_words($$$) { |
231
|
70
|
|
|
70
|
0
|
124
|
my($S,$maxlen,$nl)=@_; |
232
|
70
|
|
|
|
|
70
|
my $lastpos=0; my $I=0; my $J; |
|
70
|
|
|
|
|
76
|
|
|
70
|
|
|
|
|
67
|
|
233
|
|
|
|
|
|
|
#** Position after last space to split at, or $I |
234
|
|
|
|
|
|
|
my $K; |
235
|
70
|
|
|
|
|
68
|
my $ret=""; |
236
|
70
|
|
|
|
|
69
|
while (1) { # Imp: faster implementation |
237
|
84
|
|
|
|
|
92
|
$K=$J=$I; |
238
|
84
|
|
66
|
|
|
429
|
$J++ while $J
|
239
|
84
|
100
|
100
|
|
|
222
|
while ($K==$I ? ($J
|
240
|
2698
|
|
|
|
|
3079
|
my $C=substr($S,$J,1); |
241
|
2698
|
50
|
|
|
|
5424
|
if ($C eq"\n") { $K=$I=++$J } |
|
0
|
100
|
|
|
|
0
|
|
242
|
65
|
|
|
|
|
274
|
elsif ($C eq " ") { $K=++$J } |
243
|
2633
|
|
|
|
|
6915
|
else { $J++ } |
244
|
|
|
|
|
|
|
} |
245
|
84
|
100
|
100
|
|
|
248
|
if ($K>$I and $J>$I+$maxlen) { |
246
|
18
|
|
|
|
|
47
|
$ret.=substr($S,$I,$K-1-$I); |
247
|
18
|
|
|
|
|
30
|
$ret.=$nl; |
248
|
18
|
|
|
|
|
22
|
$I=$K; # Imp: skip more |
249
|
|
|
|
|
|
|
} |
250
|
84
|
100
|
|
|
|
169
|
if ($J+$maxlen>=length($S)) { $ret.=substr($S,$I); last } # Dat: found last line, no way to split |
|
70
|
|
|
|
|
172
|
|
|
70
|
|
|
|
|
93
|
|
251
|
|
|
|
|
|
|
} |
252
|
|
|
|
|
|
|
$ret |
253
|
70
|
|
|
|
|
182
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=item encode_mimewords RAW, [OPTS] |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
I |
258
|
|
|
|
|
|
|
Given a RAW string, try to find and encode all "unsafe" sequences |
259
|
|
|
|
|
|
|
of characters: |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
### Encode a string with some unsafe "words": |
262
|
|
|
|
|
|
|
$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB"); |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
Returns the encoded string. |
265
|
|
|
|
|
|
|
Any arguments past the RAW string are taken to define a hash of options: |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=over 4 |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=item Charset |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Encode all unsafe stuff with this charset. Default is 'ISO-8859-1', |
272
|
|
|
|
|
|
|
a.k.a. "Latin-1". |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=item Encoding |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
The encoding to use, C<"q"> or C<"b">. The default is C<"q">. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=item Field |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
Name of the mail field this string will be used in. I |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=back |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
B this is a stable, tested, widely compatible solution. Strict |
285
|
|
|
|
|
|
|
compliance with RFC-1522 (regarding the use of encoded words in message |
286
|
|
|
|
|
|
|
headers), however, was not proven, but strings returned by this function |
287
|
|
|
|
|
|
|
work properly and identically with Mutt, Pine, JavaMail and OpenWebmail. The |
288
|
|
|
|
|
|
|
recommended way is to use this function instead of C or |
289
|
|
|
|
|
|
|
L. |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=cut |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
#** Dat: no prototype, because original encode_mimewords() doesn't have it |
295
|
|
|
|
|
|
|
#** a prototype either |
296
|
|
|
|
|
|
|
#** @param $_[0] string|raw string. raw |
297
|
|
|
|
|
|
|
#** @param $_[1].. list of key-value pairs. options |
298
|
|
|
|
|
|
|
#** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field. |
299
|
|
|
|
|
|
|
#** Charset is now autodetected (not always ISO-8859-1) |
300
|
|
|
|
|
|
|
#** New key: Raw: |
301
|
|
|
|
|
|
|
#** -- 1 (default): true: $_[0] is already a raw, encoded string |
302
|
|
|
|
|
|
|
#** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered |
303
|
|
|
|
|
|
|
# with Encode::encode(Charset) |
304
|
|
|
|
|
|
|
#** New key: Shorts: |
305
|
|
|
|
|
|
|
#** -- 1 (default for encode_mimewords) |
306
|
|
|
|
|
|
|
#** -- 0 (default for encode_mimeword) |
307
|
|
|
|
|
|
|
#** New key: Whole: (is respected iff Shorts==0) |
308
|
|
|
|
|
|
|
#** -- 1 (default): quote the string as a whole word (not default in the original!) |
309
|
|
|
|
|
|
|
#** -- 0: encode subwords |
310
|
|
|
|
|
|
|
#** New key: Keeptrailnl |
311
|
|
|
|
|
|
|
#** -- 0: treat trailing newline as unprintable |
312
|
|
|
|
|
|
|
#** -- 1 (default, as in original MIME::Words, as expected by Sympla 4): |
313
|
|
|
|
|
|
|
# keep trailing newline at the end |
314
|
|
|
|
|
|
|
#** !! doc more, including new keys |
315
|
|
|
|
|
|
|
sub encode_mimewords_low { |
316
|
72
|
|
|
72
|
0
|
858
|
my($S,%opts)=@_; |
317
|
72
|
50
|
|
|
|
155
|
return undef if !defined($S); |
318
|
|
|
|
|
|
|
# die "no word for encode_mimewords" if !defined $S; # Dat: Sympa calls us with undef |
319
|
|
|
|
|
|
|
#die unless open LOG, ">> /tmp/b.log"; |
320
|
|
|
|
|
|
|
#die unless print LOG "=> ".dumpstr($S)."\n"; |
321
|
|
|
|
|
|
|
#die unless close LOG; |
322
|
|
|
|
|
|
|
#$opts{Charset}="ISO-8859-1" if !defined $opts{Charset}; |
323
|
72
|
100
|
|
|
|
200
|
$opts{Raw}=1 if !defined $opts{Raw}; |
324
|
72
|
100
|
|
|
|
186
|
$opts{Charset}=get_best_encode_charset($S) if !defined $opts{Charset}; |
325
|
72
|
50
|
|
|
|
149
|
die if !defined $opts{Charset}; |
326
|
72
|
|
|
|
|
138
|
$opts{Charset}=canonical_charset($opts{Charset}); |
327
|
72
|
100
|
|
|
|
165
|
if ($opts{Raw}) { # Dat: improvement |
328
|
38
|
|
|
|
|
45
|
$opts{Raw}=0; |
329
|
|
|
|
|
|
|
#die if !defined $S; |
330
|
38
|
|
|
|
|
105
|
$S=Encode::decode($opts{Charset}, $S); |
331
|
|
|
|
|
|
|
# ^^^ Dat: better do a Unicode regexp match |
332
|
|
|
|
|
|
|
} |
333
|
71
|
50
|
|
|
|
1031
|
$opts{Encoding}="Q" if !defined $opts{Encoding}; |
334
|
71
|
50
|
|
|
|
169
|
$opts{Encoding}="Q" if $opts{Encoding} ne "B"; # Dat: improvement |
335
|
71
|
100
|
66
|
|
|
307
|
$opts{Encoding}="B" if $opts{Encoding} eq "Q" and $opts{Charset} eq "UTF-8"; |
336
|
|
|
|
|
|
|
# ^^^ Dat: UTF-8 encoded MimeWords must be in base64 -- quoted-printable is |
337
|
|
|
|
|
|
|
# bad, Pine doesn't display quoted-printable properly |
338
|
|
|
|
|
|
|
# (it assumes ISO-8859-1 for quoted-printable chars), and Mutt does it |
339
|
|
|
|
|
|
|
# the other way; |
340
|
|
|
|
|
|
|
# We need Base64 "=?UTF-8?B?".MIME::Base64::encode_base64("Unicode string")."?= |
341
|
71
|
100
|
|
|
|
181
|
$opts{Shorts}=1 if !defined $opts{Shorts}; |
342
|
71
|
100
|
|
|
|
157
|
$opts{Whole}=1 if !defined $opts{Whole}; |
343
|
71
|
50
|
|
|
|
150
|
$opts{Space}=" " if !defined $opts{Space}; # Dat: empty =?...?==?...?= in the original MIME::Words |
344
|
71
|
50
|
|
|
|
157
|
$opts{Split}=66 if !defined $opts{Split}; # Dat: split at this many chars |
345
|
71
|
100
|
|
|
|
153
|
$opts{Keeptrailnl}=1 if !defined $opts{Keeptrailnl}; |
346
|
71
|
|
|
|
|
72
|
my $toend=""; |
347
|
71
|
100
|
100
|
|
|
435
|
$toend=$1 if $opts{Keeptrailnl} and $S=~s@(\n)\Z(?!\n)@@; |
348
|
71
|
100
|
|
|
|
206
|
if (!$opts{Shorts}) { |
|
|
100
|
|
|
|
|
|
349
|
2
|
|
|
|
|
4
|
$S=encode_mimeword1($S,\%opts) |
350
|
|
|
|
|
|
|
} elsif ($opts{Whole}) { |
351
|
42
|
100
|
|
|
|
204
|
if ($S=~/$NONPRINT/o) { |
352
|
39
|
|
|
|
|
90
|
$S=encode_mimeword1($S,\%opts); |
353
|
39
|
|
|
|
|
70
|
substr($S,-1)=""; # Dat: remove last space |
354
|
|
|
|
|
|
|
} |
355
|
42
|
50
|
|
|
|
133
|
$S=split_words($S, $opts{Split}, "\n ") if $opts{Split}; |
356
|
|
|
|
|
|
|
} else { |
357
|
27
|
|
|
|
|
38
|
my $lastpos=0; |
358
|
27
|
|
|
|
|
192
|
while ($S=~/($NONPRINT[^ ]* *)/go) { |
359
|
|
|
|
|
|
|
# ^^^ Dat: having ` *' is a must here, other clients just forget about it |
360
|
41
|
|
|
|
|
122
|
my $I=pos($S)-length($1); |
361
|
41
|
|
100
|
|
|
499
|
$I-- while $I>$lastpos and substr($S,$I-1,1)ne' '; |
362
|
41
|
|
|
|
|
64
|
my $pos=pos($S); my $D; |
|
41
|
|
|
|
|
48
|
|
363
|
41
|
|
33
|
|
|
544
|
1 while ($pos=pos($S)) and $S=~/ |\Z(?!\n)|($NONPRINT)/goc and defined($D=$1) |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
364
|
|
|
|
|
|
|
and $S=~/\G[^ ]* */goc; |
365
|
41
|
50
|
|
|
|
126
|
pos($S)=$pos if !defined $D; |
366
|
41
|
|
|
|
|
73
|
my $srclen=pos($S)-$I; |
367
|
41
|
|
|
|
|
82
|
my $src=substr($S,$I,$srclen); |
368
|
|
|
|
|
|
|
##print STDERR "D($src)(".substr($S,$I+$srclen).")\n"; |
369
|
41
|
50
|
66
|
|
|
441
|
if ($I+$srclen!=length($S) and substr($src,-1)eq' ' and $S=~/ |\Z(?!\n)|($NONPRINT)/goc and !defined($1)) { |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
370
|
|
|
|
|
|
|
##print STDERR "Strip ending space\n"; |
371
|
29
|
|
|
|
|
67
|
substr($src,-1)=""; # Dat: see test case 'ignore_space' |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
# Dat: now pos($S) is invalid |
374
|
41
|
50
|
|
|
|
97
|
die if 1>length($src); |
375
|
|
|
|
|
|
|
##print STDERR "E($src)(".substr($S,$I+$srclen).")\n"; |
376
|
41
|
|
|
|
|
87
|
my $dst=encode_mimeword1($src,\%opts); # Dat: with trailing space |
377
|
|
|
|
|
|
|
##print STDERR substr($S,$I,$srclen),";;\n"; |
378
|
41
|
|
|
|
|
144
|
substr($S,$I,$srclen)=$dst; # Imp: do with less copying |
379
|
41
|
|
|
|
|
305
|
$lastpos=pos($S)=$I+length($dst); |
380
|
|
|
|
|
|
|
} |
381
|
27
|
100
|
66
|
|
|
191
|
substr($S,-length($opts{Space}))="" if |
382
|
|
|
|
|
|
|
0
|
383
|
27
|
50
|
|
|
|
82
|
$S=split_words($S, $opts{Split}, "\n ") if $opts{Split}; |
384
|
|
|
|
|
|
|
} |
385
|
71
|
|
|
|
|
109
|
$S.=$toend; |
386
|
|
|
|
|
|
|
#$S=~s@ @\n @g; # !! debug |
387
|
|
|
|
|
|
|
#die unless open LOG, ">> /tmp/b.log"; |
388
|
|
|
|
|
|
|
#die unless print LOG "T> ".dumpstr($S)."\n"; |
389
|
|
|
|
|
|
|
#die unless close LOG; |
390
|
71
|
|
|
|
|
418
|
$S |
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
#use vars qw($old_encode_mimeword); |
394
|
|
|
|
|
|
|
#BEGIN { $old_encode_mimeword=\&MIME::AltWords::encode_mimeword } |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=item encode_mimeword RAW, [ENCODING], [CHARSET] |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
I |
399
|
|
|
|
|
|
|
Encode a single RAW "word" that has unsafe characters. |
400
|
|
|
|
|
|
|
The "word" will be encoded in its entirety. |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
### Encode "<>": |
403
|
|
|
|
|
|
|
$encoded = encode_mimeword("\xABFran\xE7ois\xBB"); |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">. |
406
|
|
|
|
|
|
|
You may specify the CHARSET, which defaults to C. |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=cut |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
#** Dat: no prototype, because original encode_mimeword() doesn't have it |
412
|
|
|
|
|
|
|
#** a prototype either |
413
|
|
|
|
|
|
|
#** @param $_[0] raw string. raw |
414
|
|
|
|
|
|
|
#** @param $_[1] string. Encoding: "Q" or "B", defaults to "Q" |
415
|
|
|
|
|
|
|
#** @param $_[2] string. Charset: defaults to "ISO-8859-1" (as in MIME::Words code, |
416
|
|
|
|
|
|
|
#** not as in its documentation |
417
|
|
|
|
|
|
|
sub encode_mimeword { |
418
|
|
|
|
|
|
|
#sub encode_mimeword($;$$) { |
419
|
2
|
|
|
2
|
1
|
8
|
encode_mimewords($_[0],Encoding=>$_[1],Charset=>$_[2],Shorts=>0); |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
1
|
|
|
1
|
|
6
|
use vars qw($old_decode_mimewords); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
52
|
|
423
|
1
|
|
|
1
|
|
317
|
BEGIN { $old_decode_mimewords=\&MIME::AltWords::decode_mimewords_wantarray } #### pts #### AltWords.pm adds `_wantarray' |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
#** Dat: function added by #### pts #### |
426
|
|
|
|
|
|
|
#** @param $_[0] a mimewords-encoded string |
427
|
|
|
|
|
|
|
#** @return a canonical encoding name with which the string can be re-encoded |
428
|
|
|
|
|
|
|
sub get_best_decode_charset($) { |
429
|
23
|
|
|
23
|
0
|
37
|
my $encodedstr=$_[0]; |
430
|
23
|
|
|
|
|
27
|
my @L; |
431
|
23
|
|
|
|
|
50
|
for my $token (decode_mimewords($encodedstr)) { |
432
|
45
|
|
100
|
|
|
184
|
my $charset=canonical_charset($token->[1] or ""); |
433
|
45
|
100
|
100
|
|
|
231
|
push @L, $charset if $charset and (!@L or $L[-1] ne $charset); |
|
|
|
66
|
|
|
|
|
434
|
|
|
|
|
|
|
} |
435
|
23
|
100
|
|
|
|
83
|
@L=canonical_charset('UTF-8') if @L!=1; # Dat: default, can accomodate any charset |
436
|
23
|
|
|
|
|
73
|
$L[0] |
437
|
|
|
|
|
|
|
} |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=item decode_mimewords ENCODED, [OPTS...] |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
I |
442
|
|
|
|
|
|
|
Go through the string looking for RFC-1522-style "Q" |
443
|
|
|
|
|
|
|
(quoted-printable, sort of) or "B" (base64) encoding, and decode them. |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
B splits the ENCODED string into a list of decoded |
446
|
|
|
|
|
|
|
C<[DATA, CHARSET]> pairs, and returns that list. Unencoded |
447
|
|
|
|
|
|
|
data are returned in a 1-element array C<[DATA]>, giving an effective |
448
|
|
|
|
|
|
|
CHARSET of C. |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
$enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '; |
451
|
|
|
|
|
|
|
foreach (decode_mimewords($enc)) { |
452
|
|
|
|
|
|
|
print "", ($_[1] || 'US-ASCII'), ": ", $_[0], "\n"; |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
B joins the "data" elements of the above list |
456
|
|
|
|
|
|
|
together, and returns that. I it |
457
|
|
|
|
|
|
|
sanitizes the returned string to use a specific, single charset, either |
458
|
|
|
|
|
|
|
specified using the C option, or autodetecting one (ISO-8859-1, |
459
|
|
|
|
|
|
|
ISO-8859-2 or UTF-8) which can accomodate all characters. In case of charset |
460
|
|
|
|
|
|
|
autodetection, C can be used to query the charset |
461
|
|
|
|
|
|
|
autodetected. |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
You might want to see L as an alternate of |
464
|
|
|
|
|
|
|
L. |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
In the event of a syntax error, $@ will be set to a description |
467
|
|
|
|
|
|
|
of the error, but parsing will continue as best as possible (so as to |
468
|
|
|
|
|
|
|
get I back when decoding headers). |
469
|
|
|
|
|
|
|
$@ will be false if no error was detected. |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
Any arguments past the ENCODED string are taken to define a hash of options: |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=over 4 |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
=item Field |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
Name of the mail field this string came from. I |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
=back |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=cut |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
#** Dat: no prototype, because original decode_mimewords() doesn't have it |
484
|
|
|
|
|
|
|
#** a prototype either |
485
|
|
|
|
|
|
|
#** Dat: it is unsafe to use this without the Raw=>1 option. Search for |
486
|
|
|
|
|
|
|
#** MIME::WordDecoder in `perldoc MIME::Words'. |
487
|
|
|
|
|
|
|
#** @param $_[0] a mimewords-encoded string |
488
|
|
|
|
|
|
|
#** @param $_[1]... list of options (key=>value pairs). |
489
|
|
|
|
|
|
|
#** Keys documented in `perldoc MIME::Words'': Charset, Encoding, Field. |
490
|
|
|
|
|
|
|
#** New key: Raw: |
491
|
|
|
|
|
|
|
#** -- 1 (default): true: return a raw, encoded string. The encoding will |
492
|
|
|
|
|
|
|
#** be Charset, or the one returned by get_best_decode_charset(). |
493
|
|
|
|
|
|
|
#** This is an improvement |
494
|
|
|
|
|
|
|
#** by #### pts #### -- the original decode_mimewords() didn't return |
495
|
|
|
|
|
|
|
#** the string in a consistent encoding. |
496
|
|
|
|
|
|
|
#** -- 0: false: $_[0] is a Perl unicode string, it needs to be filtered |
497
|
|
|
|
|
|
|
#** with Encode::encode(Charset) |
498
|
|
|
|
|
|
|
#** New key: Charset: specific charset name for Raw=1 (ignored for Raw=0) |
499
|
|
|
|
|
|
|
sub decode_mimewords_low { |
500
|
50
|
100
|
|
50
|
0
|
165
|
return $old_decode_mimewords->(@_) if wantarray; |
501
|
27
|
|
|
|
|
62
|
my($encodedstr,%opts)=@_; |
502
|
27
|
100
|
|
|
|
75
|
$opts{Raw}=1 if !defined $opts{Raw}; # Dat: default |
503
|
27
|
|
|
|
|
34
|
my $ret=''; |
504
|
|
|
|
|
|
|
# vvv Dat: not mutually recursive, because get_best_decode_charset() calls |
505
|
|
|
|
|
|
|
# decode_mimewords() in list context, so this line won't be |
506
|
|
|
|
|
|
|
# reached. |
507
|
27
|
100
|
100
|
|
|
126
|
$opts{Charset}=get_best_decode_charset($encodedstr) if |
508
|
|
|
|
|
|
|
$opts{Raw} and !defined $opts{Charset}; |
509
|
27
|
|
|
|
|
38
|
my $S; |
510
|
27
|
|
|
|
|
100
|
for my $token ($old_decode_mimewords->($encodedstr,%opts)) { # Dat: $charset in $token->[1] |
511
|
55
|
100
|
|
|
|
185
|
$S=$token->[1] ? Encode::decode($token->[1], $token->[0]) : $token->[0]; |
512
|
55
|
100
|
|
|
|
1107
|
$S=Encode::encode($opts{Charset}, $S) if $opts{Raw}; |
513
|
55
|
|
|
|
|
5359
|
$ret.=$S |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
$ret |
516
|
27
|
|
|
|
|
182
|
} |
517
|
|
|
|
|
|
|
|
518
|
1
|
|
|
1
|
|
5
|
use vars qw(@encode_subject_opts); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
38
|
|
519
|
1
|
|
|
1
|
|
83
|
BEGIN { @encode_subject_opts=(Keeptrailnl=>1, Whole=>1); } |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
#** Dat: function added by #### pts #### |
522
|
|
|
|
|
|
|
#** @param $_[0] String. A mimewords-encoded e-mail subject. |
523
|
|
|
|
|
|
|
#** @return String. better mimewords-encoded |
524
|
|
|
|
|
|
|
sub fix_subject($) { |
525
|
1
|
|
|
1
|
0
|
2
|
my $encodedstr=$_[0]; |
526
|
1
|
|
|
|
|
4
|
my $best_charset=get_best_decode_charset($encodedstr); |
527
|
1
|
|
|
|
|
4
|
my $decoded=decode_mimewords($encodedstr, Raw=>0); |
528
|
1
|
|
|
|
|
6
|
encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_subject_opts); |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
|
531
|
1
|
|
|
1
|
|
4
|
use vars qw(@encode_addresses_opts); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
532
|
1
|
|
|
1
|
|
168
|
BEGIN { @encode_addresses_opts=(Keeptrailnl=>1, Whole=>0); } |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
#** Dat: function added by #### pts #### |
535
|
|
|
|
|
|
|
#** @param $_[0] String. A mimewords-encoded e-mail address (or address list), |
536
|
|
|
|
|
|
|
#** e.g. "=?ISO-8859-1?Q?foo?= bar , foo2 bar2 " |
537
|
|
|
|
|
|
|
#** @return String. better mimewords-encoded |
538
|
|
|
|
|
|
|
sub fix_addresses($) { |
539
|
3
|
|
|
3
|
0
|
8
|
my $encodedstr=$_[0]; |
540
|
3
|
|
|
|
|
7
|
my $best_charset=get_best_decode_charset($encodedstr); |
541
|
3
|
|
|
|
|
8
|
my $decoded=decode_mimewords($encodedstr, Raw=>0); |
542
|
|
|
|
|
|
|
#print STDERR "DE($decoded)\n"; |
543
|
|
|
|
|
|
|
#chomp $decoded; |
544
|
|
|
|
|
|
|
#$decoded.=" alma "; |
545
|
3
|
|
|
|
|
11
|
encode_mimewords($decoded, Charset=>$best_charset, Raw=>0, @encode_addresses_opts); |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
#** Dat: function added by #### pts #### |
549
|
|
|
|
|
|
|
#** @param $_[0] a Unicode string |
550
|
|
|
|
|
|
|
#** @param $_[1] a charset |
551
|
|
|
|
|
|
|
#** @return Boolean: is it encodable? |
552
|
|
|
|
|
|
|
sub is_encodable($$) { |
553
|
48
|
|
|
48
|
0
|
94
|
my $charset=uc$_[1]; |
554
|
48
|
|
|
|
|
64
|
my $S=$_[0]; # Dat: must be copied for Encode::encode |
555
|
48
|
100
|
66
|
|
|
217
|
return 1 if $charset eq 'UTF-8' or $charset eq 'UTF8'; |
556
|
45
|
|
|
|
|
58
|
eval { Encode::encode($charset, $S, Encode::FB_CROAK) }; |
|
45
|
|
|
|
|
154
|
|
557
|
45
|
100
|
|
|
|
1330
|
$@ ? 0 : 1 |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
#** Please don't include US-ASCII. |
561
|
|
|
|
|
|
|
#** Specify UTF-8 last (because get_best_encode_charset() ignores everything |
562
|
|
|
|
|
|
|
#** after the first UTF-8). |
563
|
1
|
|
|
1
|
|
4
|
use vars qw(@encode_charsets); |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
35
|
|
564
|
1
|
|
|
1
|
|
295
|
BEGIN { @encode_charsets=qw(ISO-8859-1 ISO-8859-2 UTF-8) } |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
#** Dat: function added by #### pts #### |
567
|
|
|
|
|
|
|
#** @param $_[0] string to try |
568
|
|
|
|
|
|
|
sub get_best_encode_charset($) { |
569
|
39
|
|
|
39
|
0
|
68
|
for my $charset (@encode_charsets) { |
570
|
48
|
100
|
|
|
|
110
|
return $charset if is_encodable($_[0], $charset); |
571
|
|
|
|
|
|
|
} |
572
|
0
|
|
|
|
|
0
|
return 'UTF-8' |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
#** Dat: function added by #### pts #### |
576
|
|
|
|
|
|
|
#** @param $_[0] String. Perl Unicode-string |
577
|
|
|
|
|
|
|
#** @param $_[1] encode mode: 'subject' or 'addresses' |
578
|
|
|
|
|
|
|
#** @return String. better mimewords-encoded with the best charset |
579
|
|
|
|
|
|
|
sub encode_unicode($$) { |
580
|
6
|
|
|
6
|
0
|
9
|
my $str=$_[0]; |
581
|
6
|
100
|
|
|
|
17
|
my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts); |
582
|
6
|
|
|
|
|
13
|
my $best_charset=get_best_encode_charset($str); |
583
|
6
|
|
|
|
|
18
|
encode_mimewords($str, Charset=>$best_charset, Raw=>0, @$modeary); |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
#** Dat: function added by #### pts #### |
587
|
|
|
|
|
|
|
#** @param $_[0] String. Perl 8-bit string, encoded in $charset |
588
|
|
|
|
|
|
|
#** @param $_[1] encode mode: 'subject' or 'addresses' |
589
|
|
|
|
|
|
|
#** @param $_[2] $charset; |
590
|
|
|
|
|
|
|
#** @return String. better mimewords-encoded with the best charset |
591
|
|
|
|
|
|
|
sub encode_8bit($$$) { |
592
|
6
|
|
|
6
|
0
|
10
|
my $str=$_[0]; |
593
|
6
|
|
|
|
|
9
|
my $charset=$_[2]; |
594
|
6
|
100
|
|
|
|
16
|
my $modeary=($_[1] eq 'addresses' ? \@encode_addresses_opts : \@encode_subject_opts); |
595
|
6
|
|
|
|
|
12
|
$charset=canonical_charset($charset); |
596
|
6
|
|
|
|
|
10
|
my $best_charset; |
597
|
6
|
|
|
|
|
9
|
for my $charset2 (@encode_charsets) { |
598
|
13
|
100
|
|
|
|
21
|
if (canonical_charset($charset2) eq $charset) { |
599
|
5
|
|
|
|
|
6
|
$best_charset=$charset; last |
600
|
5
|
|
|
|
|
7
|
} |
601
|
|
|
|
|
|
|
} |
602
|
6
|
100
|
|
|
|
18
|
$best_charset=canonical_charset(get_best_encode_charset($str)) if |
603
|
|
|
|
|
|
|
!defined $best_charset; |
604
|
6
|
100
|
|
|
|
22
|
($charset eq $best_charset) ? # Imp: find badly encoded string... |
605
|
|
|
|
|
|
|
encode_mimewords($str, Charset=>$best_charset, Raw=>1, @$modeary) : |
606
|
|
|
|
|
|
|
encode_mimewords(Encode::decode($charset, $str), Charset=>$best_charset, Raw=>0, @$modeary) |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# --- Logging... |
610
|
|
|
|
|
|
|
|
611
|
1
|
|
|
1
|
|
25
|
BEGIN { *encode_mimewords=\&encode_mimewords_low } |
612
|
1
|
|
|
1
|
|
29
|
BEGIN { *decode_mimewords=\&decode_mimewords_low } |
613
|
|
|
|
|
|
|
|
614
|
1
|
50
|
|
1
|
|
111
|
BEGIN { if ($main::DEBUG) { |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
#use vars qw($orig_encode_mimewords $orig_decode_mimewords); |
617
|
1
|
|
|
1
|
|
5
|
no warnings qw(prototype redefine); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
483
|
|
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
#BEGIN { $orig_encode_mimewords=\&encode_mimewords } |
620
|
|
|
|
|
|
|
*encode_mimewords=sub { |
621
|
0
|
|
|
|
|
0
|
require Carp; |
622
|
0
|
|
|
|
|
0
|
my $dump="\n\n[".scalar(localtime)."] encode_mimewords(@_) = "; |
623
|
0
|
|
|
|
|
0
|
my $ret=&encode_mimewords_low(@_); # Dat: we need `&' to ignore prototype |
624
|
0
|
|
|
|
|
0
|
$dump.=$ret."\n"; |
625
|
0
|
0
|
|
|
|
0
|
if (open(my($log), ">> /tmp/em.log")) { |
626
|
0
|
|
|
|
|
0
|
local *STDERR; open STDERR, ">&".fileno($log); |
|
0
|
|
|
|
|
0
|
|
627
|
0
|
|
|
|
|
0
|
select(STDERR); $|=1; select($log); $|=1; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
628
|
0
|
|
|
|
|
0
|
binmode($log, ':utf8'); |
629
|
0
|
|
|
|
|
0
|
print $log $dump; |
630
|
0
|
|
|
|
|
0
|
Carp::cluck("^^^ encode_mimewords() "); |
631
|
0
|
|
|
|
|
0
|
close $log; |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
$ret |
634
|
0
|
|
|
|
|
0
|
}; |
|
0
|
|
|
|
|
0
|
|
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
#BEGIN { $orig_decode_mimewords=\&decode_mimewords } |
637
|
|
|
|
|
|
|
# Imp: copy prototype of original... |
638
|
|
|
|
|
|
|
*decode_mimewords=sub { |
639
|
0
|
|
|
|
|
0
|
require Carp; |
640
|
0
|
|
|
|
|
0
|
my $dump="\n\n[".scalar(localtime)."] decode_mimewords(@_) = "; |
641
|
0
|
0
|
|
|
|
0
|
if (wantarray) { |
642
|
0
|
|
|
|
|
0
|
my @L=(&decode_mimewords_low(@_)); |
643
|
0
|
|
|
|
|
0
|
$dump.="@L (ary)\n"; |
644
|
0
|
0
|
|
|
|
0
|
if (open(my($log), ">> /tmp/em.log")) { |
645
|
0
|
|
|
|
|
0
|
local *STDERR; open STDERR, ">&".fileno($log); |
|
0
|
|
|
|
|
0
|
|
646
|
0
|
|
|
|
|
0
|
select(STDERR); $|=1; select($log); $|=1; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
647
|
0
|
|
|
|
|
0
|
binmode($log, ':utf8'); |
648
|
0
|
|
|
|
|
0
|
print $log $dump; |
649
|
0
|
|
|
|
|
0
|
Carp::cluck("^^^ decode_mimewords() "); |
650
|
0
|
|
|
|
|
0
|
close $log; |
651
|
|
|
|
|
|
|
} |
652
|
|
|
|
|
|
|
@L |
653
|
0
|
|
|
|
|
0
|
} else { |
654
|
0
|
|
|
|
|
0
|
my $ret=decode_mimewords_low(@_); |
655
|
0
|
|
|
|
|
0
|
$dump.=$ret."\n"; |
656
|
0
|
0
|
|
|
|
0
|
if (open(my($log), ">> /tmp/em.log")) { |
657
|
0
|
|
|
|
|
0
|
local *STDERR; open STDERR, ">&".fileno($log); |
|
0
|
|
|
|
|
0
|
|
658
|
0
|
|
|
|
|
0
|
select(STDERR); $|=1; select($log); $|=1; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
659
|
0
|
|
|
|
|
0
|
binmode($log, ':utf8'); |
660
|
0
|
|
|
|
|
0
|
print $log $dump; |
661
|
0
|
|
|
|
|
0
|
Carp::cluck("^^^ decode_mimewords() "); |
662
|
0
|
|
|
|
|
0
|
close $log; |
663
|
|
|
|
|
|
|
} |
664
|
|
|
|
|
|
|
$ret |
665
|
0
|
|
|
|
|
0
|
} |
666
|
0
|
|
|
|
|
0
|
}; |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
} } |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
# --- |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
=back |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
=head1 NOTES |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
Exports its principle functions by default, in keeping with |
677
|
|
|
|
|
|
|
L and L. |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
Doesn't depend on L or L. All the shared code is |
680
|
|
|
|
|
|
|
copied to L, which is bundled. |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
See also L for the previous version |
683
|
|
|
|
|
|
|
of L integrated into the Sympa 4 mailing list software. |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=head1 AUTHOR |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
L was written by |
688
|
|
|
|
|
|
|
PĂ©ter SzabĂł (F) in 2006, and it has been uploaded to CPAN on |
689
|
|
|
|
|
|
|
2006-09-27. |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
L uses code from L (in the file |
692
|
|
|
|
|
|
|
C) and it uses documentation from L |
693
|
|
|
|
|
|
|
(in the files C and C). |
694
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
Here is the original author and copyright information for L. |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
Eryq (F), ZeeGee Software Inc (F). |
698
|
|
|
|
|
|
|
David F. Skoll (dfs@roaringpenguin.com) http://www.roaringpenguin.com |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
All rights reserved. This program is free software; you can redistribute |
701
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
Thanks also to... |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
Kent Boortz For providing the idea, and the baseline |
706
|
|
|
|
|
|
|
RFC-1522-decoding code! |
707
|
|
|
|
|
|
|
KJJ at PrimeNet For requesting that this be split into |
708
|
|
|
|
|
|
|
its own module. |
709
|
|
|
|
|
|
|
Stephane Barizien For reporting a nasty bug. |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
=head1 VERSION |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
See $VERSION in C . |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=cut |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
=begin testing |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("foo bar"), "foo bar"); |
721
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("foo-b\x{E9}r"), "=?ISO-8859-1?Q?foo-b=E9r?="); |
722
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimeword("foo bar"), "=?ISO-8859-1?Q?foo__bar?="); # Dat: improvement over MIME::AltWords |
723
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimeword("foo__bar "), "=?ISO-8859-1?Q?foo=5F=5Fbar_?="); # Dat: improvement over MIME::AltWords |
724
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("az űrkikötő földi adatai",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?= adatai"); |
725
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'), |
726
|
|
|
|
|
|
|
"az ?rkiköt? földi adatai"); |
727
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'), |
728
|
|
|
|
|
|
|
"az ?rkiköt? földi adatai"); |
729
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-1'), |
730
|
|
|
|
|
|
|
"az űrkikötő földi adatai"); |
731
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai", Charset=>'ISO-8859-2'), |
732
|
|
|
|
|
|
|
"az űrkikötő földi adatai"); |
733
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("az =?ISO-8859-2?Q?=FBrkik=F6t=F5_f=F6ldi_?= adatai"), |
734
|
|
|
|
|
|
|
"az űrkikötő földi adatai"); # Dat: guess Charset to ISO-8859-2 |
735
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("az űrkikötő földi adatai"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi_adatai?="); |
736
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("az űrkikötő földi",Whole=>0), "az =?ISO-8859-1?Q?=FBrkik=F6t=F5_f=F6ldi?="); |
737
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("az űrkikötő földi"), "=?ISO-8859-1?Q?az_=FBrkik=F6t=F5_f=F6ldi?="); |
738
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("foo b\x{E1}r",Whole=>0), "foo =?ISO-8859-1?Q?b=E1r?="); |
739
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("foo b\x{E1}r"), "=?ISO-8859-1?Q?foo__b=E1r?="); |
740
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1",Whole=>0), "=?ISO-8859-1?Q?b=F5r_?= foo"); |
741
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords( "b\x{F5}r foo",Charset=>"ISO-8859-1"), "=?ISO-8859-1?Q?b=F5r__foo?="); |
742
|
|
|
|
|
|
|
{ my $S; eval { $S=MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2"); }; |
743
|
|
|
|
|
|
|
ok($@=~/^Wide character /); # Dat: Encode::decode fails |
744
|
|
|
|
|
|
|
} |
745
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0,Whole=>0), "=?ISO-8859-2?Q?b=F5r_?= foo"); |
746
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("b\x{151}r foo",Charset=>"ISO-8859-2",Raw=>0), "=?ISO-8859-2?Q?b=F5r__foo?="); |
747
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2",Whole=>0), |
748
|
|
|
|
|
|
|
"ha a =?ISO-8859-2?Q?sz=F3t?= ~ jel",'ignore_space'); |
749
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ~ jel",Charset=>"ISO-8859-2"), |
750
|
|
|
|
|
|
|
"=?ISO-8859-2?Q?ha_a_sz=F3t_~_jel?=",'ignore_space2'); |
751
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"), |
752
|
|
|
|
|
|
|
"ha a =?ISO-8859-1?Q?sz=F3t_?=", "ends with one space"); |
753
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Whole=>0,Charset=>"ISO-8859-1"), |
754
|
|
|
|
|
|
|
"ha a =?ISO-8859-1?Q?sz=F3t__?=", "ends with two spaces"); |
755
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("ha a sz\x{F3}t ",Charset=>"ISO-8859-1"), |
756
|
|
|
|
|
|
|
"=?ISO-8859-1?Q?ha_a_sz=F3t_?="); |
757
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se",Whole=>0), "dokumentumok =?ISO-8859-1?Q?kezel=E9se?="); |
758
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?dokumentumok_kezel=E9se?="); |
759
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se",Whole=>0), "=?ISO-8859-1?Q?tartalmaz=F3?= dokumentumok =?ISO-8859-1?Q?kezel=E9se?="); |
760
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("tartalmaz\x{F3} dokumentumok kezel\x{E9}se"), "=?ISO-8859-1?Q?tartalmaz=F3_dokumentumok_kezel=E9se?="); # Imp: unify printable and nonprintable to save space |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("A keresési eredményekb\x{151}l bizonyos ". |
763
|
|
|
|
|
|
|
"szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel el\x{151}zi ". |
764
|
|
|
|
|
|
|
"meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy ". |
765
|
|
|
|
|
|
|
"szóköz) kapcsolatban áll egy nem kizárással.", |
766
|
|
|
|
|
|
|
Charset=>"ISO-8859-2",Raw=>0,Whole=>0), |
767
|
|
|
|
|
|
|
"A =?ISO-8859-2?Q?keres=E9si_eredm=E9nyekb=F5l?= bizonyos szavakat\n =?ISO-8859-2?Q?tartalmaz=F3?= dokumentumok\n =?ISO-8859-2?Q?kiz=E1rhat=F3k,?= ha a =?ISO-8859-2?Q?sz=F3t?= ~\n jel =?ISO-8859-2?Q?el=F5zi?= meg. Figyelem! A\n =?ISO-8859-2?Q?kiz=E1r=E1s?= csak akkor\n =?ISO-8859-2?Q?eredm=E9nyez_tal=E1latot,?= ha\n =?ISO-8859-2?Q?=E9s?= (& vagy =?ISO-8859-2?Q?sz=F3k=F6z)?=\n kapcsolatban =?ISO-8859-2?Q?=E1ll?= egy nem =?ISO-8859-2?Q?kiz=E1r=E1ssal.?="); |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("A keresési eredményekb\x{151}l bizonyos ". |
770
|
|
|
|
|
|
|
"szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel el\x{151}zi ". |
771
|
|
|
|
|
|
|
"meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy ". |
772
|
|
|
|
|
|
|
"szóköz) kapcsolatban áll egy nem kizárással.", |
773
|
|
|
|
|
|
|
Charset=>"ISO-8859-2",Raw=>0), |
774
|
|
|
|
|
|
|
"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat_?=\n =?ISO-8859-2?Q?tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k,_ha_a_sz?=\n =?ISO-8859-2?Q?=F3t_~_jel_el=F5zi_meg._Figyelem!_A_kiz=E1r=E1s_c?=\n =?ISO-8859-2?Q?sak_akkor_eredm=E9nyez_tal=E1latot,_ha_=E9s_(&_va?=\n =?ISO-8859-2?Q?gy_sz=F3k=F6z)_kapcsolatban_=E1ll_egy_nem_kiz=E1r?=\n =?ISO-8859-2?Q?=E1ssal.?="); |
775
|
|
|
|
|
|
|
# vvv Dat: composing with Pine emits: |
776
|
|
|
|
|
|
|
#is(MIME::AltWords::encode_mimewords("A keresési eredményekből bizonyos szavakat tartalmazó dokumentumok kizárhatók, ha a szót ~ jel előzi meg. Figyelem! A kizárás csak akkor eredményez találatot, ha és (& vagy szóköz) kapcsolatban áll egy nem kizárással.", |
777
|
|
|
|
|
|
|
#"=?ISO-8859-2?Q?A_keres=E9si_eredm=E9nyekb=F5l_bizonyos_szavakat?= |
778
|
|
|
|
|
|
|
# =?ISO-8859-2?Q?_tartalmaz=F3_dokumentumok_kiz=E1rhat=F3k=2C_ha_?= |
779
|
|
|
|
|
|
|
# =?ISO-8859-2?Q?a_sz=F3t_~_jel_el=F5zi_meg=2E_Figyelem!_A_?= |
780
|
|
|
|
|
|
|
# =?ISO-8859-2?Q?kiz=E1r=E1s_csak_akkor_eredm=E9nyez_tal=E1latot=2C?= |
781
|
|
|
|
|
|
|
# =?ISO-8859-2?Q?_ha_=E9s_=28&_vagy_sz=F3k=F6z=29_kapcsolatban?= |
782
|
|
|
|
|
|
|
# =?ISO-8859-2?Q?_=E1ll_egy_nem_kiz=E1r=E1ssal=2E?= |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("Árvízt\x{171}r\x{151} egy tükörfúrógép", |
785
|
|
|
|
|
|
|
Charset=>"UTF-8",Raw=>0,Whole=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkQ==?= egy =?UTF-8?B?dMO8a8O2cmbDunLDs2fDqXA=?="); |
786
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("Árvízt\x{171}r\x{151} egy tükörfúrógép", |
787
|
|
|
|
|
|
|
Charset=>"UTF-8",Raw=>0),"=?UTF-8?B?w4FydsOtenTFsXLFkSBlZ3kgdMO8a8O2cmbDunLDs2fDqXA=?="); |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
is(MIME::AltWords::split_words("fo ot bar aaaaaaaaab cccccccccdddddd e f g ",8,"xy"),"fo otxybarxyaaaaaaaaabxy cccccccccddddddxye f g ",'split_words()'); |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0), |
792
|
|
|
|
|
|
|
"=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="); |
793
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter",Charset=>"UTF-8",Raw=>0), |
794
|
|
|
|
|
|
|
"=?UTF-8?B?U3phYsOzIFDDqXRlcjxwdHNAb3VyLnVtPg==?="); # Dat: this is what compose_mail returns |
795
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0), |
796
|
|
|
|
|
|
|
"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= "); |
797
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("Szab\x{F3} P\x{E9}ter ",Charset=>"UTF-8",Raw=>0,Whole=>0), |
798
|
|
|
|
|
|
|
"=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= "); |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
SKIP: { |
801
|
|
|
|
|
|
|
eval { require Mail::Address }; |
802
|
|
|
|
|
|
|
skip "Mail::Address not installed", 1 if $@; |
803
|
|
|
|
|
|
|
my @sender_hdr = Mail::Address->parse("=?UTF-8?B?U3phYsOzIFDDqXRlciA=?= "); |
804
|
|
|
|
|
|
|
my $address=@sender_hdr ? $sender_hdr[0]->address : undef; |
805
|
|
|
|
|
|
|
$address="undef" if !defined $address; |
806
|
|
|
|
|
|
|
is($address, "pts\@our.um"); |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0)), |
810
|
|
|
|
|
|
|
"\x{C5}\x{B1}", 'decode_mimewords()'); |
811
|
|
|
|
|
|
|
is(scalar MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("\x{171}",Charset=>"UTF-8",Raw=>0),Raw=>0), |
812
|
|
|
|
|
|
|
"\x{171}", 'decode_mimewords()'); |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
is(MIME::AltWords::get_best_decode_charset(MIME::AltWords::encode_mimewords("f\x{171} fa t\x{F6}lgy",Charset=>"ISO-8859-2",Raw=>0,Whole=>0)), |
815
|
|
|
|
|
|
|
'ISO-8859-2', 'get_best_decode_charset()'); |
816
|
|
|
|
|
|
|
is(MIME::AltWords::get_best_decode_charset("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="), |
817
|
|
|
|
|
|
|
'UTF-8', 'get_best_decode_charset()'); |
818
|
|
|
|
|
|
|
is(MIME::AltWords::get_best_decode_charset("fa"), 'UTF-8', 'get_best_decode_charset()'); |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
is(MIME::AltWords::fix_addresses("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="), |
821
|
|
|
|
|
|
|
"=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=", 'fix_addresses()'); |
822
|
|
|
|
|
|
|
is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="), |
823
|
|
|
|
|
|
|
"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= ", 'fix_addresses()'); |
824
|
|
|
|
|
|
|
is(MIME::AltWords::fix_addresses("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4K?="), |
825
|
|
|
|
|
|
|
"=?UTF-8?B?U3phYsOzIFDDqXRlcg==?= \n", 'fix_addresses() Keeptrailnl'); |
826
|
|
|
|
|
|
|
is(MIME::AltWords::fix_subject("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?="), |
827
|
|
|
|
|
|
|
"=?UTF-8?B?ZsWxIGZhIHTDtmxneQ==?=", 'fix_subject()'); |
828
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("=?UTF-8?B?ZsWx?= fa =?UTF-8?B?dMO2bGd5?=",Raw=>1), |
830
|
|
|
|
|
|
|
"f\x{C5}\x{B1} fa t\x{C3}\x{B6}lgy", 'decode_mimewords()'); |
831
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?f=FB?= fa =?ISO-8859-1?Q?t=F6lgy?=",Raw=>0), |
832
|
|
|
|
|
|
|
"f\x{171} fa t\x{F6}lgy", 'decode_mimewords()'); |
833
|
|
|
|
|
|
|
#die "".MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlciA8cHRzQG91ci51bT4=?="); |
834
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("=?UTF-8?B?U3phYsOzIFDDqXRlcg==?=",Raw=>0), |
835
|
|
|
|
|
|
|
"Szabó Péter", 'decode_mimewords()'); |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l",Charset=>"UTF-8",Raw=>0)), |
838
|
|
|
|
|
|
|
"f\x{C3}\x{A9}l", 'encode+decode mimewords'); |
839
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)), |
840
|
|
|
|
|
|
|
"f\x{C3}\x{A9}l ", 'encode+decode mimewords'); |
841
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l ",Charset=>"UTF-8",Raw=>0)), |
842
|
|
|
|
|
|
|
"f\x{C3}\x{A9}l ", 'encode+decode mimewords'); |
843
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0)), |
844
|
|
|
|
|
|
|
"f\x{C3}\x{A9}l <", 'encode+decode mimewords'); |
845
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("f\x{E9}l <",Charset=>"UTF-8",Raw=>0,Whole=>0)), |
846
|
|
|
|
|
|
|
"f\x{C3}\x{A9}l <", 'encode+decode mimewords'); |
847
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)), |
848
|
|
|
|
|
|
|
"d\x{C3}\x{A9}l < ", 'encode+decode mimewords'); |
849
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("d\x{E9}l < ",Charset=>"UTF-8",Raw=>0,Whole=>0)), |
850
|
|
|
|
|
|
|
"d\x{C3}\x{A9}l < ", 'encode+decode mimewords'); |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>1), |
853
|
|
|
|
|
|
|
"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=1"); |
854
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n",Keeptrailnl=>0), |
855
|
|
|
|
|
|
|
"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra=0A?=", "encode_mimewords() Keeptrailnl=0"); |
856
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("[nekem] pr\x{F3}ba h\x{E1}romra\n"), |
857
|
|
|
|
|
|
|
"=?ISO-8859-1?Q?[nekem]_pr=F3ba_h=E1romra?=\n", "encode_mimewords() Keeptrailnl=default"); |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords("=?ISO-8859-2?Q?m=E1sik_pr=F3b=E1cska?=\n"), |
860
|
|
|
|
|
|
|
"m\x{E1}sik pr\x{F3}b\x{E1}cska\n", "decode_mimewords ISO-8859-2"); |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
is(MIME::AltWords::get_best_encode_charset("hello\t\n"), "ISO-8859-1", "get_best_encode_charset() ASCII"); |
863
|
|
|
|
|
|
|
is(MIME::AltWords::get_best_encode_charset("hell\x{F3}, w\x{F6}rld\t\n"), "ISO-8859-1", "get_best_encode_charset() ISO-8859-1"); |
864
|
|
|
|
|
|
|
is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F6}rld\t\n"), "ISO-8859-2", "get_best_encode_charset() ISO-8859-2"); |
865
|
|
|
|
|
|
|
is(MIME::AltWords::get_best_encode_charset("hell\x{151}, w\x{F5}rld\t\n"), "UTF-8", "get_best_encode_charset() UTF-8"); |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'addresses'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_addresses() UTF-8"); |
868
|
|
|
|
|
|
|
is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F5}rld\t\n", 'subject'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_subject() UTF-8"); |
869
|
|
|
|
|
|
|
is(MIME::AltWords::encode_unicode("[foo] hell\x{151}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_subject() ISO-8859-2"); |
870
|
|
|
|
|
|
|
is(MIME::AltWords::encode_unicode("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_subject() ISO-8859-1"); |
871
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords"); |
873
|
|
|
|
|
|
|
is(MIME::AltWords::encode_unicode("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_addresses() with mimewords"); |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'addresses', 'uTf8'), "[foo] =?UTF-8?B?aGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() addresses UTF-8"); |
876
|
|
|
|
|
|
|
is(MIME::AltWords::encode_8bit("[foo] hell\x{C5}\x{91}, w\x{C3}\x{B5}rld\t\n", 'subject', 'uTf8'), "=?UTF-8?B?W2Zvb10gaGVsbMWRLCB3w7VybGQJ?=\n", "encode_8bit() subject UTF-8"); |
877
|
|
|
|
|
|
|
is(MIME::AltWords::encode_8bit("[foo] hell\x{F5}, w\x{F6}rld\t\n", 'subject', '88592'), "=?ISO-8859-2?Q?[foo]_hell=F5,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-2"); |
878
|
|
|
|
|
|
|
is(MIME::AltWords::encode_8bit("[foo] hell\x{F3}, w\x{F6}rld\t\n", 'subject', '88591'), "=?ISO-8859-1?Q?[foo]_hell=F3,_w=F6rld=09?=\n", "encode_8bit() subject ISO-8859-1"); |
879
|
|
|
|
|
|
|
is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?bar=35?= me?\n", 'addresses', 'us-ascii'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fbar=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords"); |
880
|
|
|
|
|
|
|
is(MIME::AltWords::encode_8bit("toast =?FOO-42?Q?b\x{E1}r=35?= me?\n", 'addresses', '88591'), "toast =?ISO-8859-1?Q?=3D=3FFOO-42=3FQ=3Fb=E1r=3D35=3F=3D?= me?\n", "encode_8bit() addresses with mimewords"); |
881
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
is(join(' ',map{MIME::AltWords::encode_mimewords($_)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"), |
883
|
|
|
|
|
|
|
"[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-1?Q?pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 a"); |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
is(join(' ',map{MIME::AltWords::encode_mimewords($_,Raw=>0)}split/ +/,"[nekem] m\x{E1}sik pr\x{F3}b\x{151}cska\n"), |
886
|
|
|
|
|
|
|
"[nekem] =?ISO-8859-1?Q?m=E1sik?= =?ISO-8859-2?Q?pr=F3b=F5cska?=\n", "encode_mimewords() default ISO-8859-1,2 b"); |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("[nekem] m\x{E1}sik pr\x{F3}b\x{E1}cska\n"), |
889
|
|
|
|
|
|
|
"=?ISO-8859-1?Q?[nekem]_m=E1sik_pr=F3b=E1cska?=\n", "encode_mimewords() default ISO-8859-1 c"); |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords('=?US-ASCII?Q?Keith_Moore?= '), 'Keith Moore ', "MIME::Words test case 1"); |
892
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '), 'Keld Jřrn Simonsen ', "MIME::Words test case 2"); |
893
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?Q?Andr=E9_?= Pirard '), 'André Pirard ', "MIME::Words test case 3"); |
894
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords('=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?==?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?==?US-ASCII?Q?.._cool!?='), 'If you can read this you understand the example... cool!', "MIME::Words test case 4"); |
895
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("\xABFran\xE7ois\xBB"), '=?ISO-8859-1?Q?=ABFran=E7ois=BB?=', "MIME::Words test case 5"); |
896
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB at the beach"), '=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB_at_the_beach?=', "MIME::Words test case 6"); |
897
|
|
|
|
|
|
|
# vvv !! is this correct (space after \n)? |
898
|
|
|
|
|
|
|
is(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave "), "=?ISO-8859-1?Q?Me_and_=ABFran=E7ois=BB,_down_at_the_beach=0Awith?=\n =?ISO-8859-1?Q?_Dave_?=", "MIME::Words test case 7"); |
899
|
|
|
|
|
|
|
is(MIME::AltWords::decode_mimewords(MIME::AltWords::encode_mimewords("Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ")), "Me and \xABFran\xE7ois\xBB, down at the beach\nwith Dave ", "MIME::Words test case 8"); |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
=cut |
902
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
1 |