line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package MIME::Words; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
MIME::Words - deal with RFC 2047 encoded words |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 SYNOPSIS |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
Before reading further, you should see L to make sure that |
11
|
|
|
|
|
|
|
you understand where this module fits into the grand scheme of things. |
12
|
|
|
|
|
|
|
Go on, do it now. I'll wait. |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Ready? Ok... |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
use MIME::Words qw(:all); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
### Decode the string into another string, forgetting the charsets: |
20
|
|
|
|
|
|
|
$decoded = decode_mimewords( |
21
|
|
|
|
|
|
|
'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ', |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
### Split string into array of decoded [DATA,CHARSET] pairs: |
25
|
|
|
|
|
|
|
@decoded = decode_mimewords( |
26
|
|
|
|
|
|
|
'To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= ', |
27
|
|
|
|
|
|
|
); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
### Encode a single unsafe word: |
30
|
|
|
|
|
|
|
$encoded = encode_mimeword("\xABFran\xE7ois\xBB"); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
### Encode a string, trying to find the unsafe words inside it: |
33
|
|
|
|
|
|
|
$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB in town"); |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 DESCRIPTION |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Fellow Americans, you probably won't know what the hell this module |
40
|
|
|
|
|
|
|
is for. Europeans, Russians, et al, you probably do. C<:-)>. |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
For example, here's a valid MIME header you might get: |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
From: =?US-ASCII?Q?Keith_Moore?= |
45
|
|
|
|
|
|
|
To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= |
46
|
|
|
|
|
|
|
CC: =?ISO-8859-1?Q?Andr=E9_?= Pirard |
47
|
|
|
|
|
|
|
Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?= |
48
|
|
|
|
|
|
|
=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?= |
49
|
|
|
|
|
|
|
=?US-ASCII?Q?.._cool!?= |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
The fields basically decode to (sorry, I can only approximate the |
52
|
|
|
|
|
|
|
Latin characters with 7 bit sequences /o and 'e): |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
From: Keith Moore |
55
|
|
|
|
|
|
|
To: Keld J/orn Simonsen |
56
|
|
|
|
|
|
|
CC: Andr'e Pirard |
57
|
|
|
|
|
|
|
Subject: If you can read this you understand the example... cool! |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head1 PUBLIC INTERFACE |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=over 4 |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
=cut |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
require 5.001; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
### Pragmas: |
69
|
29
|
|
|
29
|
|
82125
|
use strict; |
|
29
|
|
|
|
|
57
|
|
|
29
|
|
|
|
|
773
|
|
70
|
29
|
|
|
29
|
|
135
|
use re 'taint'; |
|
29
|
|
|
|
|
51
|
|
|
29
|
|
|
|
|
1531
|
|
71
|
29
|
|
|
29
|
|
135
|
use vars qw($VERSION @EXPORT_OK %EXPORT_TAGS @ISA); |
|
29
|
|
|
|
|
44
|
|
|
29
|
|
|
|
|
1967
|
|
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
### Exporting: |
74
|
29
|
|
|
29
|
|
142
|
use Exporter; |
|
29
|
|
|
|
|
54
|
|
|
29
|
|
|
|
|
1978
|
|
75
|
|
|
|
|
|
|
%EXPORT_TAGS = (all => [qw(decode_mimewords |
76
|
|
|
|
|
|
|
encode_mimeword |
77
|
|
|
|
|
|
|
encode_mimewords |
78
|
|
|
|
|
|
|
)]); |
79
|
|
|
|
|
|
|
Exporter::export_ok_tags('all'); |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
### Inheritance: |
82
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
### Other modules: |
85
|
29
|
|
|
29
|
|
20662
|
use MIME::Base64; |
|
29
|
|
|
|
|
18345
|
|
|
29
|
|
|
|
|
1602
|
|
86
|
29
|
|
|
29
|
|
40871
|
use MIME::QuotedPrint; |
|
29
|
|
|
|
|
5841
|
|
|
29
|
|
|
|
|
27916
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
#------------------------------ |
91
|
|
|
|
|
|
|
# |
92
|
|
|
|
|
|
|
# Globals... |
93
|
|
|
|
|
|
|
# |
94
|
|
|
|
|
|
|
#------------------------------ |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
### The package version, both in 1.23 style *and* usable by MakeMaker: |
97
|
|
|
|
|
|
|
$VERSION = "5.507"; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
### Nonprintables (controls + x7F + 8bit): |
100
|
|
|
|
|
|
|
my $NONPRINT = "\\x00-\\x1F\\x7F-\\xFF"; |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
#------------------------------ |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# _decode_Q STRING |
106
|
|
|
|
|
|
|
# Private: used by _decode_header() to decode "Q" encoding, which is |
107
|
|
|
|
|
|
|
# almost, but not exactly, quoted-printable. :-P |
108
|
|
|
|
|
|
|
sub _decode_Q { |
109
|
54
|
|
|
54
|
|
88
|
my $str = shift; |
110
|
54
|
|
|
|
|
123
|
local $1; |
111
|
54
|
|
|
|
|
126
|
$str =~ s/_/\x20/g; # RFC-1522, Q rule 2 |
112
|
54
|
|
|
|
|
205
|
$str =~ s/=([\da-fA-F]{2})/pack("C", hex($1))/ge; # RFC-1522, Q rule 1 |
|
272
|
|
|
|
|
1023
|
|
113
|
54
|
|
|
|
|
175
|
$str; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# _encode_Q STRING |
117
|
|
|
|
|
|
|
# Private: used by _encode_header() to decode "Q" encoding, which is |
118
|
|
|
|
|
|
|
# almost, but not exactly, quoted-printable. :-P |
119
|
|
|
|
|
|
|
sub _encode_Q { |
120
|
25
|
|
|
25
|
|
47
|
my $str = shift; |
121
|
25
|
|
|
|
|
69
|
local $1; |
122
|
25
|
|
|
|
|
172
|
$str =~ s{([ _\?\=$NONPRINT])}{sprintf("=%02X", ord($1))}eog; |
|
238
|
|
|
|
|
860
|
|
123
|
25
|
|
|
|
|
200
|
$str; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# _decode_B STRING |
127
|
|
|
|
|
|
|
# Private: used by _decode_header() to decode "B" encoding. |
128
|
|
|
|
|
|
|
sub _decode_B { |
129
|
12
|
|
|
12
|
|
39
|
my $str = shift; |
130
|
12
|
|
|
|
|
71
|
decode_base64($str); |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# _encode_B STRING |
134
|
|
|
|
|
|
|
# Private: used by _decode_header() to decode "B" encoding. |
135
|
|
|
|
|
|
|
sub _encode_B { |
136
|
0
|
|
|
0
|
|
0
|
my $str = shift; |
137
|
0
|
|
|
|
|
0
|
encode_base64($str, ''); |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
#------------------------------ |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=item decode_mimewords ENCODED |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
I |
147
|
|
|
|
|
|
|
Go through the string looking for RFC 2047-style "Q" |
148
|
|
|
|
|
|
|
(quoted-printable, sort of) or "B" (base64) encoding, and decode them. |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
B splits the ENCODED string into a list of decoded |
151
|
|
|
|
|
|
|
C<[DATA, CHARSET]> pairs, and returns that list. Unencoded |
152
|
|
|
|
|
|
|
data are returned in a 1-element array C<[DATA]>, giving an effective |
153
|
|
|
|
|
|
|
CHARSET of C. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
$enc = '=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= '; |
156
|
|
|
|
|
|
|
foreach (decode_mimewords($enc)) { |
157
|
|
|
|
|
|
|
print "", ($_->[1] || 'US-ASCII'), ": ", $_->[0], "\n"; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
B joins the "data" elements of the above |
161
|
|
|
|
|
|
|
list together, and returns that. I |
162
|
|
|
|
|
|
|
and probably I what you want, but if you know that all charsets |
163
|
|
|
|
|
|
|
in the ENCODED string are identical, it might be useful to you. |
164
|
|
|
|
|
|
|
(Before you use this, please see L, |
165
|
|
|
|
|
|
|
which is probably what you want.) |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
In the event of a syntax error, $@ will be set to a description |
168
|
|
|
|
|
|
|
of the error, but parsing will continue as best as possible (so as to |
169
|
|
|
|
|
|
|
get I back when decoding headers). |
170
|
|
|
|
|
|
|
$@ will be false if no error was detected. |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
Any arguments past the ENCODED string are taken to define a hash of options: |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=cut |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub decode_mimewords { |
177
|
131
|
|
|
131
|
1
|
9432
|
my $encstr = shift; |
178
|
131
|
|
|
|
|
177
|
my @tokens; |
179
|
131
|
|
|
|
|
423
|
local($1,$2,$3); |
180
|
131
|
|
|
|
|
214
|
$@ = ''; ### error-return |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
### Collapse boundaries between adjacent encoded words: |
183
|
131
|
|
|
|
|
346
|
$encstr =~ s{(\?\=)\s*(\=\?)}{$1$2}gs; |
184
|
131
|
|
|
|
|
332
|
pos($encstr) = 0; |
185
|
|
|
|
|
|
|
### print STDOUT "ENC = [", $encstr, "]\n"; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
### Decode: |
188
|
131
|
|
|
|
|
229
|
my ($charset, $encoding, $enc, $dec); |
189
|
131
|
|
|
|
|
162
|
while (1) { |
190
|
337
|
100
|
|
|
|
806
|
last if (pos($encstr) >= length($encstr)); |
191
|
206
|
|
|
|
|
283
|
my $pos = pos($encstr); ### save it |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
### Case 1: are we looking at "=?..?..?="? |
194
|
206
|
100
|
|
|
|
769
|
if ($encstr =~ m{\G # from where we left off.. |
195
|
|
|
|
|
|
|
=\?([^?]*) # "=?" + charset + |
196
|
|
|
|
|
|
|
\?([bq]) # "?" + encoding + |
197
|
|
|
|
|
|
|
\?([^?]+) # "?" + data maybe with spcs + |
198
|
|
|
|
|
|
|
\?= # "?=" |
199
|
|
|
|
|
|
|
}xgi) { |
200
|
66
|
|
|
|
|
235
|
($charset, $encoding, $enc) = ($1, lc($2), $3); |
201
|
66
|
100
|
|
|
|
238
|
$dec = (($encoding eq 'q') ? _decode_Q($enc) : _decode_B($enc)); |
202
|
66
|
|
|
|
|
176
|
push @tokens, [$dec, $charset]; |
203
|
66
|
|
|
|
|
116
|
next; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
### Case 2: are we looking at a bad "=?..." prefix? |
207
|
|
|
|
|
|
|
### We need this to detect problems for case 3, which stops at "=?": |
208
|
140
|
|
|
|
|
266
|
pos($encstr) = $pos; # reset the pointer. |
209
|
140
|
100
|
|
|
|
433
|
if ($encstr =~ m{\G=\?}xg) { |
210
|
4
|
|
|
|
|
15
|
$@ .= qq|unterminated "=?..?..?=" in "$encstr" (pos $pos)\n|; |
211
|
4
|
|
|
|
|
9
|
push @tokens, ['=?']; |
212
|
4
|
|
|
|
|
7
|
next; |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
### Case 3: are we looking at ordinary text? |
216
|
136
|
|
|
|
|
225
|
pos($encstr) = $pos; # reset the pointer. |
217
|
136
|
50
|
|
|
|
1167
|
if ($encstr =~ m{\G # from where we left off... |
218
|
|
|
|
|
|
|
(.*? # shortest possible string, |
219
|
|
|
|
|
|
|
\n*) # followed by 0 or more NLs, |
220
|
|
|
|
|
|
|
(?=(\Z|=\?)) # terminated by "=?" or EOS |
221
|
|
|
|
|
|
|
}sxg) { |
222
|
136
|
50
|
|
|
|
380
|
length($1) or die "MIME::Words: internal logic err: empty token\n"; |
223
|
136
|
|
|
|
|
424
|
push @tokens, [$1]; |
224
|
136
|
|
|
|
|
222
|
next; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
### Case 4: bug! |
228
|
0
|
|
|
|
|
0
|
die "MIME::Words: unexpected case:\n($encstr) pos $pos\n\t". |
229
|
|
|
|
|
|
|
"Please alert developer.\n"; |
230
|
|
|
|
|
|
|
} |
231
|
131
|
100
|
|
|
|
594
|
return (wantarray ? @tokens : join('',map {$_->[0]} @tokens)); |
|
86
|
|
|
|
|
399
|
|
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
#------------------------------ |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=item encode_mimeword RAW, [ENCODING], [CHARSET] |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
I |
239
|
|
|
|
|
|
|
Encode a single RAW "word" that has unsafe characters. |
240
|
|
|
|
|
|
|
The "word" will be encoded in its entirety. |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
### Encode "<>": |
243
|
|
|
|
|
|
|
$encoded = encode_mimeword("\xABFran\xE7ois\xBB"); |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
You may specify the ENCODING (C<"Q"> or C<"B">), which defaults to C<"Q">. |
246
|
|
|
|
|
|
|
You may specify the CHARSET, which defaults to C. |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=cut |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
sub encode_mimeword { |
251
|
25
|
|
|
25
|
1
|
62
|
my $word = shift; |
252
|
25
|
|
50
|
|
|
100
|
my $encoding = uc(shift || 'Q'); |
253
|
25
|
|
50
|
|
|
81
|
my $charset = uc(shift || 'ISO-8859-1'); |
254
|
25
|
50
|
|
|
|
91
|
my $encfunc = (($encoding eq 'Q') ? \&_encode_Q : \&_encode_B); |
255
|
25
|
|
|
|
|
98
|
"=?$charset?$encoding?" . &$encfunc($word) . "?="; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
#------------------------------ |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=item encode_mimewords RAW, [OPTS] |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
I |
263
|
|
|
|
|
|
|
Given a RAW string, try to find and encode all "unsafe" sequences |
264
|
|
|
|
|
|
|
of characters: |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
### Encode a string with some unsafe "words": |
267
|
|
|
|
|
|
|
$encoded = encode_mimewords("Me and \xABFran\xE7ois\xBB"); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
Returns the encoded string. |
270
|
|
|
|
|
|
|
Any arguments past the RAW string are taken to define a hash of options: |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=over 4 |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=item Charset |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
Encode all unsafe stuff with this charset. Default is 'ISO-8859-1', |
277
|
|
|
|
|
|
|
a.k.a. "Latin-1". |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=item Encoding |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
The encoding to use, C<"q"> or C<"b">. The default is C<"q">. |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=back |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
B this is a quick-and-dirty solution, intended for character |
286
|
|
|
|
|
|
|
sets which overlap ASCII. B
|
287
|
|
|
|
|
|
|
rules regarding the use of encoded words in message headers>. |
288
|
|
|
|
|
|
|
You may want to roll your own variant, |
289
|
|
|
|
|
|
|
using C, for your application. |
290
|
|
|
|
|
|
|
I |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=cut |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub encode_mimewords { |
295
|
6
|
|
|
6
|
1
|
1808
|
my ($rawstr, %params) = @_; |
296
|
6
|
|
100
|
|
|
50
|
my $charset = $params{Charset} || 'ISO-8859-1'; |
297
|
6
|
|
50
|
|
|
45
|
my $encoding = lc($params{Encoding} || 'q'); |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
### Encode any "words" with unsafe characters. |
300
|
|
|
|
|
|
|
### We limit such words to 18 characters, to guarantee that the |
301
|
|
|
|
|
|
|
### worst-case encoding give us no more than 54 + ~10 < 75 characters |
302
|
6
|
|
|
|
|
13
|
my $word; |
303
|
6
|
|
|
|
|
22
|
local $1; |
304
|
6
|
|
|
|
|
45
|
$rawstr =~ s{([a-zA-Z0-9\x7F-\xFF]+\s*)}{ ### get next "word" |
305
|
51
|
|
|
|
|
126
|
$word = $1; |
306
|
51
|
100
|
|
|
|
677
|
(($word !~ /(?:[$NONPRINT])|(?:^\s+$)/o) |
307
|
|
|
|
|
|
|
? $word ### no unsafe chars |
308
|
|
|
|
|
|
|
: encode_mimeword($word, $encoding, $charset)); ### has unsafe chars |
309
|
|
|
|
|
|
|
}xeg; |
310
|
6
|
|
|
|
|
160
|
$rawstr =~ s/\?==\?/?= =?/g; |
311
|
6
|
|
|
|
|
113
|
$rawstr; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
1; |
315
|
|
|
|
|
|
|
__END__ |