line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
5
|
|
|
5
|
|
61348
|
use strict; |
|
5
|
|
|
|
|
10
|
|
|
5
|
|
|
|
|
186
|
|
2
|
5
|
|
|
5
|
|
23
|
use warnings; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
140
|
|
3
|
5
|
|
|
5
|
|
110
|
use 5.010; |
|
5
|
|
|
|
|
30
|
|
|
5
|
|
|
|
|
375
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
package Email::Address::List; |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
our $VERSION = '0.05'; |
8
|
5
|
|
|
5
|
|
5223
|
use Email::Address; |
|
5
|
|
|
|
|
174408
|
|
|
5
|
|
|
|
|
12755
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 NAME |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Email::Address::List - RFC close address list parsing |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 SYNOPSIS |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
use Email::Address::List; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
my $header = <<'END'; |
19
|
|
|
|
|
|
|
Foo Bar , (an obsolete comment),,, |
20
|
|
|
|
|
|
|
a group: |
21
|
|
|
|
|
|
|
a . weird . address @ |
22
|
|
|
|
|
|
|
for-real .biz |
23
|
|
|
|
|
|
|
; invalid thingy, < |
24
|
|
|
|
|
|
|
more@example.com |
25
|
|
|
|
|
|
|
> |
26
|
|
|
|
|
|
|
END |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
my @list = Email::Address::List->parse($header); |
29
|
|
|
|
|
|
|
foreach my $e ( @list ) { |
30
|
|
|
|
|
|
|
if ($e->{'type'} eq 'mailbox') { |
31
|
|
|
|
|
|
|
print "an address: ", $e->{'value'}->format ,"\n"; |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
else { |
34
|
|
|
|
|
|
|
print $e->{'type'}, "\n" |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
} |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# prints: |
39
|
|
|
|
|
|
|
# an address: "Foo Bar" |
40
|
|
|
|
|
|
|
# comment |
41
|
|
|
|
|
|
|
# group start |
42
|
|
|
|
|
|
|
# an address: a.weird.address@forreal.biz |
43
|
|
|
|
|
|
|
# group end |
44
|
|
|
|
|
|
|
# unknown |
45
|
|
|
|
|
|
|
# an address: more@example.com |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=head1 DESCRIPTION |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
Parser for From, To, Cc, Bcc, Reply-To, Sender and |
50
|
|
|
|
|
|
|
previous prefixed with Resent- (eg Resent-From) headers. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head1 REASONING |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
L is good at parsing addresses out of any text |
55
|
|
|
|
|
|
|
even mentioned headers and this module is derived work |
56
|
|
|
|
|
|
|
from Email::Address. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
However, mentioned headers are structured and contain lists |
59
|
|
|
|
|
|
|
of addresses. Most of the time you want to parse such field |
60
|
|
|
|
|
|
|
from start to end keeping everything even if it's an invalid |
61
|
|
|
|
|
|
|
input. |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
=head1 METHODS |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head2 parse |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
A class method that takes a header value (w/o name and :) and |
68
|
|
|
|
|
|
|
a set of named options, for example: |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
my @list = Email::Address::List->parse( $line, option => 1 ); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Returns list of hashes. Each hash at least has 'type' key that |
73
|
|
|
|
|
|
|
describes the entry. Types: |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=over 4 |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=item mailbox |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
A mailbox entry with L object under value key. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
If mailbox has obsolete parts then 'obsolete' is true. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
If address (not display-name/phrase or comments, but |
84
|
|
|
|
|
|
|
local-part@domain) contains not ASCII chars then 'not_ascii' is |
85
|
|
|
|
|
|
|
set to true. According to RFC 5322 not ASCII chars are not |
86
|
|
|
|
|
|
|
allowed within mailbox. However, there are no big problems if |
87
|
|
|
|
|
|
|
those are used and actually RFC 6532 extends a few rules |
88
|
|
|
|
|
|
|
from 5322 with UTF8-non-ascii. Either use the feature or just |
89
|
|
|
|
|
|
|
skip such addresses with skip_not_ascii option. |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=item group start |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Some headers with mailboxes may contain groupped addresses. This |
94
|
|
|
|
|
|
|
element is returned for position where group starts. Under value |
95
|
|
|
|
|
|
|
key you find name of the group. B that value is not post |
96
|
|
|
|
|
|
|
processed at the moment, so it may contain spaces, comments, |
97
|
|
|
|
|
|
|
quoted strings and other noise. Author willing to take patches |
98
|
|
|
|
|
|
|
and warns that this will be changed at some point without additional |
99
|
|
|
|
|
|
|
notifications, so if you need groups info then you better send a |
100
|
|
|
|
|
|
|
patch :) |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Groups can not be nested, but one field may have multiple groups or |
103
|
|
|
|
|
|
|
mix of addresses that are in a group and not in any. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
See skip_groups option. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=item group end |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Returned when a group ends. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item comment |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Obsolete syntax allows one to use standalone comments between mailboxes |
114
|
|
|
|
|
|
|
that can not be addressed to any mailbox. In such situations a comment |
115
|
|
|
|
|
|
|
returned as an entry of this type. Comment itself is under value. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=item unknown |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
Returned if parser met something that shouldn't be there. Parser |
120
|
|
|
|
|
|
|
tries to recover by jumping over to next comma (or semicolon if inside |
121
|
|
|
|
|
|
|
group) that is out quoted string or comment, so "foo, bar, baz" string |
122
|
|
|
|
|
|
|
results in three unknown entries. Jumping over comments and quoted strings |
123
|
|
|
|
|
|
|
means that parser is very sensitive to unbalanced quotes and parens, |
124
|
|
|
|
|
|
|
but it's on purpose. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=back |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
It can be controlled which elements are skipped, for example: |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Email::Address::List->parse($line, skip_unknown => 1, ...); |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=over 4 |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item skip_comments |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Skips comments between mailboxes. Comments inside and next to a mailbox |
137
|
|
|
|
|
|
|
are not skipped, but returned as part of mailbox entry. |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=item skip_not_ascii |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
Skips mailboxes where address part has not ASCII characters. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=item skip_groups |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
Skips group starts and end elements, however emails within groups are |
146
|
|
|
|
|
|
|
still returned. |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=item skip_unknown |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Skip anything that is not recognizable. It still tries to recover as |
151
|
|
|
|
|
|
|
described earlier. |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=back |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=cut |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# mailbox = name-addr / addr-spec |
158
|
|
|
|
|
|
|
# display-name = phrase |
159
|
|
|
|
|
|
|
# |
160
|
|
|
|
|
|
|
# from = "From:" mailbox-list CRLF |
161
|
|
|
|
|
|
|
# sender = "Sender:" mailbox CRLF |
162
|
|
|
|
|
|
|
# reply-to = "Reply-To:" address-list CRLF |
163
|
|
|
|
|
|
|
# |
164
|
|
|
|
|
|
|
# to = "To:" address-list CRLF |
165
|
|
|
|
|
|
|
# cc = "Cc:" address-list CRLF |
166
|
|
|
|
|
|
|
# bcc = "Bcc:" [address-list / CFWS] CRLF |
167
|
|
|
|
|
|
|
# |
168
|
|
|
|
|
|
|
# resent-from = "Resent-From:" mailbox-list CRLF |
169
|
|
|
|
|
|
|
# resent-sender = "Resent-Sender:" mailbox CRLF |
170
|
|
|
|
|
|
|
# resent-to = "Resent-To:" address-list CRLF |
171
|
|
|
|
|
|
|
# resent-cc = "Resent-Cc:" address-list CRLF |
172
|
|
|
|
|
|
|
# resent-bcc = "Resent-Bcc:" [address-list / CFWS] CRLF |
173
|
|
|
|
|
|
|
# |
174
|
|
|
|
|
|
|
# obs-from = "From" *WSP ":" mailbox-list CRLF |
175
|
|
|
|
|
|
|
# obs-sender = "Sender" *WSP ":" mailbox CRLF |
176
|
|
|
|
|
|
|
# obs-reply-to = "Reply-To" *WSP ":" address-list CRLF |
177
|
|
|
|
|
|
|
# |
178
|
|
|
|
|
|
|
# obs-to = "To" *WSP ":" address-list CRLF |
179
|
|
|
|
|
|
|
# obs-cc = "Cc" *WSP ":" address-list CRLF |
180
|
|
|
|
|
|
|
# obs-bcc = "Bcc" *WSP ":" (address-list / (*([CFWS] ",") [CFWS])) CRLF |
181
|
|
|
|
|
|
|
# |
182
|
|
|
|
|
|
|
# obs-resent-from = "Resent-From" *WSP ":" mailbox-list CRLF |
183
|
|
|
|
|
|
|
# obs-resent-send = "Resent-Sender" *WSP ":" mailbox CRLF |
184
|
|
|
|
|
|
|
# obs-resent-date = "Resent-Date" *WSP ":" date-time CRLF |
185
|
|
|
|
|
|
|
# obs-resent-to = "Resent-To" *WSP ":" address-list CRLF |
186
|
|
|
|
|
|
|
# obs-resent-cc = "Resent-Cc" *WSP ":" address-list CRLF |
187
|
|
|
|
|
|
|
# obs-resent-bcc = "Resent-Bcc" *WSP ":" (address-list / (*([CFWS] ",") [CFWS])) CRLF |
188
|
|
|
|
|
|
|
# obs-resent-mid = "Resent-Message-ID" *WSP ":" msg-id CRLF |
189
|
|
|
|
|
|
|
# obs-resent-rply = "Resent-Reply-To" *WSP ":" address-list CRLF |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
our $COMMENT_NEST_LEVEL ||= 2; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
our %RE; |
194
|
|
|
|
|
|
|
our %CRE; |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
$RE{'CTL'} = q{\x00-\x1F\x7F}; |
197
|
|
|
|
|
|
|
$RE{'special'} = q{()<>\\[\\]:;@\\\\,."}; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
$RE{'text'} = qr/[^\x0A\x0D]/; |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
$RE{'quoted_pair'} = qr/\\$RE{'text'}/; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
$RE{'atext'} = qr/[^$RE{'CTL'}$RE{'special'}\s]/; |
204
|
|
|
|
|
|
|
$RE{'ctext'} = qr/(?>[^()\\]+)/; |
205
|
|
|
|
|
|
|
$RE{'qtext'} = qr/[^\\"]/; |
206
|
|
|
|
|
|
|
$RE{'dtext'} = qr/[^\[\]\\]/; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
($RE{'ccontent'}, $RE{'comment'}) = (q{})x2; |
209
|
|
|
|
|
|
|
for (1 .. $COMMENT_NEST_LEVEL) { |
210
|
|
|
|
|
|
|
$RE{'ccontent'} = qr/$RE{'ctext'}|$RE{'quoted_pair'}|$RE{'comment'}/; |
211
|
|
|
|
|
|
|
$RE{'comment'} = qr/\s*\((?:\s*$RE{'ccontent'})*\s*\)\s*/; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
$RE{'cfws'} = qr/$RE{'comment'}|\s+/; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
$RE{'qcontent'} = qr/$RE{'qtext'}|$RE{'quoted_pair'}/; |
216
|
|
|
|
|
|
|
$RE{'quoted-string'} = qr/$RE{'cfws'}*"$RE{'qcontent'}+"$RE{'cfws'}*/; |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
$RE{'atom'} = qr/$RE{'cfws'}*$RE{'atext'}++$RE{'cfws'}*/; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
$RE{'word'} = qr/$RE{'cfws'}* (?: $RE{'atom'} | "$RE{'qcontent'}+" ) $RE{'cfws'}*/x; |
221
|
|
|
|
|
|
|
$RE{'phrase'} = qr/$RE{'word'}+/x; |
222
|
|
|
|
|
|
|
$RE{'display-name'} = $RE{'phrase'}; |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
$RE{'dot_atom_text'} = qr/$RE{'atext'}+(?:\.$RE{'atext'}+)*/; |
225
|
|
|
|
|
|
|
$RE{'dot_atom'} = qr/$RE{'cfws'}*$RE{'dot_atom_text'}$RE{'cfws'}*/; |
226
|
|
|
|
|
|
|
$RE{'local-part'} = qr/$RE{'dot_atom'}|$RE{'quoted-string'}/; |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
$RE{'dcontent'} = qr/$RE{'dtext'}|$RE{'quoted_pair'}/; |
229
|
|
|
|
|
|
|
$RE{'domain_literal'} = qr/$RE{'cfws'}*\[(?:\s*$RE{'dcontent'})*\s*\]$RE{'cfws'}*/; |
230
|
|
|
|
|
|
|
$RE{'domain'} = qr/$RE{'dot_atom'}|$RE{'domain_literal'}/; |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
$RE{'addr-spec'} = qr/$RE{'local-part'}\@$RE{'domain'}/; |
233
|
|
|
|
|
|
|
$RE{'angle-addr'} = qr/$RE{'cfws'}* < $RE{'addr-spec'} > $RE{'cfws'}*/x; |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
$RE{'name-addr'} = qr/$RE{'display-name'}?$RE{'angle-addr'}/; |
236
|
|
|
|
|
|
|
$RE{'mailbox'} = qr/(?:$RE{'name-addr'}|$RE{'addr-spec'})$RE{'comment'}*/; |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
$CRE{'addr-spec'} = qr/($RE{'local-part'})\@($RE{'domain'})/; |
239
|
|
|
|
|
|
|
$CRE{'mailbox'} = qr/ |
240
|
|
|
|
|
|
|
(?: |
241
|
|
|
|
|
|
|
($RE{'display-name'})?($RE{'cfws'}*)<$CRE{'addr-spec'}>($RE{'cfws'}*) |
242
|
|
|
|
|
|
|
|$CRE{'addr-spec'} |
243
|
|
|
|
|
|
|
)($RE{'comment'}*) |
244
|
|
|
|
|
|
|
/x; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
$RE{'dword'} = qr/$RE{'cfws'}* (?: $RE{'atom'} | \. | "$RE{'qcontent'}+" ) $RE{'cfws'}*/x; |
247
|
|
|
|
|
|
|
$RE{'obs-phrase'} = qr/$RE{'word'} $RE{'dword'}*/x; |
248
|
|
|
|
|
|
|
$RE{'obs-display-name'} = $RE{'obs-phrase'}; |
249
|
|
|
|
|
|
|
$RE{'obs-route'} = qr/ |
250
|
|
|
|
|
|
|
(?:$RE{'cfws'}|,)* |
251
|
|
|
|
|
|
|
\@$RE{'domain'} |
252
|
|
|
|
|
|
|
(?:,$RE{'cfws'}?(?:\@$RE{'domain'})?)* |
253
|
|
|
|
|
|
|
: |
254
|
|
|
|
|
|
|
/x; |
255
|
|
|
|
|
|
|
$RE{'obs-domain'} = qr/$RE{'atom'}(?:\.$RE{'atom'})*|$RE{'domain_literal'}/; |
256
|
|
|
|
|
|
|
$RE{'obs-local-part'} = qr/$RE{'word'}(?:\.$RE{'word'})*/; |
257
|
|
|
|
|
|
|
$RE{'obs-addr-spec'} = qr/$RE{'obs-local-part'}\@$RE{'obs-domain'}/; |
258
|
|
|
|
|
|
|
$CRE{'obs-addr-spec'} = qr/($RE{'obs-local-part'})\@($RE{'obs-domain'})/; |
259
|
|
|
|
|
|
|
$CRE{'obs-mailbox'} = qr/ |
260
|
|
|
|
|
|
|
(?: |
261
|
|
|
|
|
|
|
($RE{'obs-display-name'})? |
262
|
|
|
|
|
|
|
($RE{'cfws'}*)< $RE{'obs-route'}? $CRE{'obs-addr-spec'} >($RE{'cfws'}*) |
263
|
|
|
|
|
|
|
|$CRE{'obs-addr-spec'} |
264
|
|
|
|
|
|
|
)($RE{'comment'}*) |
265
|
|
|
|
|
|
|
/x; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub parse { |
268
|
226
|
|
|
226
|
1
|
1095183
|
my $self = shift; |
269
|
226
|
50
|
|
|
|
1747
|
my %args = @_%2? (line => @_) : @_; |
270
|
226
|
|
|
|
|
610
|
my $line = delete $args{'line'}; |
271
|
|
|
|
|
|
|
|
272
|
226
|
|
|
|
|
453
|
my $in_group = 0; |
273
|
|
|
|
|
|
|
|
274
|
226
|
|
|
|
|
312
|
my @res; |
275
|
226
|
|
|
|
|
1200
|
while ($line =~ /\S/) { |
276
|
|
|
|
|
|
|
# in obs- case we have number of optional comments/spaces/ |
277
|
|
|
|
|
|
|
# address-list = (address *("," address)) / obs-addr-list |
278
|
|
|
|
|
|
|
# obs-addr-list = *([CFWS] ",") address *("," [address / CFWS])) |
279
|
1632
|
100
|
|
|
|
10018
|
if ( $line =~ s/^(?:($RE{'cfws'})?,)//o ) { |
280
|
1003
|
50
|
66
|
|
|
7350
|
push @res, {type => 'comment', value => $1 } |
|
|
|
66
|
|
|
|
|
281
|
|
|
|
|
|
|
if $1 && !$args{'skip_comments'} && $1 =~ /($RE{'comment'})/; |
282
|
1003
|
|
|
|
|
3757
|
next; |
283
|
|
|
|
|
|
|
} |
284
|
629
|
|
|
|
|
1666
|
$line =~ s/^\s+//o; |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# now it's only comma separated address where address is: |
287
|
|
|
|
|
|
|
# address = mailbox / group |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
# deal with groups |
290
|
|
|
|
|
|
|
# group = display-name ":" [group-list] ";" [CFWS] |
291
|
|
|
|
|
|
|
# group-list = mailbox-list / CFWS / obs-group-list |
292
|
|
|
|
|
|
|
# obs-group-list = 1*([CFWS] ",") [CFWS]) |
293
|
629
|
50
|
33
|
|
|
24821
|
if ( !$in_group && $line =~ s/^($RE{'display-name'})://o ) { |
294
|
0
|
0
|
|
|
|
0
|
push @res, {type => 'group start', value => $1 } |
295
|
|
|
|
|
|
|
unless $args{'skip_groups'}; |
296
|
0
|
|
|
|
|
0
|
$in_group = 1; next; |
|
0
|
|
|
|
|
0
|
|
297
|
|
|
|
|
|
|
} |
298
|
629
|
50
|
33
|
|
|
3563
|
if ( $in_group && $line =~ s/^;// ) { |
299
|
0
|
0
|
|
|
|
0
|
push @res, {type => 'group end'} unless $args{'skip_groups'}; |
300
|
0
|
|
|
|
|
0
|
$in_group = 0; next; |
|
0
|
|
|
|
|
0
|
|
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# now we got rid of groups and cfws, 'address = mailbox' |
304
|
|
|
|
|
|
|
# mailbox-list = (mailbox *("," mailbox)) / obs-mbox-list |
305
|
|
|
|
|
|
|
# obs-mbox-list = *([CFWS] ",") mailbox *("," [mailbox / CFWS])) |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
# so address-list is now comma separated list of mailboxes: |
308
|
|
|
|
|
|
|
# address-list = (mailbox *("," mailbox)) |
309
|
629
|
|
|
|
|
841
|
my $obsolete = 0; |
310
|
629
|
100
|
100
|
|
|
491359
|
if ( $line =~ s/^($CRE{'mailbox'})($RE{cfws}*)(?=,|;|$)//o |
|
|
|
66
|
|
|
|
|
311
|
|
|
|
|
|
|
|| ($line =~ s/^($CRE{'obs-mailbox'})($RE{cfws}*)(?=,|;|$)//o and $obsolete = 1) |
312
|
|
|
|
|
|
|
) { |
313
|
624
|
|
|
|
|
3505
|
my ($original, $phrase, $user, $host, @comments) = $self->_process_mailbox( |
314
|
|
|
|
|
|
|
$1,$2,$3,$4,$5,$6,$7,$8,$9 |
315
|
|
|
|
|
|
|
); |
316
|
624
|
50
|
|
|
|
2731
|
my $not_ascii = "$user\@$host" =~ /\P{ASCII}/? 1 : 0; |
317
|
624
|
50
|
33
|
|
|
1892
|
next if $not_ascii && $args{skip_not_ascii}; |
318
|
|
|
|
|
|
|
|
319
|
624
|
|
|
|
|
3806
|
push @res, { |
320
|
|
|
|
|
|
|
type => 'mailbox', |
321
|
|
|
|
|
|
|
value => Email::Address->new( |
322
|
|
|
|
|
|
|
$phrase, "$user\@$host", join(' ', @comments), |
323
|
|
|
|
|
|
|
$original, |
324
|
|
|
|
|
|
|
), |
325
|
|
|
|
|
|
|
obsolete => $obsolete, |
326
|
|
|
|
|
|
|
not_ascii => $not_ascii, |
327
|
|
|
|
|
|
|
}; |
328
|
624
|
|
|
|
|
14078
|
next; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
# if we got here then something unknown on our way |
332
|
|
|
|
|
|
|
# try to recorver |
333
|
5
|
50
|
|
|
|
167
|
if ($in_group) { |
334
|
0
|
0
|
|
|
|
0
|
if ( $line =~ s/^([^;,"\)]*(?:(?:$RE{'quoted-string'}|$RE{'comment'})[^;,"\)]*)*)(?=;|,)//o ) { |
335
|
0
|
0
|
|
|
|
0
|
push @res, { type => 'unknown', value => $1 } unless $args{'skip_unknown'}; |
336
|
0
|
|
|
|
|
0
|
next; |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
} else { |
339
|
5
|
100
|
|
|
|
482
|
if ( $line =~ s/^([^,"\)]*(?:(?:$RE{'quoted-string'}|$RE{'comment'})[^,"\)]*)*)(?=,)//o ) { |
340
|
2
|
50
|
|
|
|
19
|
push @res, { type => 'unknown', value => $1 } unless $args{'skip_unknown'}; |
341
|
2
|
|
|
|
|
11
|
next; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
} |
344
|
3
|
50
|
|
|
|
33
|
push @res, { type => 'unknown', value => $line } unless $args{'skip_unknown'}; |
345
|
3
|
|
|
|
|
11
|
last; |
346
|
|
|
|
|
|
|
} |
347
|
226
|
|
|
|
|
1019
|
return @res; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
my $dequote = sub { |
351
|
|
|
|
|
|
|
local $_ = shift; |
352
|
|
|
|
|
|
|
s/^"//; s/"$//; s/\\(.)/$1/g; |
353
|
|
|
|
|
|
|
return "$_"; |
354
|
|
|
|
|
|
|
}; |
355
|
|
|
|
|
|
|
my $quote = sub { |
356
|
|
|
|
|
|
|
local $_ = shift; |
357
|
|
|
|
|
|
|
s/([\\"])/\\$1/g; |
358
|
|
|
|
|
|
|
return qq{"$_"}; |
359
|
|
|
|
|
|
|
}; |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
sub _process_mailbox { |
362
|
624
|
|
|
624
|
|
1055
|
my $self = shift; |
363
|
624
|
|
|
|
|
1401
|
my $original = shift; |
364
|
624
|
|
|
|
|
3746
|
my @rest = (@_); |
365
|
|
|
|
|
|
|
|
366
|
624
|
|
|
|
|
821
|
my @comments; |
367
|
624
|
|
|
|
|
2591
|
foreach ( grep defined, splice @rest ) { |
368
|
3410
|
|
|
|
|
23013
|
s{ ($RE{'quoted-string'}) | ($RE{comment}) } |
369
|
859
|
100
|
|
|
|
2399
|
{ $1? $1 : do { push @comments, $2; $comments[-1] =~ /^\s|\s$/? ' ' : '' } }xgoe; |
|
711
|
100
|
|
|
|
1814
|
|
|
711
|
|
|
|
|
8229
|
|
370
|
3410
|
|
|
|
|
5085
|
s/^\s+//; s/\s+$//; |
|
3410
|
|
|
|
|
6293
|
|
371
|
3410
|
100
|
|
|
|
8341
|
next unless length; |
372
|
|
|
|
|
|
|
|
373
|
1734
|
|
|
|
|
3927
|
push @rest, $_; |
374
|
|
|
|
|
|
|
} |
375
|
624
|
|
|
|
|
2481
|
my ($host, $user, $phrase) = reverse @rest; |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# deal with spaces out of quoted strings |
378
|
425
|
100
|
|
|
|
3526
|
s{ ($RE{'quoted-string'}) | \s+ }{ $1? $1 : ' ' }xgoe |
379
|
624
|
|
|
|
|
5023
|
foreach grep defined, $phrase; |
380
|
253
|
100
|
|
|
|
2288
|
s{ ($RE{'quoted-string'}) | \s+ }{ $1? $1 : '' }xgoe |
381
|
624
|
|
|
|
|
8922
|
foreach $user, $host; |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# dequote |
384
|
148
|
|
|
|
|
459
|
s{ ($RE{'quoted-string'}) }{ $dequote->($1) }xgoe |
385
|
624
|
|
|
|
|
4045
|
foreach grep defined, $phrase, $user; |
386
|
624
|
50
|
|
|
|
7131
|
$user = $quote->($user) unless $user =~ /^$RE{'dot_atom'}$/; |
387
|
|
|
|
|
|
|
|
388
|
624
|
|
|
|
|
1465
|
@comments = grep length, map { s/^\s+//; s/\s+$//; $_ } grep defined, @comments; |
|
711
|
|
|
|
|
1908
|
|
|
711
|
|
|
|
|
1765
|
|
|
711
|
|
|
|
|
1982
|
|
389
|
624
|
|
|
|
|
4124
|
return $original, $phrase, $user, $host, @comments; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=head1 AUTHOR |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Ruslan Zakirov Eruz@bestpractical.comE |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=head1 LICENSE |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
Under the same terms as Perl itself. |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=cut |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
1; |