line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Mail::Field::Received -- |
4
|
|
|
|
|
|
|
# mostly RFC822-compliant parser of Received headers |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Copyright (c) 2000 Adam Spiers . All rights |
7
|
|
|
|
|
|
|
# reserved. This program is free software; you can redistribute it and/or |
8
|
|
|
|
|
|
|
# modify it under the same terms as Perl itself. |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# $Id: Received.pm,v 1.28 2003/03/17 23:45:17 adams Exp $ |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
require 5.005; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
package Mail::Field::Received; |
16
|
|
|
|
|
|
|
|
17
|
1
|
|
|
1
|
|
40316
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
35
|
|
18
|
|
|
|
|
|
|
|
19
|
1
|
|
|
1
|
|
3021
|
use Mail::Field (); |
|
1
|
|
|
|
|
5044
|
|
|
1
|
|
|
|
|
23
|
|
20
|
1
|
|
|
1
|
|
9
|
use Carp; |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
69
|
|
21
|
|
|
|
|
|
|
|
22
|
1
|
|
|
1
|
|
5
|
use vars qw($VERSION @ISA @EXPORT_OK); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
226
|
|
23
|
|
|
|
|
|
|
@ISA = qw(Exporter Mail::Field Mail::Field::Generic); |
24
|
|
|
|
|
|
|
@EXPORT_OK = qw(%RC &diagnose); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$VERSION = '0.26'; |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 NAME |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
Mail::Field::Received -- mostly RFC822-compliant parser of Received headers |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 SYNOPSIS |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
use Mail::Field; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $received = Mail::Field->new('Received', $header); |
37
|
|
|
|
|
|
|
my $results = $received->parse_tree(); |
38
|
|
|
|
|
|
|
my $parsed_ok = $received->parsed_ok(); |
39
|
|
|
|
|
|
|
my $diagnostics = $received->diagnostics(); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 DESCRIPTION |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
I Instead ask Mail::Field for new |
44
|
|
|
|
|
|
|
instances based on the field name! |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
Mail::Field::Received provides subroutines for parsing Received |
47
|
|
|
|
|
|
|
headers from e-mails. It mostly complies with RFC822, but deviates to |
48
|
|
|
|
|
|
|
accomodate a number of broken MTAs which are in common use. It also |
49
|
|
|
|
|
|
|
attempts to extract useful information which MTAs often embed within |
50
|
|
|
|
|
|
|
the C<(comments)>. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
It is a subclass derived from the Mail::Field and Mail::Field::Generic |
53
|
|
|
|
|
|
|
classes. |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 ROUTINES |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=over 4 |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
INIT: { |
62
|
|
|
|
|
|
|
bless([])->register('Received'); |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
## |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item * B |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Returns current debugging level obtained via the C method. |
70
|
|
|
|
|
|
|
If a parameter is given, the debugging level is changed. The default |
71
|
|
|
|
|
|
|
level is 3. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
my $debug = 3; |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub debug { |
78
|
2
|
|
|
2
|
1
|
1780
|
my $self = shift; |
79
|
2
|
100
|
|
|
|
37
|
if (@_) { |
80
|
1
|
|
|
|
|
3
|
$debug = shift; |
81
|
|
|
|
|
|
|
} |
82
|
2
|
|
|
|
|
8
|
return $debug; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
## |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item * B |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
$received->diagnose("foo", "\n"); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Appends stuff to the parser's diagnostics buffer. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=cut |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub diagnose { |
96
|
70
|
|
|
70
|
1
|
107
|
my $self = shift; |
97
|
70
|
|
|
|
|
136
|
my (@msgs) = @_; |
98
|
70
|
|
|
|
|
231
|
$self->{Diags} .= join '', @msgs; |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=item * B |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
my $diagnostics = $received->diagnostics(); |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Returns the contents of the parser's diagnostics buffer. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=cut |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub diagnostics { |
110
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
111
|
1
|
|
50
|
|
|
17
|
return $self->{Diags} || ''; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
## |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# Here be all the roughly (!) RFC822-compliant regexps. They |
117
|
|
|
|
|
|
|
# sometimes deviate from RFC822 to allow for many common MTAs which |
118
|
|
|
|
|
|
|
# don't comply either. |
119
|
|
|
|
|
|
|
# |
120
|
|
|
|
|
|
|
# N.B. we need lots of butt-ugly extra ()s to avoid a nasty bug with |
121
|
|
|
|
|
|
|
# (?-x:) in many recent Perls (fixed by 5.005_63 it seems, maybe earlier). |
122
|
|
|
|
|
|
|
|
123
|
1
|
|
|
1
|
|
4
|
use vars qw(%RC); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5663
|
|
124
|
|
|
|
|
|
|
%RC = (); |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# Atoms consist of all CHARs except SPACE, CTLs, and SPECIALs. |
127
|
|
|
|
|
|
|
$RC{atom} = qr/(?:[\041\043-\047\052\053\055-\071\075\077\101-\132\136-\176]+)/; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
$RC{ctext} = qr/[\000-\014\016-\047\052-\133\135-\177]/; |
130
|
|
|
|
|
|
|
$RC{dtext} = qr/[\000-\014\016-\132\136-\177]/; |
131
|
|
|
|
|
|
|
$RC{quoted_pair} = qr/(?:\\[\000-\177])/; |
132
|
|
|
|
|
|
|
$RC{qtext} = qr/[\000-\014\016-\041\043-\133\135-\177]/; |
133
|
|
|
|
|
|
|
$RC{quoted_str} = qr/(?:"(?:$RC{qtext}|$RC{quoted_pair})*")/; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# Comments can be arbitrarily nested but I can't be bothered to |
136
|
|
|
|
|
|
|
# support that here; it's too much effort and no-one will nest more than |
137
|
|
|
|
|
|
|
# once ... I hope! |
138
|
|
|
|
|
|
|
$RC{comment_base}= qr/(\((?:$RC{ctext}|$RC{quoted_pair})*\))/; |
139
|
|
|
|
|
|
|
$RC{comment} = qr/(\((?:$RC{ctext}|$RC{quoted_pair}|$RC{comment_base})*\))/; |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
$RC{word} = qr/(?:$RC{atom}|$RC{quoted_str})/; |
142
|
|
|
|
|
|
|
$RC{words} = qr/($RC{atom}(\s+$RC{atom})*|$RC{quoted_str})/; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# ' 1' isn't 2DIGIT according to RFC822 but some MTAs use it anyway |
145
|
|
|
|
|
|
|
$RC{TWO_DIGIT} = qr/((?:\d|(?<= )| )\d)/; |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# This could be improved upon. I left the common triples in, even |
148
|
|
|
|
|
|
|
# though [A-Z]{3} makes them redundant. |
149
|
|
|
|
|
|
|
$RC{zone_name} = qr/(UT|GMT|[CEMPW][DES]T|[A-Z]|[A-Z]{3})/; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
$RC{zone} = qr/( |
152
|
|
|
|
|
|
|
([+-]?[01]\d(?:00|15|30|45))(?: |
153
|
|
|
|
|
|
|
)(?:\s(?:$RC{zone_name}|\($RC{zone_name}\)))? |
154
|
|
|
|
|
|
|
| |
155
|
|
|
|
|
|
|
(?:$RC{zone_name})(?: |
156
|
|
|
|
|
|
|
))/x; |
157
|
|
|
|
|
|
|
$RC{hms} = qr/($RC{TWO_DIGIT}:(\d\d)(?::(\d\d))?)/; |
158
|
|
|
|
|
|
|
# Note: case-insensitivity is not RFC-compliant here, but some MTAs |
159
|
|
|
|
|
|
|
# write days/months in all lower case. |
160
|
|
|
|
|
|
|
$RC{month} = qr/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/i; |
161
|
|
|
|
|
|
|
$RC{week_day} = qr/(Mon|Tue|Wed|Thu|Fri|Sat|Sun)/i; |
162
|
|
|
|
|
|
|
$RC{year} = qr/((?:19|20)?\d{2}|100)/; # god-DAMN the incompetence! |
163
|
|
|
|
|
|
|
$RC{year_day1} = qr/(?:$RC{TWO_DIGIT}\s$RC{month})/; |
164
|
|
|
|
|
|
|
$RC{year_day2} = qr/(?:$RC{month}\s$RC{TWO_DIGIT})/; |
165
|
|
|
|
|
|
|
$RC{day_of_year} = qr/(?:$RC{year_day1}|$RC{year_day2})/; |
166
|
|
|
|
|
|
|
$RC{date_time1} = qr/(?:$RC{hms}\s+$RC{year}\s+(?:$RC{zone})?)/; |
167
|
|
|
|
|
|
|
$RC{date_time2} = qr/(?:$RC{hms}\s+$RC{zone}\s+$RC{year})/; |
168
|
|
|
|
|
|
|
$RC{date_time3} = qr/(?:$RC{year}\s+$RC{hms}\s+(?:$RC{zone})?)/; |
169
|
|
|
|
|
|
|
$RC{date_time} = qr/( |
170
|
|
|
|
|
|
|
(?: $RC{week_day} ,? \s* )? |
171
|
|
|
|
|
|
|
($RC{day_of_year}) \s+ |
172
|
|
|
|
|
|
|
($RC{date_time1}|$RC{date_time2}|$RC{date_time3}) |
173
|
|
|
|
|
|
|
)/x; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
$RC{ipv4_addr} = qr/(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})/; |
176
|
|
|
|
|
|
|
# check valid with inet_aton() |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
$RC{domain_lit} = qr/(?:\[(?:$RC{dtext}|$RC{quoted_pair})*\])/; |
179
|
|
|
|
|
|
|
$RC{sub_domain} = qr/(?:$RC{atom}|$RC{domain_lit})/; |
180
|
|
|
|
|
|
|
$RC{domain} = qr/(?:$RC{sub_domain}(?:\.$RC{sub_domain})*)/; |
181
|
|
|
|
|
|
|
$RC{local_part} = qr/(?:$RC{word}(?:\.$RC{word})*)/; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# This is the RFC822 addr-spec ... |
184
|
|
|
|
|
|
|
$RC{addr_spec} = qr/($RC{local_part})\@($RC{domain})/; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
# ... but many MTAs are non-compliant: |
187
|
|
|
|
|
|
|
$RC{addr_spec2} = qr/($RC{local_part})(?:\@($RC{domain}))?/; |
188
|
|
|
|
|
|
|
$RC{addr_spec3} = qr/$RC{addr_spec2}|($RC{domain})/; |
189
|
|
|
|
|
|
|
$RC{addr_spec4} = qr/((?:$RC{words}\s+)?<$RC{addr_spec3}>|$RC{addr_spec3}) |
190
|
|
|
|
|
|
|
(?:,\s?\.\.\.)?/x; |
191
|
|
|
|
|
|
|
$RC{addr_spec5} = qr/(?:(?:($RC{local_part})\@)?($RC{domain}))/; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# RFC822 dictates that msg-id is "<" addr-spec ">" but in practice |
194
|
|
|
|
|
|
|
# many MTAs do not adhere to this for the "id" part of Received headers. |
195
|
|
|
|
|
|
|
$RC{msg_id} = qr/(<$RC{addr_spec2}>|\#?[\w\.-]+)/; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
$RC{from1} = qr/((?i:from) \s+ (<$RC{addr_spec}>))/x; |
198
|
|
|
|
|
|
|
$RC{from2} = qr/((?i:from) \s+ ($RC{addr_spec5})?)/x; |
199
|
|
|
|
|
|
|
$RC{by} = qr/((?i:by) \s+ ($RC{domain}))/x; |
200
|
|
|
|
|
|
|
$RC{via} = qr/((?i:via) \s+ ($RC{atom}))/x; |
201
|
|
|
|
|
|
|
$RC{with} = qr/((?i:with) \s ($RC{atom})?)/x; # sometimes empty atom |
202
|
|
|
|
|
|
|
$RC{id} = qr/((?i:id) \s+ $RC{msg_id}(?::(\d+))?)/x; |
203
|
|
|
|
|
|
|
$RC{for} = qr/((?i:for) \s+ $RC{addr_spec4})/x; |
204
|
|
|
|
|
|
|
$RC{sent_by} = qr/((?i:sent \s by) \s+ $RC{addr_spec4})/x; |
205
|
|
|
|
|
|
|
$RC{convert} = qr/((?i:convert) \s+ ($RC{atom}))/x; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
## |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
sub set { |
210
|
1
|
|
|
1
|
1
|
93
|
my $self = shift; |
211
|
1
|
|
|
|
|
7
|
return $self; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
## |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=item * B |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
The actual parser. Returns the object (Mail::Field barfs otherwise). |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=cut |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub parse { |
223
|
6
|
|
|
6
|
1
|
1374
|
my ($self, $recv) = @_; |
224
|
|
|
|
|
|
|
|
225
|
6
|
|
|
|
|
12
|
$self->{Text} = $recv; |
226
|
6
|
|
|
|
|
12
|
$self->{Diags} = ''; |
227
|
|
|
|
|
|
|
|
228
|
6
|
|
|
|
|
17
|
my %parsed = (whole => $recv); |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# \234 sometimes crops up for some unknown reason. Huh?! |
231
|
6
|
|
|
|
|
20
|
$recv =~ tr/\234//d; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# From RFC822: |
234
|
|
|
|
|
|
|
# received = "Received" ":" ; one per relay |
235
|
|
|
|
|
|
|
# ["from" domain] ; sending host |
236
|
|
|
|
|
|
|
# ["by" domain] ; receiving host |
237
|
|
|
|
|
|
|
# ["via" atom] ; physical path |
238
|
|
|
|
|
|
|
# *("with" atom) ; link/mail protocol |
239
|
|
|
|
|
|
|
# ["id" msg-id] ; receiver msg id |
240
|
|
|
|
|
|
|
# ["for" addr-spec] ; initial form |
241
|
|
|
|
|
|
|
# ";" date-time ; time received |
242
|
|
|
|
|
|
|
# |
243
|
|
|
|
|
|
|
# Sadly many many MTAs are broken, however, so we have to deal with |
244
|
|
|
|
|
|
|
# a lot of special cases. Improvements to this section are very welcome. |
245
|
|
|
|
|
|
|
|
246
|
6
|
|
|
|
|
15
|
my %expecting = map { $_ => 1 } |
|
54
|
|
|
|
|
97
|
|
247
|
|
|
|
|
|
|
(qw/from by via with id convert for sent_by date_time/); |
248
|
|
|
|
|
|
|
|
249
|
6
|
|
|
|
|
17
|
for ($recv) { |
250
|
6
|
|
|
|
|
10
|
my $last_section = ''; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
TOKEN: |
253
|
6
|
|
|
|
|
7
|
while (1) { |
254
|
43
|
50
|
|
|
|
377
|
$self->diagnose("---- Expecting: ", (join ' ', sort keys %expecting), |
255
|
|
|
|
|
|
|
"\n") if $debug >= 5; |
256
|
43
|
50
|
|
|
|
126
|
$self->diagnose("---- Last section: $last_section\n") |
257
|
|
|
|
|
|
|
if $debug >= 6; |
258
|
|
|
|
|
|
|
|
259
|
43
|
100
|
|
|
|
420
|
if (/\G$RC{comment}/cg) { |
260
|
7
|
|
|
|
|
20
|
my $comment = $1; |
261
|
7
|
50
|
|
|
|
37
|
$self->diagnose("Got comment $comment\n") if $debug >= 4; |
262
|
|
|
|
|
|
|
|
263
|
7
|
100
|
|
|
|
18
|
push @{$parsed{$last_section}{comments}}, $comment |
|
4
|
|
|
|
|
15
|
|
264
|
|
|
|
|
|
|
if $last_section; |
265
|
7
|
|
|
|
|
8
|
push @{$parsed{comments}}, $comment; |
|
7
|
|
|
|
|
19
|
|
266
|
|
|
|
|
|
|
|
267
|
7
|
100
|
|
|
|
20
|
if ($last_section eq 'from') { |
268
|
|
|
|
|
|
|
FROMCOMMENT: |
269
|
|
|
|
|
|
|
{ |
270
|
2
|
50
|
|
|
|
3
|
if ($comment =~ /\( |
|
2
|
|
|
|
|
177
|
|
271
|
|
|
|
|
|
|
(?:(?:($RC{local_part})\@)?($RC{domain})\s+)? |
272
|
|
|
|
|
|
|
(?:\[ $RC{ipv4_addr} \])(?: |
273
|
|
|
|
|
|
|
)\)/x) |
274
|
|
|
|
|
|
|
{ |
275
|
2
|
50
|
|
|
|
9
|
if ($1) { |
276
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got `from' ident in comments: $1\n") |
277
|
|
|
|
|
|
|
if $debug >= 3; |
278
|
0
|
|
|
|
|
0
|
$parsed{from}{ident} = $1; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
2
|
50
|
|
|
|
9
|
if ($2) { |
282
|
2
|
50
|
|
|
|
16
|
$self->diagnose("Got `from' domain in comments: $2\n") |
283
|
|
|
|
|
|
|
if $debug >= 3; |
284
|
2
|
|
|
|
|
9
|
$parsed{from}{domain} = $2; |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
2
|
50
|
|
|
|
8
|
if ($3) { |
288
|
2
|
50
|
|
|
|
13
|
$self->diagnose("Got `from' IP address in comments: $3\n") |
289
|
|
|
|
|
|
|
if $debug >= 3; |
290
|
2
|
|
|
|
|
6
|
$parsed{from}{address} = $3; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
2
|
|
|
|
|
5
|
last FROMCOMMENT; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
0
|
0
|
|
|
|
0
|
if ($comment =~ /(HELO|EHLO)(?:\s+|=)($RC{domain})/i) { |
297
|
|
|
|
|
|
|
# HELO domain is in comments, not outside, so swap |
298
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got `from' $1 domain in comments: $2\n") |
299
|
|
|
|
|
|
|
if $debug >= 3; |
300
|
0
|
|
|
|
|
0
|
@{$parsed{from}}{qw/domain HELO/} |
|
0
|
|
|
|
|
0
|
|
301
|
|
|
|
|
|
|
= ($parsed{from}{HELO}, $2); |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
0
|
0
|
|
|
|
0
|
if ($comment =~ /$RC{ipv4_addr}\]?(?::(\d{1,5}))?/) { |
305
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got `from' IP address in comments: $1\n") |
306
|
|
|
|
|
|
|
if $debug >= 3; |
307
|
|
|
|
|
|
|
|
308
|
0
|
|
|
|
|
0
|
$parsed{from}{address} = $1; |
309
|
|
|
|
|
|
|
|
310
|
0
|
0
|
|
|
|
0
|
if ($2) { |
311
|
0
|
|
|
|
|
0
|
$parsed{from}{port} = $2; |
312
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got `from' port in comments: $1\n") |
313
|
|
|
|
|
|
|
if $debug >= 3; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
} |
317
|
2
|
|
|
|
|
10
|
$parsed{from}{whole} .= " $comment\n"; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
7
|
|
|
|
|
18
|
next TOKEN; |
321
|
|
|
|
|
|
|
} |
322
|
|
|
|
|
|
|
|
323
|
36
|
100
|
|
|
|
117
|
if (/\G(\s+)/cg) { |
324
|
11
|
50
|
|
|
|
27
|
$self->diagnose("Got whitespace: <$1>\n") if $debug >= 7; |
325
|
11
|
|
|
|
|
21
|
next TOKEN; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
25
|
50
|
66
|
|
|
328
|
if ($expecting{from} and /\G$RC{from1}/cg) { |
329
|
0
|
|
0
|
|
|
0
|
print map { ($_ || '__undef__') . "\n---\n" } $1, $2, $3, $4, $5, $6; |
|
0
|
|
|
|
|
0
|
|
330
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got from type1: $1\n") if $debug >= 2; |
331
|
0
|
|
|
|
|
0
|
$last_section = 'from'; |
332
|
|
|
|
|
|
|
|
333
|
0
|
|
|
|
|
0
|
$parsed{from}{whole} = $1; |
334
|
0
|
|
|
|
|
0
|
$parsed{from}{from} = $2; |
335
|
0
|
0
|
|
|
|
0
|
$parsed{from}{ident} = $3 if $3; |
336
|
0
|
|
|
|
|
0
|
$parsed{from}{HELO} = $4; |
337
|
|
|
|
|
|
|
|
338
|
0
|
|
|
|
|
0
|
delete $expecting{from}; |
339
|
0
|
|
|
|
|
0
|
delete @expecting{grep /^after_/, keys %expecting}; |
340
|
0
|
|
|
|
|
0
|
$expecting{after_from}++; |
341
|
0
|
|
|
|
|
0
|
next TOKEN; |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
|
344
|
25
|
100
|
100
|
|
|
369
|
if ($expecting{from} and /\G$RC{from2}/cg) { |
345
|
2
|
50
|
|
|
|
27
|
$self->diagnose("Got from type2: $1\n") if $debug >= 2; |
346
|
2
|
|
|
|
|
4
|
$last_section = 'from'; |
347
|
|
|
|
|
|
|
|
348
|
2
|
|
|
|
|
8
|
$parsed{from}{whole} = $1; |
349
|
2
|
|
|
|
|
6
|
$parsed{from}{from} = $2; |
350
|
2
|
50
|
|
|
|
9
|
$parsed{from}{ident} = $3 if $3; |
351
|
2
|
|
|
|
|
6
|
$parsed{from}{HELO} = $4; |
352
|
|
|
|
|
|
|
|
353
|
2
|
|
|
|
|
6
|
delete $expecting{from}; |
354
|
2
|
|
|
|
|
11
|
delete @expecting{grep /^after_/, keys %expecting}; |
355
|
2
|
|
|
|
|
17
|
$expecting{after_from}++; |
356
|
2
|
|
|
|
|
9
|
next TOKEN; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
23
|
50
|
66
|
|
|
111
|
if ($expecting{after_from} and /\G($RC{domain_lit})/cg) { |
360
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got address from bad `from': $1\n") if $debug >= 3; |
361
|
0
|
|
|
|
|
0
|
$parsed{from}{address} = $1; |
362
|
0
|
|
|
|
|
0
|
delete $expecting{after_from}; |
363
|
0
|
|
|
|
|
0
|
next TOKEN; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
23
|
50
|
66
|
|
|
78
|
if ($expecting{after_from} and $parsed{from}{whole} eq 'from mail' and |
|
|
|
33
|
|
|
|
|
367
|
|
|
|
|
|
|
/\G(pickup service)/cg) { |
368
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got bad `from': appending: $1\n") |
369
|
|
|
|
|
|
|
if $debug >= 3; |
370
|
0
|
|
|
|
|
0
|
$parsed{from}{whole} .= $1; |
371
|
0
|
|
|
|
|
0
|
delete $expecting{after_from}; |
372
|
0
|
|
|
|
|
0
|
next TOKEN; |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
# Deal with incompetence from the fucking /imbeciles/ at M$. |
376
|
23
|
50
|
66
|
|
|
69
|
if ($expecting{after_from} and $parsed{whole} =~ /Microsoft SMTPSVC/ and |
|
|
|
33
|
|
|
|
|
377
|
|
|
|
|
|
|
/\G-\s+$RC{ipv4_addr}/cg) { |
378
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got IP from bad M\$ from: $1\n") if $debug >= 3; |
379
|
0
|
|
|
|
|
0
|
$parsed{from}{address} = $1; |
380
|
0
|
|
|
|
|
0
|
delete $expecting{after_from}; |
381
|
0
|
|
|
|
|
0
|
next TOKEN; |
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
|
384
|
23
|
50
|
66
|
|
|
111
|
if ($expecting{after_from} and /\G, claiming to be ($RC{word})/cg) { |
385
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got HELO: $1 from brain-dead MTA\n") if $debug >= 3; |
386
|
0
|
|
|
|
|
0
|
$parsed{allow_parse_fail}++; # More brain-dead MTAs |
387
|
0
|
|
|
|
|
0
|
$parsed{from}{HELO} = $1; |
388
|
0
|
|
|
|
|
0
|
delete $expecting{after_from}; |
389
|
0
|
|
|
|
|
0
|
next TOKEN; |
390
|
|
|
|
|
|
|
} |
391
|
|
|
|
|
|
|
|
392
|
23
|
100
|
100
|
|
|
183
|
if ($expecting{by} and /\G$RC{by},?/cg) { |
393
|
2
|
50
|
|
|
|
15
|
$self->diagnose("Got by: $1\n") if $debug >= 2; |
394
|
2
|
|
|
|
|
3
|
$last_section = 'by'; |
395
|
|
|
|
|
|
|
|
396
|
2
|
|
|
|
|
9
|
$parsed{by}{whole} = $1; |
397
|
2
|
|
|
|
|
8
|
$parsed{by}{domain} = $2; |
398
|
|
|
|
|
|
|
|
399
|
2
|
|
|
|
|
5
|
delete @expecting{qw/by/}; |
400
|
2
|
|
|
|
|
15
|
delete @expecting{grep /^after_/, keys %expecting}; |
401
|
2
|
|
|
|
|
5
|
$expecting{after_by}++; |
402
|
2
|
|
|
|
|
9
|
next TOKEN; |
403
|
|
|
|
|
|
|
} |
404
|
|
|
|
|
|
|
|
405
|
21
|
50
|
66
|
|
|
113
|
if ($expecting{after_by} and /\G($RC{domain_lit})/cg) { |
406
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got address from bad `by': $1\n") if $debug >= 3; |
407
|
0
|
|
|
|
|
0
|
$parsed{by}{address} = $1; |
408
|
0
|
|
|
|
|
0
|
delete $expecting{after_by}; |
409
|
0
|
|
|
|
|
0
|
next TOKEN; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
21
|
50
|
66
|
|
|
61
|
if ($expecting{after_by} and /\G(Sendmail)/cg) { |
413
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got MTA from bad `by': $1\n") if $debug >= 3; |
414
|
0
|
|
|
|
|
0
|
$parsed{by}{MTA} = $1; |
415
|
|
|
|
|
|
|
|
416
|
0
|
0
|
|
|
|
0
|
if ($expecting{via}) { |
417
|
0
|
|
|
|
|
0
|
$parsed{via}{via} = $1; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
0
|
|
|
|
|
0
|
delete $expecting{after_by}; |
421
|
0
|
|
|
|
|
0
|
next TOKEN; |
422
|
|
|
|
|
|
|
} |
423
|
|
|
|
|
|
|
|
424
|
21
|
50
|
66
|
|
|
129
|
if ($expecting{via} and /\G$RC{via}/cg) { |
425
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got via: $1\n") if $debug >= 2; |
426
|
0
|
|
|
|
|
0
|
$last_section = 'via'; |
427
|
|
|
|
|
|
|
|
428
|
0
|
|
|
|
|
0
|
$parsed{via}{whole} = $1; |
429
|
0
|
|
|
|
|
0
|
$parsed{via}{via} = $2; |
430
|
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
0
|
delete $expecting{via}; |
432
|
0
|
|
|
|
|
0
|
delete @expecting{grep /^after_/, keys %expecting}; |
433
|
0
|
|
|
|
|
0
|
$expecting{after_via}++; |
434
|
0
|
|
|
|
|
0
|
next TOKEN; |
435
|
|
|
|
|
|
|
} |
436
|
|
|
|
|
|
|
|
437
|
21
|
50
|
33
|
|
|
57
|
if ($expecting{after_via} and /\G\[$RC{ipv4_addr}\]/cg) { |
438
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got address from bad `via': $1\n") if $debug >= 3; |
439
|
0
|
|
|
|
|
0
|
$parsed{via}{address} = $1; |
440
|
0
|
|
|
|
|
0
|
delete $expecting{after_via}; |
441
|
0
|
|
|
|
|
0
|
next TOKEN; |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
|
444
|
21
|
50
|
66
|
|
|
82
|
if (! $expecting{from} and /\Gfrom\s+stdin/cg) { |
445
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got `from stdin'\n") if $debug >= 3; |
446
|
0
|
|
|
|
|
0
|
$parsed{from}{stdin} = 'yep'; |
447
|
0
|
|
|
|
|
0
|
next TOKEN; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
21
|
50
|
66
|
|
|
77
|
if ($expecting{with} and |
451
|
|
|
|
|
|
|
m! |
452
|
|
|
|
|
|
|
\G((?i:with) \s |
453
|
|
|
|
|
|
|
(P:(stdio|smtp)/R:(inet|bind)_hosts/T:(smtp|inet_zone_bind_smtp))) |
454
|
|
|
|
|
|
|
!cgx) { |
455
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got weird with: $1\n") if $debug >= 2; |
456
|
0
|
|
|
|
|
0
|
$last_section = 'with'; |
457
|
|
|
|
|
|
|
|
458
|
0
|
|
|
|
|
0
|
$parsed{with}{whole} = $1; |
459
|
0
|
|
|
|
|
0
|
$parsed{with}{with} = $2; |
460
|
|
|
|
|
|
|
|
461
|
0
|
|
|
|
|
0
|
delete @expecting{grep /^after_/, keys %expecting}; |
462
|
0
|
|
|
|
|
0
|
$expecting{after_with}++; |
463
|
|
|
|
|
|
|
# I've seen the `from' bit come after the `with' bit sometimes. |
464
|
|
|
|
|
|
|
# Why oh why ... |
465
|
0
|
|
|
|
|
0
|
$expecting{from}++; |
466
|
0
|
|
|
|
|
0
|
next TOKEN; |
467
|
|
|
|
|
|
|
} |
468
|
|
|
|
|
|
|
|
469
|
21
|
100
|
100
|
|
|
173
|
if ($expecting{with} and /\G$RC{with}/cg) { |
470
|
2
|
50
|
|
|
|
79
|
$self->diagnose("Got with: $1\n") if $debug >= 2; |
471
|
2
|
|
|
|
|
5
|
$last_section = 'with'; |
472
|
|
|
|
|
|
|
|
473
|
2
|
|
|
|
|
27
|
$parsed{with}{whole} = $1; |
474
|
2
|
|
|
|
|
6
|
$parsed{with}{with} = $2; |
475
|
2
|
50
|
|
|
|
16
|
$parsed{with}{with} .= $3 if $3; |
476
|
|
|
|
|
|
|
|
477
|
2
|
|
|
|
|
16
|
delete @expecting{grep /^after_/, keys %expecting}; |
478
|
2
|
|
|
|
|
5
|
$expecting{after_with}++; |
479
|
|
|
|
|
|
|
# I've seen the `from' bit come after the `with' bit sometimes. |
480
|
|
|
|
|
|
|
# Why oh why ... |
481
|
2
|
|
|
|
|
5
|
$expecting{from}++; |
482
|
2
|
|
|
|
|
7
|
next TOKEN; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
19
|
100
|
66
|
|
|
63
|
if ($expecting{after_with} && $parsed{with}{with}) { |
486
|
|
|
|
|
|
|
|
487
|
|
|
|
|
|
|
# Microsoft SMTPSVC uses two atoms -- yet /another/ example of |
488
|
|
|
|
|
|
|
# Microsoft not following standards ... *gasp* |
489
|
|
|
|
|
|
|
|
490
|
2
|
50
|
|
|
|
10
|
if ($parsed{with}{with} eq 'Microsoft') { |
491
|
0
|
0
|
|
|
|
0
|
if (/\GSMTPSVC(?:\(([\d\.]+)\))?/cg) { |
|
|
0
|
|
|
|
|
|
492
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got M\$ SMTPSVC version from bad `with'", |
|
|
0
|
|
|
|
|
|
493
|
|
|
|
|
|
|
$1 ? ": $1" : '', |
494
|
|
|
|
|
|
|
"\n") |
495
|
|
|
|
|
|
|
if $debug >= 3; |
496
|
0
|
|
|
|
|
0
|
delete $expecting{after_with}; |
497
|
0
|
|
|
|
|
0
|
next TOKEN; |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
elsif (/\GMAPI/cg) { |
500
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got Microsoft MAPI from bad `with'\n") |
501
|
|
|
|
|
|
|
if $debug >= 3; |
502
|
0
|
|
|
|
|
0
|
delete $expecting{after_with}; |
503
|
0
|
|
|
|
|
0
|
next TOKEN; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
# More brain damage ... |
508
|
|
|
|
|
|
|
|
509
|
2
|
50
|
33
|
|
|
10
|
if ($parsed{with}{with} eq 'Internet' and |
510
|
|
|
|
|
|
|
/\GMail Service\s*\(([\d\.]+)\)/cg) { |
511
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got Internet Mail Service version from bad `with': $1\n") |
512
|
|
|
|
|
|
|
if $debug >= 3; |
513
|
0
|
|
|
|
|
0
|
delete $expecting{after_with}; |
514
|
0
|
|
|
|
|
0
|
next TOKEN; |
515
|
|
|
|
|
|
|
} |
516
|
|
|
|
|
|
|
|
517
|
2
|
50
|
33
|
|
|
20
|
if ($parsed{with}{with} eq 'WorldClient' and |
518
|
|
|
|
|
|
|
/\G($RC{domain_lit})/cg) { |
519
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got WorldClient address from bad `with': $1\n") |
520
|
|
|
|
|
|
|
if $debug >= 3; |
521
|
0
|
|
|
|
|
0
|
delete $expecting{after_with}; |
522
|
0
|
|
|
|
|
0
|
next TOKEN; |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
|
525
|
2
|
50
|
33
|
|
|
9
|
if ($parsed{with}{with} eq 'Local' and |
526
|
|
|
|
|
|
|
/\GSMTP/cg) { |
527
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got Local SMTP from bad `with'\n") |
528
|
|
|
|
|
|
|
if $debug >= 3; |
529
|
0
|
|
|
|
|
0
|
delete $expecting{after_with}; |
530
|
0
|
|
|
|
|
0
|
next TOKEN; |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
19
|
100
|
100
|
|
|
350
|
if ($expecting{id} and /\G$RC{id}/cg) { |
536
|
2
|
50
|
|
|
|
17
|
$self->diagnose("Got id: $1\n") if $debug >= 2; |
537
|
2
|
|
|
|
|
5
|
$last_section = 'id'; |
538
|
|
|
|
|
|
|
|
539
|
2
|
|
|
|
|
8
|
$parsed{id}{whole} = $1; |
540
|
2
|
|
|
|
|
6
|
$parsed{id}{id} = $2; |
541
|
2
|
50
|
|
|
|
9
|
$parsed{id}{port} = $3 if $3; |
542
|
|
|
|
|
|
|
|
543
|
2
|
|
|
|
|
7
|
delete @expecting{qw/by via with/}; |
544
|
2
|
|
|
|
|
17
|
delete @expecting{grep /^after_/, keys %expecting}; |
545
|
2
|
|
|
|
|
9
|
next TOKEN; |
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
17
|
50
|
66
|
|
|
129
|
if ($expecting{convert} and /\G$RC{convert}/cg) { |
549
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got convert: $1\n") if $debug >= 2; |
550
|
0
|
|
|
|
|
0
|
$last_section = 'convert'; |
551
|
|
|
|
|
|
|
|
552
|
0
|
|
|
|
|
0
|
$parsed{convert}{whole} = $1; |
553
|
|
|
|
|
|
|
|
554
|
0
|
|
|
|
|
0
|
delete @expecting{qw/from by via with convert/}; |
555
|
0
|
|
|
|
|
0
|
delete @expecting{grep /^after_/, keys %expecting}; |
556
|
0
|
|
|
|
|
0
|
next TOKEN; |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
|
559
|
17
|
100
|
100
|
|
|
527
|
if ($expecting{for} and |
560
|
|
|
|
|
|
|
/\G$RC{for}(\s+bugtraq\@securityfocus\.com)?/cgi) { |
561
|
1
|
50
|
|
|
|
11
|
$self->diagnose("Got for: $1\n") if $debug >= 2; |
562
|
1
|
|
|
|
|
2
|
$last_section = 'for'; |
563
|
|
|
|
|
|
|
|
564
|
1
|
|
|
|
|
5
|
$parsed{for}{whole} = $1; |
565
|
1
|
|
|
|
|
4
|
$parsed{for}{for} = $2; |
566
|
1
|
50
|
|
|
|
6
|
$parsed{for}{bugtraq} = $3 if $3; |
567
|
|
|
|
|
|
|
|
568
|
1
|
|
|
|
|
5
|
delete @expecting{qw/from by convert for/}; |
569
|
1
|
|
|
|
|
5
|
delete @expecting{grep /^after_/, keys %expecting}; |
570
|
1
|
|
|
|
|
8
|
next TOKEN; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
16
|
50
|
66
|
|
|
472
|
if ($expecting{sent_by} and /\G$RC{sent_by}/cg) { |
574
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got sent by: $1\n") if $debug >= 2; |
575
|
0
|
|
|
|
|
0
|
$last_section = 'sent_by'; |
576
|
|
|
|
|
|
|
|
577
|
0
|
|
|
|
|
0
|
$parsed{sent_by}{whole} = $1; |
578
|
0
|
|
|
|
|
0
|
$parsed{sent_by}{sent_by} = $2; |
579
|
|
|
|
|
|
|
|
580
|
0
|
|
|
|
|
0
|
delete @expecting{qw/from by via with convert for sent_by/}; |
581
|
0
|
|
|
|
|
0
|
delete @expecting{grep /^after_/, keys %expecting}; |
582
|
0
|
|
|
|
|
0
|
next TOKEN; |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
|
585
|
16
|
100
|
100
|
|
|
902
|
if ($expecting{date_time} and /\G((?:on\s+)?$RC{date_time})/cg) { |
586
|
5
|
50
|
|
|
|
32
|
$self->diagnose("Got date_time: $1\n") if $debug >= 2; |
587
|
5
|
|
|
|
|
8
|
$last_section = 'date_time'; |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
# Eugh. This is horrible. Maybe I should have used |
590
|
|
|
|
|
|
|
# Parse::RecDescent after all ... |
591
|
|
|
|
|
|
|
|
592
|
5
|
|
|
|
|
11
|
@{$parsed{date_time}}{qw/whole date_time week_day day_of_year rest/} |
|
5
|
|
|
|
|
44
|
|
593
|
|
|
|
|
|
|
= ($1, $2, $3, $4, $9); |
594
|
|
|
|
|
|
|
|
595
|
5
|
50
|
|
|
|
56
|
if (" $parsed{date_time}{day_of_year}" =~ $RC{year_day1}) { |
|
|
0
|
|
|
|
|
|
596
|
5
|
|
|
|
|
8
|
@{$parsed{date_time}}{qw/month_day month/} = ($1, $2); |
|
5
|
|
|
|
|
23
|
|
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
elsif (" $parsed{date_time}{day_of_year}" =~ $RC{year_day2}) { |
599
|
0
|
|
|
|
|
0
|
@{$parsed{date_time}}{qw/month month_day/} = ($1, $2); |
|
0
|
|
|
|
|
0
|
|
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
else { |
602
|
0
|
|
|
|
|
0
|
$self->diagnose("Couldn't parse day_of_year: <$parsed{date_time}{day_of_year}>"); |
603
|
0
|
|
|
|
|
0
|
$parsed{parse_failed}++; |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
5
|
100
|
|
|
|
179
|
if ($parsed{date_time}{rest} =~ $RC{date_time1}) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
607
|
1
|
|
|
|
|
4
|
@{$parsed{date_time}}{qw/hms hour minute second year/} |
|
1
|
|
|
|
|
9
|
|
608
|
|
|
|
|
|
|
= ($1, $2, $3, $4, $5); |
609
|
1
|
50
|
|
|
|
7
|
$parsed{date_time}{zone} = $6 if defined $6; |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
elsif ($parsed{date_time}{rest} =~ $RC{date_time2}) { |
612
|
1
|
|
|
|
|
3
|
@{$parsed{date_time}}{qw/hms hour minute second zone year/} |
|
1
|
|
|
|
|
29
|
|
613
|
|
|
|
|
|
|
= ($1, $2, $3, $4, $5, $10); |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
elsif ($parsed{date_time}{rest} =~ $RC{date_time3}) { |
616
|
3
|
|
|
|
|
13
|
@{$parsed{date_time}}{qw/year hms hour minute second/} |
|
3
|
|
|
|
|
29
|
|
617
|
|
|
|
|
|
|
= ($1, $2, $3, $4, $5); |
618
|
3
|
50
|
|
|
|
20
|
$parsed{date_time}{zone} = $6 if defined $6; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
else { |
621
|
0
|
|
|
|
|
0
|
$self->diagnose("Couldn't parse rest of date_time: <$parsed{date_time}{rest}>"); |
622
|
0
|
|
|
|
|
0
|
$parsed{parse_failed}++; |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
5
|
|
|
|
|
21
|
%expecting = (after_date_time => 1); |
626
|
5
|
|
|
|
|
19
|
next TOKEN; |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
|
629
|
11
|
50
|
66
|
|
|
59
|
if ($expecting{after_date_time} and /\G((mail.from|env.from).+)/cg) { |
630
|
0
|
0
|
|
|
|
0
|
$self->diagnose("Got random crap after date: $1\n") if $debug >= 3; |
631
|
0
|
|
|
|
|
0
|
$parsed{after_date_time} = $1; |
632
|
0
|
|
|
|
|
0
|
next TOKEN; |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# Reluctantly allow semi-colons in random places |
636
|
11
|
100
|
|
|
|
41
|
if (/\G(;\s+)/cg) { |
637
|
5
|
50
|
|
|
|
11
|
$self->diagnose("Got semi-colon: <$1>\n") if $debug >= 7; |
638
|
5
|
|
|
|
|
17
|
next TOKEN; |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
|
641
|
6
|
|
100
|
|
|
88
|
my $old_pos = pos() || 0; |
642
|
6
|
|
|
|
|
19
|
my @start = ($old_pos - 35, $old_pos); |
643
|
6
|
100
|
|
|
|
17
|
$start[0] = 0 if $start[0] < 0; |
644
|
6
|
|
|
|
|
10
|
my $length = $old_pos - $start[0]; |
645
|
6
|
100
|
|
|
|
19
|
if (/\G(.{1,35})/cg) { |
646
|
1
|
50
|
|
|
|
12
|
$self->diagnose("** Ran out of things to match at position $old_pos:\n", |
647
|
|
|
|
|
|
|
substr($_, $start[0], $length), "<<<\n", |
648
|
|
|
|
|
|
|
' ' x ($length - 3), ">>>$1\n\n") |
649
|
|
|
|
|
|
|
if $debug >= 1; |
650
|
1
|
|
|
|
|
3
|
$parsed{parse_failed}++; |
651
|
|
|
|
|
|
|
} |
652
|
6
|
|
|
|
|
26
|
last TOKEN; |
653
|
|
|
|
|
|
|
} |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
|
656
|
6
|
|
|
|
|
19
|
$self->{parse_tree} = \%parsed; |
657
|
|
|
|
|
|
|
|
658
|
6
|
|
66
|
|
|
45
|
my $failed = $parsed{parse_failed} && ! $parsed{allow_parse_fail}; |
659
|
6
|
100
|
|
|
|
18
|
$self->{parsed_ok} = $failed ? 0 : 1; |
660
|
6
|
|
|
|
|
31
|
return $self; |
661
|
|
|
|
|
|
|
} |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
## |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=item * B |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
if ($received->parsed_ok()) { |
668
|
|
|
|
|
|
|
... |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
Returns true if the parse succeed, or if it failed, but was permitted |
672
|
|
|
|
|
|
|
to fail for some reason, such as encountering evidence of a known |
673
|
|
|
|
|
|
|
broken (non-RFC822-compliant) format mid-parse. |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
=cut |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
sub parsed_ok { |
678
|
6
|
|
|
6
|
1
|
17
|
my $self = shift; |
679
|
6
|
50
|
|
|
|
19
|
croak "Header not parsed yet" unless $self->{parse_tree}; |
680
|
6
|
|
|
|
|
33
|
return $self->{parsed_ok}; |
681
|
|
|
|
|
|
|
} |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
## |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=item * B |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
my $parse_tree = $received->parse_tree(); |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
Returns the actual parse tree, which is where you get all the useful |
690
|
|
|
|
|
|
|
information. It is returned as a hashref whose keys are strings like |
691
|
|
|
|
|
|
|
`from', `by', `with', `id', `via' etc., corresponding to the |
692
|
|
|
|
|
|
|
components of Received headers as defined by RFC822: |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
received = "Received" ":" ; one per relay |
695
|
|
|
|
|
|
|
["from" domain] ; sending host |
696
|
|
|
|
|
|
|
["by" domain] ; receiving host |
697
|
|
|
|
|
|
|
["via" atom] ; physical path |
698
|
|
|
|
|
|
|
*("with" atom) ; link/mail protocol |
699
|
|
|
|
|
|
|
["id" msg-id] ; receiver msg id |
700
|
|
|
|
|
|
|
["for" addr-spec] ; initial form |
701
|
|
|
|
|
|
|
";" date-time ; time received |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
The corresponding values are more hashrefs which are mini-parse-trees |
704
|
|
|
|
|
|
|
for these individual components. A typical parse tree looks something |
705
|
|
|
|
|
|
|
like: |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
{ |
708
|
|
|
|
|
|
|
'by' => { |
709
|
|
|
|
|
|
|
'domain' => 'host5.hostingcheck.com', |
710
|
|
|
|
|
|
|
'whole' => 'by host5.hostingcheck.com', |
711
|
|
|
|
|
|
|
'comments' => [ |
712
|
|
|
|
|
|
|
'(8.9.3/8.9.3)' |
713
|
|
|
|
|
|
|
], |
714
|
|
|
|
|
|
|
}, |
715
|
|
|
|
|
|
|
'date_time' => { |
716
|
|
|
|
|
|
|
'year' => 2000, |
717
|
|
|
|
|
|
|
'week_day' => 'Tue', |
718
|
|
|
|
|
|
|
'minute' => 57, |
719
|
|
|
|
|
|
|
'day_of_year' => '1 Feb', |
720
|
|
|
|
|
|
|
'month_day' => ' 1', |
721
|
|
|
|
|
|
|
'zone' => '-0500', |
722
|
|
|
|
|
|
|
'second' => 18, |
723
|
|
|
|
|
|
|
'hms' => '21:57:18', |
724
|
|
|
|
|
|
|
'date_time' => 'Tue, 1 Feb 2000 21:57:18 -0500', |
725
|
|
|
|
|
|
|
'hour' => 21, |
726
|
|
|
|
|
|
|
'month' => 'Feb', |
727
|
|
|
|
|
|
|
'rest' => '2000 21:57:18 -0500', |
728
|
|
|
|
|
|
|
'whole' => 'Tue, 1 Feb 2000 21:57:18 -0500' |
729
|
|
|
|
|
|
|
}, |
730
|
|
|
|
|
|
|
'with' => { |
731
|
|
|
|
|
|
|
'with' => 'ESMTP', |
732
|
|
|
|
|
|
|
'whole' => 'with ESMTP' |
733
|
|
|
|
|
|
|
}, |
734
|
|
|
|
|
|
|
'from' => { |
735
|
|
|
|
|
|
|
'domain' => 'mediacons.tecc.co.uk', |
736
|
|
|
|
|
|
|
'HELO' => 'tr909.mediaconsult.com', |
737
|
|
|
|
|
|
|
'from' => 'tr909.mediaconsult.com', |
738
|
|
|
|
|
|
|
'address' => '193.128.6.132', |
739
|
|
|
|
|
|
|
'comments' => [ |
740
|
|
|
|
|
|
|
'(mediacons.tecc.co.uk [193.128.6.132])', |
741
|
|
|
|
|
|
|
], |
742
|
|
|
|
|
|
|
'whole' => 'from tr909.mediaconsult.com (mediacons.tecc.co.uk [193.128.6.132]) |
743
|
|
|
|
|
|
|
' |
744
|
|
|
|
|
|
|
}, |
745
|
|
|
|
|
|
|
'id' => { |
746
|
|
|
|
|
|
|
'id' => 'VAA24164', |
747
|
|
|
|
|
|
|
'whole' => 'id VAA24164' |
748
|
|
|
|
|
|
|
}, |
749
|
|
|
|
|
|
|
'comments' => [ |
750
|
|
|
|
|
|
|
'(mediacons.tecc.co.uk [193.128.6.132])', |
751
|
|
|
|
|
|
|
'(8.9.3/8.9.3)' |
752
|
|
|
|
|
|
|
], |
753
|
|
|
|
|
|
|
'for' => { |
754
|
|
|
|
|
|
|
'for' => '', |
755
|
|
|
|
|
|
|
'whole' => 'for ' |
756
|
|
|
|
|
|
|
}, |
757
|
|
|
|
|
|
|
'whole' => 'from tr909.mediaconsult.com (mediacons.tecc.co.uk [193.128.6.132]) by host5.hostingcheck.com (8.9.3/8.9.3) with ESMTP id VAA24164 for ; Tue, 1 Feb 2000 21:57:18 -0500' |
758
|
|
|
|
|
|
|
} |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
=cut |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
sub parse_tree { |
763
|
5
|
|
|
5
|
1
|
12
|
my $self = shift; |
764
|
5
|
50
|
|
|
|
23
|
croak "Header not parsed yet" unless $self->{parse_tree}; |
765
|
5
|
|
|
|
|
26
|
return $self->{parse_tree}; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
=back |
769
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=head1 BUGS |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
Doesn't use Parse::RecDescent, which it maybe should. |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
Doesn't offer a `strict RFC822' parsing mode. To implement that would |
775
|
|
|
|
|
|
|
be a royal pain in the arse, unless we move to Parse::RecDescent. |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
=head1 SEE ALSO |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
L, L |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
=head1 AUTHOR |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
Adam Spiers |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=head1 LICENSE |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
All rights reserved. This program is free software; you can redistribute |
788
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
=cut |