| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Sisimai::RFC5322; |
|
2
|
91
|
|
|
91
|
|
123347
|
use v5.26; |
|
|
91
|
|
|
|
|
340
|
|
|
3
|
91
|
|
|
91
|
|
590
|
use strict; |
|
|
91
|
|
|
|
|
154
|
|
|
|
91
|
|
|
|
|
3198
|
|
|
4
|
91
|
|
|
91
|
|
470
|
use warnings; |
|
|
91
|
|
|
|
|
151
|
|
|
|
91
|
|
|
|
|
6304
|
|
|
5
|
91
|
|
|
91
|
|
37862
|
use Sisimai::RFC791; |
|
|
91
|
|
|
|
|
291
|
|
|
|
91
|
|
|
|
|
3752
|
|
|
6
|
91
|
|
|
91
|
|
45036
|
use Sisimai::Address; |
|
|
91
|
|
|
|
|
273
|
|
|
|
91
|
|
|
|
|
7949
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
91
|
|
|
|
|
10312
|
use constant HEADERTABLE => { |
|
9
|
|
|
|
|
|
|
'messageid' => ['message-id'], |
|
10
|
|
|
|
|
|
|
'subject' => ['subject'], |
|
11
|
|
|
|
|
|
|
'listid' => ['list-id'], |
|
12
|
|
|
|
|
|
|
'date' => [qw|date posted-date posted resent-date|], |
|
13
|
|
|
|
|
|
|
'addresser' => [qw|from return-path reply-to errors-to reverse-path x-postfix-sender envelope-from x-envelope-from|], |
|
14
|
|
|
|
|
|
|
'recipient' => [qw|to delivered-to forward-path envelope-to x-envelope-to resent-to apparently-to|], |
|
15
|
91
|
|
|
91
|
|
827
|
}; |
|
|
91
|
|
|
|
|
172
|
|
|
16
|
91
|
|
|
|
|
5569
|
use constant FIELDINDEX => [ |
|
17
|
|
|
|
|
|
|
# The following fields are not referred in Sisimai |
|
18
|
|
|
|
|
|
|
# Resent-From Resent-Sender Resent-Cc Cc Bcc Resent-Bcc In-Reply-To References |
|
19
|
|
|
|
|
|
|
# Comments Keywords |
|
20
|
|
|
|
|
|
|
qw|Resent-Date From Sender Reply-To To Message-ID Subject Return-Path Received Date X-Mailer |
|
21
|
|
|
|
|
|
|
Content-Type Content-Transfer-Encoding Content-Description Content-Disposition| |
|
22
|
91
|
|
|
91
|
|
576
|
]; |
|
|
91
|
|
|
|
|
170
|
|
|
23
|
|
|
|
|
|
|
# The part of "Received:" headers generated by qmail or qmail-clone |
|
24
|
|
|
|
|
|
|
# - Received: (qmail 2202 invoked from network); 15 Oct 2015 06:22:22 -0000 |
|
25
|
|
|
|
|
|
|
# - Received: (qmail 2220 invoked by uid 2); 17 Jul 2014 08:30:40 -0000 |
|
26
|
|
|
|
|
|
|
# Do not exclude the following strings: |
|
27
|
|
|
|
|
|
|
# - Received: (qmail 2204 invoked for bounce); 29 Apr 2010 00:00:00 -000 |
|
28
|
91
|
|
|
91
|
|
582
|
use constant woReceived => [" invoked by uid", " invoked from network"]; |
|
|
91
|
|
|
|
|
726
|
|
|
|
91
|
|
|
|
|
120232
|
|
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub HEADERFIELDS { |
|
31
|
|
|
|
|
|
|
# Grouped RFC822 headers |
|
32
|
|
|
|
|
|
|
# @param [String] group RFC822 Header group name |
|
33
|
|
|
|
|
|
|
# @return [Array] RFC822 Header list |
|
34
|
2
|
|
|
2
|
0
|
238514
|
my $class = shift; |
|
35
|
2
|
|
50
|
|
|
11
|
my $group = shift || return []; |
|
36
|
2
|
100
|
|
|
|
11
|
return HEADERTABLE->{ $group } if exists HEADERTABLE->{ $group }; |
|
37
|
1
|
|
|
|
|
3
|
return []; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub received { |
|
41
|
|
|
|
|
|
|
# Convert Received headers to a structured data |
|
42
|
|
|
|
|
|
|
# @param [String] argv1 Received header |
|
43
|
|
|
|
|
|
|
# @return [Array] Each item in the Received header order by the following: |
|
44
|
|
|
|
|
|
|
# 0: (from) "hostname" |
|
45
|
|
|
|
|
|
|
# 1: (by) "hostname" |
|
46
|
|
|
|
|
|
|
# 2: (via) "protocol/tcp" |
|
47
|
|
|
|
|
|
|
# 3: (with) "protocol/smtp" |
|
48
|
|
|
|
|
|
|
# 4: (id) "queue-id" |
|
49
|
|
|
|
|
|
|
# 5: (for) "envelope-to address" |
|
50
|
4455
|
|
|
4455
|
1
|
132955
|
my $class = shift; |
|
51
|
4455
|
100
|
100
|
|
|
15094
|
my $argv1 = shift || return []; return [] if ref $argv1; |
|
|
4449
|
|
|
|
|
11816
|
|
|
52
|
4448
|
100
|
100
|
|
|
26577
|
return [] if index($argv1, woReceived->[0]) > 0 || index($argv1, woReceived->[1]) > 0; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
# - https://datatracker.ietf.org/doc/html/rfc5322 |
|
55
|
|
|
|
|
|
|
# received = "Received:" *received-token ";" date-time CRLF |
|
56
|
|
|
|
|
|
|
# received-token = word / angle-addr / addr-spec / domain |
|
57
|
|
|
|
|
|
|
# |
|
58
|
|
|
|
|
|
|
# - Appendix A.4. Message with Trace Fields |
|
59
|
|
|
|
|
|
|
# Received: |
|
60
|
|
|
|
|
|
|
# from x.y.test |
|
61
|
|
|
|
|
|
|
# by example.net |
|
62
|
|
|
|
|
|
|
# via TCP |
|
63
|
|
|
|
|
|
|
# with ESMTP |
|
64
|
|
|
|
|
|
|
# id ABC12345 |
|
65
|
|
|
|
|
|
|
# for ; 21 Nov 1997 10:05:43 -0600 |
|
66
|
4446
|
|
|
|
|
51073
|
my $recvd = [split(' ', $argv1)]; |
|
67
|
4446
|
|
|
|
|
26307
|
my $label = [qw|from by via with id for|]; |
|
68
|
4446
|
|
|
|
|
8663
|
my $token = {}; |
|
69
|
4446
|
|
|
|
|
6365
|
my $other = []; |
|
70
|
4446
|
|
|
|
|
5897
|
my $alter = []; |
|
71
|
4446
|
|
|
|
|
6569
|
my $right = 0; |
|
72
|
4446
|
|
|
|
|
8195
|
my $range = scalar @$recvd; |
|
73
|
4446
|
|
|
|
|
6634
|
my $index = -1; |
|
74
|
|
|
|
|
|
|
|
|
75
|
4446
|
|
|
|
|
8346
|
for my $e ( @$recvd ) { |
|
76
|
|
|
|
|
|
|
# Look up each label defined in $label from Received header |
|
77
|
78658
|
50
|
|
|
|
126877
|
last unless ++$index < $range; my $f = lc $e; |
|
|
78658
|
|
|
|
|
101794
|
|
|
78
|
78658
|
100
|
|
|
|
99007
|
next unless grep { $f eq $_ } @$label; |
|
|
471948
|
|
|
|
|
709944
|
|
|
79
|
|
|
|
|
|
|
|
|
80
|
18397
|
|
100
|
|
|
61563
|
$token->{ $f } = $recvd->[$index + 1] || next; |
|
81
|
18386
|
|
|
|
|
47640
|
$token->{ $f } = lc $token->{ $f }; |
|
82
|
18386
|
|
|
|
|
33015
|
$token->{ $f } =~ y/();//d; |
|
83
|
|
|
|
|
|
|
|
|
84
|
18386
|
100
|
|
|
|
39778
|
next unless $f eq 'from'; |
|
85
|
3718
|
100
|
|
|
|
9157
|
last unless $index + 2 < $range; |
|
86
|
3713
|
100
|
|
|
|
14970
|
next unless index($recvd->[$index + 2], '(') == 0; |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# Get and keep a hostname in the comment as follows: |
|
89
|
|
|
|
|
|
|
# from mx1.example.com (c213502.kyoto.example.ne.jp [192.0.2.135]) by mx.example.jp (V8/cf) |
|
90
|
|
|
|
|
|
|
# [ |
|
91
|
|
|
|
|
|
|
# "from", # index + 0 |
|
92
|
|
|
|
|
|
|
# "mx1.example.com", # index + 1 |
|
93
|
|
|
|
|
|
|
# "(c213502.kyoto.example.ne.jp", # index + 2 |
|
94
|
|
|
|
|
|
|
# "[192.0.2.135])", # index + 3 |
|
95
|
|
|
|
|
|
|
# "by", |
|
96
|
|
|
|
|
|
|
# "mx.example.jp", |
|
97
|
|
|
|
|
|
|
# "(V8/cf)", |
|
98
|
|
|
|
|
|
|
# ... |
|
99
|
|
|
|
|
|
|
# ] |
|
100
|
|
|
|
|
|
|
# The 2nd element after the current element is NOT a continuation of the current element |
|
101
|
|
|
|
|
|
|
# such as "(c213502.kyoto.example.ne.jp)" |
|
102
|
3126
|
|
|
|
|
9328
|
push @$other, $recvd->[$index + 2]; $other->[0] =~ y/();//d; |
|
|
3126
|
|
|
|
|
7343
|
|
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# The 2nd element after the current element is a continuation of the current element. |
|
105
|
|
|
|
|
|
|
# such as "(c213502.kyoto.example.ne.jp", "[192.0.2.135])" |
|
106
|
3126
|
50
|
|
|
|
7722
|
last unless $index + 3 < $range; |
|
107
|
3126
|
|
|
|
|
6707
|
push @$other, $recvd->[$index + 3]; |
|
108
|
3126
|
|
|
|
|
9272
|
$other->[1] =~ y/();//d; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
4446
|
|
|
|
|
8916
|
for my $e ( @$other ) { |
|
112
|
|
|
|
|
|
|
# Check alternatives in $other, and then delete uninformative values. |
|
113
|
6252
|
100
|
100
|
|
|
24233
|
next if length $e < 4 || $e eq 'unknown'; |
|
114
|
5289
|
100
|
100
|
|
|
28786
|
next if $e eq 'localhost' || $e eq '[127.0.0.1]' || $e eq '[IPv6:::1]'; |
|
|
|
|
100
|
|
|
|
|
|
115
|
4447
|
100
|
100
|
|
|
19838
|
next if index($e, '.') == -1 || index($e, '=') > 1; |
|
116
|
3869
|
|
|
|
|
9644
|
push @$alter, $e; |
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
|
|
119
|
4446
|
|
|
|
|
8437
|
for my $e ('from', 'by') { |
|
120
|
|
|
|
|
|
|
# Remove square brackets from the IP address such as "[192.0.2.25]" |
|
121
|
8892
|
100
|
|
|
|
22422
|
next unless defined $token->{ $e }; |
|
122
|
8018
|
50
|
|
|
|
16175
|
next unless length $token->{ $e }; |
|
123
|
8018
|
100
|
|
|
|
20803
|
next unless index($token->{ $e }, '[') == 0; |
|
124
|
230
|
|
100
|
|
|
1713
|
$token->{ $e } = shift Sisimai::RFC791->find($token->{ $e })->@* || ''; |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
|
|
127
|
4446
|
|
100
|
|
|
14563
|
$token->{'from'} ||= ''; |
|
128
|
4446
|
|
|
|
|
5969
|
while(1) { |
|
129
|
|
|
|
|
|
|
# Prefer hostnames over IP addresses, except for localhost.localdomain and similar. |
|
130
|
4446
|
100
|
100
|
|
|
23413
|
last if $token->{'from'} eq 'localhost' || $token->{'from'} eq 'localhost.localdomain'; |
|
131
|
4064
|
100
|
|
|
|
13575
|
last if index($token->{'from'}, '.') < 0; # A hostname without a domain name |
|
132
|
2883
|
100
|
|
|
|
18349
|
last if scalar Sisimai::RFC791->find($token->{'from'})->@*; |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# No need to rewrite $token->{'from'} |
|
135
|
2473
|
|
|
|
|
4454
|
$right = 1; |
|
136
|
2473
|
|
|
|
|
8660
|
last; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
4446
|
|
|
|
|
6124
|
while(1) { |
|
139
|
|
|
|
|
|
|
# Try to rewrite uninformative hostnames and IP addresses in $token->{'from'} |
|
140
|
4446
|
100
|
|
|
|
9096
|
last if $right; # There is no need to rewrite |
|
141
|
1973
|
100
|
|
|
|
5014
|
last if scalar @$alter == 0; # There is no alternative to rewriting |
|
142
|
579
|
100
|
|
|
|
2612
|
last if index($alter->[0], $token->{'from'}) > -1; |
|
143
|
|
|
|
|
|
|
|
|
144
|
358
|
100
|
|
|
|
2005
|
if( index($token->{'from'}, 'localhost') == 0 ) { |
|
|
|
100
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# localhost or localhost.localdomain |
|
146
|
25
|
|
|
|
|
65
|
$token->{'from'} = $alter->[0]; |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
} elsif( index($token->{'from'}, '.') == -1 ) { |
|
149
|
|
|
|
|
|
|
# A hostname without a domain name such as "mail", "mx", or "mbox" |
|
150
|
80
|
50
|
|
|
|
430
|
$token->{'from'} = $alter->[0] if index($alter->[0], '.') > 0; |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
} else { |
|
153
|
|
|
|
|
|
|
# An IPv4 address |
|
154
|
253
|
|
|
|
|
687
|
$token->{'from'} = $alter->[0]; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
358
|
|
|
|
|
553
|
last; |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
4446
|
100
|
|
|
|
15825
|
delete $token->{'by'} unless defined $token->{'by'}; |
|
160
|
4446
|
50
|
|
|
|
12243
|
delete $token->{'from'} unless defined $token->{'from'}; |
|
161
|
4446
|
100
|
|
|
|
19584
|
$token->{'for'} = Sisimai::Address->s3s4($token->{'for'}) if exists $token->{'for'}; |
|
162
|
4446
|
|
|
|
|
25688
|
for my $e ( keys %$token ) { |
|
163
|
|
|
|
|
|
|
# Delete an invalid value |
|
164
|
19044
|
50
|
|
|
|
42595
|
$token->{ $e } = '' if index($token->{ $e }, ' ') > -1; |
|
165
|
19044
|
|
|
|
|
32713
|
$token->{ $e } =~ y/[]//d; # Remove "[]" from the IP address |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
return [ |
|
169
|
|
|
|
|
|
|
$token->{'from'} || '', |
|
170
|
|
|
|
|
|
|
$token->{'by'} || '', |
|
171
|
|
|
|
|
|
|
$token->{'via'} || '', |
|
172
|
|
|
|
|
|
|
$token->{'with'} || '', |
|
173
|
|
|
|
|
|
|
$token->{'id'} || '', |
|
174
|
4446
|
|
100
|
|
|
84179
|
$token->{'for'} || '', |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
175
|
|
|
|
|
|
|
]; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub part { |
|
179
|
|
|
|
|
|
|
# Split given entire message body into error message lines and the original message part only |
|
180
|
|
|
|
|
|
|
# include email headers |
|
181
|
|
|
|
|
|
|
# @param [String] email Entire message body |
|
182
|
|
|
|
|
|
|
# @param [Array] cutby List of strings which is a boundary of the original message part |
|
183
|
|
|
|
|
|
|
# @param [Bool] keeps Flag for keeping strings after "\n\n" |
|
184
|
|
|
|
|
|
|
# @return [Array] [Error message lines, The original message] |
|
185
|
|
|
|
|
|
|
# @since v5.0.0 |
|
186
|
4458
|
|
|
4458
|
1
|
15888
|
my $class = shift; |
|
187
|
4458
|
|
100
|
|
|
12075
|
my $email = shift || return undef; |
|
188
|
4457
|
|
100
|
|
|
12582
|
my $cutby = shift || return undef; |
|
189
|
4456
|
|
100
|
|
|
17394
|
my $keeps = shift // 0; |
|
190
|
|
|
|
|
|
|
|
|
191
|
4456
|
|
|
|
|
8577
|
my $boundaryor = ''; # A boundary string divides the error message part and the original message part |
|
192
|
4456
|
|
|
|
|
7113
|
my $positionor = -1; # A Position of the boundary string |
|
193
|
4456
|
|
|
|
|
7259
|
my $formerpart = ''; # The error message part |
|
194
|
4456
|
|
|
|
|
8562
|
my $latterpart = ''; # The original message part |
|
195
|
|
|
|
|
|
|
|
|
196
|
4456
|
|
|
|
|
11183
|
for my $e ( @$cutby ) { |
|
197
|
|
|
|
|
|
|
# Find a boundary string(2nd argument) from the 1st argument |
|
198
|
6376
|
100
|
|
|
|
15576
|
$positionor = index($$email, $e); next if $positionor == -1; |
|
|
6376
|
|
|
|
|
15049
|
|
|
199
|
3437
|
|
|
|
|
6017
|
$boundaryor = $e; |
|
200
|
3437
|
|
|
|
|
6176
|
last; |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
4456
|
100
|
|
|
|
12468
|
if( $positionor > 0 ) { |
|
204
|
|
|
|
|
|
|
# There is the boundary string in the message body |
|
205
|
3392
|
|
|
|
|
13123
|
$formerpart = substr($$email, 0, $positionor); |
|
206
|
3392
|
|
50
|
|
|
17493
|
$latterpart = substr($$email, ($positionor + length($boundaryor) + 1), ) || ''; |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
} else { |
|
209
|
|
|
|
|
|
|
# Substitute the entire message to the former part when the boundary string is not included |
|
210
|
|
|
|
|
|
|
# the $$email |
|
211
|
1064
|
|
|
|
|
3596
|
$formerpart = $$email; |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
|
|
214
|
4456
|
100
|
|
|
|
13175
|
if( length $latterpart > 0 ) { |
|
215
|
|
|
|
|
|
|
# Remove blank lines, the message body of the original message, and append "\n" at the end |
|
216
|
|
|
|
|
|
|
# of the original message headers |
|
217
|
|
|
|
|
|
|
# 1. Remove leading blank lines |
|
218
|
|
|
|
|
|
|
# 2. Remove text after the first blank line: \n\n |
|
219
|
|
|
|
|
|
|
# 3. Append "\n" at the end of test block when the last character is not "\n" |
|
220
|
3392
|
|
|
|
|
23717
|
$latterpart =~ s/\A[\r\n\s]+//m; |
|
221
|
3392
|
100
|
|
|
|
10198
|
if( $keeps == 0 ) { |
|
222
|
|
|
|
|
|
|
# Remove text after the first blank line: \n\n when $keeps is 0 |
|
223
|
3374
|
50
|
|
|
|
19704
|
substr($latterpart, index($latterpart, "\n\n") + 1, length($latterpart), '') if index($latterpart, "\n\n"); |
|
224
|
|
|
|
|
|
|
} |
|
225
|
3392
|
100
|
|
|
|
10323
|
$latterpart .= "\n" unless substr($latterpart, -1, 1) eq "\n"; |
|
226
|
|
|
|
|
|
|
} |
|
227
|
4456
|
|
|
|
|
19912
|
return [$formerpart, $latterpart]; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
1; |
|
231
|
|
|
|
|
|
|
__END__ |