File Coverage

lib/Sisimai/RFC5322.pm
Criterion Covered Total %
statement 111 111 100.0
branch 61 68 89.7
condition 46 48 95.8
subroutine 11 11 100.0
pod 2 3 66.6
total 231 241 95.8


line stmt bran cond sub pod time code
1             package Sisimai::RFC5322;
2 91     91   123347 use v5.26;
  91         340  
3 91     91   590 use strict;
  91         154  
  91         3198  
4 91     91   470 use warnings;
  91         151  
  91         6304  
5 91     91   37862 use Sisimai::RFC791;
  91         291  
  91         3752  
6 91     91   45036 use Sisimai::Address;
  91         273  
  91         7949  
7              
8 91         10312 use constant HEADERTABLE => {
9             'messageid' => ['message-id'],
10             'subject' => ['subject'],
11             'listid' => ['list-id'],
12             'date' => [qw|date posted-date posted resent-date|],
13             'addresser' => [qw|from return-path reply-to errors-to reverse-path x-postfix-sender envelope-from x-envelope-from|],
14             'recipient' => [qw|to delivered-to forward-path envelope-to x-envelope-to resent-to apparently-to|],
15 91     91   827 };
  91         172  
16 91         5569 use constant FIELDINDEX => [
17             # The following fields are not referred in Sisimai
18             # Resent-From Resent-Sender Resent-Cc Cc Bcc Resent-Bcc In-Reply-To References
19             # Comments Keywords
20             qw|Resent-Date From Sender Reply-To To Message-ID Subject Return-Path Received Date X-Mailer
21             Content-Type Content-Transfer-Encoding Content-Description Content-Disposition|
22 91     91   576 ];
  91         170  
23             # The part of "Received:" headers generated by qmail or qmail-clone
24             # - Received: (qmail 2202 invoked from network); 15 Oct 2015 06:22:22 -0000
25             # - Received: (qmail 2220 invoked by uid 2); 17 Jul 2014 08:30:40 -0000
26             # Do not exclude the following strings:
27             # - Received: (qmail 2204 invoked for bounce); 29 Apr 2010 00:00:00 -000
28 91     91   582 use constant woReceived => [" invoked by uid", " invoked from network"];
  91         726  
  91         120232  
29              
30             sub HEADERFIELDS {
31             # Grouped RFC822 headers
32             # @param [String] group RFC822 Header group name
33             # @return [Array] RFC822 Header list
34 2     2 0 238514 my $class = shift;
35 2   50     11 my $group = shift || return [];
36 2 100       11 return HEADERTABLE->{ $group } if exists HEADERTABLE->{ $group };
37 1         3 return [];
38             }
39              
40             sub received {
41             # Convert Received headers to a structured data
42             # @param [String] argv1 Received header
43             # @return [Array] Each item in the Received header order by the following:
44             # 0: (from) "hostname"
45             # 1: (by) "hostname"
46             # 2: (via) "protocol/tcp"
47             # 3: (with) "protocol/smtp"
48             # 4: (id) "queue-id"
49             # 5: (for) "envelope-to address"
50 4455     4455 1 132955 my $class = shift;
51 4455 100 100     15094 my $argv1 = shift || return []; return [] if ref $argv1;
  4449         11816  
52 4448 100 100     26577 return [] if index($argv1, woReceived->[0]) > 0 || index($argv1, woReceived->[1]) > 0;
53              
54             # - https://datatracker.ietf.org/doc/html/rfc5322
55             # received = "Received:" *received-token ";" date-time CRLF
56             # received-token = word / angle-addr / addr-spec / domain
57             #
58             # - Appendix A.4. Message with Trace Fields
59             # Received:
60             # from x.y.test
61             # by example.net
62             # via TCP
63             # with ESMTP
64             # id ABC12345
65             # for ; 21 Nov 1997 10:05:43 -0600
66 4446         51073 my $recvd = [split(' ', $argv1)];
67 4446         26307 my $label = [qw|from by via with id for|];
68 4446         8663 my $token = {};
69 4446         6365 my $other = [];
70 4446         5897 my $alter = [];
71 4446         6569 my $right = 0;
72 4446         8195 my $range = scalar @$recvd;
73 4446         6634 my $index = -1;
74              
75 4446         8346 for my $e ( @$recvd ) {
76             # Look up each label defined in $label from Received header
77 78658 50       126877 last unless ++$index < $range; my $f = lc $e;
  78658         101794  
78 78658 100       99007 next unless grep { $f eq $_ } @$label;
  471948         709944  
79              
80 18397   100     61563 $token->{ $f } = $recvd->[$index + 1] || next;
81 18386         47640 $token->{ $f } = lc $token->{ $f };
82 18386         33015 $token->{ $f } =~ y/();//d;
83              
84 18386 100       39778 next unless $f eq 'from';
85 3718 100       9157 last unless $index + 2 < $range;
86 3713 100       14970 next unless index($recvd->[$index + 2], '(') == 0;
87              
88             # Get and keep a hostname in the comment as follows:
89             # from mx1.example.com (c213502.kyoto.example.ne.jp [192.0.2.135]) by mx.example.jp (V8/cf)
90             # [
91             # "from", # index + 0
92             # "mx1.example.com", # index + 1
93             # "(c213502.kyoto.example.ne.jp", # index + 2
94             # "[192.0.2.135])", # index + 3
95             # "by",
96             # "mx.example.jp",
97             # "(V8/cf)",
98             # ...
99             # ]
100             # The 2nd element after the current element is NOT a continuation of the current element
101             # such as "(c213502.kyoto.example.ne.jp)"
102 3126         9328 push @$other, $recvd->[$index + 2]; $other->[0] =~ y/();//d;
  3126         7343  
103              
104             # The 2nd element after the current element is a continuation of the current element.
105             # such as "(c213502.kyoto.example.ne.jp", "[192.0.2.135])"
106 3126 50       7722 last unless $index + 3 < $range;
107 3126         6707 push @$other, $recvd->[$index + 3];
108 3126         9272 $other->[1] =~ y/();//d;
109             }
110              
111 4446         8916 for my $e ( @$other ) {
112             # Check alternatives in $other, and then delete uninformative values.
113 6252 100 100     24233 next if length $e < 4 || $e eq 'unknown';
114 5289 100 100     28786 next if $e eq 'localhost' || $e eq '[127.0.0.1]' || $e eq '[IPv6:::1]';
      100        
115 4447 100 100     19838 next if index($e, '.') == -1 || index($e, '=') > 1;
116 3869         9644 push @$alter, $e;
117             }
118              
119 4446         8437 for my $e ('from', 'by') {
120             # Remove square brackets from the IP address such as "[192.0.2.25]"
121 8892 100       22422 next unless defined $token->{ $e };
122 8018 50       16175 next unless length $token->{ $e };
123 8018 100       20803 next unless index($token->{ $e }, '[') == 0;
124 230   100     1713 $token->{ $e } = shift Sisimai::RFC791->find($token->{ $e })->@* || '';
125             }
126              
127 4446   100     14563 $token->{'from'} ||= '';
128 4446         5969 while(1) {
129             # Prefer hostnames over IP addresses, except for localhost.localdomain and similar.
130 4446 100 100     23413 last if $token->{'from'} eq 'localhost' || $token->{'from'} eq 'localhost.localdomain';
131 4064 100       13575 last if index($token->{'from'}, '.') < 0; # A hostname without a domain name
132 2883 100       18349 last if scalar Sisimai::RFC791->find($token->{'from'})->@*;
133              
134             # No need to rewrite $token->{'from'}
135 2473         4454 $right = 1;
136 2473         8660 last;
137             }
138 4446         6124 while(1) {
139             # Try to rewrite uninformative hostnames and IP addresses in $token->{'from'}
140 4446 100       9096 last if $right; # There is no need to rewrite
141 1973 100       5014 last if scalar @$alter == 0; # There is no alternative to rewriting
142 579 100       2612 last if index($alter->[0], $token->{'from'}) > -1;
143              
144 358 100       2005 if( index($token->{'from'}, 'localhost') == 0 ) {
    100          
145             # localhost or localhost.localdomain
146 25         65 $token->{'from'} = $alter->[0];
147              
148             } elsif( index($token->{'from'}, '.') == -1 ) {
149             # A hostname without a domain name such as "mail", "mx", or "mbox"
150 80 50       430 $token->{'from'} = $alter->[0] if index($alter->[0], '.') > 0;
151              
152             } else {
153             # An IPv4 address
154 253         687 $token->{'from'} = $alter->[0];
155             }
156 358         553 last;
157             }
158              
159 4446 100       15825 delete $token->{'by'} unless defined $token->{'by'};
160 4446 50       12243 delete $token->{'from'} unless defined $token->{'from'};
161 4446 100       19584 $token->{'for'} = Sisimai::Address->s3s4($token->{'for'}) if exists $token->{'for'};
162 4446         25688 for my $e ( keys %$token ) {
163             # Delete an invalid value
164 19044 50       42595 $token->{ $e } = '' if index($token->{ $e }, ' ') > -1;
165 19044         32713 $token->{ $e } =~ y/[]//d; # Remove "[]" from the IP address
166             }
167              
168             return [
169             $token->{'from'} || '',
170             $token->{'by'} || '',
171             $token->{'via'} || '',
172             $token->{'with'} || '',
173             $token->{'id'} || '',
174 4446   100     84179 $token->{'for'} || '',
      100        
      100        
      100        
      100        
      100        
175             ];
176             }
177              
178             sub part {
179             # Split given entire message body into error message lines and the original message part only
180             # include email headers
181             # @param [String] email Entire message body
182             # @param [Array] cutby List of strings which is a boundary of the original message part
183             # @param [Bool] keeps Flag for keeping strings after "\n\n"
184             # @return [Array] [Error message lines, The original message]
185             # @since v5.0.0
186 4458     4458 1 15888 my $class = shift;
187 4458   100     12075 my $email = shift || return undef;
188 4457   100     12582 my $cutby = shift || return undef;
189 4456   100     17394 my $keeps = shift // 0;
190              
191 4456         8577 my $boundaryor = ''; # A boundary string divides the error message part and the original message part
192 4456         7113 my $positionor = -1; # A Position of the boundary string
193 4456         7259 my $formerpart = ''; # The error message part
194 4456         8562 my $latterpart = ''; # The original message part
195              
196 4456         11183 for my $e ( @$cutby ) {
197             # Find a boundary string(2nd argument) from the 1st argument
198 6376 100       15576 $positionor = index($$email, $e); next if $positionor == -1;
  6376         15049  
199 3437         6017 $boundaryor = $e;
200 3437         6176 last;
201             }
202              
203 4456 100       12468 if( $positionor > 0 ) {
204             # There is the boundary string in the message body
205 3392         13123 $formerpart = substr($$email, 0, $positionor);
206 3392   50     17493 $latterpart = substr($$email, ($positionor + length($boundaryor) + 1), ) || '';
207              
208             } else {
209             # Substitute the entire message to the former part when the boundary string is not included
210             # the $$email
211 1064         3596 $formerpart = $$email;
212             }
213              
214 4456 100       13175 if( length $latterpart > 0 ) {
215             # Remove blank lines, the message body of the original message, and append "\n" at the end
216             # of the original message headers
217             # 1. Remove leading blank lines
218             # 2. Remove text after the first blank line: \n\n
219             # 3. Append "\n" at the end of test block when the last character is not "\n"
220 3392         23717 $latterpart =~ s/\A[\r\n\s]+//m;
221 3392 100       10198 if( $keeps == 0 ) {
222             # Remove text after the first blank line: \n\n when $keeps is 0
223 3374 50       19704 substr($latterpart, index($latterpart, "\n\n") + 1, length($latterpart), '') if index($latterpart, "\n\n");
224             }
225 3392 100       10323 $latterpart .= "\n" unless substr($latterpart, -1, 1) eq "\n";
226             }
227 4456         19912 return [$formerpart, $latterpart];
228             }
229              
230             1;
231             __END__