File Coverage

lib/Sisimai/RFC5322.pm
Criterion Covered Total %
statement 111 111 100.0
branch 61 68 89.7
condition 46 48 95.8
subroutine 11 11 100.0
pod 2 3 66.6
total 231 241 95.8


line stmt bran cond sub pod time code
1             package Sisimai::RFC5322;
2 93     93   124429 use v5.26;
  93         253  
3 93     93   355 use strict;
  93         111  
  93         2200  
4 93     93   323 use warnings;
  93         119  
  93         3641  
5 93     93   28160 use Sisimai::RFC791;
  93         201  
  93         2846  
6 93     93   35655 use Sisimai::Address;
  93         196  
  93         5855  
7              
8 93         7151 use constant HEADERTABLE => {
9             'messageid' => ['message-id'],
10             'subject' => ['subject'],
11             'listid' => ['list-id'],
12             'date' => [qw|date posted-date posted resent-date|],
13             'addresser' => [qw|from return-path reply-to errors-to reverse-path x-postfix-sender envelope-from x-envelope-from|],
14             'recipient' => [qw|to delivered-to forward-path envelope-to x-envelope-to resent-to apparently-to|],
15 93     93   524 };
  93         125  
16 93         3968 use constant FIELDINDEX => [
17             # The following fields are not referred in Sisimai
18             # Resent-From Resent-Sender Resent-Cc Cc Bcc Resent-Bcc In-Reply-To References
19             # Comments Keywords
20             qw|Resent-Date From Sender Reply-To To Message-ID Subject Return-Path Received Date X-Mailer
21             Content-Type Content-Transfer-Encoding Content-Description Content-Disposition|
22 93     93   413 ];
  93         118  
23             # The part of "Received:" headers generated by qmail or qmail-clone
24             # - Received: (qmail 2202 invoked from network); 15 Oct 2015 06:22:22 -0000
25             # - Received: (qmail 2220 invoked by uid 2); 17 Jul 2014 08:30:40 -0000
26             # Do not exclude the following strings:
27             # - Received: (qmail 2204 invoked for bounce); 29 Apr 2010 00:00:00 -000
28 93     93   313 use constant woReceived => [" invoked by uid", " invoked from network"];
  93         564  
  93         85484  
29              
30             sub HEADERFIELDS {
31             # Grouped RFC822 headers
32             # @param [String] group RFC822 Header group name
33             # @return [Array] RFC822 Header list
34 2     2 0 199140 my $class = shift;
35 2   50     5 my $group = shift || return [];
36 2 100       9 return HEADERTABLE->{ $group } if exists HEADERTABLE->{ $group };
37 1         2 return [];
38             }
39              
40             sub received {
41             # Convert Received headers to a structured data
42             # @param [String] argv1 Received header
43             # @return [Array] Each item in the Received header order by the following:
44             # 0: (from) "hostname"
45             # 1: (by) "hostname"
46             # 2: (via) "protocol/tcp"
47             # 3: (with) "protocol/smtp"
48             # 4: (id) "queue-id"
49             # 5: (for) "envelope-to address"
50 4525     4525 1 104233 my $class = shift;
51 4525 100 100     7133 my $argv1 = shift || return []; return [] if ref $argv1;
  4519         7768  
52 4518 100 100     19376 return [] if index($argv1, woReceived->[0]) > 0 || index($argv1, woReceived->[1]) > 0;
53              
54             # - https://datatracker.ietf.org/doc/html/rfc5322
55             # received = "Received:" *received-token ";" date-time CRLF
56             # received-token = word / angle-addr / addr-spec / domain
57             #
58             # - Appendix A.4. Message with Trace Fields
59             # Received:
60             # from x.y.test
61             # by example.net
62             # via TCP
63             # with ESMTP
64             # id ABC12345
65             # for ; 21 Nov 1997 10:05:43 -0600
66 4516         32600 my $recvd = [split(' ', $argv1)];
67 4516         13635 my $label = [qw|from by via with id for|];
68 4516         5129 my $token = {};
69 4516         3982 my $other = [];
70 4516         3801 my $alter = [];
71 4516         4232 my $right = 0;
72 4516         4749 my $range = scalar @$recvd;
73 4516         4137 my $index = -1;
74              
75 4516         5802 for my $e ( @$recvd ) {
76             # Look up each label defined in $label from Received header
77 80068 50       83474 last unless ++$index < $range; my $f = lc $e;
  80068         67952  
78 80068 100       66390 next unless grep { $f eq $_ } @$label;
  480408         453062  
79              
80 18747   100     37006 $token->{ $f } = $recvd->[$index + 1] || next;
81 18736         24737 $token->{ $f } = lc $token->{ $f };
82 18736         20327 $token->{ $f } =~ y/();//d;
83              
84 18736 100       24434 next unless $f eq 'from';
85 3788 100       5649 last unless $index + 2 < $range;
86 3783 100       7284 next unless index($recvd->[$index + 2], '(') == 0;
87              
88             # Get and keep a hostname in the comment as follows:
89             # from mx1.example.com (c213502.kyoto.example.ne.jp [192.0.2.135]) by mx.example.jp (V8/cf)
90             # [
91             # "from", # index + 0
92             # "mx1.example.com", # index + 1
93             # "(c213502.kyoto.example.ne.jp", # index + 2
94             # "[192.0.2.135])", # index + 3
95             # "by",
96             # "mx.example.jp",
97             # "(V8/cf)",
98             # ...
99             # ]
100             # The 2nd element after the current element is NOT a continuation of the current element
101             # such as "(c213502.kyoto.example.ne.jp)"
102 3196         6587 push @$other, $recvd->[$index + 2]; $other->[0] =~ y/();//d;
  3196         5287  
103              
104             # The 2nd element after the current element is a continuation of the current element.
105             # such as "(c213502.kyoto.example.ne.jp", "[192.0.2.135])"
106 3196 50       4693 last unless $index + 3 < $range;
107 3196         3984 push @$other, $recvd->[$index + 3];
108 3196         5542 $other->[1] =~ y/();//d;
109             }
110              
111 4516         5744 for my $e ( @$other ) {
112             # Check alternatives in $other, and then delete uninformative values.
113 6392 100 100     14598 next if length $e < 4 || $e eq 'unknown';
114 5409 100 100     18179 next if $e eq 'localhost' || $e eq '[127.0.0.1]' || $e eq '[IPv6:::1]';
      100        
115 4547 100 100     12044 next if index($e, '.') == -1 || index($e, '=') > 1;
116 3969         6055 push @$alter, $e;
117             }
118              
119 4516         5458 for my $e ('from', 'by') {
120             # Remove square brackets from the IP address such as "[192.0.2.25]"
121 9032 100       14316 next unless defined $token->{ $e };
122 8158 50       11325 next unless length $token->{ $e };
123 8158 100       13194 next unless index($token->{ $e }, '[') == 0;
124 235   100     1026 $token->{ $e } = shift Sisimai::RFC791->find($token->{ $e })->@* || '';
125             }
126              
127 4516   100     10975 $token->{'from'} ||= '';
128 4516         5497 while(1) {
129             # Prefer hostnames over IP addresses, except for localhost.localdomain and similar.
130 4516 100 100     13565 last if $token->{'from'} eq 'localhost' || $token->{'from'} eq 'localhost.localdomain';
131 4129 100       7139 last if index($token->{'from'}, '.') < 0; # A hostname without a domain name
132 2948 100       10623 last if scalar Sisimai::RFC791->find($token->{'from'})->@*;
133              
134             # No need to rewrite $token->{'from'}
135 2533         3494 $right = 1;
136 2533         3697 last;
137             }
138 4516         4866 while(1) {
139             # Try to rewrite uninformative hostnames and IP addresses in $token->{'from'}
140 4516 100       6376 last if $right; # There is no need to rewrite
141 1983 100       3566 last if scalar @$alter == 0; # There is no alternative to rewriting
142 584 100       1571 last if index($alter->[0], $token->{'from'}) > -1;
143              
144 358 100       1292 if( index($token->{'from'}, 'localhost') == 0 ) {
    100          
145             # localhost or localhost.localdomain
146 25         52 $token->{'from'} = $alter->[0];
147              
148             } elsif( index($token->{'from'}, '.') == -1 ) {
149             # A hostname without a domain name such as "mail", "mx", or "mbox"
150 80 50       287 $token->{'from'} = $alter->[0] if index($alter->[0], '.') > 0;
151              
152             } else {
153             # An IPv4 address
154 253         514 $token->{'from'} = $alter->[0];
155             }
156 358         420 last;
157             }
158              
159 4516 100       8506 delete $token->{'by'} unless defined $token->{'by'};
160 4516 50       7333 delete $token->{'from'} unless defined $token->{'from'};
161 4516 100       11989 $token->{'for'} = Sisimai::Address->s3s4($token->{'for'}) if exists $token->{'for'};
162 4516         10191 for my $e ( keys %$token ) {
163             # Delete an invalid value
164 19394 50       24718 $token->{ $e } = '' if index($token->{ $e }, ' ') > -1;
165 19394         21546 $token->{ $e } =~ y/[]//d; # Remove "[]" from the IP address
166             }
167              
168             return [
169             $token->{'from'} || '',
170             $token->{'by'} || '',
171             $token->{'via'} || '',
172             $token->{'with'} || '',
173             $token->{'id'} || '',
174 4516   100     52676 $token->{'for'} || '',
      100        
      100        
      100        
      100        
      100        
175             ];
176             }
177              
178             sub part {
179             # Split given entire message body into error message lines and the original message part only
180             # include email headers
181             # @param [String] email Entire message body
182             # @param [Array] cutby List of strings which is a boundary of the original message part
183             # @param [Bool] keeps Flag for keeping strings after "\n\n"
184             # @return [Array] [Error message lines, The original message]
185             # @since v5.0.0
186 4468     4468 1 15197 my $class = shift;
187 4468   100     7969 my $email = shift || return undef;
188 4467   100     7330 my $cutby = shift || return undef;
189 4466   100     11039 my $keeps = shift // 0;
190              
191 4466         5117 my $boundaryor = ''; # A boundary string divides the error message part and the original message part
192 4466         4527 my $positionor = -1; # A Position of the boundary string
193 4466         4061 my $formerpart = ''; # The error message part
194 4466         5326 my $latterpart = ''; # The original message part
195              
196 4466         6381 for my $e ( @$cutby ) {
197             # Find a boundary string(2nd argument) from the 1st argument
198 6382 100       10360 $positionor = index($$email, $e); next if $positionor == -1;
  6382         9262  
199 3454         3646 $boundaryor = $e;
200 3454         3912 last;
201             }
202              
203 4466 100       6905 if( $positionor > 0 ) {
204             # There is the boundary string in the message body
205 3409         8379 $formerpart = substr($$email, 0, $positionor);
206 3409   50     11472 $latterpart = substr($$email, ($positionor + length($boundaryor) + 1), ) || '';
207              
208             } else {
209             # Substitute the entire message to the former part when the boundary string is not included
210             # the $$email
211 1057         2427 $formerpart = $$email;
212             }
213              
214 4466 100       7561 if( length $latterpart > 0 ) {
215             # Remove blank lines, the message body of the original message, and append "\n" at the end
216             # of the original message headers
217             # 1. Remove leading blank lines
218             # 2. Remove text after the first blank line: \n\n
219             # 3. Append "\n" at the end of test block when the last character is not "\n"
220 3409         14355 $latterpart =~ s/\A[\r\n\s]+//m;
221 3409 100       5754 if( $keeps == 0 ) {
222             # Remove text after the first blank line: \n\n when $keeps is 0
223 3390 50       13346 substr($latterpart, index($latterpart, "\n\n") + 1, length($latterpart), '') if index($latterpart, "\n\n");
224             }
225 3409 100       7529 $latterpart .= "\n" unless substr($latterpart, -1, 1) eq "\n";
226             }
227 4466         14562 return [$formerpart, $latterpart];
228             }
229              
230             1;
231             __END__