File Coverage

lib/Sisimai/Message.pm
Criterion Covered Total %
statement 240 240 100.0
branch 100 108 92.5
condition 53 61 86.8
subroutine 16 16 100.0
pod 1 5 20.0
total 410 430 95.3


line stmt bran cond sub pod time code
1             package Sisimai::Message;
2 89     89   92174 use v5.26;
  89         236  
3 89     89   352 use strict;
  89         109  
  89         1803  
4 89     89   317 use warnings;
  89         115  
  89         3562  
5 89     89   32540 use Sisimai::RFC1894;
  89         237  
  89         2954  
6 89     89   35761 use Sisimai::RFC2045;
  89         210  
  89         3096  
7 89     89   32128 use Sisimai::RFC5322;
  89         340  
  89         2822  
8 89     89   30331 use Sisimai::RFC5965;
  89         196  
  89         2408  
9 89     89   357 use Sisimai::Address;
  89         120  
  89         1329  
10 89     89   235 use Sisimai::String;
  89         94  
  89         1067  
11 89     89   29960 use Sisimai::Order;
  89         192  
  89         2652  
12 89     89   404 use Sisimai::Lhost;
  89         98  
  89         231963  
13              
14             state $Fields1894 = Sisimai::RFC1894->FIELDINDEX;
15             state $Fields5322 = Sisimai::RFC5322->FIELDINDEX;
16             state $Fields5965 = Sisimai::RFC5965->FIELDINDEX;
17             state $FieldTable = { map { lc $_ => $_ } ($Fields1894->@*, $Fields5322->@*, $Fields5965->@*) };
18             state $Boundaries = ["Content-Type: message/rfc822", "Content-Type: text/rfc822-headers"];
19             state $MediaTypes = [
20             ["message/xdelivery-status", "message/delivery-status"],
21             ["message/disposition-notification", "message/delivery-status"],
22             ["message/global-delivery-status", "message/delivery-status"],
23             ["message/global-disposition-notification", "message/delivery-status"],
24             ["message/global-delivery-status", "message/delivery-status"],
25             ["message/global-headers", "text/rfc822-headers"],
26             ["message/global", "message/rfc822"],
27             ];
28              
29             my $TryOnFirst = [];
30              
31             sub rise {
32             # Constructor of Sisimai::Message
33             # @param [Hash] argvs Email text data
34             # @options argvs [String] data Entire email message
35             # @options argvs [Code] hook Reference to callback method
36             # @return [Hash] Structured email data
37             # [Undef] If each value of the arguments are missing
38 3521     3521 1 243943 my $class = shift;
39 3521   100     6321 my $argvs = shift || return undef;
40 3520   100     8357 my $email = $argvs->{'data'} || return undef;
41 3519         15999 my $thing = {'from' => '', 'header' => {}, 'rfc822' => '', 'ds' => [], 'catch' => undef};
42 3519         4458 my $param = {};
43              
44 3519         3869 my $aftersplit = undef;
45 3519         3591 my $beforefact = undef;
46 3519         4790 my $parseagain = 0;
47              
48 3519         6708 while($parseagain < 2) {
49             # 1. Split email data to headers and a body part.
50 3537 100       9041 last unless $aftersplit = __PACKAGE__->part(\$email);
51              
52             # 2. Convert email headers from text to hash reference
53 3536         6170 $thing->{'from'} = $aftersplit->[0];
54 3536         9565 $thing->{'header'} = __PACKAGE__->makemap(\$aftersplit->[1]);
55              
56             # 3. Decode and rewrite the "Subject:" header
57 3536 100       8065 if( $thing->{'header'}->{'subject'} ) {
58             # Decode MIME-Encoded "Subject:" header
59 3518         6505 my $cv = $thing->{'header'}->{'subject'};
60 3518 100       19898 my $cq = Sisimai::RFC2045->is_encoded(\$cv) ? Sisimai::RFC2045->decodeH([split(/[ ]/, $cv)]) : $cv;
61 3518         6359 my $cl = lc $cq;
62 3518 100       5500 my $p1 = index($cl, 'fwd:'); $p1 = index($cl, 'fw:') if $p1 < 0;
  3518         6828  
63              
64             # Remove "Fwd:" string from the "Subject:" header
65 3518 100       6565 if( $p1 > -1 ) {
66             # Delete quoted strings, quote symbols(>)
67 30         69 $cq = substr($cq, index($cq, ':') + 1,);
68 30         284 s/^[>][ ]//gm, s/^[>]$//gm for $aftersplit->[2];
69             }
70 3518         6213 $thing->{'header'}->{'subject'} = $cq;
71             }
72              
73             # 4. Rewrite message body for detecting the bounce reason
74 3536         18385 $TryOnFirst = Sisimai::Order->make($thing->{'header'}->{'subject'});
75 3536   100     18181 $param = {'hook' => $argvs->{'hook'} || undef, 'mail' => $thing, 'body' => \$aftersplit->[2]};
76 3536 100       12125 last if $beforefact = __PACKAGE__->sift(%$param);
77 58 100       129 last unless grep { index($aftersplit->[2], $_) > -1 } @$Boundaries;
  116         271  
78              
79             # 5. Try to sift again
80             # There is a bounce message inside of mutipart/*, try to sift the first message/rfc822
81             # part as a entire message body again. rfc3464/1086-a847b090.eml is the email but the
82             # results decodd by sisimai are unstable.
83 18         27 $parseagain++;
84 18         68 $email = Sisimai::RFC5322->part(\$aftersplit->[2], $Boundaries, 1)->[1];
85 18         44 $email =~ s/\A[\r\n\s]+//m;
86 18 50       60 last unless length $email > 128;
87             }
88 3519 100       8559 return undef unless $beforefact;
89              
90             # 6. Rewrite headers of the original message in the body part
91 3478         14858 $thing->{ $_ } = $beforefact->{ $_ } for ('ds', 'catch', 'rfc822');
92 3478   66     10632 my $r = $beforefact->{'rfc822'} || $aftersplit->[2];
93 3478 50       15025 $thing->{'rfc822'} = ref $r ? $r : __PACKAGE__->makemap(\$r, 1);
94              
95 3478         21113 return $thing;
96             }
97              
98             sub part {
99             # Divide email data up headers and a body part.
100             # @param [String] email Email data
101             # @return [Array] Email data after split
102             # @since v4.14.0
103 3539     3539 0 5821 my $class = shift;
104 3539   100     6620 my $email = shift // return undef;
105 3537         7278 my $parts = ['', '', '']; # 0:From, 1:Header, 2:Body
106              
107 3537         12685 $$email =~ s/\A\s+//m;
108 3537 100       35319 $$email =~ s/\r\n/\n/gm if rindex($$email, "\r\n") > -1;
109              
110 3537   100     22786 ($parts->[1], $parts->[2]) = split(/\n\n/, $$email, 2); $parts->[2] ||= "";
  3537         7606  
111 3537 100 66     15007 return undef if $parts->[1] eq "" || $parts->[2] eq "";
112              
113 3536 100       10119 if( substr($parts->[1], 0, 5) eq 'From ' ) {
114             # From MAILER-DAEMON Tue Feb 11 00:00:00 2014
115 461         1739 $parts->[0] = [split(/\n/, $parts->[1], 2)]->[0];
116 461         1388 $parts->[0] =~ y/\r\n//d;
117              
118             } else {
119             # Set pseudo UNIX From line
120 3075         5187 $parts->[0] = 'MAILER-DAEMON Tue Feb 11 00:00:00 2014';
121             }
122 3536 50       10496 $parts->[1] .= "\n" unless substr($parts->[1], -1, 1) eq "\n";
123              
124 3536         7472 for my $e ('image/', 'application/', 'text/html') {
125             # https://github.com/sisimai/p5-sisimai/issues/492, Reduce email size
126 10608 100       9063 my $p0 = 0; my $p1 = 0; my $ep = $e eq 'text/html' ? '' : "--\n";
  10608         8530  
  10608         15235  
127 10608         11436 while(1) {
128             # Remove each part from "Content-Type: image/..." to "--\n" (the end of each boundary)
129 11222 100       21133 $p0 = index($parts->[2], 'Content-Type: '.$e, $p0); last if $p0 < 0;
  11222         17596  
130 837 100       2968 $p1 = index($parts->[2], $ep, $p0 + 32); last if $p1 < 0;
  837         1978  
131 614         1245 substr($parts->[2], $p0, $p1 - $p0, '');
132             }
133             }
134 3536         6110 $parts->[2] .= "\n";
135 3536         7952 return $parts;
136             }
137              
138             sub makemap {
139             # Convert a text including email headers to a hash reference
140             # @param [String] argv0 Email header data
141             # @param [Bool] argv1 Decode "Subject:" header
142             # @return [Hash] Structured email header data
143             # @since v4.25.6
144 7015     7015 0 8225 my $class = shift;
145 7015   100     11905 my $argv0 = shift || return {}; $$argv0 =~ s/^[>]+[ ]//mg; # Remove '>' indent symbols
  7014         14235  
146 7014   100     14423 my $argv1 = shift || 0;
147              
148             # Select and convert all the headers in $argv0. The following regular expression is based on
149             # https://gist.github.com/xtetsuji/b080e1f5551d17242f6415aba8a00239
150 7014         207792 my $firstpairs = {$$argv0 =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms};
151 7014         23498 my $headermaps = {'subject' => ''};
152 7014         88296 $headermaps->{ lc $_ } = $firstpairs->{ $_ } for keys %$firstpairs;
153 7014         11873 my $receivedby = [];
154              
155 7014         12947 for my $e ( values %$headermaps ) { s/\n\s+/ /, y/\t / /s for $e }
  81023         156841  
156 7014 100 100     22566 if( index($$argv0, "\nReceived:") > 0 || index($$argv0, "Received:") == 0 ) {
157             # Capture values of each Received: header
158 6216         45967 my $re = [$$argv0 =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms];
159 6216         10328 for my $e ( @$re ) {
160             # 1. Exclude the Received header including "(qmail ** invoked from network)".
161             # 2. Convert all consecutive spaces and line breaks into a single space character.
162 11743 100 100     59808 next if index($e, Sisimai::RFC5322->woReceived->[0]) > 0 || index($e, Sisimai::RFC5322->woReceived->[1]) > 0;
163              
164 11488         29933 $e =~ s/\n\s+/ /;
165 11488         18918 $e =~ y/\n\t / /s;
166 11488         21224 push @$receivedby, $e;
167             }
168             }
169 7014         11747 $headermaps->{'received'} = $receivedby;
170              
171 7014 100       26775 return $headermaps unless $argv1;
172 3478 100       9186 return $headermaps unless length $headermaps->{'subject'};
173              
174             # Convert MIME-Encoded subject
175 3172 100       13036 if( Sisimai::String->is_8bit(\$headermaps->{'subject'}) ) {
176             # The value of ``Subject'' header is including multibyte character, is not MIME-Encoded text.
177 44         81 eval {
178             # Remove invalid byte sequence
179 44         643 Encode::decode_utf8($headermaps->{'subject'});
180 44         440 Encode::encode_utf8($headermaps->{'subject'});
181             };
182 44 50       108 $headermaps->{'subject'} = 'MULTIBYTE CHARACTERS HAVE BEEN REMOVED' if $@;
183              
184             } else {
185             # MIME-Encoded subject field or ASCII characters only
186 3128         3613 my $r = [];
187 3128 100       11821 if( Sisimai::RFC2045->is_encoded(\$headermaps->{'subject'}) ) {
188             # split the Subject: field by " "
189 251         740 for my $v ( split(/ /, $headermaps->{'subject'}) ) {
190             # Insert value to the array if the string is MIME encoded text
191 313 100       547 push @$r, $v if Sisimai::RFC2045->is_encoded(\$v);
192             }
193             } else {
194             # Subject line is not MIME encoded
195 2877         5847 $r = [$headermaps->{'subject'}];
196             }
197 3128         8061 $headermaps->{'subject'} = Sisimai::RFC2045->decodeH($r);
198             }
199 3172         16746 return $headermaps;
200             }
201              
202             sub tidy {
203             # Tidy up each field name and format
204             # @param [String] argv0 Strings including field and value used at an email
205             # @return [String] Strings tidied up
206             # @since v5.0.0
207 3558     3558 0 44418 my $class = shift;
208 3558   100     5484 my $argv0 = shift || return '';
209 3557         4119 my $email = '';
210 3557         46524 my @lines = split("\n", $$argv0);
211 3557         5411 my $index = -1;
212              
213 3557         5136 for my $e ( @lines ) {
214             # Find and tidy up fields defined in RFC5322, RFC1894, and RFC5965
215             # 1. Find a field label defined in RFC5322, RFC1894, or RFC5965 from this line
216 196471         176338 my $p0 = index($e, ':');
217 196471 100       191636 my $cf = substr(lc $e, 0, $p0); chop $cf if substr($cf, -1, 1) eq ' ';
  196471         226652  
218 196471   100     305810 my $fn = $FieldTable->{ $cf } || '';
219              
220             # There is neither ":" character nor the field listed in $FieldTable
221 196471         139917 $index++;
222 196471 100       197429 if( $fn eq '' ){ $email .= $e."\n"; next }
  134031         126016  
  134031         125722  
223              
224             # 2. Tidy up a sub type of each field defined in RFC1894 such as Reporting-MTA: DNS;...
225 62440         61530 my $ab = [];
226 62440         60584 my $bf = substr($e, $p0 + 1,);
227 62440         54573 my $p1 = index($bf, ';');
228 62440         45110 while(1) {
229             # Such as Diagnostic-Code, Remote-MTA, and so on
230             # - Before: Diagnostic-Code: SMTP;550 User unknown
231             # - After: Diagnostic-Code: smtp; 550 User unknown
232 62440 100       59036 last unless grep { $fn eq $_ } (@$Fields1894, 'Content-Type');
  811720         743740  
233              
234 27261 100       32908 if( $p1 > 0 ) {
235             # The field including one or more ";"
236 14793         22520 for my $f (split(';', $bf)) {
237             # 2-1. Trim leading and trailing space characters from the current buffer
238 29041         37890 while( index($f, ' ') == 0 ) { $f = substr($f, 1,) }
  26933         38718  
239 29041         36597 while( substr($f, -1, 1) eq ' '){ $f = substr($f, 0, length($f) - 1) }
  61         167  
240 29041         23913 my $ps = '';
241              
242             # 2-2. Convert some parameters to the lower-cased string
243 29041         21134 while(1) {
244             # For example,
245             # - Content-Type: Message/delivery-status => message/delivery-status
246             # - Content-Type: Charset=UTF8 => charset=utf8
247             # - Reporting-MTA: DNS; ... => dns
248             # - Final-Recipient: RFC822; ... => rfc822
249 29041 100       36746 last if index($f, ' ') > 0;
250              
251 26717 100       23495 my $p2 = index($f, '='); if( $p2 > 0 ) {
  26717         29291  
252             # charset=, boundary=, and other pairs divided by "="
253 4764         6972 $ps = lc substr($f, 0, $p2);
254 4764         6081 substr($f, 0, $p2, $ps);
255             }
256 26717 100       35839 $f = lc $f if $ps ne 'boundary';
257 26717 100       28911 $f = 'rfc822' if $f eq 'rfc/822';
258 26717         22884 last;
259             }
260 29041         33629 push @$ab, $f;
261             }
262              
263 14793 100       20276 if( $fn eq 'Diagnostic-Code' ) {
264             # Diagnostic-Code: x-unix;
265             # /var/email/kijitora/Maildir/tmp/1000000000.A000000B00000.neko22:
266             # Disk quota exceeded
267 2026 100 66     4406 push @$ab, '' if scalar @$ab == 1 && index($lines[$index + 1], " ") == 0;
268             }
269 14793         17771 $bf = join('; ', @$ab); $ab = []; # Insert " " (space characer) immediately after ";"
  14793         13798  
270              
271             } else {
272             # There is no ";" in the field
273             # Arrival-Date, Last-Attempt-Date
274             # X-Original-Message-ID
275 12468 100 100     29633 last if index($fn, '-Date') > 0 || index($fn, '-Message-ID') > 0;
276 9451         9854 $bf = lc $bf;
277             }
278 24244         22236 last;
279             }
280              
281             # 3. Tidy up a value, and a parameter of Content-Type: field
282 62440 100       68249 if( $fn eq "Content-Type" ) {
283             # Replace the value of "Content-Type" field
284 10282         11474 for my $f ( @$MediaTypes ) {
285             # - Before: Content-Type: message/xdelivery-status; ...
286             # - After: Content-Type: message/delivery-status; ...
287 71974 100       66736 $p1 = index($bf, $f->[0]); next if $p1 < 0;
  71974         76766  
288 6         15 substr($bf, $p1, length $f->[0], $f->[1]);
289             }
290             }
291              
292             # 4. Concatenate the field name and the field value
293 62440         86015 for my $f ( split(' ', $bf) ) {
294             # Remove redundant space characters
295 162452 50       245783 push @$ab, $f if length $f > 0;
296             }
297 62440         128528 $email .= sprintf("%s: %s\n", $fn, join(' ', @$ab));
298             }
299              
300             # 5. Convert the lower-cased SMTP command to the upper-cased.
301 3557         12422 $email =~ s/after end of data:/after end of DATA:/g;
302 3557 50       8725 $email .= "\n" if substr($email, -2, 2) ne "\n\n";
303 3557         26339 return \$email;
304             }
305              
306             sub sift {
307             # Sift a bounce mail with each MTA module
308             # @param [Hash] argvs Processing message entity.
309             # @param options argvs [Hash] mail Email message entity
310             # @param options mail [String] from From line of mbox
311             # @param options mail [Hash] header Email header data
312             # @param options mail [String] rfc822 Original message part
313             # @param options mail [Array] ds Delivery status list(decoded data)
314             # @param options argvs [String] body Email message body
315             # @param options argvs [Code] hook Hook method to be called
316             # @return [Hash] Decoded and structured bounce mails
317 3538     3538 0 4806 my $class = shift;
318 3538         7667 my $argvs = { @_ };
319              
320 3538   100     7212 my $mailheader = $argvs->{'mail'}->{'header'} || return undef;
321 3537   100     6534 my $bodystring = $argvs->{'body'} || return undef;
322 3536   100     8138 my $hookmethod = $argvs->{'hook'} || undef;
323 3536         3812 my $havecaught = undef;
324              
325 3536         5522 state $defaultset = Sisimai::Order->another;
326 3536         4037 state $lhosttable = Sisimai::Lhost->path;
327              
328 3536   100     7443 $mailheader->{'from'} //= '';
329 3536   50     7640 $mailheader->{'subject'} //= '';
330 3536   100     7352 $mailheader->{'content-type'} //= '';
331              
332             # Tidy up each field name and value in the entire message body
333 3536         7776 $$bodystring = __PACKAGE__->tidy($bodystring)->$*;
334              
335             # Decode BASE64 Encoded message body
336 3536   100     12767 my $mesgformat = lc($mailheader->{'content-type'} || '');
337 3536   100     8888 my $ctencoding = lc($mailheader->{'content-transfer-encoding'} || '');
338              
339 3536 100 66     16084 if( index($mesgformat, 'text/plain') == 0 || index($mesgformat, 'text/html') == 0 ) {
    100          
340             # Content-Type: text/plain; charset=UTF-8
341 458 100       1582 if( $ctencoding eq 'base64' ) {
    100          
342             # Content-Transfer-Encoding: base64
343 11         32 $bodystring = Sisimai::RFC2045->decodeB($bodystring);
344              
345             } elsif( $ctencoding eq 'quoted-printable' ) {
346             # Content-Transfer-Encoding: quoted-printable
347 60         213 $bodystring = Sisimai::RFC2045->decodeQ($bodystring);
348             }
349              
350             # Content-Type: text/html;...
351 458 50       1266 $bodystring = Sisimai::String->to_plain($bodystring, 1) if index($mesgformat, 'text/html') > -1;
352              
353             } elsif( index($mesgformat, 'multipart/') == 0 ) {
354             # In case of Content-Type: multipart/*
355 2410         10769 my $p = Sisimai::RFC2045->makeflat($mailheader->{'content-type'}, $bodystring);
356 2410 100 66     8787 $bodystring = $p if defined $p && length $$p;
357             }
358 3536         14852 $$bodystring =~ tr/\r//d;
359 3536         19782 $$bodystring =~ s/\t/ /g;
360              
361 3536 100       6959 if( ref $hookmethod eq 'CODE' ) {
362             # Call hook method
363 713         2715 my $p = {'headers' => $mailheader, 'message' => $$bodystring};
364 713         1050 eval { $havecaught = $hookmethod->($p) };
  713         2026  
365 713 50       15384 warn sprintf(" ***warning: Something is wrong in hook method 'hook': %s", $@) if $@;
366             }
367              
368 3536         3922 my $haveloaded = {};
369 3536         4465 my $havesifted = undef;
370 3536         4428 my $modulename = '';
371 3536         4015 DECODER: while(1) {
372             # 1. MTA Module Candidates to be tried on first
373             # 2. Sisimai::Lhost::*
374             # 3. Sisimai::RFC3464
375             # 4. Sisimai::ARF
376             # 5. Sisimai::RFC3834
377 3536         7461 TRY_ON_FIRST_AND_DEFAULTS: for my $r ( @$TryOnFirst, @$defaultset ) {
378             # Try MTA module candidates
379 39575 100       52933 next if exists $haveloaded->{ $r };
380 36205         623853 require $lhosttable->{ $r };
381 36205         183419 $havesifted = $r->inquire($mailheader, $bodystring);
382 36205         50888 $haveloaded->{ $r } = 1;
383 36205         31552 $modulename = $r;
384 36205 100       53374 last(DECODER) if $havesifted;
385             }
386              
387             # When the all of Sisimai::Lhost::* modules did not return bounce data, call Sisimai::RFC3464;
388 853         7707 require Sisimai::RFC3464;
389 853         4609 $havesifted = Sisimai::RFC3464->inquire($mailheader, $bodystring);
390 853 100       1791 if( $havesifted ){ $modulename = 'RFC3464'; last(DECODER) }
  739         1018  
  739         1212  
391              
392             # Feedback Loop message
393 114         2031 require Sisimai::ARF;
394 114         637 $havesifted = Sisimai::ARF->inquire($mailheader, $bodystring);
395 114 100       234 if( $havesifted ){ $modulename = "ARF"; last(DECODER) }
  40         66  
  40         81  
396              
397             # Try to sift the message as auto reply message defined in RFC3834
398 74         3269 require Sisimai::RFC3834;
399 74         343 $havesifted = Sisimai::RFC3834->inquire($mailheader, $bodystring);
400 74 100       143 if( $havesifted ){ $modulename = 'RFC3834'; last(DECODER) }
  16         18  
  16         24  
401              
402 58         61 last; # as of now, we have no sample email for coding this block
403              
404             } # End of while(DECODER)
405 3536 100       6755 return undef unless $havesifted;
406              
407 3478         6369 $havesifted->{'catch'} = $havecaught;
408 3478   33     14115 $modulename =~ s/\A.+:://; $_->{'agent'} ||= $modulename for $havesifted->{'ds'}->@*;
  3478         16360  
409 3478         20323 return $havesifted;
410             }
411              
412             1;
413             __END__