File Coverage

lib/Sisimai/Message.pm
Criterion Covered Total %
statement 240 240 100.0
branch 100 108 92.5
condition 54 61 88.5
subroutine 16 16 100.0
pod 1 5 20.0
total 411 430 95.5


line stmt bran cond sub pod time code
1             package Sisimai::Message;
2 87     87   98267 use v5.26;
  87         310  
3 87     87   457 use strict;
  87         143  
  87         2262  
4 87     87   386 use warnings;
  87         160  
  87         4945  
5 87     87   56983 use Sisimai::RFC1894;
  87         282  
  87         3894  
6 87     87   48155 use Sisimai::RFC2045;
  87         282  
  87         4315  
7 87     87   40643 use Sisimai::RFC5322;
  87         404  
  87         3911  
8 87     87   39947 use Sisimai::RFC5965;
  87         264  
  87         3026  
9 87     87   544 use Sisimai::Address;
  87         162  
  87         1894  
10 87     87   401 use Sisimai::String;
  87         125  
  87         1746  
11 87     87   38127 use Sisimai::Order;
  87         231  
  87         3525  
12 87     87   546 use Sisimai::Lhost;
  87         149  
  87         319820  
13              
14             state $Fields1894 = Sisimai::RFC1894->FIELDINDEX;
15             state $Fields5322 = Sisimai::RFC5322->FIELDINDEX;
16             state $Fields5965 = Sisimai::RFC5965->FIELDINDEX;
17             state $FieldTable = { map { lc $_ => $_ } ($Fields1894->@*, $Fields5322->@*, $Fields5965->@*) };
18             state $Boundaries = ["Content-Type: message/rfc822", "Content-Type: text/rfc822-headers"];
19             state $MediaTypes = [
20             ["message/xdelivery-status", "message/delivery-status"],
21             ["message/disposition-notification", "message/delivery-status"],
22             ["message/global-delivery-status", "message/delivery-status"],
23             ["message/global-disposition-notification", "message/delivery-status"],
24             ["message/global-delivery-status", "message/delivery-status"],
25             ["message/global-headers", "text/rfc822-headers"],
26             ["message/global", "message/rfc822"],
27             ];
28              
29             my $TryOnFirst = [];
30              
31             sub rise {
32             # Constructor of Sisimai::Message
33             # @param [Hash] argvs Email text data
34             # @options argvs [String] data Entire email message
35             # @options argvs [Code] hook Reference to callback method
36             # @return [Hash] Structured email data
37             # [Undef] If each value of the arguments are missing
38 3509     3509 1 417455 my $class = shift;
39 3509   100     10647 my $argvs = shift || return undef;
40 3508   100     11064 my $email = $argvs->{'data'} || return undef;
41 3507         25280 my $thing = {'from' => '', 'header' => {}, 'rfc822' => '', 'ds' => [], 'catch' => undef};
42 3507         8292 my $param = {};
43              
44 3507         6524 my $aftersplit = undef;
45 3507         6358 my $beforefact = undef;
46 3507         9181 my $parseagain = 0;
47              
48 3507         14111 while($parseagain < 2) {
49             # 1. Split email data to headers and a body part.
50 3524 100       15300 last unless $aftersplit = __PACKAGE__->part(\$email);
51              
52             # 2. Convert email headers from text to hash reference
53 3523         10160 $thing->{'from'} = $aftersplit->[0];
54 3523         14682 $thing->{'header'} = __PACKAGE__->makemap(\$aftersplit->[1]);
55              
56             # 3. Decode and rewrite the "Subject:" header
57 3523 100       12663 if( $thing->{'header'}->{'subject'} ) {
58             # Decode MIME-Encoded "Subject:" header
59 3506         8892 my $cv = $thing->{'header'}->{'subject'};
60 3506 100       39151 my $cq = Sisimai::RFC2045->is_encoded(\$cv) ? Sisimai::RFC2045->decodeH([split(/[ ]/, $cv)]) : $cv;
61 3506         9329 my $cl = lc $cq;
62 3506 100       9024 my $p1 = index($cl, 'fwd:'); $p1 = index($cl, 'fw:') if $p1 < 0;
  3506         25277  
63              
64             # Remove "Fwd:" string from the "Subject:" header
65 3506 100       10695 if( $p1 > -1 ) {
66             # Delete quoted strings, quote symbols(>)
67 30         314 $cq = Sisimai::String->sweep(substr($cq, index($cq, ':') + 1,));
68 30         470 s/^[>][ ]//gm, s/^[>]$//gm for $aftersplit->[2];
69             }
70 3506         10896 $thing->{'header'}->{'subject'} = $cq;
71             }
72              
73             # 4. Rewrite message body for detecting the bounce reason
74 3523         30394 $TryOnFirst = Sisimai::Order->make($thing->{'header'}->{'subject'});
75 3523   100     33989 $param = {'hook' => $argvs->{'hook'} || undef, 'mail' => $thing, 'body' => \$aftersplit->[2]};
76 3523 100       19392 last if $beforefact = __PACKAGE__->sift(%$param);
77 60 100       253 last unless grep { index($aftersplit->[2], $_) > -1 } @$Boundaries;
  120         466  
78              
79             # 5. Try to sift again
80             # There is a bounce message inside of mutipart/*, try to sift the first message/rfc822
81             # part as a entire message body again. rfc3464/1086-a847b090.eml is the email but the
82             # results decodd by sisimai are unstable.
83 17         42 $parseagain++;
84 17         109 $email = Sisimai::RFC5322->part(\$aftersplit->[2], $Boundaries, 1)->[1];
85 17         64 $email =~ s/\A[\r\n\s]+//m;
86 17 50       93 last unless length $email > 128;
87             }
88 3507 100       12075 return undef unless $beforefact;
89              
90             # 6. Rewrite headers of the original message in the body part
91 3463         22036 $thing->{ $_ } = $beforefact->{ $_ } for ('ds', 'catch', 'rfc822');
92 3463   66     13261 my $r = $beforefact->{'rfc822'} || $aftersplit->[2];
93 3463 50       20817 $thing->{'rfc822'} = ref $r ? $r : __PACKAGE__->makemap(\$r, 1);
94              
95 3463         31673 return $thing;
96             }
97              
98             sub part {
99             # Divide email data up headers and a body part.
100             # @param [String] email Email data
101             # @return [Array] Email data after split
102             # @since v4.14.0
103 3526     3526 0 8050 my $class = shift;
104 3526   100     11315 my $email = shift // return undef;
105 3524         12294 my $parts = ['', '', '']; # 0:From, 1:Header, 2:Body
106              
107 3524         20725 $$email =~ s/\A\s+//m;
108 3524 100       53378 $$email =~ s/\r\n/\n/gm if rindex($$email, "\r\n") > -1;
109              
110 3524   100     32241 ($parts->[1], $parts->[2]) = split(/\n\n/, $$email, 2); $parts->[2] ||= "";
  3524         13638  
111 3524 100 66     21701 return undef if $parts->[1] eq "" || $parts->[2] eq "";
112              
113 3523 100       15602 if( substr($parts->[1], 0, 5) eq 'From ' ) {
114             # From MAILER-DAEMON Tue Feb 11 00:00:00 2014
115 461         2646 $parts->[0] = [split(/\n/, $parts->[1], 2)]->[0];
116 461         2050 $parts->[0] =~ y/\r\n//d;
117              
118             } else {
119             # Set pseudo UNIX From line
120 3062         7850 $parts->[0] = 'MAILER-DAEMON Tue Feb 11 00:00:00 2014';
121             }
122 3523 50       15939 $parts->[1] .= "\n" unless substr($parts->[1], -1, 1) eq "\n";
123              
124 3523         10231 for my $e ('image/', 'application/', 'text/html') {
125             # https://github.com/sisimai/p5-sisimai/issues/492, Reduce email size
126 10569 100       15844 my $p0 = 0; my $p1 = 0; my $ep = $e eq 'text/html' ? '' : "--\n";
  10569         12900  
  10569         22389  
127 10569         12916 while(1) {
128             # Remove each part from "Content-Type: image/..." to "--\n" (the end of each boundary)
129 11182 100       29556 $p0 = index($parts->[2], 'Content-Type: '.$e, $p0); last if $p0 < 0;
  11182         29517  
130 826 100       4196 $p1 = index($parts->[2], $ep, $p0 + 32); last if $p1 < 0;
  826         2660  
131 613         4525 substr($parts->[2], $p0, $p1 - $p0, '');
132             }
133             }
134 3523         9239 $parts->[2] .= "\n";
135 3523         13913 return $parts;
136             }
137              
138             sub makemap {
139             # Convert a text including email headers to a hash reference
140             # @param [String] argv0 Email header data
141             # @param [Bool] argv1 Decode "Subject:" header
142             # @return [Hash] Structured email header data
143             # @since v4.25.6
144 6987     6987 0 14771 my $class = shift;
145 6987   100     22739 my $argv0 = shift || return {}; $$argv0 =~ s/^[>]+[ ]//mg; # Remove '>' indent symbols
  6986         21561  
146 6986   100     20468 my $argv1 = shift || 0;
147              
148             # Select and convert all the headers in $argv0. The following regular expression is based on
149             # https://gist.github.com/xtetsuji/b080e1f5551d17242f6415aba8a00239
150 6986         310372 my $firstpairs = {$$argv0 =~ /^([\w-]+):[ ]*(.*?)\n(?![\s\t])/gms};
151 6986         36339 my $headermaps = {'subject' => ''};
152 6986         144942 $headermaps->{ lc $_ } = $firstpairs->{ $_ } for keys %$firstpairs;
153 6986         19263 my $receivedby = [];
154              
155 6986         22384 for my $e ( values %$headermaps ) { s/\n\s+/ /, y/\t / /s for $e }
  80577         239841  
156 6986 100 100     41201 if( index($$argv0, "\nReceived:") > 0 || index($$argv0, "Received:") == 0 ) {
157             # Capture values of each Received: header
158 6163         67195 my $re = [$$argv0 =~ /^Received:[ ]*(.*?)\n(?![\s\t])/gms];
159 6163         17766 for my $e ( @$re ) {
160             # 1. Exclude the Received header including "(qmail ** invoked from network)".
161             # 2. Convert all consecutive spaces and line breaks into a single space character.
162 11599 100 100     102870 next if index($e, Sisimai::RFC5322->woReceived->[0]) > 0 || index($e, Sisimai::RFC5322->woReceived->[1]) > 0;
163              
164 11344         51026 $e =~ s/\n\s+/ /;
165 11344         30320 $e =~ y/\n\t / /s;
166 11344         37224 push @$receivedby, $e;
167             }
168             }
169 6986         20241 $headermaps->{'received'} = $receivedby;
170              
171 6986 100       41081 return $headermaps unless $argv1;
172 3463 100       13070 return $headermaps unless length $headermaps->{'subject'};
173              
174             # Convert MIME-Encoded subject
175 3131 100       19196 if( Sisimai::String->is_8bit(\$headermaps->{'subject'}) ) {
176             # The value of ``Subject'' header is including multibyte character, is not MIME-Encoded text.
177 44         96 eval {
178             # Remove invalid byte sequence
179 44         959 Encode::decode_utf8($headermaps->{'subject'});
180 44         568 Encode::encode_utf8($headermaps->{'subject'});
181             };
182 44 50       169 $headermaps->{'subject'} = 'MULTIBYTE CHARACTERS HAVE BEEN REMOVED' if $@;
183              
184             } else {
185             # MIME-Encoded subject field or ASCII characters only
186 3087         5135 my $r = [];
187 3087 100       15884 if( Sisimai::RFC2045->is_encoded(\$headermaps->{'subject'}) ) {
188             # split the Subject: field by " "
189 251         1261 for my $v ( split(/ /, $headermaps->{'subject'}) ) {
190             # Insert value to the array if the string is MIME encoded text
191 313 100       1167 push @$r, $v if Sisimai::RFC2045->is_encoded(\$v);
192             }
193             } else {
194             # Subject line is not MIME encoded
195 2836         8290 $r = [$headermaps->{'subject'}];
196             }
197 3087         13964 $headermaps->{'subject'} = Sisimai::RFC2045->decodeH($r);
198             }
199 3131         27185 return $headermaps;
200             }
201              
202             sub tidy {
203             # Tidy up each field name and format
204             # @param [String] argv0 Strings including field and value used at an email
205             # @return [String] Strings tidied up
206             # @since v5.0.0
207 3545     3545 0 38168 my $class = shift;
208 3545   100     12378 my $argv0 = shift || return '';
209 3544         7706 my $email = '';
210 3544         78501 my @lines = split("\n", $$argv0);
211 3544         9380 my $index = -1;
212              
213 3544         8043 for my $e ( @lines ) {
214             # Find and tidy up fields defined in RFC5322, RFC1894, and RFC5965
215             # 1. Find a field label defined in RFC5322, RFC1894, or RFC5965 from this line
216 195601         266860 my $p0 = index($e, ':');
217 195601 100       302820 my $cf = substr(lc $e, 0, $p0); chop $cf if substr($cf, -1, 1) eq ' ';
  195601         335883  
218 195601   100     492461 my $fn = $FieldTable->{ $cf } || '';
219              
220             # There is neither ":" character nor the field listed in $FieldTable
221 195601         216439 $index++;
222 195601 100       321680 if( $fn eq '' ){ $email .= $e."\n"; next }
  133334         194261  
  133334         217331  
223              
224             # 2. Tidy up a sub type of each field defined in RFC1894 such as Reporting-MTA: DNS;...
225 62267         88919 my $ab = [];
226 62267         88891 my $bf = substr($e, $p0 + 1,);
227 62267         87637 my $p1 = index($bf, ';');
228 62267         70291 while(1) {
229             # Such as Diagnostic-Code, Remote-MTA, and so on
230             # - Before: Diagnostic-Code: SMTP;550 User unknown
231             # - After: Diagnostic-Code: smtp; 550 User unknown
232 62267 100       104938 last unless grep { $fn eq $_ } (@$Fields1894, 'Content-Type');
  809471         1174365  
233              
234 27233 100       51710 if( $p1 > 0 ) {
235             # The field including one or more ";"
236 14767         39430 for my $f (split(';', $bf)) {
237             # 2-1. Trim leading and trailing space characters from the current buffer
238 29010         57751 while( index($f, ' ') == 0 ) { $f = substr($f, 1,) }
  26902         61604  
239 29010         57411 while( substr($f, -1, 1) eq ' '){ $f = substr($f, 0, length($f) - 1) }
  61         206  
240 29010         35480 my $ps = '';
241              
242             # 2-2. Convert some parameters to the lower-cased string
243 29010         35586 while(1) {
244             # For example,
245             # - Content-Type: Message/delivery-status => message/delivery-status
246             # - Content-Type: Charset=UTF8 => charset=utf8
247             # - Reporting-MTA: DNS; ... => dns
248             # - Final-Recipient: RFC822; ... => rfc822
249 29010 100       51632 last if index($f, ' ') > 0;
250              
251 26686 100       36880 my $p2 = index($f, '='); if( $p2 > 0 ) {
  26686         44976  
252             # charset=, boundary=, and other pairs divided by "="
253 4759         12013 $ps = lc substr($f, 0, $p2);
254 4759         10662 substr($f, 0, $p2, $ps);
255             }
256 26686 100       56928 $f = lc $f if $ps ne 'boundary';
257 26686 100       43773 $f = 'rfc822' if $f eq 'rfc/822';
258 26686         34950 last;
259             }
260 29010         58283 push @$ab, $f;
261             }
262              
263 14767 100       33536 if( $fn eq 'Diagnostic-Code' ) {
264             # Diagnostic-Code: x-unix;
265             # /var/email/kijitora/Maildir/tmp/1000000000.A000000B00000.neko22:
266             # Disk quota exceeded
267 2026 100 66     7342 push @$ab, '' if scalar @$ab == 1 && index($lines[$index + 1], " ") == 0;
268             }
269 14767         32081 $bf = join('; ', @$ab); $ab = []; # Insert " " (space characer) immediately after ";"
  14767         27440  
270              
271             } else {
272             # There is no ";" in the field
273             # Arrival-Date, Last-Attempt-Date
274             # X-Original-Message-ID
275 12466 100 100     49242 last if index($fn, '-Date') > 0 || index($fn, '-Message-ID') > 0;
276 9449         16638 $bf = lc $bf;
277             }
278 24216         38903 last;
279             }
280              
281             # 3. Tidy up a value, and a parameter of Content-Type: field
282 62267 100       111166 if( $fn eq "Content-Type" ) {
283             # Replace the value of "Content-Type" field
284 10254         17566 for my $f ( @$MediaTypes ) {
285             # - Before: Content-Type: message/xdelivery-status; ...
286             # - After: Content-Type: message/delivery-status; ...
287 71778 100       109280 $p1 = index($bf, $f->[0]); next if $p1 < 0;
  71778         118833  
288 6         19 substr($bf, $p1, length $f->[0], $f->[1]);
289             }
290             }
291              
292             # 4. Concatenate the field name and the field value
293 62267         141243 for my $f ( split(' ', $bf) ) {
294             # Remove redundant space characters
295 161985 50       367696 push @$ab, $f if length $f > 0;
296             }
297 62267         221660 $email .= sprintf("%s: %s\n", $fn, join(' ', @$ab));
298             }
299              
300             # 5. Convert the lower-cased SMTP command to the upper-cased.
301 3544         19051 $email =~ s/after end of data:/after end of DATA:/g;
302 3544 50       12974 $email .= "\n" if substr($email, -2, 2) ne "\n\n";
303 3544         40276 return \$email;
304             }
305              
306             sub sift {
307             # Sift a bounce mail with each MTA module
308             # @param [Hash] argvs Processing message entity.
309             # @param options argvs [Hash] mail Email message entity
310             # @param options mail [String] from From line of mbox
311             # @param options mail [Hash] header Email header data
312             # @param options mail [String] rfc822 Original message part
313             # @param options mail [Array] ds Delivery status list(decoded data)
314             # @param options argvs [String] body Email message body
315             # @param options argvs [Code] hook Hook method to be called
316             # @return [Hash] Decoded and structured bounce mails
317 3525     3525 0 8518 my $class = shift;
318 3525         15782 my $argvs = { @_ };
319              
320 3525   100     14116 my $mailheader = $argvs->{'mail'}->{'header'} || return undef;
321 3524   100     11230 my $bodystring = $argvs->{'body'} || return undef;
322 3523   100     15155 my $hookmethod = $argvs->{'hook'} || undef;
323 3523         6426 my $havecaught = undef;
324              
325 3523         6193 state $defaultset = Sisimai::Order->another;
326 3523         8999 state $lhosttable = Sisimai::Lhost->path;
327              
328 3523   100     11770 $mailheader->{'from'} //= '';
329 3523   50     10323 $mailheader->{'subject'} //= '';
330 3523   100     11657 $mailheader->{'content-type'} //= '';
331              
332             # Tidy up each field name and value in the entire message body
333 3523         12519 $$bodystring = __PACKAGE__->tidy($bodystring)->$*;
334              
335             # Decode BASE64 Encoded message body
336 3523   100     19141 my $mesgformat = lc($mailheader->{'content-type'} || '');
337 3523   100     16551 my $ctencoding = lc($mailheader->{'content-transfer-encoding'} || '');
338              
339 3523 100 66     29160 if( index($mesgformat, 'text/plain') == 0 || index($mesgformat, 'text/html') == 0 ) {
    100          
340             # Content-Type: text/plain; charset=UTF-8
341 458 100       3254 if( $ctencoding eq 'base64' ) {
    100          
342             # Content-Transfer-Encoding: base64
343 11         70 $bodystring = Sisimai::RFC2045->decodeB($bodystring);
344              
345             } elsif( $ctencoding eq 'quoted-printable' ) {
346             # Content-Transfer-Encoding: quoted-printable
347 60         368 $bodystring = Sisimai::RFC2045->decodeQ($bodystring);
348             }
349              
350             # Content-Type: text/html;...
351 458 50       1528 $bodystring = Sisimai::String->to_plain($bodystring, 1) if index($mesgformat, 'text/html') > -1;
352              
353             } elsif( index($mesgformat, 'multipart/') == 0 ) {
354             # In case of Content-Type: multipart/*
355 2409         16894 my $p = Sisimai::RFC2045->makeflat($mailheader->{'content-type'}, $bodystring);
356 2409 100 100     13279 $bodystring = $p if defined $p && length $$p;
357             }
358 3523         24670 $$bodystring =~ tr/\r//d;
359 3523         28399 $$bodystring =~ s/\t/ /g;
360              
361 3523 100       11171 if( ref $hookmethod eq 'CODE' ) {
362             # Call hook method
363 711         4935 my $p = {'headers' => $mailheader, 'message' => $$bodystring};
364 711         1859 eval { $havecaught = $hookmethod->($p) };
  711         4349  
365 711 50       29103 warn sprintf(" ***warning: Something is wrong in hook method 'hook': %s", $@) if $@;
366             }
367              
368 3523         9042 my $haveloaded = {};
369 3523         6793 my $havesifted = undef;
370 3523         9798 my $modulename = '';
371 3523         5966 DECODER: while(1) {
372             # 1. MTA Module Candidates to be tried on first
373             # 2. Sisimai::Lhost::*
374             # 3. Sisimai::RFC3464
375             # 4. Sisimai::ARF
376             # 5. Sisimai::RFC3834
377 3523         11762 TRY_ON_FIRST_AND_DEFAULTS: for my $r ( @$TryOnFirst, @$defaultset ) {
378             # Try MTA module candidates
379 38610 100       79850 next if exists $haveloaded->{ $r };
380 35270         809534 require $lhosttable->{ $r };
381 35270         269909 $havesifted = $r->inquire($mailheader, $bodystring);
382 35270         78753 $haveloaded->{ $r } = 1;
383 35270         58454 $modulename = $r;
384 35270 100       92139 last(DECODER) if $havesifted;
385             }
386              
387             # When the all of Sisimai::Lhost::* modules did not return bounce data, call Sisimai::RFC3464;
388 855         9761 require Sisimai::RFC3464;
389 855         9307 $havesifted = Sisimai::RFC3464->inquire($mailheader, $bodystring);
390 855 100       2893 if( $havesifted ){ $modulename = 'RFC3464'; last(DECODER) }
  739         1529  
  739         1815  
391              
392             # Feedback Loop message
393 116         2295 require Sisimai::ARF;
394 116         887 $havesifted = Sisimai::ARF->inquire($mailheader, $bodystring);
395 116 100       413 if( $havesifted ){ $modulename = "ARF"; last(DECODER) }
  40         112  
  40         120  
396              
397             # Try to sift the message as auto reply message defined in RFC3834
398 76         3579 require Sisimai::RFC3834;
399 76         641 $havesifted = Sisimai::RFC3834->inquire($mailheader, $bodystring);
400 76 100       252 if( $havesifted ){ $modulename = 'RFC3834'; last(DECODER) }
  16         26  
  16         39  
401              
402 60         123 last; # as of now, we have no sample email for coding this block
403              
404             } # End of while(DECODER)
405 3523 100       10075 return undef unless $havesifted;
406              
407 3463         8989 $havesifted->{'catch'} = $havecaught;
408 3463   33     19726 $modulename =~ s/\A.+:://; $_->{'agent'} ||= $modulename for $havesifted->{'ds'}->@*;
  3463         24637  
409 3463         32914 return $havesifted;
410             }
411              
412             1;
413             __END__