File Coverage

lib/Sisimai/ARF.pm
Criterion Covered Total %
statement 103 107 96.2
branch 54 64 84.3
condition 25 32 78.1
subroutine 8 8 100.0
pod 0 3 0.0
total 190 214 88.7


line stmt bran cond sub pod time code
1             package Sisimai::ARF;
2 7     7   1648 use v5.26;
  7         22  
3 7     7   36 use strict;
  7         8  
  7         325  
4 7     7   32 use warnings;
  7         11  
  7         361  
5 7     7   374 use Sisimai::Lhost;
  7         15  
  7         182  
6 7     7   27 use Sisimai::RFC5322;
  7         8  
  7         9150  
7              
8 1     1 0 6 sub description { return 'Abuse Feedback Reporting Format' }
9             sub is_arf {
10             # Email is a Feedback-Loop message or not
11             # @param [Hash] heads Email header including "Content-Type", "From" and "Subject" field
12             # @return [Integer] 1: Feedback Loop
13             # 0: is not Feedback loop
14 164     164 0 795 my $class = shift;
15 164   100     530 my $heads = shift || return 0;
16 163         547 my $abuse = ['staff@hotmail.com', 'complaints@email-abuse.amazonses.com'];
17 163   100     702 my $ctype = $heads->{"content-type"} || "";
18              
19             # Content-Type: multipart/report; report-type=feedback-report; ...
20 163 100       1136 return 1 if Sisimai::String->aligned(\$ctype, ["report-type=", "feedback-report"]);
21              
22 97 100       461 if( index($ctype, "multipart/mixed") > -1 ) {
23             # Microsoft (Hotmail, MSN, Live, Outlook) uses its own report format.
24             # Amazon SES Complaints bounces
25 32 100       141 if( index($heads->{"subject"}, "complaint about message from ") > -1 ) {
26             # From: staff@hotmail.com
27             # From: complaints@email-abuse.amazonses.com
28             # Subject: complaint about message from 192.0.2.1
29 30 50       68 return 1 if grep { index($heads->{"from"}, $_) > -1 } @$abuse;
  60         273  
30             }
31             }
32              
33             # X-Apple-Unsubscribe: true
34 67 100       394 return 0 unless exists $heads->{"x-apple-unsubscribe"};
35 5 50       33 return 1 if $heads->{"x-apple-unsubscribe"} eq "true";
36 0         0 return 0;
37             }
38              
39             sub inquire {
40             # Detect an error for Feedback Loop
41             # @param [Hash] mhead Message headers of a bounce email
42             # @param [String] mbody Message body of a bounce email
43             # @return [Hash] Bounce data list and message/rfc822 part
44             # @return [undef] failed to decode or the arguments are missing
45 164     164 0 1572 my $class = shift;
46 164 100 100     578 my $mhead = shift // return undef; return undef unless is_arf(undef, $mhead);
  163         519  
47 101   50     470 my $mbody = shift // return undef;
48              
49             # http://tools.ietf.org/html/rfc5965
50             # http://en.wikipedia.org/wiki/Feedback_loop_(email)
51             # http://en.wikipedia.org/wiki/Abuse_Reporting_Format
52             #
53             # Netease DMARC uses: This is a spf/dkim authentication-failure report for an email message received from IP
54             # OpenDMARC 1.3.0 uses: This is an authentication failure report for an email message received from IP
55             # Abusix ARF uses: this is an autogenerated email abuse complaint regarding your network.
56 101         204 state $indicators = Sisimai::Lhost->INDICATORS;
57 101         227 state $reportfrom = "Content-Type: message/feedback-report";
58 101         186 state $boundaries = [
59             "Content-Type: message/rfc822",
60             "Content-Type: text/rfc822-headers",
61             "Content-Type: text/rfc822-header", # ??
62             ];
63 101         166 state $arfpreface = [
64             ["this is a", "abuse report"],
65             ["this is a", "authentication", "failure report"],
66             ["this is a", " report for"],
67             ["this is an authentication", "failure report"],
68             ["this is an autogenerated email abuse complaint"],
69             ["this is an email abuse report"],
70             ];
71              
72 101         611 my $dscontents = [Sisimai::Lhost->DELIVERYSTATUS]; my $v = $dscontents->[-1];
  101         252  
73 101         173 my $reportpart = 0;
74 101         476 my $emailparts = Sisimai::RFC5322->part($mbody, $boundaries);
75 101         248 my $readcursor = 0; # Points the current cursor position
76 101         289 my $recipients = 0; # The number of "Final-Recipient" header
77 101         198 my $timestamp0 = ""; # The value of "Arrival-Date" or "Received-Date"
78 101         246 my $remotehost = ""; # The value of "Source-IP" field
79 101         156 my $reportedby = ""; # The value of "Reporting-MTA" field
80 101         198 my $anotherone = ""; # Other fields(append to Diagnosis)
81              
82             # 3.1. Required Fields
83             #
84             # The following report header fields MUST appear exactly once:
85             #
86             # o "Feedback-Type" contains the type of feedback report (as defined
87             # in the corresponding IANA registry and later in this memo). This
88             # is intended to let report parsers distinguish among different
89             # types of reports.
90             #
91             # o "User-Agent" indicates the name and version of the software
92             # program that generated the report. The format of this field MUST
93             # follow section 14.43 of [HTTP]. This field is for documentation
94             # only; there is no registry of user agent names or versions, and
95             # report receivers SHOULD NOT expect user agent names to belong to a
96             # known set.
97             #
98             # o "Version" indicates the version of specification that the report
99             # generator is using to generate the report. The version number in
100             # this specification is set to "1".
101             #
102 101         962 for my $e ( split("\n", $emailparts->[0]) ) {
103             # Read error messages and delivery status lines from the head of the email to the
104             # previous line of the beginning of the original message.
105 1721 100       3221 unless( $readcursor ) {
106             # Beginning of the bounce message or message/delivery-status part
107 761         1463 my $r = lc $e;
108 761         1206 for my $f ( @$arfpreface ) {
109             # Hello,
110             # this is an autogenerated email abuse complaint regarding your network.
111 4276 100       11936 next unless Sisimai::String->aligned(\$r, $f);
112 66         228 $readcursor |= $indicators->{'deliverystatus'};
113 66         273 $v->{"diagnosis"} .= " ".$e;
114 66         120 last;
115             }
116 761         2810 next;
117             }
118 960 100 66     3272 next if ($readcursor & $indicators->{'deliverystatus'}) == 0 || $e eq "";
119 810 100       1473 if( $e eq $reportfrom ) { $reportpart = 1; next }
  66         103  
  66         142  
120              
121 744 100       1206 if( $reportpart ) {
122             # Content-Type: message/feedback-report
123             # MIME-Version: 1.0
124             #
125             # Feedback-Type: abuse
126             # User-Agent: SomeGenerator/1.0
127             # Version: 0.1
128             # Original-Mail-From:
129             # Original-Rcpt-To:
130             # Received-Date: Thu, 29 Apr 2009 00:00:00 JST
131             # Source-IP: 192.0.2.1
132 573 100 100     6206 if( index($e, "Original-Rcpt-To: ") == 0 || index($e, "Removal-Recipient: ") == 0 ) {
    100 100        
    100 100        
    100 100        
    50          
    100          
133             # Original-Rcpt-To header field is optional and may appear any number of times as appropriate:
134             # Original-Rcpt-To:
135             # Removal-Recipient: user@example.com
136 70 50       572 my $cv = Sisimai::Address->s3s4(substr($e, index($e, " ") + 1,)); next unless Sisimai::Address->is_emailaddress($cv);
  70         337  
137 70 50 33     203 my $cw = scalar @$dscontents; next if $cw > 0 && $cv eq $dscontents->[$cw - 1]->{"recipient"};
  70         396  
138              
139 70 100       212 if( $v->{"recipient"} ) {
140             # There are multiple recipient addresses in the message body.
141 35         129 push @$dscontents, Sisimai::Lhost->DELIVERYSTATUS;
142 35         73 $v = $dscontents->[-1];
143             }
144 70         342 $v->{"recipient"} = Sisimai::Address->s3s4(substr($e, index($e, " ") +1,));
145 70         209 $recipients++;
146              
147             } elsif( index($e, "Feedback-Type: ") == 0 ) {
148             # The header field MUST appear exactly once.
149             # Feedback-Type: abuse
150 66         305 $v->{"feedbacktype"} = substr($e, index($e, " ") + 1,);
151              
152             } elsif( index($e, "Authentication-Results: ") == 0 || index($e, "User-Agent: ") == 0 || index($e, "Original-Mail-From: ") == 0 ) {
153             # "Authentication-Results" indicates the result of one or more authentication checks
154             # run by the report generator.
155             # - Authentication-Results: mail.example.com;
156             # - spf=fail smtp.mail=somespammer@example.com
157             #
158             # The header field MUST appear exactly once.
159             # - User-Agent: SomeGenerator/1.0
160             #
161             # The header is optional and MUST NOT appear more than once.
162             # - Original-Mail-From:
163 141         339 $anotherone .= $e.", ";
164              
165             } elsif( index($e, "Received-Date: ") == 0 || index($e, "Arrival-Date: ") == 0 ) {
166             # Arrival-Date header is optional and MUST NOT appear more than once.
167             # Received-Date: Thu, 29 Apr 2010 00:00:00 JST
168             # Arrival-Date: Thu, 29 Apr 2010 00:00:00 +0000
169 51         151 $timestamp0 = substr($e, index($e, " ") + 1,);
170              
171             } elsif( index($e, "Reporting-MTA: ") == 0 ) {
172             # The header is optional and MUST NOT appear more than once.
173             # Reporting-MTA: dns; mx.example.jp
174 0 0       0 my $cv = Sisimai::RFC1894->field($e); next if scalar(@$cv) == 0;
  0         0  
175 0         0 $reportedby = $cv->[2];
176              
177             } elsif( index($e, "Source-IP: ") == 0 ) {
178             # The header is optional and MUST NOT appear more than once.
179             # Source-IP: 192.0.2.45
180 46         171 $remotehost = substr($e, index($e, ' ') + 1,);
181             }
182             } else {
183             # Messages before "Content-Type: message/feedback-report" part
184 171         401 $v->{"diagnosis"} .= " ".$e;
185             }
186             }
187              
188 101         11147 while( $recipients == 0 ) {
189             # There is no recipient address in the message
190 66 100       247 if( exists $mhead->{"x-apple-unsubscribe"} ) {
191             # X-Apple-Unsubscribe: true
192 5 50 33     46 last if $mhead->{"x-apple-unsubscribe"} ne "true" || index($mhead->{"from"}, "@") < 1;
193 5         17 $dscontents->[0]->{"recipient"} = $mhead->{"from"};
194 5         25 $dscontents->[0]->{"diagnosis"} = Sisimai::String->sweep($emailparts->[0]);
195 5         28 $dscontents->[0]->{"feedbacktype"} = "opt-out";
196              
197             # Addpend To: field as a pseudo header
198 5 50       31 $emailparts->[1] = sprintf("To: <%s>\n", $mhead->{"from"}) if $emailparts->[1] eq "";
199              
200             } else {
201             # Pick it from the original message part
202 61 100       189 my $p1 = index($emailparts->[1], "\nTo:"); last if $p1 < 0;
  61         171  
203 31 50       79 my $p2 = index($emailparts->[1], "\n", $p1 + 4); last if $p2 < 0;
  31         81  
204 31         381 my $cv = Sisimai::Address->s3s4(substr($emailparts->[1], $p1 + 4, $p2 - $p1));
205              
206             # There is no valid email address in the To: header of the original message such as
207             # To:
208 31 100       207 $cv = Sisimai::Address->undisclosed("r") unless Sisimai::Address->is_emailaddress($cv);
209 31         148 $dscontents->[0]->{"recipient"} = $cv;
210             }
211 36         150 $recipients++;
212             }
213 101 100       698 return undef if $recipients == 0;
214              
215 71 100       493 $anotherone = ": ".Sisimai::String->sweep($anotherone) if $anotherone ne "";
216 71 100       355 substr($anotherone, -1, 1, "") if substr($anotherone, -1, 1) eq ",";
217              
218 71         163 my $j = -1; for my $e ( @$dscontents ) {
  71         168  
219             # Tidy up the error message in e.Diagnosis, Try to detect the bounce reason.
220 106         204 $j++;
221 106         503 $e->{"diagnosis"} = Sisimai::String->sweep($e->{"diagnosis"}.$anotherone);
222 106         253 $e->{"reason"} = "feedback";
223 106         251 $e->{"rhost"} = $remotehost;
224 106         224 $e->{"lhost"} = $reportedby;
225 106         218 $e->{"date"} = $timestamp0;
226              
227             # Copy some values from the previous element when the report have 2 or more email address
228 106 100 66     426 next if $j == 0 || scalar(@$dscontents) == 1;
229 35         86 $e->{"diagnosis"} = $dscontents->[$j - 1]->{"diagnosis"};
230 35         89 $e->{"feedbacktype"} = $dscontents->[$j - 1]->{"feedbacktype"};
231             }
232 71         595 return {"ds" => $dscontents, "rfc822" => $emailparts->[1]};
233             }
234              
235             1;
236             __END__