File Coverage

lib/Sisimai/RFC2045.pm
Criterion Covered Total %
statement 184 184 100.0
branch 110 122 90.1
condition 55 64 85.9
subroutine 16 16 100.0
pod 9 9 100.0
total 374 395 94.6


line stmt bran cond sub pod time code
1             package Sisimai::RFC2045;
2 90     90   86916 use v5.26;
  90         244  
3 90     90   344 use strict;
  90         109  
  90         1596  
4 90     90   279 use warnings;
  90         500  
  90         3776  
5 90     90   925 use Encode;
  90         14687  
  90         7757  
6 90     90   36024 use MIME::Base64 ();
  90         56017  
  90         2327  
7 90     90   34240 use MIME::QuotedPrint ();
  90         21267  
  90         1868  
8 90     90   738 use Sisimai::String;
  90         101  
  90         194636  
9              
10             sub is_encoded {
11             # Check that the argument is MIME-Encoded string or not
12             # @param [String] argv0 String to be checked
13             # @return [Boolean] 0: Not MIME encoded string
14             # 1: MIME encoded string
15 10477     10477 1 266932 my $class = shift;
16 10477   100     13929 my $argv0 = shift || return 0;
17 10476         10771 my $text1 = $$argv0; $text1 =~ y/"//d;
  10476         10916  
18 10476         16529 my @piece = ($text1);
19 10476         9404 my $mime1 = 0;
20              
21             # Multiple MIME-Encoded strings in a line
22 10476 100       22777 @piece = split(' ', $text1) if rindex($text1, ' ') > -1;
23 10476         18208 while( my $e = shift @piece ) {
24             # Check all the string in the array
25 24518 100       50423 next unless $e =~ /[ \t]*=[?][-_0-9A-Za-z]+[?][BbQq][?].+[?]=?[ \t]*/;
26 1295         2225 $mime1 = 1;
27             }
28 10476         22777 return $mime1;
29             }
30              
31             sub decodeH {
32             # Decode MIME-Encoded string in an email header
33             # @param [Array] argvs Reference to an array including MIME-Encoded text
34             # @return [String] MIME-Decoded text
35 3292     3292 1 9149 my $class = shift;
36 3292   100     5069 my $argvs = shift || return '';
37              
38 3291         3484 my $ctxcharset = '';
39 3291         3835 my $qbencoding = '';
40 3291         3505 my @textblocks;
41              
42 3291         6613 while( my $e = shift @$argvs ) {
43             # Check and decode each element
44 3510         13201 s/\A[ \t]+//g, s/[ \t]+\z//g, y/"//d for $e;
45              
46 3510 100       6693 if( __PACKAGE__->is_encoded(\$e) ) {
47             # =?utf-8?B?55m954yr44Gr44KD44KT44GT?=
48 496 100       2417 next unless $e =~ m{\A(.*)=[?]([-_0-9A-Za-z]+)[?]([BbQq])[?](.+)[?]=?(.*)\z};
49 476   66     2108 $ctxcharset ||= lc $2;
50 476   66     1649 $qbencoding ||= uc $3;
51              
52 476         943 push @textblocks, $1;
53 476 100       2235 push @textblocks, $qbencoding eq 'B'
54             ? MIME::Base64::decode($4)
55             : MIME::QuotedPrint::decode($4);
56 476         782 $textblocks[-1] =~ y/\r\n//d;
57 476         1587 push @textblocks, $5;
58              
59             } else {
60 3014 100       9177 push @textblocks, scalar @textblocks ? ' '.$e : $e;
61             }
62             }
63 3291 50       6717 return '' unless scalar @textblocks;
64              
65 3291         6654 my $p = join('', @textblocks);
66 3291 100 66     6985 if( $ctxcharset && $qbencoding ) {
67             # utf-8 => utf8
68 412 100       853 $ctxcharset = 'utf8' if $ctxcharset eq 'utf-8';
69              
70 412 100       910 unless( $ctxcharset eq 'utf8' ) {
71             # Characterset is not UTF-8
72 128         231 eval { Encode::from_to($p, $ctxcharset, 'utf8') };
  128         1379  
73 128 50       29812 $p = 'FAILED TO CONVERT THE SUBJECT' if $@;
74             }
75             }
76 3291         8705 return $p;
77             }
78              
79             sub decodeB {
80             # Decode MIME BASE64 Encoded string
81             # @param [String] argv0 MIME Encoded text
82             # @return [String] MIME-Decoded text
83 47     47 1 329 my $class = shift;
84 47   100     205 my $argv0 = shift // return "";
85              
86 46 50       663 my $p = $$argv0 =~ m|([+/=0-9A-Za-z\r\n]+)| ? MIME::Base64::decode($1) : '';
87 46         136 return \$p;
88             }
89              
90             sub decodeQ {
91             # Decode MIME Quoted-Printable Encoded string
92             # @param [String] argv0 Entire MIME-Encoded text
93             # @param [String] argv1 The value of Content-Type: header
94             # @return [String] MIME Decoded text
95 286     286 1 2705 my $class = shift;
96 286   100     670 my $argv0 = shift // return "";
97              
98 285   100     4193 my $p = MIME::QuotedPrint::decode($$argv0) || '';
99 285         685 return \$p;
100             }
101              
102             sub parameter {
103             # Find a value of specified field name from Content-Type: header
104             # @param [String] argv0 The value of Content-Type: header
105             # @param [String] argv1 Lower-cased attribute name of the parameter
106             # @return [String] The value of the parameter
107             # @since v5.0.0
108 12864     12864 1 11809 my $class = shift;
109 12864   100     20720 my $argv0 = shift || return "";
110 10458   100     18288 my $argv1 = shift || '';
111              
112 10458 100       15518 my $parameterq = length $argv1 > 0 ? $argv1.'=' : '';
113 10458 100       13452 my $paramindex = length $argv1 > 0 ? index($argv0, $parameterq) : 0;
114 10458 100       14360 return '' if $paramindex == -1;
115              
116             # Find the value of the parameter name specified in $argv1
117 10308         22733 my $foundtoken = [split(';', substr($argv0, $paramindex + length($parameterq)), 2)]->[0];
118 10308 100       18730 $foundtoken = lc $foundtoken unless $argv1 eq 'boundary';
119 10308         12538 $foundtoken =~ y/"'//d;
120 10308         18879 return $foundtoken;
121             }
122              
123             sub boundary {
124             # Get a boundary string
125             # @param [String] argv0 The value of Content-Type header
126             # @param [Integer] start -1: boundary string itself
127             # 0: Start of boundary
128             # 1: End of boundary
129             # @return [String] Boundary string
130 4355     4355 1 5418 my $class = shift;
131 4355   100     6532 my $argv0 = shift || return "";
132 4314   100     6276 my $start = shift // -1;
133 4314   100     8285 my $btext = __PACKAGE__->parameter($argv0, 'boundary') || return '';
134              
135             # Content-Type: multipart/mixed; boundary=Apple-Mail-5--931376066
136             # Content-Type: multipart/report; report-type=delivery-status;
137             # boundary="n6H9lKZh014511.1247824040/mx.example.jp"
138 4166 100       9094 $btext = '--'.$btext if $start > -1;
139 4166 100       5832 $btext = $btext.'--' if $start > 0;
140 4166         7805 return $btext;
141             }
142              
143             sub haircut {
144             # Cut header fields except Content-Type, Content-Transfer-Encoding from multipart/* block
145             # @param [String] block multipart/* block text
146             # @param [Boolean] heads 1 = Returns only Content-(Type|Transfer-Encoding) headers
147             # @return [Array] Two headers and body part of multipart/* block
148             # @since v5.0.0
149 9011     9011 1 14930 my $class = shift;
150 9011   100     10736 my $block = shift // return undef;
151 9010   100     15874 my $heads = shift // undef;
152              
153 9010         22809 my($upperchunk, $lowerchunk) = split("\n\n", $$block, 2);
154 9010 50       11845 return ['', ''] unless $upperchunk;
155 9010 100       12797 return ['', ''] unless $lowerchunk;
156 7450 100       11691 return ['', ''] unless index($upperchunk, 'Content-Type:') > -1;
157              
158 6604         8187 my $headerpart = ['', '']; # ["text/plain; charset=iso-2022-jp; ...", "quoted-printable"]
159 6604         6297 my $multipart1 = []; # [@$headerpart, "body"]
160              
161 6604         10994 for my $e ( split("\n", $upperchunk) ) {
162             # Remove fields except Content-Type:, and Content-Transfer-Encoding: in each part
163             # of multipart/* block such as the following:
164             # Date: Thu, 29 Apr 2018 22:22:22 +0900
165             # MIME-Version: 1.0
166             # Message-ID: ...
167             # Content-Transfer-Encoding: quoted-printable
168             # Content-Type: text/plain; charset=us-ascii
169 12617 100 100     28567 if( index($e, 'Content-Type:') == 0 ) {
    100          
    100          
170             # Content-Type: ***
171 6604         11923 my $v = [split(' ', $e, 2)]->[-1];
172 6604 100       16620 $headerpart->[0] = index($v, 'boundary=') > -1 ? $v : lc $v;
173              
174             } elsif( index($e, 'Content-Transfer-Encoding:') == 0 ) {
175             # Content-Transfer-Encoding: ***
176 1463         4441 $headerpart->[1] = lc [split(' ', $e, 2)]->[-1];
177              
178             } elsif( index($e, 'boundary=') > -1 || index($e, 'charset=') > -1 ) {
179             # "Content-Type" field has boundary="..." or charset="utf-8"
180 273 50       804 next unless length $headerpart->[0];
181 273         570 $headerpart->[0] .= " ".$e;
182 273         1641 $headerpart->[0] =~ s/\s\s+/ /g;
183             }
184             }
185 6604 100       9705 return $headerpart if $heads;
186              
187 6603         7820 my $mediatypev = lc $headerpart->[0];
188 6603         6608 my $ctencoding = $headerpart->[1];
189 6603         9909 push @$multipart1, @$headerpart, '';
190              
191 6603         6129 UPPER: while(1) {
192             # Make a body part at the 2nd element of $multipart1
193 6603         8013 $multipart1->[2] = sprintf("Content-Type: %s\n", $headerpart->[0]);
194              
195             # Do not append Content-Transfer-Encoding: header when the part is the original message:
196             # Content-Type is message/rfc822 or text/rfc822-headers, or message/delivery-status or
197             # message/feedback-report
198 6603 100       9857 last if index($mediatypev, '/rfc822') > -1;
199 4411 100       7424 last if index($mediatypev, '/delivery-status') > -1;
200 2410 100       3988 last if index($mediatypev, '/feedback-report') > -1;
201 2344 100       4480 last if length $ctencoding == 0;
202              
203 795         1470 $multipart1->[2] .= sprintf("Content-Transfer-Encoding: %s\n", $ctencoding);
204 795         1042 last;
205             }
206              
207             # Append LF before the lower chunk into the 2nd element of $multipart1
208 6603 100 66     18040 $multipart1->[2] .= "\n" if $lowerchunk ne "" && substr($lowerchunk, 0, 1) ne "\n";
209 6603         12952 $multipart1->[2] .= $lowerchunk;
210 6603         10213 return $multipart1;
211             }
212              
213             sub levelout {
214             # Split argv1: multipart/* blocks by a boundary string in argv0
215             # @param [String] argv0 The value of Content-Type header
216             # @param [String] argv1 A pointer to multipart/* message blocks
217             # @return [Array] List of each part of multipart/*
218             # @since v5.0.0
219 2876     2876 1 7422 my $class = shift;
220 2876 50 100     4637 my $argv0 = shift || return []; return [] unless length $argv0;
  2875         5108  
221 2875 50 100     4730 my $argv1 = shift || return []; return [] unless length $$argv1;
  2874         4895  
222              
223 2874   100     7668 my $boundary01 = __PACKAGE__->boundary($argv0, 0) || return [];
224 2859         64835 my $multiparts = [split(/\Q$boundary01\E\n/, $$argv1)];
225 2859         5698 my $partstable = [];
226              
227             # Remove empty or useless preamble and epilogue of multipart/* block
228 2859 100       6542 shift @$multiparts if length $multiparts->[0] < 8;
229 2859 100       5360 return [] if scalar @$multiparts == 0;
230 2857 100       4955 pop @$multiparts if length $multiparts->[-1] < 8;
231              
232 2857         7869 while( my $e = shift @$multiparts ) {
233             # Check each part and breaks up internal multipart/* block
234 9008         14413 my $f = __PACKAGE__->haircut(\$e);
235 9008 100       13714 if( index($f->[0], 'multipart/') > -1 ) {
236             # There is nested multipart/* block
237 462   50     1097 my $boundary02 = __PACKAGE__->boundary($f->[0], -1) || next;
238 462         2093 my $bodyinside = [split(/\n\n/, $f->[-1], 2)]->[-1];
239 462 50 33     1865 next if length $bodyinside < 9 || index($bodyinside, $boundary02) < 0;
240              
241 462         1224 my $v = __PACKAGE__->levelout($f->[0], \$bodyinside);
242 462 50       1845 push @$partstable, @$v if scalar @$v;
243              
244             } else {
245             # The part is not a multipart/* block
246 8546 100       13337 my $b = length $f->[-1] ? $f->[-1] : $e;
247 8546 100       24952 my $v = [$f->[0], $f->[1], length $f->[0] ? [split("\n\n", $b, 2)]->[-1] : $b];
248 8546         24629 push @$partstable, $v;
249             }
250             }
251 2857 50       4466 return [] unless scalar @$partstable;
252              
253             # Remove $boundary01.'--' and strings from the boundary to the end of the body part.
254 2857         3860 chomp $boundary01;
255 2857         4317 my $b = $partstable->[-1]->[2];
256 2857         5618 my $p = index($b, $boundary01.'--');
257 2857 100       6804 substr($partstable->[-1]->[2], $p, length $b, "") if $p > -1;
258              
259 2857         5003 return $partstable;
260             }
261              
262             sub makeflat {
263             # Make flat multipart/* part blocks and decode
264             # @param [String] argv0 The value of Content-Type header
265             # @param [String] argv1 A pointer to multipart/* message blocks
266             # @return [String] Message body
267 2413     2413 1 7924 my $class = shift;
268 2413   100     4500 my $argv0 = shift // return undef;
269 2412   100     3861 my $argv1 = shift // return undef;
270 2411 50 33     9783 return undef if index(lc $argv0, 'multipart/') < 0 || index(lc $argv0, 'boundary=') < 0;
271              
272 2411         10966 my $iso2022set = qr/charset=["']?(iso-2022-[-a-z0-9]+)['"]?\b/;
273 2411         7086 my $multiparts = __PACKAGE__->levelout($argv0, $argv1);
274 2411         3055 my $flattenout = '';
275 2411         4837 my $delimiters = ["/delivery-status", "/rfc822", "/feedback-report", "/partial"];
276              
277 2411         5794 while( my $e = shift @$multiparts ) {
278             # Pick only the following parts Sisimai::Lhost will use, and decode each part
279             # - text/plain, text/rfc822-headers
280             # - message/delivery-status, message/rfc822, message/partial, message/feedback-report
281 8544         7569 my $istexthtml = 0;
282 8544   100     12597 my $mediatypev = __PACKAGE__->parameter($e->[0]) || 'text/plain';
283 8544 100 100     19241 next if index($mediatypev, 'text/') != 0 && index($mediatypev, 'message/') != 0;
284              
285 8510 100       11299 if( $mediatypev eq 'text/html' ) {
286             # Skip text/html part when the value of Content-Type: header in an internal part of
287             # multipart/* includes multipart/alternative;
288 12 50       41 next if index($argv0, 'multipart/alternative') > -1;
289 12         16 $istexthtml = 1;
290             }
291 8510   100     14751 my $ctencoding = $e->[1] || '';
292 8510         7730 my $bodyinside = $e->[2];
293 8510         7829 my $bodystring = '';
294              
295 8510 100       9755 if( length $ctencoding ) {
296             # Check the value of Content-Transfer-Encoding: header
297 1427 100       4013 if( $ctencoding eq 'base64' ) {
    100          
    100          
298             # Content-Transfer-Encoding: base64
299 34         102 $bodystring = __PACKAGE__->decodeB(\$bodyinside)->$*;
300 34         71 my $dontset = 0; while( my $first10 = substr($bodystring, 0, 10) ) {
  34         128  
301             # Don't pick the decoded part as an error message when the part is
302             # - BASE64 encoded.
303             # - the value of the charset is not utf-8.
304             # - NOT a plain text.
305 34 100       244 last if Sisimai::String->aligned(\$e->[0], ['charset', '=', 'utf-8']);
306 10 100       52 last unless $first10 =~ /[\x00-\x08\x0E-\x1F\x7F-]/;
307 5         8 $dontset = 1; last;
  5         10  
308             }
309 34 100       90 next if $dontset;
310              
311             } elsif( $ctencoding eq 'quoted-printable') {
312             # Content-Transfer-Encoding: quoted-printable
313 222         688 $bodystring = __PACKAGE__->decodeQ(\$bodyinside)->$*;
314              
315             } elsif( $ctencoding eq '7bit' ) {
316             # Content-Transfer-Encoding: 7bit
317 882 100       3961 if( lc $e->[0] =~ $iso2022set ) {
318             # Content-Type: text/plain; charset=ISO-2022-JP
319 67         525 $bodystring = Sisimai::String->to_utf8(\$bodyinside, $1)->$*;
320              
321             } else {
322             # No "charset" parameter in the value of Content-Type: header
323 815         1213 $bodystring = $bodyinside;
324             }
325             } else {
326             # Content-Transfer-Encoding: 8bit, binary, and so on
327 289         392 $bodystring = $bodyinside;
328             }
329              
330             # Try to delete HTML tags inside of text/html part whenever possible
331 1422 100       2792 $bodystring = Sisimai::String->to_plain(\$bodystring)->$* if $istexthtml;
332 1422 100       2414 next unless $bodystring;
333 1411 100       2837 $bodystring =~ s|\r\n|\n|g if index($bodystring, "\r\n") > -1; # Convert CRLF to LF
334              
335             } else {
336             # There is no Content-Transfer-Encoding header in the part
337 7083         7539 $bodystring .= $bodyinside;
338             }
339            
340 8494 100       9306 if( grep { index($mediatypev, $_) > 0 } @$delimiters ) {
  33976         41911  
341             # Add Content-Type: header of each part (will be used as a delimiter at Sisimai::Lhost) into
342             # the body inside when the value of Content-Type: is message/delivery-status, message/rfc822,
343             # or text/rfc822-headers
344 4264         6284 $bodystring = sprintf("Content-Type: %s\n%s", $mediatypev, $bodystring);
345             }
346              
347             # Append "\n" when the last character of $bodystring is not LF
348 8494 100       12438 $bodystring .= "\n\n" unless substr($bodystring, -2, 2) eq "\n\n";
349 8494         28928 $flattenout .= $bodystring;
350             }
351 2411         10033 return \$flattenout;
352             }
353              
354             1;
355             __END__