File Coverage

lib/Sisimai/Address.pm
Criterion Covered Total %
statement 226 242 93.3
branch 150 186 80.6
condition 58 88 65.9
subroutine 15 15 100.0
pod 7 10 70.0
total 456 541 84.2


line stmt bran cond sub pod time code
1             package Sisimai::Address;
2 93     93   84275 use v5.26;
  93         230  
3 93     93   332 use strict;
  93         104  
  93         1433  
4 93     93   259 use warnings;
  93         675  
  93         5933  
5             use Class::Accessor::Lite (
6 93         925 'new' => 0,
7             'ro' => [
8             'address', # [String] Email address
9             'user', # [String] local part of the email address
10             'host', # [String] domain part of the email address
11             'verp', # [String] VERP
12             'alias', # [String] alias of the email address
13             ],
14             'rw' => [
15             'name', # [String] Display name
16             'comment', # [String] (Comment)
17             ]
18 93     93   5287 );
  93         7840  
19 93     93   45930 use Sisimai::RFC1123;
  93         200  
  93         248824  
20              
21             sub undisclosed {
22             # Return pseudo recipient or sender address
23             # @param [String] argv0 Address type: true = recipient, false = sender
24             # @return [String] Pseudo recipient address or sender address or an empty value when
25             # the $argv0 is neither 'r' nor 's'
26 14     14 0 1037 my $class = shift;
27 14   100     44 my $argv0 = shift // 0;
28 14 100       197 my $local = $argv0 ? 'recipient' : 'sender';
29 14         60 return sprintf("undisclosed-%s-in-headers%slibsisimai.org.invalid", $local, '@');
30             }
31              
32             sub is_emailaddress {
33             # Check that the argument is an email address or not
34             # @param [String] email Email address string
35             # @return [Integer] 0: Not email address
36             # 1: Email address
37 10761     10761 1 46211 my $class = shift;
38 10761 100 100     15882 my $email = shift // return 0; return 0 if length($email) < 5;
  10640         14092  
39 10637         25561 $email =~ s/\A[\s\t]+//; $email =~ s/[\s\t]+\z//;
  10637         17108  
40              
41 10637         9866 my $width = length($email);
42 10637         12923 my $lasta = rindex($email, '@');
43              
44 10637 100       13530 return 0 if $width > 254; # The maximum length of an email address is 254
45 10636 100 66     26287 return 0 if $lasta < 1 || $lasta > 64; # The maximum length of a local part is 64
46 10618 50       14957 return 0 if $width - $lasta > 253; # The maximum length of a domain part is 252
47              
48 10618 100       17444 my $quote = __PACKAGE__->is_quotedaddress($email); unless( $quote ) {
  10618         14635  
49             # The email address is not a quoted address
50 10600 100       16912 return 0 if index($email, '@') != $lasta; # There are 2 or more '@'.
51 10580 50       14932 return 0 if index($email, ' ') > 0; # There is 1 or more ' '.
52             }
53 10598         23029 my $ipv46 = Sisimai::RFC1123->is_domainliteral($email);
54              
55 10598         10451 my $j = -1; for my $e ( split('', $email) ) {
  10598         51950  
56             # 31 < The ASCII code of each character < 127
57 255255         202184 my $p = ord($e); $j++;
  255255         176446  
58              
59 255255 100       220452 if( $j < $lasta ) {
60             # A local part of the email address: string before the last "@"
61 106771 50 33     184074 return 0 if $p < 32 || $p > 126; # Before ' ', After '~'
62 106771 100       109176 next if $j == 0; # The character is the first character
63              
64 96173 100       88145 if( $quote ) {
65             # The email address has quoted local part like "neko@cat"@example.org
66 264         290 my $jp = substr($email, $j - 1, 1);
67 264 50       263 if( ord($jp) == 92 ) { # 92 = '\'
68             # When the previous character IS '\', only the followings are allowed: '\', '"'
69 0 0 0     0 return 0 if $p != 92 && $p != 34;
70              
71             } else {
72             # When the previous character IS NOT '\'
73 264 50 66     417 return 0 if $p == 34 && $j + 1 < $lasta;
74             }
75             } else {
76             # The local part is not quoted
77             # ".." is not allowed in a local part when the local part is not quoted by "" but
78             # Non-RFC compliant email addresses still persist in the world.
79             #
80             # The following characters are not allowed in a local part without "..."@example.jp
81 95909 50 33     370662 return 0 if $e eq ',' || $e eq '@' || $e eq ':' || $e eq ';' || $e eq '(';
      33        
      33        
      33        
82 95909 50 33     402982 return 0 if $e eq ')' || $e eq '<' || $e eq '>' || $e eq '[' || $e eq ']';
      33        
      33        
      33        
83             }
84             } else {
85             # A domain part of the email address: string after the last "@"
86 148484 100       143712 next if $p == 64; # '@'
87 137886 50       143214 return 0 if $p < 45; # Before '-'
88 137886 50       130119 return 0 if $p == 47; # Equals '/'
89 137886 50       136489 return 0 if $p == 92; # Equals '\'
90 137886 50       133834 return 0 if $p > 122; # After 'z'
91              
92 137886 100       120573 if( $ipv46 == 0 ) {
93             # Such as "example.jp", "neko.example.org"
94 137816 100 100     231945 return 0 if $p > 57 && $p < 64; # ':' to '?'
95 137801 50 66     243369 return 0 if $p > 90 && $p < 97; # '[' to '`'
96              
97             } else {
98             # Such as "[IPv4:192.0.2.25]"
99 70 50 66     106 return 0 if $p > 59 && $p < 64; # ';' to '?'
100 70 50 66     101 return 0 if $p > 93 && $p < 97; # '^' to '`'
101             }
102             }
103             }
104 10583 100       31935 return 1 if $ipv46;
105 10579         34216 return Sisimai::RFC1123->is_internethost(substr($email, $lasta + 1,));
106             }
107              
108             sub is_quotedaddress {
109             # Checks that the local part of the argument is quoted address or not.
110             # @param [String] argv0 Email address
111             # @return [Integer] 0: is not a quoted address
112             # 1: is a quoted address
113 10618     10618 1 10190 my $class = shift;
114 10618         9874 my $argv0 = shift;
115 10618 100 66     19988 return 1 if index($argv0, '"') == 0 && index($argv0, '"@') > 1;
116 10600         14148 return 0;
117             }
118              
119             sub is_mailerdaemon {
120             # Check that the argument is mailer-daemon or not
121             # @param [String] argv0 Email address
122             # @return [Integer] 0: Not mailer-daemon
123             # 1: Mailer-daemon
124 1601     1601 1 16337223 my $class = shift;
125 1601   100     6066 my $argv0 = shift // return 0;
126 1600         4129 my $email = lc $argv0;
127              
128 1600         2735 state $postmaster = [
129             'mailer-daemon@', '', '(mailer-daemon)', ' mailer-daemon ',
130             'postmaster@', '', '(postmaster)'
131             ];
132 1600 100       3941 return 1 if grep { index($email, $_) > -1 } @$postmaster;
  11200         20190  
133 1521 100 100     8420 return 1 if $email eq 'mailer-daemon' || $email eq 'postmaster';
134 1484         3488 return 0;
135             }
136              
137             sub new {
138             # Constructor of Sisimai::Address
139             # @param [Hash] argvs Email address, name, and other elements
140             # @return [Sisimai::Address] Object or undef when the email address was not valid
141             # @since v4.22.1
142 7424     7424 0 218736 my $class = shift;
143 7424 100 50     10417 my $argvs = shift // return undef; return undef if ref $argvs ne 'HASH';
  7424         12660  
144 7344         39436 my $thing = {
145             'address' => '', # Entire email address
146             'user' => '', # Local part
147             'host' => '', # Domain part
148             'verp' => '', # VERP
149             'alias' => '', # Alias
150             'comment' => '', # Comment
151             'name' => '', # Display name
152             };
153              
154 7344 50       15086 return undef unless exists $argvs->{'address'};
155 7344 100       11283 return undef unless $argvs->{'address'};
156              
157 7302         9500 my $heads = ['<'];
158 7302         11469 my $tails = ['>', ',', '.', ';'];
159 7302         9950 my $point = rindex($argvs->{'address'}, '@');
160              
161 7302 100       9896 if( $point > 0 ) {
162             # Get the local part and the domain part from the email address
163 7280         10709 my $lpart = substr($argvs->{'address'}, 0, $point);
164 7280         10371 my $dpart = substr($argvs->{'address'}, $point+1,);
165 7280   100     27518 my $email = __PACKAGE__->expand_verp($argvs->{'address'}) || '';
166 7280         8856 my $alias = 0;
167              
168 7280 100       9619 unless( $email ) {
169             # Is not VERP address, try to expand the address as an alias
170 7279   100     12196 $email = __PACKAGE__->expand_alias($argvs->{'address'}) || '';
171 7279 100       11288 $alias = 1 if $email;
172             }
173              
174 7280 100       11598 if( index($email, '@') > 0 ) {
175             # The address is a VERP or an alias
176 12 100       33 if( $alias ) {
177             # The address is an alias: neko+nyaan@example.jp
178 11         26 $thing->{'alias'} = $argvs->{'address'};
179              
180             } else {
181             # The address is a VERP: b+neko=example.jp@example.org
182 1         2 $thing->{'verp'} = $argvs->{'address'};
183             }
184             }
185              
186 7280         10837 do { while( substr($lpart, 0, 1) eq $_ ) { substr($lpart, 0, 1, '') }} for @$heads;
  7280         15555  
  0         0  
187 7280         8536 do { while( substr($dpart, -1, 1) eq $_ ) { substr($dpart, -1, 1, '') }} for @$tails;
  29120         38804  
  15         37  
188 7280         11829 $thing->{'user'} = $lpart;
189 7280         8250 $thing->{'host'} = $dpart;
190 7280         13417 $thing->{'address'} = $lpart.'@'.$dpart;
191              
192             } else {
193             # The argument does not include "@"
194 22 100       67 return undef unless __PACKAGE__->is_mailerdaemon($argvs->{'address'});
195 18 50       56 return undef if rindex($argvs->{'address'}, ' ') > -1;
196              
197             # The argument does not include " "
198 18         39 $thing->{'user'} = $argvs->{'address'};
199 18         33 $thing->{'address'} = $argvs->{'address'};
200             }
201              
202 7298   100     17349 $thing->{'name'} = $argvs->{'name'} || '';
203 7298   100     18307 $thing->{'comment'} = $argvs->{'comment'} || '';
204 7298         26813 return bless($thing, __PACKAGE__);
205             }
206              
207             sub find {
208             # Email address parser with a name and a comment
209             # @param [String] argv1 String including email address
210             # @param [Boolean] addrs 0 = Returns list including all the elements
211             # 1 = Returns list including email addresses only
212             # @return [Array] Email address list or undef when there is no email address in the argument
213             # @since v4.22.0
214 9681     9681 1 299682 my $class = shift;
215 9681   100     16436 my $argv1 = shift // return undef; y/\r//d, y/\n//d for $argv1; # Remove CR, NL
  9680         19999  
216 9680   100     15759 my $addrs = shift // undef;
217              
218 9680         42637 require Sisimai::String;
219 9680         10236 state $indicators = {
220             'email-address' => (1 << 0), #
221             'quoted-string' => (1 << 1), # "Neko, Nyaan"
222             'comment-block' => (1 << 2), # (neko)
223             };
224 9680         8768 state $delimiters = {'<' => 1, '>' => 1, '(' => 1, ')' => 1, '"' => 1, ',' => 1};
225 9680         7844 state $validemail = qr{(?>
226             (?:([^\s]+|["].+?["])) # local part
227             [@]
228             (?:([^@\s]+|[0-9A-Za-z:\.]+)) # domain part
229             )
230             }x;
231              
232 9680         30342 my $emailtable = {'address' => '', 'name' => '', 'comment' => ''};
233 9680         9991 my $addrtables = [];
234 9680         8900 my @readbuffer;
235 9680         7954 my $readcursor = 0;
236 9680         8357 my $v = $emailtable; # temporary buffer
237 9680         9912 my $p = ''; # current position
238              
239 9680         59009 for my $e ( split('', $argv1) ) {
240             # Check each characters
241 268986 100       256337 if( $delimiters->{ $e } ) {
242             # The character is a delimiter character
243 14742 100       21098 if( $e eq ',' ) {
244             # Separator of email addresses or not
245 640 100 66     3089 if( index($v->{'address'}, '<') == 0 &&
      66        
246             rindex($v->{'address'}, '@') > -1 &&
247             substr($v->{'address'}, -1, 1) eq '>' ) {
248             # An email address has already been picked
249 2 50       7 if( $readcursor & $indicators->{'comment-block'} ) {
    50          
250             # The cursor is in the comment block (Neko, Nyaan)
251 0         0 $v->{'comment'} .= $e;
252              
253             } elsif( $readcursor & $indicators->{'quoted-string'} ) {
254             # "Neko, Nyaan"
255 0         0 $v->{'name'} .= $e;
256              
257             } else {
258             # The cursor is not in neither the quoted-string nor the comment block
259 2         3 $readcursor = 0; # reset cursor position
260 2         2 push @readbuffer, $v;
261 2         8 $v = {'address' => '', 'name' => '', 'comment' => ''};
262 2         4 $p = '';
263             }
264             } else {
265             # "Neko, Nyaan" OR <"neko,nyaan"@example.org>
266 638 100       1542 $p ? ($v->{ $p } .= $e) : ($v->{'name'} .= $e);
267             }
268 640         811 next;
269             } # End of if(',')
270              
271 14102 100       18603 if( $e eq '<' ) {
272             # <: The beginning of an email address or not
273 6270 100       9653 if( $v->{'address'} ) {
274 5 50       13 $p ? ($v->{ $p } .= $e) : ($v->{'name'} .= $e);
275              
276             } else {
277             #
278 6265         8779 $readcursor |= $indicators->{'email-address'};
279 6265         11162 $v->{'address'} .= $e;
280 6265         14209 $p = 'address';
281             }
282 6270         7179 next;
283             } # End of if('<')
284              
285 7832 100       9968 if( $e eq '>' ) {
286             # >: The end of an email address or not
287 5620 100       8900 if( $readcursor & $indicators->{'email-address'} ) {
288             #
289 5594         7879 $readcursor &= ~$indicators->{'email-address'};
290 5594         5966 $v->{'address'} .= $e;
291 5594         5444 $p = '';
292              
293             } else {
294             # a comment block or a display name
295 26 50       60 $p ? ($v->{'comment'} .= $e) : ($v->{'name'} .= $e);
296             }
297 5620         6205 next;
298             } # End of if('>')
299              
300 2212 100       3145 if( $e eq '(' ) {
301             # The beginning of a comment block or not
302 125 100       535 if( $readcursor & $indicators->{'email-address'} ) {
    50          
    50          
303             # <"neko(nyaan)"@example.org> or
304 2 50       5 if( rindex($v->{'address'}, '"') > -1 ) {
305             # Quoted local part: <"neko(nyaan)"@example.org>
306 0         0 $v->{'address'} .= $e;
307              
308             } else {
309             # Comment:
310 2         4 $readcursor |= $indicators->{'comment-block'};
311 2 50       8 $v->{'comment'} .= ' ' if substr($v->{'comment'}, -1, 1) eq ')';
312 2         55 $v->{'comment'} .= $e;
313 2         5 $p = 'comment';
314             }
315             } elsif( $readcursor & $indicators->{'comment-block'} ) {
316             # Comment at the outside of an email address (...(...)
317 0 0       0 $v->{'comment'} .= ' ' if substr($v->{'comment'}, -1, 1) eq ')';
318 0         0 $v->{'comment'} .= $e;
319              
320             } elsif( $readcursor & $indicators->{'quoted-string'} ) {
321             # "Neko, Nyaan(cat)", Deal as a display name
322 0         0 $v->{'name'} .= $e;
323              
324             } else {
325             # The beginning of a comment block
326 123         175 $readcursor |= $indicators->{'comment-block'};
327 123 100       333 $v->{'comment'} .= ' ' if substr($v->{'comment'}, -1, 1) eq ')';
328 123         209 $v->{'comment'} .= $e;
329 123         145 $p = 'comment';
330             }
331 125         163 next;
332             } # End of if('(')
333              
334 2087 100       2824 if( $e eq ')' ) {
335             # The end of a comment block or not
336 131 100       328 if( $readcursor & $indicators->{'email-address'} ) {
    100          
337             # <"neko(nyaan)"@example.org> OR
338 2 50       6 if( rindex($v->{'address'}, '"') > -1 ) {
339             # Quoted string in the local part: <"neko(nyaan)"@example.org>
340 0         0 $v->{'address'} .= $e;
341              
342             } else {
343             # Comment:
344 2         4 $readcursor &= ~$indicators->{'comment-block'};
345 2         3 $v->{'comment'} .= $e;
346 2         3 $p = 'address';
347             }
348             } elsif( $readcursor & $indicators->{'comment-block'} ) {
349             # Comment at the outside of an email address (...(...)
350 123         195 $readcursor &= ~$indicators->{'comment-block'};
351 123         170 $v->{'comment'} .= $e;
352 123         145 $p = '';
353              
354             } else {
355             # Deal as a display name
356 6         26 $readcursor &= ~$indicators->{'comment-block'};
357 6         25 $v->{'name'} .= $e;
358 6         12 $p = '';
359             }
360 131         160 next;
361             } # End of if(')')
362              
363 1956 50       2566 if( $e eq '"' ) {
364             # The beginning or the end of a quoted-string
365 1956 100       2724 if( $p ) {
366             # email-address or comment-block
367 10         21 $v->{ $p } .= $e;
368              
369             } else {
370             # Display name like "Neko, Nyaan"
371 1946         2647 $v->{'name'} .= $e;
372 1946 50       3787 next unless $readcursor & $indicators->{'quoted-string'};
373 0 0       0 next if substr($v->{'name'}, -2, 2) eq qq|\x5c"|; # "Neko, Nyaan \"...
374 0         0 $readcursor &= ~$indicators->{'quoted-string'};
375 0         0 $p = '';
376             }
377 10         14 next;
378             } # End of if('"')
379             } else {
380             # The character is not a delimiter
381 254244 100       261239 $p ? ($v->{ $p } .= $e) : ($v->{'name'} .= $e);
382 254244         206816 next;
383             }
384             }
385              
386 9680 100       29162 if( $v->{'address'} ) {
387             # Push the latest values
388 6263         8182 push @readbuffer, $v;
389              
390             } else {
391             # No email address like in the argument
392 3417 100       31996 if( $v->{'name'} =~ $validemail ) {
    100          
393             # String like an email address will be set to the value of "address"
394 3242         12789 $v->{'address'} = $1.'@'.$2;
395              
396             } elsif( __PACKAGE__->is_mailerdaemon($v->{'name'}) ) {
397             # Allow if the argument is MAILER-DAEMON
398 4         6 $v->{'address'} = $v->{'name'};
399             }
400              
401 3417 100       6650 if( $v->{'address'} ) {
402             # Remove the comment from the address
403 3246 50       13496 if( Sisimai::String->aligned(\$v->{'address'}, ['(', ')']) ) {
404             # (nyaan)nekochan@example.org, nekochan(nyaan)cat@example.org or
405             # nekochan(nyaan)@example.org
406 0         0 my $p1 = index($v->{'address'}, '(');
407 0         0 my $p2 = index($v->{'address'}, ')');
408 0         0 $v->{'address'} = substr($v->{'address'}, 0, $p1).substr($v->{'address'}, $p2 + 1,);
409 0         0 $v->{'comment'} = substr($v->{'address'}, $p1, $p2 - $p1 - 1);
410             }
411 3246         6610 push @readbuffer, $v;
412             }
413             }
414              
415 9680         13327 for my $e ( @readbuffer ) {
416             # The element must not include any character except from 0x20 to 0x7e.
417 9511 50       24998 next if $e->{'address'} =~ /[^\x20-\x7e]/;
418 9511 100       16443 if( index($e->{'address'}, '@') == -1 ) {
419             # Allow if the argument is MAILER-DAEMON
420 29 100       96 next unless __PACKAGE__->is_mailerdaemon($e->{'address'});
421             }
422              
423             # Remove angle brackets, other brackets, and quotations: []<>{}'` except a domain part is
424             # an IP address like neko@[192.0.2.222]
425 9506         48391 s/\A[\[<{('`]//g, s/[.,'`>});]\z//g for $e->{'address'};
426 9506 100       27588 $e->{'address'} =~ s/[^A-Za-z]\z//g unless index($e->{'address'}, '@[') > 1;
427              
428 9506 100       17729 if( index($e->{'address'}, '"@') < 0 ) {
429             # Remove double-quotations
430 9485 100       18698 substr($e->{'address'}, 0, 1, '') if substr($e->{'address'}, 0, 1) eq '"';
431 9485 100       16937 substr($e->{'address'}, -1, 1, '') if substr($e->{'address'}, -1, 1) eq '"';
432             }
433              
434 9506 100       12996 if( $addrs ) {
435             # Almost compatible with parse() method, returns email address only
436 5797         7910 delete $e->{'name'};
437 5797         7120 delete $e->{'comment'};
438              
439             } else {
440             # Remove double-quotations, trailing spaces.
441 3709         4995 for my $f ('name', 'comment') { s/\A[ ]//g, s/[ ]\z//g for $e->{ $f } }
  7418         18442  
442 3709 100       9421 $e->{'comment'} = '' unless $e->{'comment'} =~ /\A[(].+[)]\z/;
443 3709 100       10237 $e->{'name'} =~ y/ //s unless $e->{'name'} =~ /\A["].+["]\z/;
444 3709 100       9635 $e->{'name'} =~ s/\A["]// unless $e->{'name'} =~ /\A["].+["][@]/;
445 3709 100       9427 substr($e->{'name'}, -1, 1, '') if substr($e->{'name'}, -1, 1) eq '"';
446             }
447 9506         15767 push @$addrtables, $e;
448             }
449              
450 9680 100       14221 return undef unless scalar @$addrtables;
451 9504         29040 return $addrtables;
452             }
453              
454             sub s3s4 {
455             # Runs like ruleset 3,4 of sendmail.cf
456             # @param [String] input Text including an email address
457             # @return [String] Email address without comment, brackets
458 5923     5923 1 211543 my $class = shift;
459 5923   100     11742 my $input = shift // return "";
460 5923   100     13238 my $addrs = __PACKAGE__->find($input, 1) || [];
461 5923 100       9112 return $input unless scalar @$addrs;
462 5752         20518 return $addrs->[0]->{'address'};
463             }
464              
465             sub expand_verp {
466             # Expand VERP: Get the original recipient address from VERP
467             # @param [String] email VERP Address
468             # @return [String] Email address
469 7282     7282 1 7361 my $class = shift;
470 7282   100     11068 my $email = shift // return "";
471 7281         16117 my $local = (split('@', $email, 2))[0];
472              
473             # bounce+neko=example.org@example.org => neko@example.org
474 7281 100       26086 return "" unless $local =~ /\A[-_\w]+?[+](\w[-._\w]+\w)[=](\w[-.\w]+\w)\z/;
475 2         8 my $verp0 = $1.'@'.$2;
476 2 50       7 return $verp0 if __PACKAGE__->is_emailaddress($verp0);
477             }
478              
479             sub expand_alias {
480             # Expand alias: remove from '+' to '@'
481             # @param [String] email Email alias string
482             # @return [String] Expanded email address
483 7281     7281 1 7387 my $class = shift;
484 7281 100 100     10443 my $email = shift // return ""; return "" unless __PACKAGE__->is_emailaddress($email);
  7280         13652  
485              
486             # neko+straycat@example.org => neko@example.org
487 7260         17825 my @local = split('@', $email);
488 7260 100       29518 return "" unless $local[0] =~ /\A([-_\w]+?)[+].+\z/;
489 12         71 return $1.'@'.$local[1];
490             }
491              
492             sub TO_JSON {
493             # Instance method for JSON::encode()
494             # @return [String] The value of "address" accessor
495 1385     1385 0 35560 my $self = shift; return $self->address;
  1385         2048  
496             }
497              
498             1;
499             __END__