File Coverage

lib/Sisimai/Address.pm
Criterion Covered Total %
statement 225 242 92.9
branch 149 186 80.1
condition 57 88 64.7
subroutine 15 15 100.0
pod 7 10 70.0
total 453 541 83.7


line stmt bran cond sub pod time code
1             package Sisimai::Address;
2 91     91   146511 use v5.26;
  91         410  
3 91     91   474 use strict;
  91         147  
  91         2147  
4 91     91   404 use warnings;
  91         725  
  91         8119  
5             use Class::Accessor::Lite (
6 91         1244 'new' => 0,
7             'ro' => [
8             'address', # [String] Email address
9             'user', # [String] local part of the email address
10             'host', # [String] domain part of the email address
11             'verp', # [String] VERP
12             'alias', # [String] alias of the email address
13             ],
14             'rw' => [
15             'name', # [String] Display name
16             'comment', # [String] (Comment)
17             ]
18 91     91   5508 );
  91         16854  
19 91     91   59658 use Sisimai::RFC1123;
  91         299  
  91         355923  
20              
21             sub undisclosed {
22             # Return pseudo recipient or sender address
23             # @param [String] argv0 Address type: true = recipient, false = sender
24             # @return [String] Pseudo recipient address or sender address or an empty value when
25             # the $argv0 is neither 'r' nor 's'
26 14     14 0 926 my $class = shift;
27 14   100     53 my $argv0 = shift // 0;
28 14 100       44 my $local = $argv0 ? 'recipient' : 'sender';
29 14         65 return sprintf("undisclosed-%s-in-headers%slibsisimai.org.invalid", $local, '@');
30             }
31              
32             sub is_emailaddress {
33             # Check that the argument is an email address or not
34             # @param [String] email Email address string
35             # @return [Integer] 0: Not email address
36             # 1: Email address
37 10691     10691 1 68936 my $class = shift;
38 10691 100 100     27563 my $email = shift // return 0; return 0 if length($email) < 5;
  10570         24062  
39 10567         42780 $email =~ s/\A[\s\t]+//; $email =~ s/[\s\t]+\z//;
  10567         27504  
40              
41 10567         17770 my $width = length($email);
42 10567         17947 my $lasta = rindex($email, '@');
43              
44 10567 100       22060 return 0 if $width > 254; # The maximum length of an email address is 254
45 10566 100 66     40678 return 0 if $lasta < 1 || $lasta > 64; # The maximum length of a local part is 64
46 10548 50       22750 return 0 if $width - $lasta > 253; # The maximum length of a domain part is 252
47              
48 10548 100       30356 my $quote = __PACKAGE__->is_quotedaddress($email); unless( $quote ) {
  10548         23764  
49             # The email address is not a quoted address
50 10530 100       28349 return 0 if index($email, '@') != $lasta; # There are 2 or more '@'.
51 10510 50       22975 return 0 if index($email, ' ') > 0; # There is 1 or more ' '.
52             }
53 10528         38081 my $ipv46 = Sisimai::RFC1123->is_domainliteral($email);
54              
55 10528         18115 my $j = -1; for my $e ( split('', $email) ) {
  10528         83782  
56             # 31 < The ASCII code of each character < 127
57 253680         297210 my $p = ord($e); $j++;
  253680         267333  
58              
59 253680 100       349167 if( $j < $lasta ) {
60             # A local part of the email address: string before the last "@"
61 106156 50 33     301712 return 0 if $p < 32 || $p > 126; # Before ' ', After '~'
62 106156 100       210877 next if $j == 0; # The character is the first character
63              
64 95628 100       156500 if( $quote ) {
65             # The email address has quoted local part like "neko@cat"@example.org
66 264         450 my $jp = substr($email, $j - 1, 1);
67 264 50       443 if( ord($jp) == 92 ) { # 92 = '\'
68             # When the previous character IS '\', only the followings are allowed: '\', '"'
69 0 0 0     0 return 0 if $p != 92 && $p != 34;
70              
71             } else {
72             # When the previous character IS NOT '\'
73 264 50 66     608 return 0 if $p == 34 && $j + 1 < $lasta;
74             }
75             } else {
76             # The local part is not quoted
77             # ".." is not allowed in a local part when the local part is not quoted by "" but
78             # Non-RFC compliant email addresses still persist in the world.
79             #
80             # The following characters are not allowed in a local part without "..."@example.jp
81 95364 50 33     592803 return 0 if $e eq ',' || $e eq '@' || $e eq ':' || $e eq ';' || $e eq '(';
      33        
      33        
      33        
82 95364 50 33     586502 return 0 if $e eq ')' || $e eq '<' || $e eq '>' || $e eq '[' || $e eq ']';
      33        
      33        
      33        
83             }
84             } else {
85             # A domain part of the email address: string after the last "@"
86 147524 100       240541 next if $p == 64; # '@'
87 136996 50       217127 return 0 if $p < 45; # Before '-'
88 136996 50       204968 return 0 if $p == 47; # Equals '/'
89 136996 50       200751 return 0 if $p == 92; # Equals '\'
90 136996 50       211311 return 0 if $p > 122; # After 'z'
91              
92 136996 100       183014 if( $ipv46 == 0 ) {
93             # Such as "example.jp", "neko.example.org"
94 136926 50 66     354446 return 0 if $p > 57 && $p < 64; # ':' to '?'
95 136926 50 66     371500 return 0 if $p > 90 && $p < 97; # '[' to '`'
96              
97             } else {
98             # Such as "[IPv4:192.0.2.25]"
99 70 50 66     176 return 0 if $p > 59 && $p < 64; # ';' to '?'
100 70 50 66     165 return 0 if $p > 93 && $p < 97; # '^' to '`'
101             }
102             }
103             }
104 10528 100       50343 return 1 if $ipv46;
105 10524         62046 return Sisimai::RFC1123->is_internethost(substr($email, $lasta + 1,));
106             }
107              
108             sub is_quotedaddress {
109             # Checks that the local part of the argument is quoted address or not.
110             # @param [String] argv0 Email address
111             # @return [Integer] 0: is not a quoted address
112             # 1: is a quoted address
113 10548     10548 1 15631 my $class = shift;
114 10548         17970 my $argv0 = shift;
115 10548 100 66     32765 return 1 if index($argv0, '"') == 0 && index($argv0, '"@') > 1;
116 10530         20540 return 0;
117             }
118              
119             sub is_mailerdaemon {
120             # Check that the argument is mailer-daemon or not
121             # @param [String] argv0 Email address
122             # @return [Integer] 0: Not mailer-daemon
123             # 1: Mailer-daemon
124 1593     1593 1 20310726 my $class = shift;
125 1593   100     7973 my $argv0 = shift // return 0;
126 1592         6494 my $email = lc $argv0;
127              
128 1592         3642 state $postmaster = [
129             'mailer-daemon@', '', '(mailer-daemon)', ' mailer-daemon ',
130             'postmaster@', '', '(postmaster)'
131             ];
132 1592 100       5097 return 1 if grep { index($email, $_) > -1 } @$postmaster;
  11144         24971  
133 1513 100 100     14299 return 1 if $email eq 'mailer-daemon' || $email eq 'postmaster';
134 1476         4932 return 0;
135             }
136              
137             sub new {
138             # Constructor of Sisimai::Address
139             # @param [Hash] argvs Email address, name, and other elements
140             # @return [Sisimai::Address] Object or undef when the email address was not valid
141             # @since v4.22.1
142 7384     7384 0 355162 my $class = shift;
143 7384 100 50     16876 my $argvs = shift // return undef; return undef if ref $argvs ne 'HASH';
  7384         21055  
144 7304         61133 my $thing = {
145             'address' => '', # Entire email address
146             'user' => '', # Local part
147             'host' => '', # Domain part
148             'verp' => '', # VERP
149             'alias' => '', # Alias
150             'comment' => '', # Comment
151             'name' => '', # Display name
152             };
153              
154 7304 50       22546 return undef unless exists $argvs->{'address'};
155 7304 100       18733 return undef unless $argvs->{'address'};
156              
157 7262         13816 my $heads = ['<'];
158 7262         22071 my $tails = ['>', ',', '.', ';'];
159 7262         16058 my $point = rindex($argvs->{'address'}, '@');
160              
161 7262 100       15028 if( $point > 0 ) {
162             # Get the local part and the domain part from the email address
163 7240         15795 my $lpart = substr($argvs->{'address'}, 0, $point);
164 7240         17300 my $dpart = substr($argvs->{'address'}, $point+1,);
165 7240   100     24713 my $email = __PACKAGE__->expand_verp($argvs->{'address'}) || '';
166 7240         12779 my $alias = 0;
167              
168 7240 100       16708 unless( $email ) {
169             # Is not VERP address, try to expand the address as an alias
170 7239   100     19352 $email = __PACKAGE__->expand_alias($argvs->{'address'}) || '';
171 7239 100       17121 $alias = 1 if $email;
172             }
173              
174 7240 100       17477 if( index($email, '@') > 0 ) {
175             # The address is a VERP or an alias
176 12 100       43 if( $alias ) {
177             # The address is an alias: neko+nyaan@example.jp
178 11         80 $thing->{'alias'} = $argvs->{'address'};
179              
180             } else {
181             # The address is a VERP: b+neko=example.jp@example.org
182 1         4 $thing->{'verp'} = $argvs->{'address'};
183             }
184             }
185              
186 7240         18945 do { while( substr($lpart, 0, 1) eq $_ ) { substr($lpart, 0, 1, '') }} for @$heads;
  7240         23181  
  0         0  
187 7240         13160 do { while( substr($dpart, -1, 1) eq $_ ) { substr($dpart, -1, 1, '') }} for @$tails;
  28960         67515  
  0         0  
188 7240         17270 $thing->{'user'} = $lpart;
189 7240         13251 $thing->{'host'} = $dpart;
190 7240         19904 $thing->{'address'} = $lpart.'@'.$dpart;
191              
192             } else {
193             # The argument does not include "@"
194 22 100       85 return undef unless __PACKAGE__->is_mailerdaemon($argvs->{'address'});
195 18 50       79 return undef if rindex($argvs->{'address'}, ' ') > -1;
196              
197             # The argument does not include " "
198 18         91 $thing->{'user'} = $argvs->{'address'};
199 18         42 $thing->{'address'} = $argvs->{'address'};
200             }
201              
202 7258   100     26135 $thing->{'name'} = $argvs->{'name'} || '';
203 7258   100     25458 $thing->{'comment'} = $argvs->{'comment'} || '';
204 7258         46523 return bless($thing, __PACKAGE__);
205             }
206              
207             sub find {
208             # Email address parser with a name and a comment
209             # @param [String] argv1 String including email address
210             # @param [Boolean] addrs 0 = Returns list including all the elements
211             # 1 = Returns list including email addresses only
212             # @return [Array] Email address list or undef when there is no email address in the argument
213             # @since v4.22.0
214 9591     9591 1 456925 my $class = shift;
215 9591   100     24662 my $argv1 = shift // return undef; y/\r//d, y/\n//d for $argv1; # Remove CR, NL
  9590         34596  
216 9590   100     26663 my $addrs = shift // undef;
217              
218 9590         59363 require Sisimai::String;
219 9590         17208 state $indicators = {
220             'email-address' => (1 << 0), #
221             'quoted-string' => (1 << 1), # "Neko, Nyaan"
222             'comment-block' => (1 << 2), # (neko)
223             };
224 9590         13486 state $delimiters = {'<' => 1, '>' => 1, '(' => 1, ')' => 1, '"' => 1, ',' => 1};
225 9590         17906 state $validemail = qr{(?>
226             (?:([^\s]+|["].+?["])) # local part
227             [@]
228             (?:([^@\s]+|[0-9A-Za-z:\.]+)) # domain part
229             )
230             }x;
231              
232 9590         47888 my $emailtable = {'address' => '', 'name' => '', 'comment' => ''};
233 9590         15025 my $addrtables = [];
234 9590         13321 my @readbuffer;
235 9590         12066 my $readcursor = 0;
236 9590         13406 my $v = $emailtable; # temporary buffer
237 9590         14226 my $p = ''; # current position
238              
239 9590         89600 for my $e ( split('', $argv1) ) {
240             # Check each characters
241 266236 100       413101 if( $delimiters->{ $e } ) {
242             # The character is a delimiter character
243 14549 100       37579 if( $e eq ',' ) {
244             # Separator of email addresses or not
245 629 100 66     3620 if( index($v->{'address'}, '<') == 0 &&
      66        
246             rindex($v->{'address'}, '@') > -1 &&
247             substr($v->{'address'}, -1, 1) eq '>' ) {
248             # An email address has already been picked
249 2 50       10 if( $readcursor & $indicators->{'comment-block'} ) {
    50          
250             # The cursor is in the comment block (Neko, Nyaan)
251 0         0 $v->{'comment'} .= $e;
252              
253             } elsif( $readcursor & $indicators->{'quoted-string'} ) {
254             # "Neko, Nyaan"
255 0         0 $v->{'name'} .= $e;
256              
257             } else {
258             # The cursor is not in neither the quoted-string nor the comment block
259 2         4 $readcursor = 0; # reset cursor position
260 2         4 push @readbuffer, $v;
261 2         8 $v = {'address' => '', 'name' => '', 'comment' => ''};
262 2         5 $p = '';
263             }
264             } else {
265             # "Neko, Nyaan" OR <"neko,nyaan"@example.org>
266 627 100       1616 $p ? ($v->{ $p } .= $e) : ($v->{'name'} .= $e);
267             }
268 629         1033 next;
269             } # End of if(',')
270              
271 13920 100       29951 if( $e eq '<' ) {
272             # <: The beginning of an email address or not
273 6190 100       16094 if( $v->{'address'} ) {
274 5 50       16 $p ? ($v->{ $p } .= $e) : ($v->{'name'} .= $e);
275              
276             } else {
277             #
278 6185         13281 $readcursor |= $indicators->{'email-address'};
279 6185         14217 $v->{'address'} .= $e;
280 6185         11814 $p = 'address';
281             }
282 6190         10767 next;
283             } # End of if('<')
284              
285 7730 100       18797 if( $e eq '>' ) {
286             # >: The end of an email address or not
287 5540 100       14970 if( $readcursor & $indicators->{'email-address'} ) {
288             #
289 5514         12590 $readcursor &= ~$indicators->{'email-address'};
290 5514         11176 $v->{'address'} .= $e;
291 5514         10202 $p = '';
292              
293             } else {
294             # a comment block or a display name
295 26 50       77 $p ? ($v->{'comment'} .= $e) : ($v->{'name'} .= $e);
296             }
297 5540         9971 next;
298             } # End of if('>')
299              
300 2190 100       5003 if( $e eq '(' ) {
301             # The beginning of a comment block or not
302 125 100       778 if( $readcursor & $indicators->{'email-address'} ) {
    50          
    50          
303             # <"neko(nyaan)"@example.org> or
304 2 50       10 if( rindex($v->{'address'}, '"') > -1 ) {
305             # Quoted local part: <"neko(nyaan)"@example.org>
306 0         0 $v->{'address'} .= $e;
307              
308             } else {
309             # Comment:
310 2         4 $readcursor |= $indicators->{'comment-block'};
311 2 50       10 $v->{'comment'} .= ' ' if substr($v->{'comment'}, -1, 1) eq ')';
312 2         5 $v->{'comment'} .= $e;
313 2         51 $p = 'comment';
314             }
315             } elsif( $readcursor & $indicators->{'comment-block'} ) {
316             # Comment at the outside of an email address (...(...)
317 0 0       0 $v->{'comment'} .= ' ' if substr($v->{'comment'}, -1, 1) eq ')';
318 0         0 $v->{'comment'} .= $e;
319              
320             } elsif( $readcursor & $indicators->{'quoted-string'} ) {
321             # "Neko, Nyaan(cat)", Deal as a display name
322 0         0 $v->{'name'} .= $e;
323              
324             } else {
325             # The beginning of a comment block
326 123         256 $readcursor |= $indicators->{'comment-block'};
327 123 100       446 $v->{'comment'} .= ' ' if substr($v->{'comment'}, -1, 1) eq ')';
328 123         295 $v->{'comment'} .= $e;
329 123         225 $p = 'comment';
330             }
331 125         257 next;
332             } # End of if('(')
333              
334 2065 100       4551 if( $e eq ')' ) {
335             # The end of a comment block or not
336 131 100       567 if( $readcursor & $indicators->{'email-address'} ) {
    100          
337             # <"neko(nyaan)"@example.org> OR
338 2 50       9 if( rindex($v->{'address'}, '"') > -1 ) {
339             # Quoted string in the local part: <"neko(nyaan)"@example.org>
340 0         0 $v->{'address'} .= $e;
341              
342             } else {
343             # Comment:
344 2         5 $readcursor &= ~$indicators->{'comment-block'};
345 2         4 $v->{'comment'} .= $e;
346 2         4 $p = 'address';
347             }
348             } elsif( $readcursor & $indicators->{'comment-block'} ) {
349             # Comment at the outside of an email address (...(...)
350 123         262 $readcursor &= ~$indicators->{'comment-block'};
351 123         227 $v->{'comment'} .= $e;
352 123         189 $p = '';
353              
354             } else {
355             # Deal as a display name
356 6         20 $readcursor &= ~$indicators->{'comment-block'};
357 6         52 $v->{'name'} .= $e;
358 6         21 $p = '';
359             }
360 131         231 next;
361             } # End of if(')')
362              
363 1934 50       4125 if( $e eq '"' ) {
364             # The beginning or the end of a quoted-string
365 1934 100       3828 if( $p ) {
366             # email-address or comment-block
367 10         24 $v->{ $p } .= $e;
368              
369             } else {
370             # Display name like "Neko, Nyaan"
371 1924         4862 $v->{'name'} .= $e;
372 1924 50       5855 next unless $readcursor & $indicators->{'quoted-string'};
373 0 0       0 next if substr($v->{'name'}, -2, 2) eq qq|\x5c"|; # "Neko, Nyaan \"...
374 0         0 $readcursor &= ~$indicators->{'quoted-string'};
375 0         0 $p = '';
376             }
377 10         17 next;
378             } # End of if('"')
379             } else {
380             # The character is not a delimiter
381 251687 100       385283 $p ? ($v->{ $p } .= $e) : ($v->{'name'} .= $e);
382 251687         307339 next;
383             }
384             }
385              
386 9590 100       47424 if( $v->{'address'} ) {
387             # Push the latest values
388 6183         36909 push @readbuffer, $v;
389              
390             } else {
391             # No email address like in the argument
392 3407 100       50840 if( $v->{'name'} =~ $validemail ) {
    100          
393             # String like an email address will be set to the value of "address"
394 3232         20926 $v->{'address'} = $1.'@'.$2;
395              
396             } elsif( __PACKAGE__->is_mailerdaemon($v->{'name'}) ) {
397             # Allow if the argument is MAILER-DAEMON
398 4         14 $v->{'address'} = $v->{'name'};
399             }
400              
401 3407 100       10862 if( $v->{'address'} ) {
402             # Remove the comment from the address
403 3236 50       22191 if( Sisimai::String->aligned(\$v->{'address'}, ['(', ')']) ) {
404             # (nyaan)nekochan@example.org, nekochan(nyaan)cat@example.org or
405             # nekochan(nyaan)@example.org
406 0         0 my $p1 = index($v->{'address'}, '(');
407 0         0 my $p2 = index($v->{'address'}, ')');
408 0         0 $v->{'address'} = substr($v->{'address'}, 0, $p1).substr($v->{'address'}, $p2 + 1,);
409 0         0 $v->{'comment'} = substr($v->{'address'}, $p1, $p2 - $p1 - 1);
410             }
411 3236         9249 push @readbuffer, $v;
412             }
413             }
414              
415 9590         21097 for my $e ( @readbuffer ) {
416             # The element must not include any character except from 0x20 to 0x7e.
417 9421 50       42573 next if $e->{'address'} =~ /[^\x20-\x7e]/;
418 9421 100       29035 if( index($e->{'address'}, '@') == -1 ) {
419             # Allow if the argument is MAILER-DAEMON
420 29 100       148 next unless __PACKAGE__->is_mailerdaemon($e->{'address'});
421             }
422              
423             # Remove angle brackets, other brackets, and quotations: []<>{}'` except a domain part is
424             # an IP address like neko@[192.0.2.222]
425 9416         72570 s/\A[\[<{('`]//g, s/[.,'`>});]\z//g for $e->{'address'};
426 9416 100       41661 $e->{'address'} =~ s/[^A-Za-z]\z//g unless index($e->{'address'}, '@[') > 1;
427              
428 9416 100       23057 if( index($e->{'address'}, '"@') < 0 ) {
429             # Remove double-quotations
430 9395 100       28497 substr($e->{'address'}, 0, 1, '') if substr($e->{'address'}, 0, 1) eq '"';
431 9395 100       23886 substr($e->{'address'}, -1, 1, '') if substr($e->{'address'}, -1, 1) eq '"';
432             }
433              
434 9416 100       18594 if( $addrs ) {
435             # Almost compatible with parse() method, returns email address only
436 5727         11676 delete $e->{'name'};
437 5727         9674 delete $e->{'comment'};
438              
439             } else {
440             # Remove double-quotations, trailing spaces.
441 3689         9060 for my $f ('name', 'comment') { s/\A[ ]//g, s/[ ]\z//g for $e->{ $f } }
  7378         28277  
442 3689 100       13219 $e->{'comment'} = '' unless $e->{'comment'} =~ /\A[(].+[)]\z/;
443 3689 100       19101 $e->{'name'} =~ y/ //s unless $e->{'name'} =~ /\A["].+["]\z/;
444 3689 100       24062 $e->{'name'} =~ s/\A["]// unless $e->{'name'} =~ /\A["].+["][@]/;
445 3689 100       13451 substr($e->{'name'}, -1, 1, '') if substr($e->{'name'}, -1, 1) eq '"';
446             }
447 9416         23817 push @$addrtables, $e;
448             }
449              
450 9590 100       22809 return undef unless scalar @$addrtables;
451 9414         40841 return $addrtables;
452             }
453              
454             sub s3s4 {
455             # Runs like ruleset 3,4 of sendmail.cf
456             # @param [String] input Text including an email address
457             # @return [String] Email address without comment, brackets
458 5853     5853 1 307842 my $class = shift;
459 5853   100     18142 my $input = shift // return "";
460 5853   100     20362 my $addrs = __PACKAGE__->find($input, 1) || [];
461 5853 100       20622 return $input unless scalar @$addrs;
462 5682         35497 return $addrs->[0]->{'address'};
463             }
464              
465             sub expand_verp {
466             # Expand VERP: Get the original recipient address from VERP
467             # @param [String] email VERP Address
468             # @return [String] Email address
469 7242     7242 1 10583 my $class = shift;
470 7242   100     16361 my $email = shift // return "";
471 7241         24856 my $local = (split('@', $email, 2))[0];
472              
473             # bounce+neko=example.org@example.org => neko@example.org
474 7241 100       41463 return "" unless $local =~ /\A[-_\w]+?[+](\w[-._\w]+\w)[=](\w[-.\w]+\w)\z/;
475 2         11 my $verp0 = $1.'@'.$2;
476 2 50       13 return $verp0 if __PACKAGE__->is_emailaddress($verp0);
477             }
478              
479             sub expand_alias {
480             # Expand alias: remove from '+' to '@'
481             # @param [String] email Email alias string
482             # @return [String] Expanded email address
483 7241     7241 1 13739 my $class = shift;
484 7241 100 100     16587 my $email = shift // return ""; return "" unless __PACKAGE__->is_emailaddress($email);
  7240         21267  
485              
486             # neko+straycat@example.org => neko@example.org
487 7235         28159 my @local = split('@', $email);
488 7235 100       53248 return "" unless $local[0] =~ /\A([-_\w]+?)[+].+\z/;
489 12         133 return $1.'@'.$local[1];
490             }
491              
492             sub TO_JSON {
493             # Instance method for JSON::encode()
494             # @return [String] The value of "address" accessor
495 1377     1377 0 48684 my $self = shift; return $self->address;
  1377         3186  
496             }
497              
498             1;
499             __END__