File Coverage

blib/lib/Cookie/Domain.pm
Criterion Covered Total %
statement 230 337 68.2
branch 79 190 41.5
condition 19 79 24.0
subroutine 37 41 90.2
pod 15 15 100.0
total 380 662 57.4


line stmt bran cond sub pod time code
1             ##----------------------------------------------------------------------------
2             ## Cookies API for Server & Client - ~/lib/Cookie/Domain.pm
3             ## Version v0.1.7
4             ## Copyright(c) 2024 DEGUEST Pte. Ltd.
5             ## Author: Jacques Deguest <jack@deguest.jp>
6             ## Created 2021/05/06
7             ## Modified 2025/07/30
8             ## You can use, copy, modify and redistribute this package and associated
9             ## files under the same terms as Perl itself.
10             ##----------------------------------------------------------------------------
11             package Cookie::Domain;
12             BEGIN
13             {
14 3     3   152467 use strict;
  3         5  
  3         132  
15 3     3   13 use warnings;
  3         43  
  3         156  
16 3     3   14 use warnings::register;
  3         7  
  3         185  
17 3     3   521 use parent qw( Module::Generic );
  3         402  
  3         18  
18 3     3   272148 use vars qw( $DOMAIN_RE $PUBLIC_SUFFIX_DATA $VERSION );
  3         21  
  3         149  
19 3     3   1787 use DateTime;
  3         645052  
  3         131  
20 3     3   748 use DateTime::Format::Strptime;
  3         74501  
  3         33  
21 3     3   5136 use Module::Generic::File qw( tempfile );
  3         259145  
  3         58  
22 3     3   4333 use JSON;
  3         31023  
  3         23  
23 3     3   2428 use Net::IDN::Encode ();
  3         253253  
  3         135  
24 3     3   31 use Wanted;
  3         8  
  3         335  
25 3     3   18 use constant URL => 'https://publicsuffix.org/list/effective_tld_names.dat';
  3         6  
  3         1252  
26             # Properly formed domain name according to rfc1123
27 3     3   27 our $DOMAIN_RE = qr/^
28             (?:
29             [[:alnum:]]
30             (?:
31             (?:[[:alnum:]-]){0,61}
32             [[:alnum:]]
33             )?
34             (?:
35             \.[[:alnum:]]
36             (?:
37             (?:[[:alnum:]-]){0,61}
38             [[:alnum:]]
39             )?
40             )*
41             )
42             $/x;
43 3         70 our $VERSION = 'v0.1.7';
44             };
45              
46 3     3   19 use strict;
  3         11  
  3         80  
47 3     3   13 use warnings;
  3         12  
  3         9976  
48              
49             sub init
50             {
51 11     11 1 285965 my $self = shift( @_ );
52 11         85 my $base = Module::Generic::File::file( __FILE__ )->parent;
53 11         3485021 $self->{file} = $base->child( 'public_suffix_list.txt' );
54 11         920528 $self->{json_file} = Module::Generic::File->sys_tmpdir->child( 'public_suffix.json' );
55 11         2464175 $self->{meta} = {};
56 11         111874 $self->{min_suffix} = 0;
57 11         79 $self->{suffixes} = {};
58 11         179 $self->{_init_strict_use_sub} = 1;
59 11 50       182 $self->SUPER::init( @_ ) || return( $self->pass_error );
60 11 50       1405 unless( $self->{no_load} )
61             {
62 11 50       195 $self->load || return( $self->pass_error );
63             }
64 11         107 return( $self );
65             }
66              
67             sub cron_fetch
68             {
69             # Cookie::Domain->cron_fetch
70             # $obj->cron_fetch
71             # Cookie::Domain->cron_fetch( $hash_ref );
72             # $obj->cron_fetch( $hash_ref );
73             # Cookie::Domain->cron_fetch( %options );
74             # $obj->cron_fetch( %options );
75 0     0 1 0 my( $this, $self );
76 0         0 my $opts = {};
77 0 0 0     0 if( scalar( @_ ) && ( ref( $_[0] ) eq __PACKAGE__ || $_[0] eq __PACKAGE__ ) )
      0        
78             {
79 0         0 $this = shift( @_ );
80             }
81 0 0 0     0 if( @_ == 1 && ref( $_[0] ) eq 'HASH' )
    0          
82             {
83 0         0 $opts = shift( @_ );
84             }
85             elsif( !( scalar( @_ ) % 2 ) )
86             {
87 0         0 $opts = { @_ };
88             }
89            
90 0 0       0 if( ref( $this ) )
91             {
92 0         0 $self = $this;
93             }
94             else
95             {
96 0   0     0 $this //= __PACKAGE__;
97 0         0 $self = $this->new( $opts );
98             }
99 0   0     0 $opts->{file} //= '';
100 0   0     0 my $file = $opts->{file} || $self->file;
101 0 0       0 $file = $self->_is_a( $file, 'Module::Generic::File' ) ? $file : Module::Generic::File::file( $file );
102 0         0 require LWP::UserAgent;
103 0         0 my $ua = LWP::UserAgent->new(
104             agent => "Cookie::Domain/" . $VERSION,
105             );
106 0         0 my $meta = $self->meta;
107 0         0 my $req_headers = {};
108 0         0 my $dont_have_etag = 0;
109             my $mtime = $meta->{db_last_modified}
110             ? $meta->{db_last_modified}
111 0 0 0     0 : ( $file->exists && !$file->is_empty )
    0          
112             ? $file->mtime
113             : undef;
114             # If we have already a local file and it is not empty, let's use the etag when making the request
115 0 0 0     0 if( $meta->{etag} && $file->exists && !$file->is_empty )
    0 0        
116             {
117 0         0 $meta->{etag} =~ s/^\"([^"]+)\"$/$1/;
118 0         0 $req_headers->{'If-None-Match'} = qq{"$meta->{etag}"};
119             }
120             elsif( !$meta->{etag} )
121             {
122 0         0 $dont_have_etag = 1;
123             # <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since>
124 0 0 0     0 if( defined( $mtime ) && $mtime )
125             {
126 0         0 my $dt = $self->_parse_timestamp( $mtime );
127 0 0       0 if( $dt )
128             {
129             # HTTP Date format
130 0         0 my $dt_fmt = DateTime::Format::Strptime->new(
131             pattern => '%a, %d %b %Y %H:%M:%S GMT',
132             locale => 'en_GB',
133             time_zone => 'GMT',
134             );
135 0         0 $dt->set_formatter( $dt_fmt );
136 0         0 $req_headers->{ 'If-Modified-Since' } = $dt;
137             }
138             }
139             }
140            
141             # try-catch
142 0         0 local $@;
143             my $resp = eval
144 0         0 {
145 0         0 $ua->get( URL, %$req_headers );
146             };
147 0 0       0 if( $@ )
148             {
149 0         0 return( $self->error( "Error trying to perform an HTTP GET request to ", URL, ": $@" ) );
150             }
151 0         0 my $code = $resp->code;
152             # try-catch
153             my $data = eval
154 0         0 {
155 0         0 $resp->decoded_content( default_charset => 'utf-8', alt_charset => 'utf-8' );
156             };
157 0 0       0 if( $@ )
158             {
159 0         0 return( $self->error( "Error decoding response content: $@" ) );
160             }
161 0         0 my $last_mod = $resp->header( 'Last-Modified' );
162              
163 0         0 my $tz;
164             # DateTime::TimeZone::Local will die ungracefully if the local timezeon is not set with the error:
165             # "Cannot determine local time zone"
166             # try-catch
167             $tz = eval
168 0         0 {
169 0         0 DateTime::TimeZone->new( name => 'local' );
170             };
171 0 0       0 if( $@ )
172             {
173 0         0 $tz = DateTime::TimeZone->new( name => 'UTC' );
174             }
175            
176 0 0       0 if( $last_mod )
177             {
178 0         0 $last_mod = $self->_parse_timestamp( $last_mod )->set_time_zone( $tz );
179             }
180             else
181             {
182 0         0 $last_mod = DateTime->now( time_zone => $tz );
183             }
184 0         0 my $epoch = $last_mod->epoch;
185 0 0       0 if( $resp->header( 'etag' ) )
186             {
187 0 0 0     0 $dont_have_etag = $resp->header( 'etag' ) eq ( $meta->{etag} // '' ) ? 0 : 1;
188 0         0 $meta->{etag} = $resp->header( 'etag' );
189 0         0 $meta->{etag} =~ s/^\"([^"]+)\"$/$1/;
190             }
191            
192 0 0 0     0 if( $code == 304 ||
    0 0        
    0 0        
193             ( !$file->is_empty && $mtime && $mtime == $epoch ) )
194             {
195 0 0       0 if( !$self->suffixes->length )
196             {
197 0 0       0 $self->load_public_suffix || return( $self->pass_error );
198             }
199             # Did not have an etag, but I do have one now
200 0 0 0     0 if( $dont_have_etag && $meta->{etag} )
201             {
202 0 0       0 $self->save_as_json || return( $self->pass_error );
203             }
204 0         0 return( $self );
205             }
206             elsif( $code ne 200 )
207             {
208 0         0 return( $self->error( "Failed to get the remote public domain list. Server responded with code '$code': ", $resp->as_string ) );
209             }
210             elsif( !length( $data ) )
211             {
212 0         0 return( $self->error( "Remote server returned no data." ) );
213             }
214 0 0       0 $file->unload_utf8( $data, { lock => 1 } ) || return( $self->error( "Unable to open public suffix data file \"$file\" in write mode: ", $file->error ) );
215 0         0 $file->unlock;
216 0         0 $file->utime( $epoch, $epoch );
217 0 0       0 $self->load_public_suffix || return( $self->pass_error );
218 0 0       0 $self->save_as_json || return( $self->pass_error );
219              
220 0         0 return( $self );
221             }
222              
223             sub decode
224             {
225 0     0 1 0 my $self = shift( @_ );
226 0         0 my $name = shift( @_ );
227 0 0       0 return( '' ) if( !length( $name ) );
228             # try-catch
229 0         0 local $@;
230             my $rv = eval
231 0         0 {
232 0         0 return( Net::IDN::Encode::domain_to_ascii( $name ) );
233             };
234 0 0       0 if( $@ )
235             {
236 0         0 return( $self->error( "An unexpected error occurred while decoding a domain name: $@" ) );
237             }
238 0         0 return( $rv );
239             }
240              
241             sub encode
242             {
243 0     0 1 0 my $self = shift( @_ );
244 0         0 my $name = shift( @_ );
245 0 0       0 return( '' ) if( !length( $name ) );
246             # try-catch
247 0         0 local $@;
248             my $rv = eval
249 0         0 {
250 0         0 return( Net::IDN::Encode::domain_to_unicode( $name ) );
251             };
252 0 0       0 if( $@ )
253             {
254 0         0 return( $self->error( "An unexpected error occurred while encoding a domain name: $@" ) );
255             }
256 0         0 return( $rv );
257             }
258              
259 14     14 1 1707 sub file { return( shift->_set_get_object_without_init( 'file', 'Module::Generic::File', @_ ) ); }
260              
261 11     11 1 44 sub json_file { return( shift->_set_get_object_without_init( 'json_file', 'Module::Generic::File', @_ ) ); }
262              
263             sub load
264             {
265 11     11 1 42 my $self = shift( @_ );
266 11         79 my $f = $self->file;
267 11         1318 my $json_file = $self->json_file;
268 11 100 66     1035 if( defined( $PUBLIC_SUFFIX_DATA ) && ref( $PUBLIC_SUFFIX_DATA ) eq 'HASH' )
    100 66        
269             {
270 9         207 $self->suffixes( $PUBLIC_SUFFIX_DATA );
271 9         327921 $self->meta( {} );
272             }
273             elsif( $json_file && $json_file->exists )
274             {
275 1 50       121 $self->load_json( $json_file ) || return( $self->pass_error );
276 1         6 my $meta = $self->meta;
277 1 50 33     915 if( $f && $f->exists )
278             {
279 1 50 33     114 if( defined( $meta->{db_last_modified} ) && $meta->{db_last_modified} =~ /^\d{10}$/ )
280             {
281 1         101 my $mtime = $f->mtime;
282 1 50       10761 if( $mtime > $meta->{db_last_modified} )
283             {
284 0 0       0 $self->load_public_suffix( $f ) || return( $self->pass_error );
285 0 0       0 $self->save_as_json( $json_file ) || return( $self->pass_error );
286             }
287             }
288             else
289             {
290 0 0       0 $self->load_public_suffix( $f ) || return( $self->pass_error );
291 0 0       0 $self->save_as_json( $json_file ) || return( $self->pass_error );
292             }
293             }
294             }
295             else
296             {
297 1 0 33     61 return( $self->error( "No public suffix data file or json cache data file was specified." ) ) if( !$json_file && !$f );
298 1 50       12 $self->load_public_suffix( $f ) || return( $self->pass_error );
299 1 50       16 $self->save_as_json( $json_file ) || return( $self->pass_error );
300             }
301 11         27600 return( $self );
302             }
303              
304             sub load_json
305             {
306 1     1 1 9 my $self = shift( @_ );
307 1   50     8 my $file = shift( @_ ) || $self->json_file || return( $self->error( "No json file was specified." ) );
308 1 50       60 $file = $self->_is_a( $file, 'Module::Generic::File' ) ? $file : Module::Generic::File::file( "$file" );
309             # Basic error checking
310 1 50       68 if( !$file->exists )
    50          
    50          
311             {
312 0         0 return( $self->error( "Json data file provided \"$file\" does not exist." ) );
313             }
314             elsif( !$file->can_read )
315             {
316 0         0 return( $self->error( "Json data file provided \"$file\" lacks enough permission to read." ) );
317             }
318             elsif( $file->is_empty )
319             {
320 0         0 return( $self->error( "Json data file provided \"$file\" is empty." ) );
321             }
322 1         53680 my $json = $file->load_utf8;
323 1 50       13959 return( $self->error( "Unable to open the public suffix json data file in read mode: $!" ) ) if( !defined( $json ) );
324 1 50       11 return( $self->error( "No data found from public domain json file \"$file\"." ) ) if( !CORE::length( $json ) );
325             # try-catch
326 1         3 local $@;
327             my $ref = eval
328 1         5 {
329 1         85 my $j = JSON->new->relaxed;
330 1         11253 return( $j->decode( $json ) );
331             };
332 1 50       8 if( $@ )
333             {
334 0         0 return( $self->error( "An unexpected error occurred while trying to load json data of public suffixes: $@" ) );
335             }
336 1 50       7 if( ref( $ref->{suffixes} ) eq 'HASH' )
337             {
338 1         8 $PUBLIC_SUFFIX_DATA = $ref->{suffixes};
339 1         14 $self->suffixes( $ref->{suffixes} );
340             }
341 1 50       393901 $ref->{meta} = {} if( ref( $ref->{meta} ) ne 'HASH' );
342 1         17 $self->meta( $ref->{metadata} );
343 1         2981 return( $self );
344             }
345              
346             sub load_public_suffix
347             {
348 1     1 1 4 my $self = shift( @_ );
349 1   50     4 my $file = shift( @_ ) || $self->file || return( $self->error( "No public suffix data file was provided." ) );
350 1 50       35 $file = $self->_is_a( $file, 'Module::Generic::File' ) ? $file : Module::Generic::File::file( "$file" );
351             # Basic error checking
352 1 50       41 if( !$file->exists )
    50          
    50          
353             {
354 0         0 return( $self->error( "Public suffix data file provided \"$file\" does not exist." ) );
355             }
356             elsif( !$file->can_read )
357             {
358 0         0 return( $self->error( "Public suffix data file provided \"$file\" lacks enough permission to read." ) );
359             }
360             elsif( $file->is_empty )
361             {
362 0         0 return( $self->error( "Public suffix data file provided \"$file\" is empty." ) );
363             }
364 1 50       31097 $file->open( '<', { binmode => 'utf-8' }) || return( $self->error( "Unable to open the public suffix data file in read mode: ", $file->error ) );
365 1         6912 my $ref = {};
366             $file->line(sub
367             {
368 15420     15420   22648210 my $l = shift( @_ );
369 15420         40424 chomp( $l );
370 15420         74523 $l =~ s,//.*$,,;
371 15420         36254 $l =~ s,[[:blank:]\h]+$,,g;
372 15420 100       65878 return(1) if( !CORE::length( $l ) );
373 9568         13367 my $orig;
374 9568 100       45025 if( $l !~ /^[\x00-\x7f]*$/ )
375             {
376 460         903 $orig = $l;
377             # try-catch
378 460         618 local $@;
379             $l = eval
380 460         718 {
381 460         1542 Net::IDN::Encode::domain_to_ascii( $l );
382             };
383 460 50       205443 if( $@ )
384             {
385 0         0 return( $self->error( "An unexpected error occurred while parsing the public suffix data file content: $@" ) );
386             }
387             }
388 9568         20812 my $is_neg = $l =~ s,^\!,,;
389 9568         35581 my @labels = split( /\./, $l );
390 9568         13668 my $h = $ref;
391 9568         20902 foreach my $label ( reverse( @labels ) )
392             {
393 21603   100     109355 $h = $h->{ $label } ||= {};
394             }
395 9568 100       20368 $h->{_is_neg} = $is_neg if( $is_neg );
396 9568 100       44861 $h->{_original} = $orig if( defined( $orig ) );
397 1         24 });
398              
399 1         1848 $file->close;
400             # Although this is a private extension, it is still valid nevertheless, and is missing as of 2024-02-02
401 1 50       11575 if( !CORE::exists( $ref->{test} ) )
402             {
403 1         15 $ref->{test} = {};
404             }
405 1         35 $self->suffixes( $ref );
406 1         638280 $PUBLIC_SUFFIX_DATA = $ref;
407 1         19 return( $self );
408             }
409              
410 12     12 1 77 sub meta { return( shift->_set_get_hash_as_mix_object( 'meta', @_ ) ); }
411              
412 82     82 1 923 sub min_suffix { return( shift->_set_get_number( 'min_suffix', @_ ) ); }
413              
414 0     0 1 0 sub no_load { return( shift->_set_get_boolean( 'no_load', @_ ) ); }
415              
416             sub save_as_json
417             {
418 1     1 1 4 my $self = shift( @_ );
419 1   50     28 my $file = shift( @_ ) || $self->json_file || return( $self->error( "No json file was specified." ) );
420 1 50       103 $file = $self->_is_a( $file, 'Module::Generic::File' ) ? $file : Module::Generic::File::file( "$file" );
421 1         101 my $data = $self->suffixes;
422 1         1077 my $tz;
423             # DateTime::TimeZone::Local will die ungracefully if the local timezeon is not set with the error:
424             # "Cannot determine local time zone"
425             # try-catch
426 1         3 local $@;
427             $tz = eval
428 1         2 {
429 1         138 DateTime::TimeZone->new( name => 'local' );
430             };
431 1 50       9263 if( $@ )
432             {
433 1         10 $tz = DateTime::TimeZone->new( name => 'UTC' );
434             }
435 1         1701 my $dt_fmt = DateTime::Format::Strptime->new(
436             pattern => '%FT%T%z',
437             locale => 'en_GB',
438             time_zone => $tz->name,
439             );
440 1         7770 my $today = DateTime->now( time_zone => $tz, formatter => $dt_fmt );
441 1         1568 my $meta = $self->meta;
442             my $ref =
443             {
444             metadata =>
445             {
446             created => $today->stringify,
447             module => 'Cookie::Domain',
448             ( $self->file && $self->file->exists ? ( db_last_modified => $self->file->mtime ) : () ),
449 1 50 33     2885 ( $meta->{etag} ? ( etag => $meta->{etag} ) : () ),
    50          
450             },
451             suffixes => $data
452             };
453 1         33622 my $j = JSON->new->canonical->pretty->convert_blessed;
454             # try-catch
455             my $json = eval
456 1         5 {
457 1         51 $j->encode( $ref );
458             };
459 1 50       650738 if( $@ )
460             {
461 0         0 return( $self->error( "An error occurred while trying to save data to json file \"$file\": $@" ) );
462             }
463 1 50       45 $file->unload_utf8( $json ) ||
464             return( $self->error( "Unable to write json data to file \"$file\": ", $file->error ) );
465 1         62747 return( $self );
466             }
467              
468             sub stat
469             {
470 97     97 1 419643 my $self = shift( @_ );
471 97   100     708 my $name = shift( @_ ) || return( $self->error( "No host name was provided" ) );
472 95         649 my $opts = $self->_get_args_as_hash( @_ );
473 95 100       15743 $opts->{min_suffix} = $self->min_suffix if( !exists( $opts->{min_suffix} ) );
474 95         575654 my $idn;
475             # Punnycode
476 95 100       1015 if( $name !~ /^[\x00-\x7f]*$/ )
477             {
478 12         33 $idn = $name;
479 12         78 $name = Net::IDN::Encode::domain_to_ascii( $name );
480 12         7765 $name = lc( $name );
481 12         97 $name =~ s/^[[:blank:]\h]+|[[:blank:]\h]+$//g;
482 12         43 $name =~s/\.$//;
483             }
484             else
485             {
486 83         561 $name =~ s/^\.|\.$//g;
487 83         373 $name = lc( $name );
488             }
489 95 50       1579 return( $self->error( "Malformed domain name \"$name\"" ) ) if( $name !~ /$DOMAIN_RE/ );
490 95         983 my $labels = $self->new_array( [split( /\./, $name )] );
491 95         70143 my $any = {};
492 95         172 my $host = {};
493 95         205 my $expt = {};
494 95         444 my $ref = $self->suffixes;
495 95         80233 my $def = $ref;
496 95         289 my $stack = [];
497             # The following algorithm is borrowed from IO-Socket-SSL
498             # for( my $i = 0; $i < scalar( @$labels ); $i++ )
499             # $labels->reverse->for(sub
500 95         896 my $reverse = $labels->reverse;
501 95         3272 for( my $i = 0; $i < scalar( @$reverse ); $i++ )
502             {
503 201         556 my $label = $reverse->[$i];
504             # my( $i, $label ) = @_;
505 201         338 my $buff = [];
506 201 100       1359 if( my $public_label_def = $def->{ $label } )
    100          
507             {
508             # name match, continue with next path element
509 121         3808 push( @$buff, $public_label_def );
510 121 100 66     712 if( exists( $public_label_def->{_is_neg} ) && $public_label_def->{_is_neg} )
511             {
512 4         223 $expt->{ $i + 1 }->{ $i + 1 } = -1;
513             }
514             else
515             {
516 117         3777 $host->{ $i + 1 }->{ $i + 1 } = 1;
517             }
518             }
519             elsif( exists( $def->{ '*' } ) )
520             {
521 8         402 my $public_label_def = $def->{ '*' };
522 8         215 push( @$buff, $public_label_def );
523 8 50 33     31 if( exists( $public_label_def->{_is_neg} ) && $public_label_def->{_is_neg} )
524             {
525 0         0 $expt->{ $i + 1 }->{ $i + 1 } = -1;
526             }
527             else
528             {
529 8         223 $any->{ $i + 1 }->{ $i + 1 } = 1;
530             }
531             }
532            
533 3     3   25 no warnings 'exiting';
  3         5  
  3         1537  
534             LABEL:
535             # We found something
536 201 100       4183 if( @$buff )
537             {
538             # take out the one we just added
539 129         333 $def = shift( @$buff );
540             # if we are circling within the next_choice loop, add the previous step to $stack
541 129 50       320 push( @$stack, [ $buff, $i ] ) if( @$buff );
542             # go deeper
543 129         497 next;
544             # The following works too by the way, but let's keep it traditional
545             # return(1);
546             }
547              
548             # We did not find anything, so we backtrack
549 72 50       325 last if( !scalar( @$stack ) );
550             # The following works too by the way, but let's keep it traditional
551             # return if( !scalar( @$stack ) );
552             # Recall our last entry
553 0         0 ( $buff, $_[0] ) = @{ pop( @$stack ) };
  0         0  
554 0         0 goto LABEL;
555             # });
556             }
557            
558             # remove all exceptions from wildcards
559 95 100       389 delete( @$any{ keys( %$expt ) } ) if( scalar( keys( %$expt ) ) );
560             # get longest match
561 49         238 my( $len ) = sort{ $b <=> $a } (
562 95         628 keys( %$any ), keys( %$host ), map{ $_-1 } keys( %$expt )
  4         30  
563             );
564 95 100       339 $len = $opts->{min_suffix} if( !defined( $len ) );
565 95 100       409 $len += int( $opts->{add} ) if( $opts->{add} );
566 95         290 my $suffix;
567             my $sub;
568 95 100       652 if( $len < $labels->length )
    50          
569             {
570 71         2655475 $suffix = $self->new_array( [ $labels->splice( -$len, $len ) ] );
571             }
572             elsif( $len > 0 )
573             {
574 24         906341 $suffix = $labels;
575 24         172 $labels = $self->new_array;
576             }
577             else
578             {
579 0         0 $suffix = $self->new_array;
580             }
581 95 100       89747 if( !$suffix->length )
582             {
583 10 50       414791 if( want( 'OBJECT' ) )
584             {
585 0         0 rreturn( Module::Generic::Null->new );
586             }
587             else
588             {
589 10         943 return( '' );
590             }
591             }
592 85         3390861 $suffix = $suffix->join( '.' );
593 85         218556 $name = $labels->pop;
594 85 100       11245 $sub = $labels->join( '.' ) if( $labels->length );
595 85 100       3095568 if( defined( $idn ) )
596             {
597 12         28630 $suffix = Net::IDN::Encode::domain_to_unicode( $suffix );
598 12 100       5680 $name = Net::IDN::Encode::domain_to_unicode( $name ) if( defined( $name ) );
599 12 100       4457 $sub = Net::IDN::Encode::domain_to_unicode( $sub ) if( defined( $sub ) );
600             }
601 85         167363 return(Cookie::Domain::Result->new({ name => $name, sub => $sub, suffix => $suffix }));
602             }
603              
604 107     107 1 899 sub suffixes { return( shift->_set_get_hash_as_mix_object( 'suffixes', @_ ) ); }
605              
606             # NOTE: Cookie::Domain::Result class
607             {
608             package
609             Cookie::Domain::Result;
610             BEGIN
611             {
612 3     3   22 use strict;
  3         5  
  3         63  
613 3     3   10 use warnings;
  3         6  
  3         267  
614 3     3   54 use parent qw( Module::Generic::Hash );
  3         11  
  3         38  
615 3     3   415173 use Wanted;
  3         8  
  3         269  
616 3     3   612 our $VERSION = 'v0.1.0';
617             };
618            
619             sub domain
620             {
621 15     15   12725 my $self = shift( @_ );
622 15 0 33     123 if( !$self->name->length && !$self->suffix->length )
623             {
624 0         0 return( Module::Generic::Scalar->new( '' ) );
625             }
626 15         562484 return( $self->name->join( '.', $self->suffix ) );
627             }
628            
629 105     105   196386 sub name { return( shift->_set_get_scalar_as_object( 'name', @_ ) ); }
630              
631 75     75   304514 sub sub { return( shift->_set_get_scalar_as_object( 'sub', @_ ) ); }
632              
633 90     90   286571 sub suffix { return( shift->_set_get_scalar_as_object( 'suffix', @_ ) ); }
634             }
635              
636             1;
637             # NOTE: POD
638             __END__
639              
640             =encoding utf-8
641              
642             =head1 NAME
643              
644             Cookie::Domain - Domain Name Public Suffix Query Interface
645              
646             =head1 SYNOPSIS
647              
648             use Cookie::Domain;
649             my $dom = Cookie::Domain->new( min_suffix => 1, debug => 3 ) ||
650             die( Cookie::Domain->error, "\n" );
651             my $res = $dom->stat( 'www.example.or.uk' ) || die( $dom->error, "\n" );
652             # Check for potential errors;
653             die( $dom->error ) if( !defined( $res ) );
654             # stat() returns an empty string if nothing was found and undef upon error
655             print( "Nothing found\n" ), exit(0) if( !$res );
656             print( $res->domain, "\n" ); # example.co.uk
657             print( $res->name, "\n" ); # example
658             print( $res->sub, "\n" ); # www
659             print( $res->suffix, "\n" ); # co.uk
660              
661             # Load the public suffix. This is done automatically, so no need to do it
662             $dom->load_public_suffix( '/some/path/on/the/filesystem/data.txt' ) ||
663             die( $dom->error );
664             # Then, save it as json data for next time
665             $dom->save_as_json( '/var/domain/public_suffix.json' ) ||
666             die( $dom->error, "\n" );
667             say $dom->suffixes->length, " suffixes data loaded.";
668              
669             =head1 VERSION
670              
671             v0.1.7
672              
673             =head1 DESCRIPTION
674              
675             This is an interface to query the C<Public Suffix> list courtesy of the mozilla project.
676              
677             This list contains all the top level domains, a.k.a. zones and is used to determine what part of a domain name constitute the top level domain, what part is the domain, a.k.a. C<label> and what part (the rest) constitute the subdomain.
678              
679             Consider C<www.example.org>. In this example, C<org> is the top level domain, C<example> is the name, C<example.org> is the domain, and C<www> is the subdomain.
680              
681             This is easy enough, but there are cases where it is tricky to know which label (or part) is the domain part or the top level domain part. For example, C<www.example.com.sg>, C<com.sg> is the top level domain, C<example> the name, C<example.com.sg> is the domain, and C<www> the subdomain.
682              
683             This module will use a json cache data file to speed up the loading of the suffixes, a.k.a, top level domains, data.
684              
685             By default the location of this json file will be C<public_suffix.json> under your system temporary directory, but you can override this by specifying your own location upon object instantiation:
686              
687             my $dom = Cookie::Domain->new( json_file => '/home/joe/var/public_suffix.json' );
688              
689             =head1 METHODS
690              
691             =head2 new
692              
693             This initiates the package and take the following parameters either as an hash or hash reference:
694              
695             =over 4
696              
697             =item * C<debug>
698              
699             Optional. If set with a positive integer, this will activate verbose debugging message
700              
701             =item * C<file>
702              
703             Specify the location of the Public Suffix data file. The default one is under the same directory as this module with the file name C<public_suffix_list.txt>
704              
705             You can download a different (new) version and specify with this parameter where it will be found.
706              
707             =item * C<json_file>
708              
709             Specify the location of the json cache data file. The default location is set using L<Module::Generic::File> to get the system temporary directory and the file name C<public_suffix.json>.
710              
711             This json file is created once upon initiating an object and if it does not already exist. See the L</json_file> method for more information.
712              
713             =item * C<min_suffix>
714              
715             Sets the minimum suffix length required. Default to 0.
716              
717             =item * C<no_load>
718              
719             If this is set to true, this will prevent the object instantiation method from loading the public suffix file upon object instantiation. Normally you would not want to do that, unless you want to control when the file is loaded before you call L</stat>. This is primarily used by L</cron_fetch>
720              
721             =back
722              
723             =head2 cron_fetch
724              
725             You need to have installed the package L<LWP::UserAgent> to use this method.
726              
727             This method can also be called as a package subroutine, such as C<Cookie::Domain::cron_fetch>
728            
729             Its purpose is to perform a remote connection to L<https://publicsuffix.org/list/effective_tld_names.dat> and check for an updated copy of the public suffix data file.
730              
731             It checks if the remote file has changed by using the http header field C<Last-Modified> in the server response, or if there is already an C<etag> stored in the cache, it performs a conditional http query using C<If-None-Matched>. See L<Mozilla documentation|https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag> for more information on those types of query.
732              
733             This is important to save bandwidth and useless processing.
734              
735             If the file has indeed changed, L</save_as_json> is invoked to refresh the cache.
736              
737             It returns the object it was called with for chaining.
738              
739             =head2 decode
740              
741             Takes a domain name, or rather called a host name, such as C<www.東京.jp> or C<今何年.jp> and this will return its punycode ascii representation prefixed with a so-called ASCII Compatible Encoding, a.k.a. C<ACE>. Thus, using our previous examples, this would produce respectively C<www.xn--1lqs71d.jp> and C<xn--wmq0m700b.jp>
742              
743             Even if the host name contains non-ascii dots, they will be recognised. For example C<www。東京。jp> would still be successfully decoded to C<www.xn--1lqs71d.jp>
744              
745             If the host name provided is not an international domain name (a.k.a. IDN), it is simply returned as is. Thus, if C<www.example.org> is provided, it would return C<www.example.org>
746              
747             If an error occurred, it sets an error object and returns L<perlfunc/undef>. The error can then be retrieved using L<Module::Generic/error> inherited by this module.
748              
749             It uses L<Net::IDN::Encode/domain_to_ascii> to perform the actual decoding.
750              
751             =head2 encode
752              
753             This does the reverse operation from L</decode>.
754              
755             It takes a domain name, or rather called a host name, already decoded, and with its so called ASCII Compatible Encoding a.k.a. C<ACE> prefix C<xn--> such as C<xn--wmq0m700b.jp> and returns its encoded version in perl internal utf8 encoding. Using the previous example, and this would return C<今何年.jp>. The C<ACE> prefix is required to tell apart international domain name (a.k.a. IDN) from other pure ascii domain names.
756              
757             Just like in L</decode>, if a non-international domain name is provided, it is returned as is. Thus, if C<www.example.org> is provided, it would return C<www.example.org>
758              
759             Note that this returns the name in perl's internal utf8 encoding, so if you need to save it to an utf8 file or print it out as utf8 string, you still need to encode it in utf8 before. For example:
760              
761             use Cookie::Domain;
762             use open ':std' => ':utf8';
763             my $d = Cookie::Domain->new;
764             say $d->encode( "xn--wmq0m700b.jp" );
765              
766             Or
767              
768             use Cookie::Domain;
769             use Encode;
770             my $d = Cookie::Domain->new;
771             my $encoded = $d->encode( "xn--wmq0m700b.jp" );
772             say Encode::encode_utf8( $encoded );
773              
774             If an error occurred, it sets an error object and returns L<perlfunc/undef>. The error can then be retrieved using L<Module::Generic/error> inherited by this module.
775              
776             It uses L<Net::IDN::Encode/domain_to_unicode> to perform the actual encoding.
777              
778             =head2 file
779              
780             Sets the file path to the Public Suffix file. This file is a public domain file at the initiative of Mozilla Foundation and its latest version can be accessed here: L<https://publicsuffix.org/list/>
781              
782             =head2 json_file
783              
784             Sets the file path of the json cache data file. THe purpose of this file is to contain a json representation of the parsed data from the Public Suffix data file. This is to avoid re-parsing it each time and instead load the json file using the XS module L<JSON>
785              
786             =head2 load
787              
788             This method takes no parameter and relies on the properties set with L</file> and L</json_file>.
789              
790             If the hash data is already accessibly in a module-wide variable, the data is taken from it.
791              
792             Otherwise, if json_file is set and accessible, this will load the data from it, otherwise, it will load the data from the file specified with L</file> and save it as json.
793              
794             If the json file meta data enclosed, specifically the property I<db_last_modified> has a unix timestamp value lower than the last modification timestamp of the public suffix data file, then, L</load> will reload that data file and save it as json again.
795              
796             That way, all you need to do is set up a crontab to fetch the latest version of that public suffix data file.
797              
798             For example, to fetch it every day at 1:00 in the morning:
799              
800             0 1 * * * perl -MCookie::Domain -e 'Cookie::Domain::cron_fetch' >/dev/null 2>&1
801              
802             But if you want to store the public suffix data file somewhere other than the default location:
803              
804             0 1 * * * perl -MCookie::Domain -e 'my $d=Cookie::Domain->new(file=>"/some/system/file.txt"); $d->cron_fetch' >/dev/null 2>&1
805              
806             See your machine manpage for C<crontab> for more detail.
807              
808             The data read are loaded into L</suffixes>.
809              
810             It returns the current object for chaining.
811              
812             =head2 load_json
813              
814             This takes a file path to the json cache data as the only argument, and attempt to read its content and set it onto the data accessible with L</suffixes>.
815              
816             If an error occurs, it set an error object using L<Module::Generic/error> and returns L<perlfunc/undef>
817              
818             It returns its current object for chaining.
819              
820             =head2 load_public_suffix
821              
822             This is similar to the method L</load_json> above.
823              
824             This takes a file path to the Public Suffix data as the only argument, read its content, parse it using the algorithm described at L<https://publicsuffix.org/list/> and set it onto the data accessible with L</suffixes> and also onto the package-wide global variable to make the data available across object instantiations.
825              
826             If an error occurs, it set an error object using L<Module::Generic/error> and returns L<perlfunc/undef>
827              
828             It returns its current object for chaining.
829              
830             =head2 meta
831              
832             Returns an L<hash object|Module::Generic::Hash> of meta information pertaining to the public suffix file. This is used primarily by L</cron_fetch>
833              
834             =head2 min_suffix
835              
836             Sets or gets the minimum suffix required as an integer value.
837              
838             It returns the current value as a L<Module::Generic::Number> object.
839              
840             =head2 no_load
841              
842             If this is set to true, this will prevent the object instantiation method from loading the public suffix file upon object instantiation. Normally you would not want to do that, unless you want to control when the file is loaded before you call L</stat>. This is primarily used by L</cron_fetch>
843              
844             =head2 save_as_json
845              
846             This takes as sole argument the file path where to save the json cache data and save the data accessible with L</suffixes>.
847              
848             It returns the current object for chaining.
849              
850             If an error occurs, it set an error object using L<Module::Generic/error> and returns L<perlfunc/undef>
851              
852             =head2 stat
853              
854             This takes a domain name, such as C<www.example.org> and optionally an hash reference of options and returns:
855              
856             =over 4
857              
858             =item C<undef()>
859              
860             If an error occurred.
861              
862             my $rv = $d->stat( 'www.example.org' );
863             die( "Error: ", $d->error ) if( !defined( $rv ) );
864              
865             =item empty string
866              
867             If there is no data available such as when querying a non existing top level domain.
868              
869             =item A C<Cookie::Domain::Result> object
870              
871             An object with the following properties and methods, although not all are necessarily defined, depending on the results.
872              
873             Accessed as an hash property and this return a regular string, but accessed as a method and they will return a L<Module::Generic::Scalar> object.
874              
875             =over 8
876              
877             =item I<name>
878              
879             The label that immediately follows the suffix (i.e. on its lefthand side).
880              
881             For example, in C<www.example.org>, the I<name> would be C<example>
882              
883             my $res = $dom->stat( 'www.example.org' ) || die( $dom->error );
884             say $res->{name}; # example
885             # or alternatively
886             say $res->name; # example
887              
888             =item I<sub>
889              
890             The sub domain or sub domains that follows the domain on its lefthand side.
891              
892             For example, in C<www.paris.example.fr>, C<www.paris> is the I<sub> and C<example> the I<name>
893              
894             my $res = $dom->stat( 'www.paris.example.fr' ) || die( $dom->error );
895             say $res->{sub}; # www.paris
896             # or alternatively
897             say $res->sub; # www.paris
898              
899             =item I<suffix>
900              
901             The top level domain or I<suffix>. For example, in C<example.com.sg>, C<com.sg> is the suffix and C<example> the I<name>
902              
903             my $res = $dom->stat( 'example.com.sg' ) || die( $dom->error );
904             say $res->{suffix}; # com.sg
905             # or alternatively
906             say $res->suffix; # com.sg
907              
908             What constitute a suffix varies from zone to zone or country to country, hence the necessity of this public domain suffix data file.
909              
910             =back
911              
912             C<Cookie::Domain::Result> objects inherit from L<Module::Generic::Hash>, thus you can do:
913              
914             my $res = $dom->stat( 'www.example.org' ) || die( $dom->error );
915             say $res->length, " properties set.";
916             # which should say 3 since we alway return suffix, name and sub
917              
918             The following additional method is also available as a convenience:
919              
920             =over 8
921              
922             =item I<domain>
923              
924             This is a read only method which returns and empty L<Module::Generic::Scalar> object if the I<name> property is empty, or the properties I<name> and I<suffix> join by a dot '.' and returned as a new L<Module::Generic::Scalar> object.
925              
926             my $res = $dom->stat( 'www.example.com.sg' ) || die( $dom->error );
927             say $res->domain; # example.com.sg
928             say $res->domain->length; # 14
929              
930             =back
931              
932             =back
933              
934             The options accepted are:
935              
936             =over 4
937              
938             =item I<add>
939              
940             This is an integer, and represent the additional length to be added, for the domain name.
941              
942             =item I<min_suffix>
943              
944             This is an integer, and if provided, will override the default value set with L</min_suffix>
945              
946             =back
947              
948             =head2 suffixes
949              
950             This method is used to access the hash repository of all the public suffix data.
951              
952             It is actually an L<Module::Generic::Hash> object. So you could do:
953              
954             say "There are ", $dom->suffixes->length, " rules.";
955              
956             =head1 AUTHOR
957              
958             Jacques Deguest E<lt>F<jack@deguest.jp>E<gt>
959              
960             =head1 SEE ALSO
961              
962             L<Cookie>, L<Cookie::Jar>, L<Mozilla::PublicSuffix>, L<Domain::PublicSuffix>, L<Net::PublicSuffixList>
963              
964             L<https://publicsuffix.org/list/>
965              
966             =head1 COPYRIGHT & LICENSE
967              
968             Copyright (c) 2021 DEGUEST Pte. Ltd.
969              
970             You can use, copy, modify and redistribute this package and associated files under the same terms as Perl itself.
971              
972             =cut