|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The (extremely complex) rules for domain delegation.  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # <@LICENSE>  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Licensed to the Apache Software Foundation (ASF) under one or more  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # contributor license agreements.  See the NOTICE file distributed with  | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # this work for additional information regarding copyright ownership.  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The ASF licenses this file to you under the Apache License, Version 2.0  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # (the "License"); you may not use this file except in compliance with  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # the License.  You may obtain a copy of the License at:  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     http://www.apache.org/licenses/LICENSE-2.0  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Unless required by applicable law or agreed to in writing, software  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # distributed under the License is distributed on an "AS IS" BASIS,  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # See the License for the specific language governing permissions and  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # limitations under the License.  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # </@LICENSE>  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Mail::SpamAssassin::RegistryBoundaries - domain delegation rules  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use strict;  | 
| 
28
 | 
41
 | 
 
 | 
 
 | 
  
41
  
 | 
 
 | 
279
 | 
 use warnings;  | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
88
 | 
    | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1361
 | 
    | 
| 
29
 | 
41
 | 
 
 | 
 
 | 
  
41
  
 | 
 
 | 
262
 | 
 # use bytes;  | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
114
 | 
    | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1467
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use re 'taint';  | 
| 
31
 | 
41
 | 
 
 | 
 
 | 
  
41
  
 | 
 
 | 
256
 | 
    | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
108
 | 
    | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2356
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our @ISA = qw();  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use Mail::SpamAssassin::Logger;  | 
| 
35
 | 
41
 | 
 
 | 
 
 | 
  
41
  
 | 
 
 | 
278
 | 
 use Mail::SpamAssassin::Constants qw(:ip);  | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
100
 | 
    | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2682
 | 
    | 
| 
36
 | 
41
 | 
 
 | 
 
 | 
  
41
  
 | 
 
 | 
274
 | 
 use Mail::SpamAssassin::Util qw(is_fqdn_valid);  | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
122
 | 
    | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5015
 | 
    | 
| 
37
 | 
41
 | 
 
 | 
 
 | 
  
41
  
 | 
 
 | 
300
 | 
    | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
90
 | 
    | 
| 
 
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45237
 | 
    | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 my $IP_ADDRESS = IP_ADDRESS;  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # called from SpamAssassin->init() to create $self->{util_rb}  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $class = shift;  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $class = ref($class) || $class;  | 
| 
43
 | 
91
 | 
 
 | 
 
 | 
  
91
  
 | 
  
0
  
 | 
233
 | 
    | 
| 
44
 | 
91
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
623
 | 
   my ($main) = @_;  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $self = {  | 
| 
46
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
243
 | 
     'main'              => $main,  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     'conf'              => $main->{conf},  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bless ($self, $class);  | 
| 
50
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
387
 | 
    | 
| 
51
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
397
 | 
   # Initialize valid_tlds_re for schemeless uri parsing, FreeMail etc  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ($self->{conf}->{valid_tlds} && %{$self->{conf}->{valid_tlds}}) {  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # International domain names are already in ASCII-compatible encoding (ACE)  | 
| 
54
 | 
91
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
711
 | 
     my $tlds =   | 
| 
 
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
457
 | 
    | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       '(?<![a-zA-Z0-9-])(?:'. # make sure tld starts at boundary  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       join('|', keys %{$self->{conf}->{valid_tlds}}).  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ')(?!(?:[a-zA-Z0-9-]|\.[a-zA-Z0-9]))'; # make sure it ends  | 
| 
58
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
256
 | 
     # Perl 5.10+ trie optimizes lists, no need for fancy regex optimizing  | 
| 
 
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35828
 | 
    | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (eval { $self->{valid_tlds_re} = qr/$tlds/i; 1; }) {  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dbg("config: registryboundaries: %d tlds loaded",  | 
| 
61
 | 
91
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3412
 | 
         scalar keys %{$self->{conf}->{valid_tlds}});  | 
| 
 
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
449117
 | 
    | 
| 
 
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7149
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } else {  | 
| 
63
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
227
 | 
       warn "config: registryboundaries: failed to compile valid_tlds_re: $@\n";  | 
| 
 
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
953
 | 
    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $self->{valid_tlds_re} = qr/no_tlds_defined/;  | 
| 
65
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     }  | 
| 
66
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
   }  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else {  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Failsafe in case no tlds defined, we don't want this to match everything..  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{valid_tlds_re} = qr/no_tlds_defined/;  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     warn "config: registryboundaries: no tlds defined, need to run sa-update\n"  | 
| 
71
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       if !$self->{main}->{ignore_site_cf_files};  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
73
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $self;  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
76
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
596
 | 
    | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This is required because the .us domain is nuts. See split_domain.  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our %US_STATES = qw(  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ak 1 al 1 ar 1 az 1 ca 1 co 1 ct 1 dc 1 de 1 fl 1 ga 1 gu 1 hi 1 ia 1 id 1 il 1 in 1 ks 1 ky 1 la 1 ma 1 md 1 me 1 mi 1  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   mn 1 mo 1 ms 1 mt 1 nc 1 nd 1 ne 1 nh 1 nj 1 nm 1 nv 1 ny 1 oh 1 ok 1 or 1 pa 1 pr 1 ri 1 sc 1 sd 1 tn 1 tx 1 ut 1 va 1  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   vi 1 vt 1 wa 1 wi 1 wv 1 wy 1  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   );  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ###########################################################################  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item ($hostname, $domain) = split_domain ($fqdn)  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Cut a fully-qualified hostname into the hostname part and the domain  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 part, splitting at the DNS registry boundary.  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Examples:  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "www.foo.com" => ( "www", "foo.com" )  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "www.foo.co.uk" => ( "www", "foo.co.uk" )  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $self = shift;  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $domain = lc shift;  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
550
 | 
 
 | 
 
 | 
  
550
  
 | 
  
1
  
 | 
621
 | 
   my $hostname = '';  | 
| 
106
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
808
 | 
    | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (defined $domain && $domain ne '') {  | 
| 
108
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
759
 | 
     # www..spamassassin.org -> www.spamassassin.org  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $domain =~ tr/././s;  | 
| 
110
 | 
550
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
1616
 | 
    | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # leading/trailing dots  | 
| 
112
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
872
 | 
     $domain =~ s/^\.+//;  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $domain =~ s/\.+$//;  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
115
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
815
 | 
     # Split scalar domain into components  | 
| 
116
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1024
 | 
     my @domparts = split(/\./, $domain);  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @hostname;  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1115
 | 
     while (@domparts > 1) { # go until we find the TLD  | 
| 
120
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
641
 | 
       if (@domparts == 4) {  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ($domparts[3] eq 'us' &&  | 
| 
122
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
975
 | 
             (($domparts[0] eq 'pvt' && $domparts[1] eq 'k12') ||  | 
| 
123
 | 
885
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2125
 | 
              ($domparts[0] =~ /^c[io]$/)))  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
27
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
69
 | 
         {  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           # http://www.neustar.us/policies/docs/rfc_1480.txt  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           # "Fire-Dept.CI.Los-Angeles.CA.US"  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           # "<school-name>.PVT.K12.<state>.US"  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           last if ($US_STATES{$domparts[2]});  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
131
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
       elsif (@domparts == 3) {  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # http://www.neustar.us/policies/docs/rfc_1480.txt  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # demon.co.uk  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # esc.edu.ar  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # [^\.]+\.${US_STATES}\.us  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ($domparts[2] eq 'us') {  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           last if ($US_STATES{$domparts[1]});  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
139
 | 
302
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
529
 | 
         else {  | 
| 
140
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
           my $temp = join(".", @domparts);  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           last if ($self->{conf}->{three_level_domains}{$temp});  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
143
 | 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
590
 | 
       }  | 
| 
144
 | 
297
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
761
 | 
       elsif (@domparts == 2) {  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # co.uk, etc.  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $temp = join(".", @domparts);  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         last if ($self->{conf}->{two_level_domains}{$temp});  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
149
 | 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
879
 | 
       push(@hostname, shift @domparts);  | 
| 
150
 | 
545
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1332
 | 
     }  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
152
 | 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1880
 | 
     # Look for a sub-delegated TLD  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # use @domparts to skip trying to match on TLDs that can't possibly  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # match, but keep in mind that the hostname can be blank, so 4TLD needs 4,  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # 3TLD needs 3, 2TLD needs 2 ...  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     unshift @domparts, pop @hostname if @hostname;  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $domain = join(".", @domparts);  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $hostname = join(".", @hostname);  | 
| 
160
 | 
550
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1184
 | 
   }  | 
| 
161
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
952
 | 
    | 
| 
162
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
987
 | 
   ($hostname, $domain);  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
165
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1312
 | 
 ###########################################################################  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $domain = trim_domain($fqdn)  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Cut a fully-qualified hostname into the hostname part and the domain  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 part, returning just the domain.  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Examples:  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "www.foo.com" => "foo.com"  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "www.foo.co.uk" => "foo.co.uk"  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $self = shift;  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $domain = shift;  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my ($host, $dom) = $self->split_domain($domain);  | 
| 
183
 | 
550
 | 
 
 | 
 
 | 
  
550
  
 | 
  
1
  
 | 
40606
 | 
   return $dom;  | 
| 
184
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
639
 | 
 }  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
985
 | 
 ###########################################################################  | 
| 
187
 | 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1020
 | 
    | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $ok = is_domain_valid($dom)  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return C<1> if the domain is valid, C<undef> otherwise.  A valid domain  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (a) does not contain whitespace, (b) contains at least one dot, and (c)  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 uses a valid TLD or ccTLD.  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my ($self, $dom) = @_;  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return 0 unless defined $dom;  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # domains don't have whitespace  | 
| 
203
 | 
607
 | 
 
 | 
 
 | 
  
607
  
 | 
  
1
  
 | 
1016
 | 
   return 0 if ($dom =~ /\s/);  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
205
 | 
607
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
983
 | 
   # ensure it ends in a known-valid TLD, and has at least 1 dot  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return 0 unless ($dom =~ /\.([^.]+)$/);  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return 0 unless ($self->{conf}->{valid_tlds}{lc $1});  | 
| 
208
 | 
607
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1201
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return 1;     # nah, it's ok.  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
211
 | 
607
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1885
 | 
    | 
| 
212
 | 
605
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2119
 | 
 #  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
214
 | 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1250
 | 
   my $self = shift;  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $uri = lc shift;  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Javascript is not going to help us, so return.  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # Likewise ignore cid, file  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return if ($uri =~ /^(?:javascript|cid|file):/);  | 
| 
220
 | 
749
 | 
 
 | 
 
 | 
  
749
  
 | 
  
0
  
 | 
13016
 | 
    | 
| 
221
 | 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1303
 | 
   if ($uri =~ s/^mailto://) { # handle mailto: specially  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $uri =~ s/\?.*//;			# drop parameters ?subject= etc  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # note above, Outlook linkifies foo@bar%2Ecom&x.com to foo@bar.com !!  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # uri_list_canonicalize should have made versions without ? &  | 
| 
225
 | 
749
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2015
 | 
     # Keep testing with & here just in case..  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return if $uri =~ /\@.*?\@/;	# abort if multiple @  | 
| 
227
 | 
743
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1472
 | 
     return unless $uri =~ s/.*@//;	# drop username or abort  | 
| 
228
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
89
 | 
   } else {  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $uri =~ s{^[a-z]+:/{0,2}}{}gs;	# drop the protocol  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # strip path, CGI params, fragment.  note: bug 4213 shows that "&" should  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # *not* be likewise stripped here -- it's permitted in hostnames by  | 
| 
232
 | 
42
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
103
 | 
     # some common MUAs!  | 
| 
233
 | 
39
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
146
 | 
     $uri =~ s{[/?#].*}{}gs;                | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $uri =~ s{^[^/]*\@}{}gs;		# drop username/passwd  | 
| 
235
 | 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2054
 | 
     $uri =~ s{:\d*$}{}gs;		# port, bug 4191: sometimes the # is missing  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # skip undecoded URIs if the encoded bits shouldn't be.  | 
| 
239
 | 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1408
 | 
   # we'll see the decoded version as well.  see url_encode()  | 
| 
240
 | 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
942
 | 
   return if $uri =~ /\%(?:2[1-9a-f]|[3-6][0-9a-f]|7[0-9a-e])/;  | 
| 
241
 | 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1027
 | 
    | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $host = $uri;  # unstripped/full domain name  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $domain = $host;  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # keep IPs intact  | 
| 
246
 | 
735
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1314
 | 
   if ($host !~ /^$IP_ADDRESS$/) {   | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # check that it's a valid hostname/fqdn  | 
| 
248
 | 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
937
 | 
     return unless is_fqdn_valid($host);  | 
| 
249
 | 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
915
 | 
     # ignore invalid TLDs  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return unless $self->is_domain_valid($host);  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # get rid of hostname part of domain, understanding delegation  | 
| 
252
 | 
734
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9116
 | 
     $domain = $self->trim_domain($host);  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
254
 | 
679
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1965
 | 
     | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # $uri is now the domain only, optionally return unstripped host name  | 
| 
256
 | 
573
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1213
 | 
   return !wantarray ? $domain : ($domain, $host);  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
258
 | 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
915
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    |