| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Net::Whois::Parser; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 1 |  |  | 1 |  | 78808 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 24 |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 1 |  |  | 1 |  | 4 | use utf8; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 4 |  | 
| 6 | 1 |  |  | 1 |  | 14 | use Net::Whois::Raw; | 
|  | 1 |  |  |  |  | 6 |  | 
|  | 1 |  |  |  |  | 4 |  | 
| 7 | 1 |  |  | 1 |  | 531 | use Data::Dumper; | 
|  | 1 |  |  |  |  | 6121 |  | 
|  | 1 |  |  |  |  | 190 |  | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | our $VERSION = '0.08'; | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | our @EXPORT = qw( parse_whois ); | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | our $DEBUG = 0; | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | # parsers for parse whois text to data structure | 
| 16 |  |  |  |  |  |  | our %PARSERS = ( | 
| 17 |  |  |  |  |  |  | 'DEFAULT' => \&_default_parser, | 
| 18 |  |  |  |  |  |  | ); | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | # rules to convert diferent names of same fields to standard name | 
| 21 |  |  |  |  |  |  | our %FIELD_NAME_CONV = ( | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | # nameservers | 
| 24 |  |  |  |  |  |  | nserver          => 'nameservers', | 
| 25 |  |  |  |  |  |  | name_server      => 'nameservers', | 
| 26 |  |  |  |  |  |  | name_serever     => 'nameservers', | 
| 27 |  |  |  |  |  |  | name_server      => 'nameservers', | 
| 28 |  |  |  |  |  |  | nameserver       => 'nameservers', | 
| 29 |  |  |  |  |  |  | dns1             => 'nameservers', | 
| 30 |  |  |  |  |  |  | dns2             => 'nameservers', | 
| 31 |  |  |  |  |  |  | primary_server   => 'nameservers', | 
| 32 |  |  |  |  |  |  | secondary_server => 'nameservers', | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | # domain | 
| 35 |  |  |  |  |  |  | domain_name   => 'domain', | 
| 36 |  |  |  |  |  |  | domainname    => 'domain', | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | # creation_date | 
| 39 |  |  |  |  |  |  | created                  => 'creation_date', | 
| 40 |  |  |  |  |  |  | created_on               => 'creation_date', | 
| 41 |  |  |  |  |  |  | creation_date            => 'creation_date', | 
| 42 |  |  |  |  |  |  | domain_registration_date => 'creation_date', | 
| 43 |  |  |  |  |  |  | domain_created           => 'creation_date', | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | #expiration_date | 
| 46 |  |  |  |  |  |  | expire                 => 'expiration_date', | 
| 47 |  |  |  |  |  |  | expire_date            => 'expiration_date', | 
| 48 |  |  |  |  |  |  | expires                => 'expiration_date', | 
| 49 |  |  |  |  |  |  | expires_at             => 'expiration_date', | 
| 50 |  |  |  |  |  |  | expires_on             => 'expiration_date', | 
| 51 |  |  |  |  |  |  | expiry_date            => 'expiration_date', | 
| 52 |  |  |  |  |  |  | domain_expiration_date => 'expiration_date', | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | ); | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | # You can turn this flag to get | 
| 57 |  |  |  |  |  |  | # all values of field in all whois answers | 
| 58 |  |  |  |  |  |  | our $GET_ALL_VALUES = 0; | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | # hooks for formating values | 
| 61 |  |  |  |  |  |  | our %HOOKS = ( | 
| 62 |  |  |  |  |  |  | nameservers => [ \&format_nameservers ], | 
| 63 |  |  |  |  |  |  | emails => [ sub {my $value = shift; ref $value ? $value : [$value] } ], | 
| 64 |  |  |  |  |  |  | ); | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | # From Net::Whois::Raw | 
| 67 |  |  |  |  |  |  | sub import { | 
| 68 | 1 |  |  | 1 |  | 8 | my $mypkg = shift; | 
| 69 | 1 |  |  |  |  | 2 | my $callpkg = caller; | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 1 |  |  | 1 |  | 6 | no strict 'refs'; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 1504 |  | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | # export subs | 
| 74 | 1 |  |  |  |  | 2 | *{"$callpkg\::$_"} = \&{"$mypkg\::$_"} foreach ((@EXPORT, @_)); | 
|  | 1 |  |  |  |  | 1549 |  | 
|  | 1 |  |  |  |  | 4 |  | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  |  | 
| 77 |  |  |  |  |  |  | # fetches whois text | 
| 78 |  |  |  |  |  |  | sub _fetch_whois { | 
| 79 | 2 |  |  | 2 |  | 5 | my %args = @_; | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 2 |  |  |  |  | 6 | local $Net::Whois::Raw::CHECK_FAIL = 1; | 
| 82 |  |  |  |  |  |  |  | 
| 83 | 2 |  |  |  |  | 3 | my @res = eval { | 
| 84 |  |  |  |  |  |  | Net::Whois::Raw::whois( | 
| 85 |  |  |  |  |  |  | $args{domain}, | 
| 86 |  |  |  |  |  |  | $args{server} || undef, | 
| 87 | 2 |  | 50 |  |  | 22 | $args{which_whois} || 'QRY_ALL' | 
|  |  |  | 50 |  |  |  |  | 
| 88 |  |  |  |  |  |  | ) | 
| 89 |  |  |  |  |  |  | }; | 
| 90 | 2 | 50 |  |  |  | 645292 | return undef if $@; | 
| 91 |  |  |  |  |  |  |  | 
| 92 | 2 | 50 |  |  |  | 12 | my $res = ref $res[0] ? $res[0] : [ { text => $res[0], srv => $res[1] } ]; | 
| 93 | 2 |  |  |  |  | 6 | @$res = grep { $_->{text} } @$res; | 
|  | 2 |  |  |  |  | 13 |  | 
| 94 |  |  |  |  |  |  |  | 
| 95 | 2 | 100 |  |  |  | 14 | return scalar @$res ? $res : undef; | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | sub parse_whois { | 
| 99 |  |  |  |  |  |  | #TODO warn: Odd number of elements in hash assignment | 
| 100 | 7 |  |  | 7 | 1 | 285370 | my %args = @_; | 
| 101 |  |  |  |  |  |  |  | 
| 102 | 7 | 100 |  |  |  | 23 | if ( $args{raw} ) { | 
|  |  | 50 |  |  |  |  |  | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  | my $server = | 
| 105 |  |  |  |  |  |  | $args{server} || | 
| 106 | 5 |  | 50 |  |  | 24 | Net::Whois::Raw::Common::get_server($args{domain}) || | 
| 107 |  |  |  |  |  |  | 'DEFAULT'; | 
| 108 |  |  |  |  |  |  |  | 
| 109 | 5 | 100 |  |  |  | 25253 | my $whois = ref $args{raw} ? $args{raw} : [ { text => $args{raw}, srv => $server } ]; | 
| 110 |  |  |  |  |  |  |  | 
| 111 | 5 |  |  |  |  | 13 | return _process_parse($whois); | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  | elsif ( $args{domain} ) { | 
| 115 | 2 |  |  |  |  | 6 | my $whois = _fetch_whois(%args); | 
| 116 | 2 | 100 |  |  |  | 23 | return $whois ? _process_parse($whois) : undef; | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  |  | 
| 119 | 0 |  |  |  |  | 0 | undef; | 
| 120 |  |  |  |  |  |  | } | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | sub _process_parse { | 
| 123 | 6 |  |  | 6 |  | 11 | my ( $whois ) = @_; | 
| 124 |  |  |  |  |  |  |  | 
| 125 | 6 |  |  |  |  | 11 | my @data = (); | 
| 126 | 6 |  |  |  |  | 12 | for my $ans ( @$whois ) { | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | my $parser = | 
| 129 |  |  |  |  |  |  | $ans->{srv} && $PARSERS{$ans->{srv}} ? | 
| 130 | 8 | 50 | 66 |  |  | 48 | $PARSERS{$ans->{srv}} : $PARSERS{DEFAULT}; | 
| 131 |  |  |  |  |  |  |  | 
| 132 | 8 |  |  |  |  | 21 | push @data, $parser->($ans->{text}); | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 6 |  |  |  |  | 17 | _post_parse(\@data); | 
| 136 |  |  |  |  |  |  | } | 
| 137 |  |  |  |  |  |  |  | 
| 138 |  |  |  |  |  |  | # standardize data structure | 
| 139 |  |  |  |  |  |  | sub _post_parse { | 
| 140 | 6 |  |  | 6 |  | 10 | my ( $data )  = @_; | 
| 141 |  |  |  |  |  |  |  | 
| 142 | 6 |  |  |  |  | 11 | my %res = (); | 
| 143 | 6 |  |  |  |  | 8 | my $count = 0; | 
| 144 | 6 |  |  |  |  | 7 | my %flag = (); | 
| 145 |  |  |  |  |  |  |  | 
| 146 | 6 |  |  |  |  | 10 | for my $hash ( @$data ) { | 
| 147 |  |  |  |  |  |  |  | 
| 148 | 8 |  |  |  |  | 7 | $count++; | 
| 149 |  |  |  |  |  |  |  | 
| 150 | 8 |  |  |  |  | 30 | for my $key ( keys %$hash ) { | 
| 151 | 59 | 50 |  |  |  | 83 | next unless $hash->{$key}; | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | # change keys to standard names | 
| 154 | 59 |  |  |  |  | 50 | my $new_key = lc $key; | 
| 155 | 59 |  |  |  |  | 181 | $new_key =~ s/\s+|\t+|-/_/g; | 
| 156 | 59 |  |  |  |  | 55 | $new_key =~ s/\.+$//; | 
| 157 | 59 | 100 |  |  |  | 83 | if ( exists $FIELD_NAME_CONV{$new_key} ) { | 
| 158 | 8 |  |  |  |  | 19 | $new_key =  $FIELD_NAME_CONV{$new_key}; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  |  | 
| 161 | 59 | 100 |  |  |  | 67 | unless ( $GET_ALL_VALUES ) { | 
| 162 | 53 | 50 | 33 |  |  | 104 | if ( exists $res{$new_key} && !$flag{$new_key} ) { | 
| 163 | 0 |  |  |  |  | 0 | delete $res{$new_key}; | 
| 164 | 0 |  |  |  |  | 0 | $flag{$new_key} = 1; | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  |  | 
| 168 |  |  |  |  |  |  | # add values to result hash | 
| 169 | 59 | 100 |  |  |  | 58 | if ( exists $res{$new_key} ) { | 
| 170 | 4 |  |  |  |  | 4 | push @{$res{$new_key}}, @{$hash->{$key}}; | 
|  | 4 |  |  |  |  | 4 |  | 
|  | 4 |  |  |  |  | 7 |  | 
| 171 |  |  |  |  |  |  | } | 
| 172 |  |  |  |  |  |  | else { | 
| 173 | 55 | 50 |  |  |  | 113 | $res{$new_key} = ref $hash->{$key} ? $hash->{$key} : [$hash->{$key}]; | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  |  | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | # make unique and process hooks | 
| 180 | 6 |  |  |  |  | 23 | while ( my ( $key, $value ) = each %res ) { | 
| 181 |  |  |  |  |  |  |  | 
| 182 | 55 | 100 |  |  |  | 52 | if ( scalar @$value > 1 ) { | 
| 183 | 5 |  |  |  |  | 9 | @$value = _make_unique(@$value); | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  | else { | 
| 186 | 50 |  |  |  |  | 63 | $value = $value->[0]; | 
| 187 |  |  |  |  |  |  | } | 
| 188 |  |  |  |  |  |  |  | 
| 189 | 55 | 100 |  |  |  | 77 | if ( exists $HOOKS{$key} ) { | 
| 190 | 10 |  |  |  |  | 7 | for my $hook ( @{$HOOKS{$key}} ) { $value = $hook->($value) } | 
|  | 10 |  |  |  |  | 20 |  | 
|  | 10 |  |  |  |  | 19 |  | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  |  | 
| 193 | 55 |  |  |  |  | 105 | $res{$key} = $value; | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | } | 
| 196 |  |  |  |  |  |  |  | 
| 197 | 6 |  |  |  |  | 49 | \%res; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  | sub _make_unique { | 
| 201 | 5 |  |  | 5 |  | 5 | my %vals; | 
| 202 | 5 |  |  |  |  | 22 | grep { not $vals{$_} ++ } @_; | 
|  | 19 |  |  |  |  | 70 |  | 
| 203 |  |  |  |  |  |  | } | 
| 204 |  |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  | ## PARSERS ## | 
| 206 |  |  |  |  |  |  |  | 
| 207 |  |  |  |  |  |  | # Regular expression built using Jeffrey Friedl's example in | 
| 208 |  |  |  |  |  |  | # _Mastering Regular Expressions_ (http://www.ora.com/catalog/regexp/). | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | my $RFC822PAT = <<'EOF'; | 
| 211 |  |  |  |  |  |  | [\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\ | 
| 212 |  |  |  |  |  |  | xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xf | 
| 213 |  |  |  |  |  |  | f\n\015()]*)*\)[\040\t]*)*(?:(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\x | 
| 214 |  |  |  |  |  |  | ff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|"[^\\\x80-\xff\n\015 | 
| 215 |  |  |  |  |  |  | "]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[\040\t]*(?:\([^\\\x80-\ | 
| 216 |  |  |  |  |  |  | xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80 | 
| 217 |  |  |  |  |  |  | -\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]* | 
| 218 |  |  |  |  |  |  | )*(?:\.[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\ | 
| 219 |  |  |  |  |  |  | \\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\ | 
| 220 |  |  |  |  |  |  | x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x8 | 
| 221 |  |  |  |  |  |  | 0-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|"[^\\\x80-\xff\n | 
| 222 |  |  |  |  |  |  | \015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[\040\t]*(?:\([^\\\x | 
| 223 |  |  |  |  |  |  | 80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^ | 
| 224 |  |  |  |  |  |  | \x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040 | 
| 225 |  |  |  |  |  |  | \t]*)*)*@[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([ | 
| 226 |  |  |  |  |  |  | ^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\ | 
| 227 |  |  |  |  |  |  | \\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\ | 
| 228 |  |  |  |  |  |  | x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80- | 
| 229 |  |  |  |  |  |  | \xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015() | 
| 230 |  |  |  |  |  |  | ]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\ | 
| 231 |  |  |  |  |  |  | x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\04 | 
| 232 |  |  |  |  |  |  | 0\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\ | 
| 233 |  |  |  |  |  |  | n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\ | 
| 234 |  |  |  |  |  |  | 015()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?! | 
| 235 |  |  |  |  |  |  | [^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\ | 
| 236 |  |  |  |  |  |  | ]]|\\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\ | 
| 237 |  |  |  |  |  |  | x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\01 | 
| 238 |  |  |  |  |  |  | 5()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*)*|(?:[^(\040)<>@,;:". | 
| 239 |  |  |  |  |  |  | \\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff] | 
| 240 |  |  |  |  |  |  | )|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[^ | 
| 241 |  |  |  |  |  |  | ()<>@,;:".\\\[\]\x80-\xff\000-\010\012-\037]*(?:(?:\([^\\\x80-\xff\n\0 | 
| 242 |  |  |  |  |  |  | 15()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][ | 
| 243 |  |  |  |  |  |  | ^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)|"[^\\\x80-\xff\ | 
| 244 |  |  |  |  |  |  | n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015"]*)*")[^()<>@,;:".\\\[\]\ | 
| 245 |  |  |  |  |  |  | x80-\xff\000-\010\012-\037]*)*<[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(? | 
| 246 |  |  |  |  |  |  | :(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80- | 
| 247 |  |  |  |  |  |  | \xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:@[\040\t]* | 
| 248 |  |  |  |  |  |  | (?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015 | 
| 249 |  |  |  |  |  |  | ()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015() | 
| 250 |  |  |  |  |  |  | ]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\0 | 
| 251 |  |  |  |  |  |  | 40)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\ | 
| 252 |  |  |  |  |  |  | [^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\ | 
| 253 |  |  |  |  |  |  | xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]* | 
| 254 |  |  |  |  |  |  | )*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x80 | 
| 255 |  |  |  |  |  |  | -\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x | 
| 256 |  |  |  |  |  |  | 80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t | 
| 257 |  |  |  |  |  |  | ]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\ | 
| 258 |  |  |  |  |  |  | \[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff]) | 
| 259 |  |  |  |  |  |  | *\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x | 
| 260 |  |  |  |  |  |  | 80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80 | 
| 261 |  |  |  |  |  |  | -\xff\n\015()]*)*\)[\040\t]*)*)*(?:,[\040\t]*(?:\([^\\\x80-\xff\n\015( | 
| 262 |  |  |  |  |  |  | )]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\ | 
| 263 |  |  |  |  |  |  | \x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*@[\040\t | 
| 264 |  |  |  |  |  |  | ]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\0 | 
| 265 |  |  |  |  |  |  | 15()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015 | 
| 266 |  |  |  |  |  |  | ()]*)*\)[\040\t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^( | 
| 267 |  |  |  |  |  |  | \040)<>@,;:".\\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]| | 
| 268 |  |  |  |  |  |  | \\[^\x80-\xff])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80 | 
| 269 |  |  |  |  |  |  | -\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015() | 
| 270 |  |  |  |  |  |  | ]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x | 
| 271 |  |  |  |  |  |  | 80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^ | 
| 272 |  |  |  |  |  |  | \x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040 | 
| 273 |  |  |  |  |  |  | \t]*)*(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:". | 
| 274 |  |  |  |  |  |  | \\\[\]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff | 
| 275 |  |  |  |  |  |  | ])*\])[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\ | 
| 276 |  |  |  |  |  |  | \x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x | 
| 277 |  |  |  |  |  |  | 80-\xff\n\015()]*)*\)[\040\t]*)*)*)*:[\040\t]*(?:\([^\\\x80-\xff\n\015 | 
| 278 |  |  |  |  |  |  | ()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\ | 
| 279 |  |  |  |  |  |  | \\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*)?(?:[^ | 
| 280 |  |  |  |  |  |  | (\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000- | 
| 281 |  |  |  |  |  |  | \037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\xff][^\\\x80-\xff\ | 
| 282 |  |  |  |  |  |  | n\015"]*)*")[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]| | 
| 283 |  |  |  |  |  |  | \([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\)) | 
| 284 |  |  |  |  |  |  | [^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x80-\xff | 
| 285 |  |  |  |  |  |  | \n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\x | 
| 286 |  |  |  |  |  |  | ff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*( | 
| 287 |  |  |  |  |  |  | ?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\ | 
| 288 |  |  |  |  |  |  | 000-\037\x80-\xff])|"[^\\\x80-\xff\n\015"]*(?:\\[^\x80-\xff][^\\\x80-\ | 
| 289 |  |  |  |  |  |  | xff\n\015"]*)*")[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\x | 
| 290 |  |  |  |  |  |  | ff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*) | 
| 291 |  |  |  |  |  |  | *\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*)*@[\040\t]*(?:\([^\\\x80-\x | 
| 292 |  |  |  |  |  |  | ff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80- | 
| 293 |  |  |  |  |  |  | \xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*) | 
| 294 |  |  |  |  |  |  | *(?:[^(\040)<>@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\ | 
| 295 |  |  |  |  |  |  | ]\000-\037\x80-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\] | 
| 296 |  |  |  |  |  |  | )[\040\t]*(?:\([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80- | 
| 297 |  |  |  |  |  |  | \xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\x | 
| 298 |  |  |  |  |  |  | ff\n\015()]*)*\)[\040\t]*)*(?:\.[\040\t]*(?:\([^\\\x80-\xff\n\015()]*( | 
| 299 |  |  |  |  |  |  | ?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()]*(?:\\[^\x80-\xff][^\\\x80 | 
| 300 |  |  |  |  |  |  | -\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*)*\)[\040\t]*)*(?:[^(\040)< | 
| 301 |  |  |  |  |  |  | >@,;:".\\\[\]\000-\037\x80-\xff]+(?![^(\040)<>@,;:".\\\[\]\000-\037\x8 | 
| 302 |  |  |  |  |  |  | 0-\xff])|\[(?:[^\\\x80-\xff\n\015\[\]]|\\[^\x80-\xff])*\])[\040\t]*(?: | 
| 303 |  |  |  |  |  |  | \([^\\\x80-\xff\n\015()]*(?:(?:\\[^\x80-\xff]|\([^\\\x80-\xff\n\015()] | 
| 304 |  |  |  |  |  |  | *(?:\\[^\x80-\xff][^\\\x80-\xff\n\015()]*)*\))[^\\\x80-\xff\n\015()]*) | 
| 305 |  |  |  |  |  |  | *\)[\040\t]*)*)*>) | 
| 306 |  |  |  |  |  |  | EOF | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | $RFC822PAT =~ s/\n//g; | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  |  | 
| 311 |  |  |  |  |  |  | sub _default_parser { | 
| 312 | 8 |  |  | 8 |  | 11 | my ( $raw ) = @_; | 
| 313 | 8 |  |  |  |  | 9 | my %data; | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | # transform data to key => value | 
| 316 | 8 |  |  |  |  | 45 | for my $line ( split /\n/, $raw ) { | 
| 317 |  |  |  |  |  |  |  | 
| 318 | 88 |  |  |  |  | 59 | chomp $line; | 
| 319 | 88 |  |  |  |  | 98 | $line =~ s/^\s+//; | 
| 320 | 88 |  |  |  |  | 150 | $line =~ s/\s+$//; | 
| 321 |  |  |  |  |  |  |  | 
| 322 | 88 |  |  |  |  | 175 | my ( $key, $value ) = $line =~ /^\s*([\d\w\s._-]+):\s*(.+)$/; | 
| 323 | 88 | 100 | 66 |  |  | 209 | next if  !$line || !$value; | 
| 324 | 63 |  |  |  |  | 62 | $key =~ s/\s+$//; | 
| 325 | 63 |  |  |  |  | 64 | $value =~ s/\s+$//; | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | # if we have more then one value for one field we push them into array | 
| 328 |  |  |  |  |  |  | $data{$key} = ref $data{$key} eq 'ARRAY' ? | 
| 329 | 63 | 100 |  |  |  | 163 | [ @{$data{$key}}, $value ] : [ $value ]; | 
|  | 12 |  |  |  |  | 34 |  | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | } | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | # find all emails in the text | 
| 334 | 8 |  |  |  |  | 10705 | my @emails = $raw =~ /($RFC822PAT)/gso; | 
| 335 | 8 |  |  |  |  | 32 | @emails = map { $_ =~ s/\s+//g; ($_) } @emails; | 
|  | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 336 |  |  |  |  |  |  | $data{emails} = exists $data{emails} ? | 
| 337 | 8 | 50 |  |  |  | 25 | [ @{$data{emails}}, @emails ] : \@emails; | 
|  | 0 |  |  |  |  | 0 |  | 
| 338 |  |  |  |  |  |  |  | 
| 339 | 8 |  |  |  |  | 26 | \%data; | 
| 340 |  |  |  |  |  |  | } | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | ## FORMATERS ## | 
| 343 |  |  |  |  |  |  |  | 
| 344 |  |  |  |  |  |  | sub format_nameservers { | 
| 345 | 4 |  |  | 4 | 0 | 4 | my ( $value ) = @_; | 
| 346 |  |  |  |  |  |  |  | 
| 347 | 4 | 50 |  |  |  | 9 | $value = [$value] unless ref $value; | 
| 348 |  |  |  |  |  |  |  | 
| 349 | 4 |  |  |  |  | 5 | my @nss; | 
| 350 | 4 |  |  |  |  | 7 | for my $ns ( @$value ) { | 
| 351 | 16 |  |  |  |  | 50 | my ( $domain, $ip ) = split /\s+/, $ns; | 
| 352 |  |  |  |  |  |  |  | 
| 353 | 16 |  | 33 |  |  | 24 | $domain ||= $ns; | 
| 354 | 16 |  |  |  |  | 33 | $domain =~ s/\.$//; | 
| 355 | 16 |  |  |  |  | 15 | $domain = lc $domain; | 
| 356 |  |  |  |  |  |  |  | 
| 357 | 16 | 50 |  |  |  | 41 | push @nss, { | 
| 358 |  |  |  |  |  |  | domain => $domain, | 
| 359 |  |  |  |  |  |  | ( $ip ? (ip => $ip) : () ) | 
| 360 |  |  |  |  |  |  | }; | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  |  | 
| 363 | 4 |  |  |  |  | 7 | \@nss; | 
| 364 |  |  |  |  |  |  | } | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | 1; | 
| 367 |  |  |  |  |  |  |  | 
| 368 |  |  |  |  |  |  | =head1 NAME | 
| 369 |  |  |  |  |  |  |  | 
| 370 |  |  |  |  |  |  | Net::Whois::Parser - module for parsing whois information | 
| 371 |  |  |  |  |  |  |  | 
| 372 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | use Net::Whois::Parser; | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | my $info = parse_whois( domain => $domain ); | 
| 377 |  |  |  |  |  |  | my $info = parse_whois( raw => $whois_raw_text, domain => $domain  ); | 
| 378 |  |  |  |  |  |  | my $info = parse_whois( raw => $whois_raw_text, server => $whois_server  ); | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | $info = { | 
| 381 |  |  |  |  |  |  | nameservers => [ | 
| 382 |  |  |  |  |  |  | { domain => 'ns.example.com', ip => '123.123.123.123' }, | 
| 383 |  |  |  |  |  |  | { domain => 'ns.example.com' }, | 
| 384 |  |  |  |  |  |  | ], | 
| 385 |  |  |  |  |  |  | emails => [ 'admin@example.com' ], | 
| 386 |  |  |  |  |  |  | domain => 'example.com', | 
| 387 |  |  |  |  |  |  | somefield1 => 'value', | 
| 388 |  |  |  |  |  |  | somefield2 => [ 'value', 'value2' ], | 
| 389 |  |  |  |  |  |  | ... | 
| 390 |  |  |  |  |  |  | }; | 
| 391 |  |  |  |  |  |  |  | 
| 392 |  |  |  |  |  |  | # Your own parsers | 
| 393 |  |  |  |  |  |  |  | 
| 394 |  |  |  |  |  |  | sub my_parser { | 
| 395 |  |  |  |  |  |  | my ( $text ) = @_; | 
| 396 |  |  |  |  |  |  | return { | 
| 397 |  |  |  |  |  |  | nameservers => [ | 
| 398 |  |  |  |  |  |  | { domain => 'ns.example.com', ip => '123.123.123.123' }, | 
| 399 |  |  |  |  |  |  | { domain => 'ns.example.com' }, | 
| 400 |  |  |  |  |  |  | ], | 
| 401 |  |  |  |  |  |  | emails => [ 'admin@example.com' ], | 
| 402 |  |  |  |  |  |  | somefield => 'value', | 
| 403 |  |  |  |  |  |  | somefield2 => [ 'value', 'value2' ], | 
| 404 |  |  |  |  |  |  | }; | 
| 405 |  |  |  |  |  |  | } | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | $Net::Whois::Parser::PARSERS{'whois.example.com'} = \&my_parser; | 
| 408 |  |  |  |  |  |  | $Net::Whois::Parser::PARSERS{'DEFAULT'}           = \&my_default_parser; | 
| 409 |  |  |  |  |  |  |  | 
| 410 |  |  |  |  |  |  | # If you want to get all values of fields from all whois answers | 
| 411 |  |  |  |  |  |  | $Net::Whois::Parser::GET_ALL_VALUES = 1; | 
| 412 |  |  |  |  |  |  | # example | 
| 413 |  |  |  |  |  |  | # Net::Whois::Raw returns 2 answers | 
| 414 |  |  |  |  |  |  | $raw = [ { text => 'key: value1' }, { text => 'key: value2'}]; | 
| 415 |  |  |  |  |  |  | $data = parse_whois(raw => $raw); | 
| 416 |  |  |  |  |  |  | # If flag is off parser returns | 
| 417 |  |  |  |  |  |  | # { key => 'value2' }; | 
| 418 |  |  |  |  |  |  | # If flag is on parser returns | 
| 419 |  |  |  |  |  |  | # { key => [ 'value1', 'value2' ] }; | 
| 420 |  |  |  |  |  |  |  | 
| 421 |  |  |  |  |  |  | # If you want to convert some field name to another: | 
| 422 |  |  |  |  |  |  | $Net::Whois::Parser::FIELD_NAME_CONV{'Domain name'} = 'domain'; | 
| 423 |  |  |  |  |  |  |  | 
| 424 |  |  |  |  |  |  | # If you want to format some fields. | 
| 425 |  |  |  |  |  |  | # I think it is very useful for dates. | 
| 426 |  |  |  |  |  |  | $Net::Whois::Parser::HOOKS{'expiration_date'} = [ \&format_date ]; | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 429 |  |  |  |  |  |  |  | 
| 430 |  |  |  |  |  |  | Net::Whois::Parser module provides Whois data parsing. | 
| 431 |  |  |  |  |  |  | You can add your own parsers for any whois server. | 
| 432 |  |  |  |  |  |  |  | 
| 433 |  |  |  |  |  |  | =head1 FUNCTIONS | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | =over 3 | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | =item parse_whois(%args) | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  | Returns hash of whois data. Arguments: | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | C<'domain'> - | 
| 442 |  |  |  |  |  |  | domain | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | C<'raw'> - | 
| 445 |  |  |  |  |  |  | raw whois text | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | C<'server'> - | 
| 448 |  |  |  |  |  |  | whois server | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | C<'which_whois'> - | 
| 451 |  |  |  |  |  |  | option for Net::Whois::Raw::whois. Default value is QRY_ALL | 
| 452 |  |  |  |  |  |  |  | 
| 453 |  |  |  |  |  |  | =back | 
| 454 |  |  |  |  |  |  |  | 
| 455 |  |  |  |  |  |  | =head1 CHANGES | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  | See file "Changes" in the distribution | 
| 458 |  |  |  |  |  |  |  | 
| 459 |  |  |  |  |  |  | =head1 AUTHOR | 
| 460 |  |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  | Ivan Sokolov, C<<  >> | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | =head1 COPYRIGHT & LICENSE | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | Copyright 2009 Ivan Sokolov | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | This program is free software; you can redistribute it and/or modify it | 
| 468 |  |  |  |  |  |  | under the same terms as Perl itself. | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  |  | 
| 471 |  |  |  |  |  |  | =cut |