| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package URI::Title; | 
| 2 |  |  |  |  |  |  | $URI::Title::VERSION = '1.904'; | 
| 3 | 4 |  |  | 4 |  | 206944 | use 5.006; | 
|  | 4 |  |  |  |  | 39 |  | 
| 4 | 4 |  |  | 4 |  | 21 | use warnings; | 
|  | 4 |  |  |  |  | 7 |  | 
|  | 4 |  |  |  |  | 108 |  | 
| 5 | 4 |  |  | 4 |  | 21 | use strict; | 
|  | 4 |  |  |  |  | 4 |  | 
|  | 4 |  |  |  |  | 102 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 4 |  |  | 4 |  | 20 | use base qw(Exporter); | 
|  | 4 |  |  |  |  | 5 |  | 
|  | 4 |  |  |  |  | 651 |  | 
| 8 |  |  |  |  |  |  | our @EXPORT_OK = qw( title ); | 
| 9 |  |  |  |  |  |  |  | 
| 10 | 4 |  |  | 4 |  | 1890 | use Module::Pluggable (search_path => ['URI::Title'], require => 1 ); | 
|  | 4 |  |  |  |  | 43137 |  | 
|  | 4 |  |  |  |  | 29 |  | 
| 11 | 4 |  |  | 4 |  | 2628 | use File::Type; | 
|  | 4 |  |  |  |  | 71009 |  | 
|  | 4 |  |  |  |  | 195 |  | 
| 12 |  |  |  |  |  |  |  | 
| 13 | 4 |  |  | 4 |  | 2978 | use LWP::UserAgent; | 
|  | 4 |  |  |  |  | 209086 |  | 
|  | 4 |  |  |  |  | 155 |  | 
| 14 | 4 |  |  | 4 |  | 39 | use HTTP::Request; | 
|  | 4 |  |  |  |  | 9 |  | 
|  | 4 |  |  |  |  | 95 |  | 
| 15 | 4 |  |  | 4 |  | 23 | use HTTP::Response; | 
|  | 4 |  |  |  |  | 6 |  | 
|  | 4 |  |  |  |  | 3211 |  | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | sub _ua { | 
| 19 | 3 |  |  | 3 |  | 28 | my $ua = LWP::UserAgent->new; | 
| 20 | 3 |  |  |  |  | 6238 | $ua->agent("URI::Title/$URI::Title::VERSION"); | 
| 21 | 3 |  |  |  |  | 205 | $ua->timeout(20); | 
| 22 | 3 |  |  |  |  | 56 | $ua->default_header('Accept-Encoding' => 'gzip'); | 
| 23 | 3 |  |  |  |  | 144 | return $ua; | 
| 24 |  |  |  |  |  |  | } | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | sub _get_limited { | 
| 27 | 2 |  |  | 2 |  | 4 | my $url = shift; | 
| 28 | 2 |  | 50 |  |  | 13 | my $size = shift || 32*1024; | 
| 29 | 2 |  |  |  |  | 6 | my $ua = _ua(); | 
| 30 | 2 |  |  |  |  | 10 | $ua->max_size($size); | 
| 31 | 2 |  |  |  |  | 33 | my $req = HTTP::Request->new(GET => $url); | 
| 32 | 2 |  |  |  |  | 12110 | $req->header( Range => "bytes=0-$size" ); | 
| 33 | 2 |  |  |  |  | 268 | $req->header( "Accept-Encoding" => "" ); # vox sends invalid gzipped data? | 
| 34 | 2 |  |  |  |  | 98 | my $res = eval { $ua->request($req) }; | 
|  | 2 |  |  |  |  | 18 |  | 
| 35 | 2 | 50 |  |  |  | 388950 | return unless $res; # useragent explodes for non-valid uris | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | # some servers don't like the Range header. If we | 
| 38 |  |  |  |  |  |  | # get an odd 4xx response that isn't 404, just try getting | 
| 39 |  |  |  |  |  |  | # the full thing. This may be a little impolite. | 
| 40 | 2 | 50 | 66 |  |  | 9 | return _get_all($url) if $res->code >= 400 and $res->code < 500 and $res->code != 404; | 
|  |  |  | 66 |  |  |  |  | 
| 41 | 1 | 50 |  |  |  | 20 | return unless $res->is_success; | 
| 42 | 1 | 50 |  |  |  | 18 | if (!wantarray) { | 
| 43 | 0 |  | 0 |  |  | 0 | return $res->decoded_content || $res->content; | 
| 44 |  |  |  |  |  |  | } | 
| 45 | 1 |  |  |  |  | 4 | my $cset = "iso-8859-1"; # default; | 
| 46 | 1 |  |  |  |  | 3 | my $ct = $res->header("Content-type"); | 
| 47 | 1 | 50 |  |  |  | 61 | if ($ct =~ /charset\s*=\>?\s*\"?([\w-]+)/i) { | 
| 48 | 0 |  |  |  |  | 0 | $cset = lc($1); | 
| 49 |  |  |  |  |  |  | #warn "Got charset $cset from URI headers\n"; | 
| 50 |  |  |  |  |  |  | } | 
| 51 | 1 |  | 33 |  |  | 7 | return ($res->decoded_content || $res->content, $cset); | 
| 52 |  |  |  |  |  |  | } | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | sub _get_end { | 
| 55 | 0 |  |  | 0 |  | 0 | my $url = shift; | 
| 56 | 0 |  | 0 |  |  | 0 | my $size = shift || 16*1024; | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 0 |  |  |  |  | 0 | my $ua = _ua(); | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 0 |  |  |  |  | 0 | my $request = HTTP::Request->new(HEAD => $url); | 
| 61 | 0 |  |  |  |  | 0 | my $response = $ua->request($request); | 
| 62 | 0 | 0 |  |  |  | 0 | return unless $response; # useragent explodes for non-valid uris | 
| 63 | 0 |  |  |  |  | 0 | my $length = $response->header('Content-Length'); | 
| 64 |  |  |  |  |  |  |  | 
| 65 | 0 | 0 |  |  |  | 0 | return unless $length; # We can't get the length, and we're _not_ | 
| 66 |  |  |  |  |  |  | # going to get the whole thing. | 
| 67 |  |  |  |  |  |  |  | 
| 68 | 0 |  |  |  |  | 0 | my $start = $length - $size; | 
| 69 |  |  |  |  |  |  |  | 
| 70 | 0 |  |  |  |  | 0 | $ua->max_size($size); | 
| 71 |  |  |  |  |  |  |  | 
| 72 | 0 |  |  |  |  | 0 | my $req = HTTP::Request->new(GET => $url); | 
| 73 | 0 |  |  |  |  | 0 | $req->header( Range => "bytes=$start-$length" ); | 
| 74 | 0 |  |  |  |  | 0 | my $res = $ua->request($req); | 
| 75 | 0 | 0 |  |  |  | 0 | return unless $res; # useragent explodes for non-valid uris | 
| 76 |  |  |  |  |  |  |  | 
| 77 | 0 | 0 |  |  |  | 0 | return unless $res->is_success; | 
| 78 | 0 | 0 |  |  |  | 0 | return $res->decoded_content unless wantarray; | 
| 79 | 0 |  |  |  |  | 0 | my $cset = "iso-8859-1"; # default; | 
| 80 | 0 |  |  |  |  | 0 | my $ct = $res->header("Content-type"); | 
| 81 | 0 | 0 |  |  |  | 0 | if ($ct =~ /charset=\"?(.*)\"?$/) { | 
| 82 | 0 |  |  |  |  | 0 | $cset = $1; | 
| 83 |  |  |  |  |  |  | } | 
| 84 | 0 |  |  |  |  | 0 | return ($res->decoded_content, $cset); | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | sub _get_all { | 
| 88 | 1 |  |  | 1 |  | 46 | my $url = shift; | 
| 89 | 1 |  |  |  |  | 3 | my $ua = _ua(); | 
| 90 | 1 |  |  |  |  | 4 | my $req = HTTP::Request->new(GET => $url); | 
| 91 | 1 |  |  |  |  | 98 | my $res = $ua->request($req); | 
| 92 | 1 | 50 |  |  |  | 460 | return unless $res->is_success; | 
| 93 | 0 | 0 |  |  |  | 0 | return $res->decoded_content unless wantarray; | 
| 94 | 0 |  |  |  |  | 0 | my $cset = "iso-8859-1"; # default; | 
| 95 | 0 |  |  |  |  | 0 | my $ct = $res->header("Content-type"); | 
| 96 | 0 | 0 |  |  |  | 0 | if ($ct =~ /charset=\"?(.*)\"?$/) { | 
| 97 | 0 |  |  |  |  | 0 | $cset = $1; | 
| 98 |  |  |  |  |  |  | } | 
| 99 | 0 |  |  |  |  | 0 | return ($res->decoded_content, $cset); | 
| 100 |  |  |  |  |  |  | } | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | # cache | 
| 103 |  |  |  |  |  |  | our $HANDLERS; | 
| 104 |  |  |  |  |  |  | sub _handlers { | 
| 105 | 2 |  |  | 2 |  | 10 | my @plugins = plugins(); | 
| 106 | 2 | 50 |  |  |  | 8578 | return $HANDLERS if $HANDLERS; | 
| 107 | 2 |  |  |  |  | 7 | for my $plugin (@plugins) { | 
| 108 | 8 |  |  |  |  | 48 | for my $type ($plugin->types) { | 
| 109 | 18 |  |  |  |  | 42 | $HANDLERS->{$type} = $plugin; | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  | } | 
| 112 | 2 |  |  |  |  | 8 | return $HANDLERS; | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | sub title { | 
| 116 | 3 |  |  | 3 | 1 | 53678 | my $param = shift; | 
| 117 | 3 |  |  |  |  | 14 | my $data; | 
| 118 |  |  |  |  |  |  | my $url; | 
| 119 | 3 |  |  |  |  | 0 | my $type; | 
| 120 | 3 |  |  |  |  | 6 | my $cset = "iso-8859-1"; # default | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | # we can be passed a hashref. Keys are url, or data. | 
| 123 | 3 | 50 |  |  |  | 31 | if (ref($param)) { | 
| 124 | 0 | 0 |  |  |  | 0 | if ($param->{data}) { | 
|  |  | 0 |  |  |  |  |  | 
| 125 | 0 |  |  |  |  | 0 | $data = $param->{data}; | 
| 126 | 0 | 0 |  |  |  | 0 | $data = $$data if ref($data); # we can be passed a ref to the data | 
| 127 |  |  |  |  |  |  | } elsif ($param->{url}) { | 
| 128 | 0 |  |  |  |  | 0 | $url = $param->{url}; | 
| 129 |  |  |  |  |  |  | } else { | 
| 130 | 4 |  |  | 4 |  | 31 | use Carp qw(croak); | 
|  | 4 |  |  |  |  | 9 |  | 
|  | 4 |  |  |  |  | 1375 |  | 
| 131 | 0 |  |  |  |  | 0 | croak("Expected a single parameter, or an 'url' or 'data' key"); | 
| 132 |  |  |  |  |  |  | } | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | # otherwise, assume we're passed an url | 
| 135 |  |  |  |  |  |  | } else { | 
| 136 | 3 |  |  |  |  | 6 | $url = $param; | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  |  | 
| 139 | 3 | 0 | 33 |  |  | 12 | if (!$url and !$data) { | 
| 140 | 0 |  |  |  |  | 0 | warn "Need at least an url or data"; | 
| 141 | 0 |  |  |  |  | 0 | return; | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | # If we don't have data, we will have an url, so try to get data. | 
| 145 | 3 | 50 |  |  |  | 8 | if (!$data) { | 
| 146 |  |  |  |  |  |  | # url might be a filename | 
| 147 | 3 | 100 |  |  |  | 98 | if (-e $url) { | 
| 148 | 1 |  |  |  |  | 6 | local $/ = undef; | 
| 149 | 1 | 50 |  |  |  | 35 | unless (open DATA, $url) { | 
| 150 | 0 |  |  |  |  | 0 | warn "$url looks like a file and isn't"; | 
| 151 | 0 |  |  |  |  | 0 | return; | 
| 152 |  |  |  |  |  |  | } | 
| 153 | 1 |  |  |  |  | 63 | $data = ; | 
| 154 | 1 |  |  |  |  | 16 | close DATA; | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | # If not, assume it's an url | 
| 157 |  |  |  |  |  |  | } else { | 
| 158 |  |  |  |  |  |  | # special case for itms | 
| 159 | 2 | 50 |  |  |  | 14 | if ($url =~ s/^itms:/http:/) { | 
| 160 | 0 |  |  |  |  | 0 | $type = "itms"; | 
| 161 | 0 |  |  |  |  | 0 | $data = 1; # we don't need it, fake it. | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | } else { | 
| 164 | 2 |  |  |  |  | 6 | $url =~ s{#!}{?_escaped_fragment_=}; | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 2 |  |  |  |  | 8 | ($data, $cset) = _get_limited($url); | 
| 167 |  |  |  |  |  |  | } | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  | } | 
| 170 | 3 | 100 |  |  |  | 19159 | if (!$data) { | 
| 171 |  |  |  |  |  |  | #warn "Can't get content for $url"; | 
| 172 | 1 |  |  |  |  | 6 | return; | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  |  | 
| 175 | 2 | 50 |  |  |  | 7 | return undef unless $data; | 
| 176 |  |  |  |  |  |  |  | 
| 177 | 2 |  | 33 |  |  | 27 | $type ||= File::Type->new->checktype_contents($data); | 
| 178 |  |  |  |  |  |  |  | 
| 179 | 2 |  |  |  |  | 1872 | my $handlers = _handlers(); | 
| 180 |  |  |  |  |  |  | my $handler = $handlers->{$type} || $handlers->{default} | 
| 181 | 2 | 50 | 33 |  |  | 16 | or return; | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 2 |  |  |  |  | 15 | return $handler->title($url, $data, $type, $cset); | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | 1; | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | __END__ |