File Coverage

lib/HTML/HTML5/Parser/UA.pm
Criterion Covered Total %
statement 56 69 81.1
branch 15 28 53.5
condition 9 29 31.0
subroutine 11 11 100.0
pod 1 1 100.0
total 92 138 66.6


line stmt bran cond sub pod time code
1             package HTML::HTML5::Parser::UA;
2              
3 3     3   172955 use 5.008001;
  3         16  
4 3     3   18 use strict;
  3         6  
  3         70  
5 3     3   13 use warnings;
  3         5  
  3         138  
6              
7             BEGIN {
8 3     3   10 $HTML::HTML5::Parser::UA::AUTHORITY = 'cpan:TOBYINK';
9 3         110 $HTML::HTML5::Parser::UA::VERSION = '0.992';
10             }
11              
12 3     3   621 use Encode qw(decode);
  3         17135  
  3         254  
13 3     3   2296 use HTTP::Tiny;
  3         122659  
  3         125  
14 3     3   702 use URI::file;
  3         5656  
  3         2686  
15              
16             our $NO_LWP = '0';
17              
18             sub get
19             {
20 8     8 1 46454 my ($class, $uri, $ua) = @_;
21              
22 8 0 33     160 if (ref $ua and $ua->isa('HTTP::Tiny') and $uri =~ /^https?:/i)
      33        
23 0         0 { goto \&_get_tiny }
24 8 50 33     106 if (ref $ua and $ua->isa('LWP::UserAgent'))
25 0         0 { goto \&_get_lwp }
26 8 100 66     396 if (UNIVERSAL::can('LWP::UserAgent', 'can') and not $NO_LWP)
27 5         86 { goto \&_get_lwp }
28 3 100       79 if ($uri =~ /^file:/i)
29 1         65 { goto \&_get_fs }
30              
31 2         20 goto \&_get_tiny;
32             }
33              
34             sub _get_lwp
35             {
36             eval "require LWP::UserAgent; 1"
37 5 50   5   773 or do {
38 0         0 require Carp;
39 0         0 Carp::croak("could not load LWP::UserAgent");
40             };
41            
42 5         44 my ($class, $uri, $ua) = @_;
43              
44 5   33     390 $ua ||= LWP::UserAgent->new(
45             agent => sprintf(
46             "%s/%s ",
47             'HTML::HTML5::Parser',
48             HTML::HTML5::Parser->VERSION,
49             ),
50             default_headers => HTTP::Headers->new(
51             'Accept' => join q(, ) => qw(
52             text/html
53             application/xhtml+xml;q=0.9
54             application/xml;q=0.1
55             text/xml;q=0.1
56             )
57             ),
58             parse_head => 0,
59             );
60            
61 5         2534 my $response = $ua->get($uri);
62            
63 5         149289 my $h = $response->headers;
64             my %header_hash =
65 5         117 map { lc($_) => $h->header($_); }
  24         908  
66             $h->header_field_names;
67            
68             return +{
69 5         204 success => $response->is_success,
70             status => $response->code,
71             reason => $response->message,
72             headers => \%header_hash,
73             content => $response->content,
74             decoded_content => $response->decoded_content,
75             };
76             }
77              
78             sub _get_tiny
79             {
80 2     2   15 my ($class, $uri, $ua) = @_;
81            
82 2   33     136 $ua ||= HTTP::Tiny->new(
83             agent => sprintf("%s/%s", 'HTML::HTML5::Parser', HTML::HTML5::Parser->VERSION),
84             default_headers => +{
85             'Accept' => join(q(, ) => qw(
86             text/html
87             application/xhtml+xml;q=0.9
88             application/xml;q=0.1
89             text/xml;q=0.1
90             )),
91             },
92             );
93            
94 2         331 my $response = $ua->get($uri);
95            
96 2 50       11522 if ($response->{headers}{'content-type'} =~ /charset=(\S+)/)
97             {
98 0         0 (my $encoding = $1) =~ s/["']//g;
99 0         0 $response->{decoded_content} = eval {
100             decode($encoding, $response->{content})
101 0         0 };
102             }
103            
104             $response->{decoded_content} = $response->{content}
105 2 50       12 unless defined $response->{decoded_content};
106 2         47 return $response;
107             }
108              
109             sub _get_fs
110             {
111 1     1   4 my $class = shift;
112 1 50       9 my ($uri) = map { ref() ? $_ : URI->new($_) } @_;
  1         17  
113 1         18 my $file = $uri->file;
114              
115 1         206 my ($status, $reason, $content, $content_type) = do {
116 1 50       60 if (not -e $file)
    50          
117 0         0 { (404 => 'Not Found', 'File not found.', 'text/plain') }
118             elsif (not -r $file)
119 0         0 { (403 => 'Forbidden', 'File not readable by effective guid.', 'text/plain') }
120             else
121 1         19 { (200 => 'OK') }
122             };
123            
124 1   33     14 $content ||= do {
125 1 50       118 if (open my $fh, '<', $file)
126 1         72 { local $/ = <$fh> }
127             else
128 0         0 { $status = 418; $reason = "I'm a teapot"; $content_type = 'text/plain'; $! }
  0         0  
  0         0  
  0         0  
129             };
130            
131 1 50 0     23 $content_type ||= 'text/xml' if $file =~ /\.xml$/i;
132 1 50 0     25 $content_type ||= 'application/xhtml+xml' if $file =~ /\.xht(ml)?$/i;
133 1 50 0     10 $content_type ||= 'text/html' if $file =~ /\.html?$/i;
134 1   50     31 $content_type ||= 'application/octet-stream';
135            
136             return +{
137 1         61 success => ($status == 200),
138             status => $status,
139             reason => $reason,
140             headers => +{
141             'content-type' => $content_type,
142             'content-length' => length($content),
143             },
144             content => $content,
145             decoded_content => $content,
146             };
147             }
148              
149             1;
150              
151             =head1 NAME
152              
153             HTML::HTML5::Parser::UA - simple web user agent class
154              
155             =head1 SYNOPSIS
156              
157             use aliased 'HTML::HTML5::Parser::UA';
158            
159             my $response = UA->get($url);
160             die unless $response->{success};
161            
162             print $response->{decoded_content};
163              
164             =head1 DESCRIPTION
165              
166             This is a simple wrapper around HTTP::Tiny and LWP::UserAgent to smooth out
167             the API differences between them. It only supports bog standard
168             C<< get($url) >> requests.
169              
170             If LWP::UserAgent is already in memory, this module will use that.
171              
172             If LWP::UserAgent is not in memory, then this module will use HTTP::Tiny (or
173             direct filesystem access for "file://" URLs).
174              
175             If LWP::UserAgent is not in memory, and you attempt to request a URL that
176             HTTP::Tiny cannot handle (e.g. an "ftp://" URL), then this module will load
177             LWP::UserAgent and die if it cannot be loaded (e.g. is not installed).
178              
179             HTML::HTML5::Parser::UA is used by the C method of
180             HTML::HTML5::Parser.
181              
182             =head2 Class Method
183              
184             =over
185              
186             =item C<< get($url, $ua) >>
187              
188             Gets the URL and returns a hashref similar to HTTP::Tiny's hashrefs, but
189             with an additional C key, which contains the response
190             body, decoded into a Perl character string (not a byte string).
191              
192             If $ua is given (it's optional), then this user agent will be used to
193             perform the actual request. Must be undef or an LWP::UserAgent object
194             (or a subclass) or an HTTP::Tiny object (or a subclass).
195              
196             =back
197              
198             =head2 Package Variable
199              
200             =over
201              
202             =item C<< $HTML::HTML5::Parser::NO_LWP >>
203              
204             If true, avoids using LWP::UserAgent.
205              
206             =back
207              
208             =head1 MOTIVATION
209              
210             L is a good piece of software but it has a dependency on
211             L. L is only used to provide one fairly
212             esoteric feature, which this package doesn't make use of. (It's the
213             C option.)
214              
215             Because of that, I don't especially want HTML::HTML5::Parser to have a
216             dependency on LWP::UserAgent. Hence this module.
217              
218             =head1 SEE ALSO
219              
220             L.
221              
222             =head1 AUTHOR
223              
224             Toby Inkster, Etobyink@cpan.orgE
225              
226             =head1 COPYRIGHT AND LICENSE
227              
228             Copyright (C) 2012 by Toby Inkster
229              
230             This library is free software; you can redistribute it and/or modify
231             it under the same terms as Perl itself.
232              
233             =head1 DISCLAIMER OF WARRANTIES
234              
235             THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
236             WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
237             MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
238