File Coverage

blib/lib/HTML/HTML5/Parser/UA.pm
Criterion Covered Total %
statement 17 67 25.3
branch 0 28 0.0
condition 0 29 0.0
subroutine 6 10 60.0
pod 1 1 100.0
total 24 135 17.7


line stmt bran cond sub pod time code
1             package HTML::HTML5::Parser::UA;
2              
3 1     1   24613 use 5.008001;
  1         4  
  1         48  
4 1     1   6 use strict;
  1         2  
  1         56  
5              
6             BEGIN {
7 1     1   2 $HTML::HTML5::Parser::UA::AUTHORITY = 'cpan:TOBYINK';
8 1         28 $HTML::HTML5::Parser::UA::VERSION = '0.301';
9             }
10              
11 1     1   1110 use Encode qw(decode);
  1         14364  
  1         112  
12 1     1   1191 use HTTP::Tiny;
  1         72510  
  1         52  
13 1     1   3506 use URI::file;
  1         24807  
  1         1284  
14              
15             our $NO_LWP = '0';
16              
17             sub get
18             {
19 0     0 1   my ($class, $uri, $ua) = @_;
20              
21 0 0 0       if (ref $ua and $ua->isa('HTTP::Tiny') and $uri =~ /^https?:/i)
      0        
22 0           { goto \&_get_tiny }
23 0 0 0       if (ref $ua and $ua->isa('LWP::UserAgent'))
24 0           { goto \&_get_lwp }
25 0 0 0       if (UNIVERSAL::can('LWP::UserAgent', 'can') and not $NO_LWP)
26 0           { goto \&_get_lwp }
27 0 0         if ($uri =~ /^file:/i)
28 0           { goto \&_get_fs }
29              
30 0           goto \&_get_tiny;
31             }
32              
33             sub _get_lwp
34             {
35             eval "require LWP::UserAgent; 1"
36 0 0   0     or do {
37 0           require Carp;
38 0           Carp::croak("could not load LWP::UserAgent");
39             };
40            
41 0           my ($class, $uri, $ua) = @_;
42              
43 0   0       $ua ||= LWP::UserAgent->new(
44             agent => sprintf(
45             "%s/%s ",
46             'HTML::HTML5::Parser',
47             HTML::HTML5::Parser->VERSION,
48             ),
49             default_headers => HTTP::Headers->new(
50             'Accept' => join q(, ) => qw(
51             text/html
52             application/xhtml+xml;q=0.9
53             application/xml;q=0.1
54             text/xml;q=0.1
55             )
56             ),
57             parse_head => 0,
58             );
59            
60 0           my $response = $ua->get($uri);
61            
62 0           my $h = $response->headers;
63 0           my %header_hash =
64 0           map { lc($_) => $h->header($_); }
65             $h->header_field_names;
66            
67             return +{
68 0           success => $response->is_success,
69             status => $response->code,
70             reason => $response->message,
71             headers => \%header_hash,
72             content => $response->content,
73             decoded_content => $response->decoded_content,
74             };
75             }
76              
77             sub _get_tiny
78             {
79 0     0     my ($class, $uri, $ua) = @_;
80            
81 0   0       $ua ||= HTTP::Tiny->new(
82             agent => sprintf("%s/%s", 'HTML::HTML5::Parser', HTML::HTML5::Parser->VERSION),
83             default_headers => +{
84             'Accept' => join(q(, ) => qw(
85             text/html
86             application/xhtml+xml;q=0.9
87             application/xml;q=0.1
88             text/xml;q=0.1
89             )),
90             },
91             );
92            
93 0           my $response = $ua->get($uri);
94            
95 0 0         if ($response->{headers}{'content-type'} =~ /charset=(\S+)/)
96             {
97 0           (my $encoding = $1) =~ s/["']//g;
98 0           $response->{decoded_content} = eval {
99 0           decode($encoding, $response->{content})
100             };
101             }
102            
103 0 0         $response->{decoded_content} = $response->{content}
104             unless defined $response->{decoded_content};
105 0           return $response;
106             }
107              
108             sub _get_fs
109             {
110 0     0     my $class = shift;
111 0 0         my ($uri) = map { ref() ? $_ : URI->new($_) } @_;
  0            
112 0           my $file = $uri->file;
113              
114 0           my ($status, $reason, $content, $content_type) = do {
115 0 0         if (not -e $file)
    0          
116 0           { (404 => 'Not Found', 'File not found.', 'text/plain') }
117             elsif (not -r $file)
118 0           { (403 => 'Forbidden', 'File not readable by effective guid.', 'text/plain') }
119             else
120 0           { (200 => 'OK') }
121             };
122            
123 0   0       $content ||= do {
124 0 0         if (open my $fh, '<', $file)
125 0           { local $/ = <$fh> }
126             else
127 0           { $status = 418; $reason = "I'm a teapot"; $content_type = 'text/plain'; $! }
  0            
  0            
  0            
128             };
129            
130 0 0 0       $content_type ||= 'text/xml' if $file =~ /\.xml$/i;
131 0 0 0       $content_type ||= 'application/xhtml+xml' if $file =~ /\.xht(ml)?$/i;
132 0 0 0       $content_type ||= 'text/html' if $file =~ /\.html?$/i;
133 0   0       $content_type ||= 'application/octet-stream';
134            
135             return +{
136 0           success => ($status == 200),
137             status => $status,
138             reason => $reason,
139             headers => +{
140             'content-type' => $content_type,
141             'content-length' => length($content),
142             },
143             content => $content,
144             decoded_content => $content,
145             };
146             }
147              
148             1;
149              
150             =head1 NAME
151              
152             HTML::HTML5::Parser::UA - simple web user agent class
153              
154             =head1 SYNOPSIS
155              
156             use aliased 'HTML::HTML5::Parser::UA';
157            
158             my $response = UA->get($url);
159             die unless $response->{success};
160            
161             print $response->{decoded_content};
162              
163             =head1 DESCRIPTION
164              
165             This is a simple wrapper around HTTP::Tiny and LWP::UserAgent to smooth out
166             the API differences between them. It only supports bog standard
167             C<< get($url) >> requests.
168              
169             If LWP::UserAgent is already in memory, this module will use that.
170              
171             If LWP::UserAgent is not in memory, then this module will use HTTP::Tiny (or
172             direct filesystem access for "file://" URLs).
173              
174             If LWP::UserAgent is not in memory, and you attempt to request a URL that
175             HTTP::Tiny cannot handle (e.g. an "ftp://" URL), then this module will load
176             LWP::UserAgent and die if it cannot be loaded (e.g. is not installed).
177              
178             HTML::HTML5::Parser::UA is used by the C<parse_file> method of
179             HTML::HTML5::Parser.
180              
181             =head2 Class Method
182              
183             =over
184              
185             =item C<< get($url, $ua) >>
186              
187             Gets the URL and returns a hashref similar to HTTP::Tiny's hashrefs, but
188             with an additional C<decoded_content> key, which contains the response
189             body, decoded into a Perl character string (not a byte string).
190              
191             If $ua is given (it's optional), then this user agent will be used to
192             perform the actual request. Must be undef or an LWP::UserAgent object
193             (or a subclass) or an HTTP::Tiny object (or a subclass).
194              
195             =back
196              
197             =head2 Package Variable
198              
199             =over
200              
201             =item C<< $HTML::HTML5::Parser::NO_LWP >>
202              
203             If true, avoids using LWP::UserAgent.
204              
205             =back
206              
207             =head1 MOTIVATION
208              
209             L<LWP::UserAgent> is a good piece of software but it has a dependency on
210             L<HTML::Parser>. L<HTML::Parser> is only used to provide one fairly
211             esoteric feature, which this package doesn't make use of. (It's the
212             C<parse_head> option.)
213              
214             Because of that, I don't especially want HTML::HTML5::Parser to have a
215             dependency on LWP::UserAgent. Hence this module.
216              
217             =head1 SEE ALSO
218              
219             L<HTML::HTML5::Parser>.
220              
221             =head1 AUTHOR
222              
223             Toby Inkster, E<lt>tobyink@cpan.orgE<gt>
224              
225             =head1 COPYRIGHT AND LICENSE
226              
227             Copyright (C) 2012 by Toby Inkster
228              
229             This library is free software; you can redistribute it and/or modify
230             it under the same terms as Perl itself.
231              
232             =head1 DISCLAIMER OF WARRANTIES
233              
234             THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
235             WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
236             MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
237