line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::XKCD::AsText; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
112917
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
22
|
|
4
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
33
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.003'; |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
4
|
use Carp; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
108
|
|
9
|
1
|
|
|
1
|
|
11
|
use URI; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
18
|
|
10
|
1
|
|
|
1
|
|
4
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
29
|
|
11
|
1
|
|
|
1
|
|
4
|
use HTML::TokeParser::Simple; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
20
|
|
12
|
1
|
|
|
1
|
|
4
|
use HTML::Entities; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
81
|
|
13
|
1
|
|
|
1
|
|
5
|
use base 'Class::Data::Accessor'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
824
|
|
14
|
|
|
|
|
|
|
__PACKAGE__->mk_classaccessors( qw( |
15
|
|
|
|
|
|
|
ua |
16
|
|
|
|
|
|
|
timeout |
17
|
|
|
|
|
|
|
uri |
18
|
|
|
|
|
|
|
error |
19
|
|
|
|
|
|
|
text |
20
|
|
|
|
|
|
|
)); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub new { |
23
|
1
|
|
|
1
|
1
|
463
|
my $class = shift; |
24
|
1
|
50
|
|
|
|
4
|
croak "Must have even number of arguments to new()" |
25
|
|
|
|
|
|
|
if @_ & 1; |
26
|
|
|
|
|
|
|
|
27
|
1
|
|
|
|
|
6
|
my %args = @_; |
28
|
1
|
|
|
|
|
9
|
$args{ +lc } = delete $args{ $_ } for keys %args; |
29
|
|
|
|
|
|
|
|
30
|
1
|
|
50
|
|
|
4
|
$args{timeout} ||= 30; |
31
|
1
|
|
33
|
|
|
17
|
$args{ua} ||= LWP::UserAgent->new( |
32
|
|
|
|
|
|
|
timeout => $args{timeout}, |
33
|
|
|
|
|
|
|
agent => 'Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.8.1.12)' |
34
|
|
|
|
|
|
|
.' Gecko/20080207 Ubuntu/7.10 (gutsy) Firefox/2.0.0.12', |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
|
37
|
1
|
|
|
|
|
3002
|
my $self = bless {}, $class; |
38
|
1
|
|
|
|
|
8
|
$self->timeout( $args{timeout} ); |
39
|
1
|
|
|
|
|
22
|
$self->ua( $args{ua} ); |
40
|
|
|
|
|
|
|
|
41
|
1
|
|
|
|
|
9
|
return $self; |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub retrieve { |
45
|
1
|
|
|
1
|
1
|
1319
|
my ( $self, $id ) = @_; |
46
|
1
|
50
|
|
|
|
6
|
croak "Undefined ID argument to retrieve()" |
47
|
|
|
|
|
|
|
unless defined $id; |
48
|
|
|
|
|
|
|
|
49
|
1
|
|
|
|
|
5
|
$id =~ s/\s+//g; |
50
|
|
|
|
|
|
|
|
51
|
1
|
50
|
|
|
|
4
|
croak "ID number... must be a NUMBER" |
52
|
|
|
|
|
|
|
if $id =~ /\D/; |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
$self->$_(undef) |
55
|
1
|
|
|
|
|
8
|
for qw(uri text error); |
56
|
|
|
|
|
|
|
|
57
|
1
|
|
|
|
|
34
|
my $comic_uri = $self->uri( URI->new("http://xkcd.com/$id/") ); |
58
|
|
|
|
|
|
|
|
59
|
1
|
|
|
|
|
8459
|
my $text_uri = URI->new('http://www.ohnorobot.com/transcribe.pl'); |
60
|
1
|
|
|
|
|
57
|
$text_uri->query_form( |
61
|
|
|
|
|
|
|
comicid => 'apKHvCCc66NMg', |
62
|
|
|
|
|
|
|
url => $comic_uri, |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
1
|
|
|
|
|
241
|
my $response = $self->ua->get( $text_uri ); |
66
|
1
|
50
|
|
|
|
282612
|
if ( $response->is_success ) { |
67
|
1
|
|
|
|
|
24
|
my $final_text = $self->_parse( $response->content ); |
68
|
1
|
|
|
|
|
52
|
$final_text =~ s/\s*\n\s*/\n\n/g; |
69
|
1
|
|
|
|
|
80
|
return $final_text; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
else { |
72
|
0
|
|
|
|
|
0
|
return $self->_set_error('Network error: ' . $response->status_line); |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub _parse { |
77
|
1
|
|
|
1
|
|
22
|
my ( $self, $content ) = @_; |
78
|
1
|
|
|
|
|
15
|
my $parser = HTML::TokeParser::Simple->new( \$content ); |
79
|
|
|
|
|
|
|
|
80
|
1
|
|
|
|
|
245
|
my %nav = ( |
81
|
|
|
|
|
|
|
get_text_from_textarea => 0, |
82
|
|
|
|
|
|
|
level => 0, |
83
|
|
|
|
|
|
|
look_for_p => 0, |
84
|
|
|
|
|
|
|
is_p => 0, |
85
|
|
|
|
|
|
|
); |
86
|
1
|
|
|
|
|
3
|
my $text = ''; |
87
|
1
|
|
|
|
|
10
|
while ( my $t = $parser->get_token ) { |
88
|
41
|
50
|
33
|
|
|
2856
|
if ( |
|
|
100
|
33
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
66
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
100
|
100
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
50
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
89
|
|
|
|
|
|
|
$t->is_start_tag('textarea') |
90
|
|
|
|
|
|
|
and defined $t->get_attr('name') |
91
|
|
|
|
|
|
|
and $t->get_attr('name') eq 'transcription' |
92
|
|
|
|
|
|
|
) { |
93
|
0
|
|
|
|
|
0
|
@nav{ qw(get_text_from_textarea level) } = ( 1, 1 ); |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
elsif ( $t->is_start_tag('td') ) { |
96
|
1
|
|
|
|
|
32
|
@nav{ qw(look_for_p level) } = ( 1, 2 ); |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
elsif ( |
99
|
|
|
|
|
|
|
$nav{look_for_p} == 1 |
100
|
|
|
|
|
|
|
and $t->is_text |
101
|
|
|
|
|
|
|
and $t->as_is =~ /Here's the transcription for this comic/ |
102
|
|
|
|
|
|
|
) { |
103
|
1
|
|
|
|
|
42
|
@nav{ qw(look_for_p level) } = ( 2, 3 ); |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
elsif ( $nav{look_for_p} == 2 and $t->is_start_tag('p') ) { |
106
|
1
|
|
|
|
|
63
|
@nav{ qw(is_p level) } = ( 1, 4 ); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
elsif ( $nav{is_p} and $t->is_text ) { |
109
|
7
|
|
|
|
|
229
|
$text .= $t->as_is; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
elsif ( $nav{is_p} and $t->is_start_tag('BR') ) { |
112
|
6
|
|
|
|
|
507
|
$text .= "\n"; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
elsif ( $nav{is_p} and $t->is_end_tag('p') ) { |
115
|
1
|
|
|
|
|
100
|
return $self->text( decode_entities($text) ); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
elsif ( $nav{get_text_from_textarea} == 1 |
118
|
|
|
|
|
|
|
and $t->is_end_tag('textarea') |
119
|
|
|
|
|
|
|
) { |
120
|
0
|
|
|
|
|
|
return $self->_set_error( |
121
|
|
|
|
|
|
|
q|Doesn't seem to be any text for this comic| |
122
|
|
|
|
|
|
|
); |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
elsif ( $nav{get_text_from_textarea} == 1 and $t->is_text ) { |
125
|
0
|
|
|
|
|
|
return $self->text( decode_entities($t->as_is) ); |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
} |
128
|
0
|
|
|
|
|
|
return $self->_set_error(q|Doesn't seem to be any text for this comic|); |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub _set_error { |
132
|
0
|
|
|
0
|
|
|
my ( $self, $error ) = @_; |
133
|
0
|
|
|
|
|
|
$self->error( $error ); |
134
|
0
|
|
|
|
|
|
return; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
1; |
139
|
|
|
|
|
|
|
__END__ |