line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
############################################################ |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# $Id: Plugin.pm,v 1.7 2006/01/10 15:49:32 nicolaw Exp $ |
4
|
|
|
|
|
|
|
# WWW::Comic::Plugin - Subclassable plugin module for WWW::Comic |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Copyright 2006 Nicola Worthington |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License"); |
9
|
|
|
|
|
|
|
# you may not use this file except in compliance with the License. |
10
|
|
|
|
|
|
|
# You may obtain a copy of the License at |
11
|
|
|
|
|
|
|
# |
12
|
|
|
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0 |
13
|
|
|
|
|
|
|
# |
14
|
|
|
|
|
|
|
# Unless required by applicable law or agreed to in writing, software |
15
|
|
|
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS, |
16
|
|
|
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
17
|
|
|
|
|
|
|
# See the License for the specific language governing permissions and |
18
|
|
|
|
|
|
|
# limitations under the License. |
19
|
|
|
|
|
|
|
# |
20
|
|
|
|
|
|
|
############################################################ |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package WWW::Comic::Plugin; |
23
|
|
|
|
|
|
|
# vim:ts=4:sw=4:tw=78 |
24
|
|
|
|
|
|
|
|
25
|
3
|
|
|
3
|
|
16
|
use strict; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
99
|
|
26
|
3
|
|
|
3
|
|
3468
|
use LWP::UserAgent qw(); |
|
3
|
|
|
|
|
176284
|
|
|
3
|
|
|
|
|
80
|
|
27
|
3
|
|
|
3
|
|
29
|
use HTTP::Request qw(); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
61
|
|
28
|
3
|
|
|
3
|
|
16
|
use Carp qw(carp croak confess); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
280
|
|
29
|
|
|
|
|
|
|
|
30
|
3
|
50
|
|
3
|
|
19
|
use constant DEBUG => $ENV{DEBUG} ? 1 : 0; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
216
|
|
31
|
3
|
|
|
3
|
|
19
|
use vars qw($VERSION); |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
3520
|
|
32
|
|
|
|
|
|
|
$VERSION = sprintf('%d.%02d', q$Revision: 1.7 $ =~ /(\d+)/g); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
################################# |
36
|
|
|
|
|
|
|
# Public methods |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub comics { |
39
|
75
|
|
|
75
|
1
|
103
|
my $self = shift; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Look at instance specific first |
42
|
75
|
50
|
|
|
|
211
|
if (exists $self->{comics}) { |
43
|
0
|
0
|
|
|
|
0
|
if (ref($self->{comics}) eq 'ARRAY') { |
|
|
0
|
|
|
|
|
|
44
|
0
|
|
|
|
|
0
|
return @{$self->{comics}}; |
|
0
|
|
|
|
|
0
|
|
45
|
|
|
|
|
|
|
} elsif (ref($self->{comics}) eq 'HASH') { |
46
|
0
|
|
|
|
|
0
|
return keys(%{$self->{comics}}); |
|
0
|
|
|
|
|
0
|
|
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
# Then look package wide |
51
|
75
|
|
|
|
|
4267
|
my @comics = (); |
52
|
75
|
|
|
|
|
106
|
eval { |
53
|
75
|
|
|
|
|
4122
|
my %comics = eval('%'.ref($self).'::COMICS'); |
54
|
75
|
|
|
|
|
364
|
push @comics, keys(%comics); |
55
|
|
|
|
|
|
|
}; |
56
|
75
|
|
|
|
|
118
|
eval { |
57
|
75
|
|
|
|
|
3822
|
push @comics, eval('@'.ref($self).'::COMICS'); |
58
|
|
|
|
|
|
|
}; |
59
|
|
|
|
|
|
|
|
60
|
75
|
|
|
|
|
498
|
return @comics; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub strip_url { |
64
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
65
|
0
|
|
|
|
|
0
|
confess "I do not know how to get the URL for this comic"; |
66
|
0
|
|
|
|
|
0
|
return undef; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
sub get_strip { |
70
|
18
|
|
|
18
|
1
|
43
|
my $self = shift; |
71
|
18
|
|
|
|
|
74
|
my %param = @_; |
72
|
|
|
|
|
|
|
|
73
|
18
|
|
100
|
|
|
127
|
$param{url} ||= $self->strip_url(%param); |
74
|
18
|
100
|
|
|
|
208
|
return undef unless $param{url} =~ /^https?:\/\/[a-z0-9\-\.]+.*/i; |
75
|
|
|
|
|
|
|
|
76
|
5
|
|
|
|
|
20
|
(my $referer = $param{url}) =~ s/[\?\&]//; |
77
|
5
|
|
|
|
|
43
|
$referer =~ s#/[^/]*$#/#; |
78
|
|
|
|
|
|
|
|
79
|
5
|
|
|
|
|
31
|
my $ua = $self->_new_agent(); |
80
|
5
|
|
|
|
|
47
|
my $req = HTTP::Request->new(GET => $param{url}); |
81
|
5
|
|
|
|
|
847
|
$req->referer($referer); |
82
|
5
|
|
|
|
|
263
|
my $response = $ua->request($req); |
83
|
|
|
|
|
|
|
|
84
|
5
|
50
|
|
|
|
5761732
|
if ($response->is_success) { |
|
|
0
|
|
|
|
|
|
85
|
5
|
50
|
|
|
|
95
|
unless ($self->_image_format($response->content)) { |
86
|
0
|
0
|
|
|
|
0
|
carp('Unrecognised image format') if $^W; |
87
|
0
|
|
|
|
|
0
|
return undef; |
88
|
|
|
|
|
|
|
} |
89
|
5
|
|
|
|
|
25
|
return $response->content; |
90
|
|
|
|
|
|
|
} elsif ($^W) { |
91
|
0
|
|
|
|
|
0
|
carp $response->status_line; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
0
|
return undef; |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub mirror_strip { |
98
|
6
|
|
|
6
|
1
|
12
|
my $self = shift; |
99
|
6
|
|
|
|
|
27
|
my %param = @_; |
100
|
|
|
|
|
|
|
|
101
|
6
|
|
66
|
|
|
54
|
$param{url} ||= $self->strip_url(%param); |
102
|
6
|
|
|
|
|
54
|
my $blob = $self->get_strip(%param); |
103
|
6
|
100
|
|
|
|
194
|
return undef if !defined($blob); |
104
|
|
|
|
|
|
|
|
105
|
2
|
100
|
66
|
|
|
36
|
if ((!defined($param{filename}) || !length($param{filename})) |
|
|
|
66
|
|
|
|
|
106
|
|
|
|
|
|
|
&& defined($param{url})) { |
107
|
1
|
|
|
|
|
10
|
($param{filename} = $param{url}) =~ s#.*/##; |
108
|
|
|
|
|
|
|
} |
109
|
2
|
|
|
|
|
9
|
my $ext = $self->_image_format($blob); |
110
|
2
|
|
|
|
|
26
|
$param{filename} =~ s/(\.(jpe?g|gif|png))?$/.$ext/i; |
111
|
|
|
|
|
|
|
|
112
|
2
|
50
|
|
|
|
340
|
open(FH,">$param{filename}") || |
113
|
|
|
|
|
|
|
croak "Unable to open file handle FH for file '$param{filename}': $!"; |
114
|
2
|
|
|
|
|
11
|
binmode FH; |
115
|
2
|
|
|
|
|
246
|
print FH $blob; |
116
|
2
|
50
|
|
|
|
114
|
close(FH) || |
117
|
|
|
|
|
|
|
carp "Unable to close file handle FH for file '$param{filename}': $!"; |
118
|
|
|
|
|
|
|
|
119
|
2
|
|
|
|
|
58
|
return $param{filename}; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
################################# |
125
|
|
|
|
|
|
|
# Private methods |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub _image_format { |
128
|
7
|
|
|
7
|
|
236
|
my $self = shift; |
129
|
7
|
|
50
|
|
|
34
|
local $_ = shift || ''; |
130
|
7
|
50
|
|
|
|
76
|
return 'gif' if /^GIF8[79]a/; |
131
|
0
|
0
|
|
|
|
0
|
return 'jpg' if /^\xFF\xD8/; |
132
|
0
|
0
|
|
|
|
0
|
return 'png' if /^\x89PNG\x0d\x0a\x1a\x0a/; |
133
|
0
|
|
|
|
|
0
|
return undef; |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub _new_agent { |
137
|
8
|
|
|
8
|
|
17
|
my $self = shift; |
138
|
|
|
|
|
|
|
|
139
|
8
|
|
|
|
|
49
|
my @agents = ( |
140
|
|
|
|
|
|
|
'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1).', |
141
|
|
|
|
|
|
|
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.7.8) '. |
142
|
|
|
|
|
|
|
'Gecko/20050718 Firefox/1.0.4 (Debian package 1.0.4-2sarge1)', |
143
|
|
|
|
|
|
|
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.7.5) '. |
144
|
|
|
|
|
|
|
'Gecko/20041110 Firefox/1.0', |
145
|
|
|
|
|
|
|
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) '. |
146
|
|
|
|
|
|
|
'AppleWebKit/125.5.5 (KHTML, like Gecko) Safari/125.12', |
147
|
|
|
|
|
|
|
'Mozilla/4.0 (compatible; MSIE 6.0; Windows 98)', |
148
|
|
|
|
|
|
|
); |
149
|
|
|
|
|
|
|
|
150
|
8
|
|
|
|
|
102
|
my $ua = LWP::UserAgent->new( |
151
|
|
|
|
|
|
|
agent => $agents[int(rand(@agents))], |
152
|
|
|
|
|
|
|
timeout => 20 |
153
|
|
|
|
|
|
|
); |
154
|
8
|
|
|
|
|
6351
|
$ua->env_proxy; |
155
|
8
|
|
|
|
|
31511
|
$ua->max_size(1024*1024); # 1mb max limit |
156
|
|
|
|
|
|
|
|
157
|
8
|
|
|
|
|
140
|
return $ua; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub TRACE { |
161
|
0
|
|
|
0
|
0
|
|
return unless DEBUG; |
162
|
0
|
|
|
|
|
|
carp(shift()); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub DUMP { |
166
|
0
|
|
|
0
|
0
|
|
return unless DEBUG; |
167
|
0
|
|
|
|
|
|
eval { |
168
|
0
|
|
|
|
|
|
require Data::Dumper; |
169
|
0
|
|
|
|
|
|
carp(shift().': '.Data::Dumper::Dumper(shift())); |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
1; |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=pod |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 NAME |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
WWW::Comic::Plugin - Plugin superclass for WWW::Comic |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head1 DESCRIPTION |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
This is a plugin superclass for WWW::Comic from which all plugin modules |
186
|
|
|
|
|
|
|
are subclassed. |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=head1 METHODS |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Each plugin module should subclass WWW::Comic::Plugin, and support the |
191
|
|
|
|
|
|
|
following methods explicitly or through inheritance: |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head2 new *MANDATORY* |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
This method is mandatory. Your plugin must allow instantiation through |
196
|
|
|
|
|
|
|
this method. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head2 comics |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
This method should return a list of comics which your plugin will |
201
|
|
|
|
|
|
|
support. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
The default superclassed C method will try to determine |
204
|
|
|
|
|
|
|
what comics your plugin supports by loogking for C<$self->{comics}>, which |
205
|
|
|
|
|
|
|
can be an array of hash of comic names. If it cannot find a suitable |
206
|
|
|
|
|
|
|
list of comics there, it will look for |
207
|
|
|
|
|
|
|
C<@WWW::Comic::Plugin::YourPlugin::COMICS> or |
208
|
|
|
|
|
|
|
C<%WWW::Comic::Plugin::YourPlugin::COMICS>. |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=head2 strip_url *MANDATORY* |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
This method is mandatory. This method must return a valid comic strip |
213
|
|
|
|
|
|
|
image URL. A predefined L object can be obtained in order |
214
|
|
|
|
|
|
|
to perform this functionality by calling the superclassed C<_new_agent()> |
215
|
|
|
|
|
|
|
method. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
This method should return an C value upon failure. |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=head2 get_strip |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
The default superclassed C method will try and download a URL |
222
|
|
|
|
|
|
|
in to a scalar, and if it contains a valid GIF, JPEG or PNG image, it will |
223
|
|
|
|
|
|
|
return. The URL of the comic strip image can be passed as a paramater. If |
224
|
|
|
|
|
|
|
it is not passed, it will ask the C method for a comic strip URL. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
If you overload the default method, you should return C on failure, |
227
|
|
|
|
|
|
|
or return the binary image data as a scalar if successful. You method should |
228
|
|
|
|
|
|
|
validate the binary image data as a valid GIF, JPEG or PNG image file by |
229
|
|
|
|
|
|
|
using the superclassed C<_image_format()> method. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 mirror_strip |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
The default superclassed method will use the C method to download |
234
|
|
|
|
|
|
|
a comic image URL and then write it to disk. If no filename paramater is |
235
|
|
|
|
|
|
|
passed, it will assume a sensible default filename to write to disk based upon |
236
|
|
|
|
|
|
|
the comic strip URL that it is retrieving. It will return the name of the file |
237
|
|
|
|
|
|
|
that it wrote to disk. |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
If you overload the default method, you should return C on failure, |
240
|
|
|
|
|
|
|
or return the name of the file that was written to disk if successful. |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head1 PRIVATE METHODS |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
The following private methods existing withing the L |
245
|
|
|
|
|
|
|
module as utility methods. These are not intended to be part of the |
246
|
|
|
|
|
|
|
publically exposed and documented part of your plugin API. |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 _new_agent |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
This method returns an L object, preconfigured with |
251
|
|
|
|
|
|
|
sensible default paramaters. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=head2 _image_format |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
This method accepts a single scalar argument which should contain binary |
256
|
|
|
|
|
|
|
image data. It will return a scalar value of C, C or C to |
257
|
|
|
|
|
|
|
match the format of the image. |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
It will return an C value if it is not a valid GIF, JPEG or PNG |
260
|
|
|
|
|
|
|
image. |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head1 EXAMPLES |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
See inside L, L, |
265
|
|
|
|
|
|
|
L, L, |
266
|
|
|
|
|
|
|
L, L |
267
|
|
|
|
|
|
|
and L. |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
A good boiler plate example is L. |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=head1 VERSION |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
$Id: Plugin.pm,v 1.7 2006/01/10 15:49:32 nicolaw Exp $ |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
=head1 AUTHOR |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
Nicola Worthington |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
L |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head1 COPYRIGHT |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Copyright 2006 Nicola Worthington. |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
This software is licensed under The Apache Software License, Version 2.0. |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
L |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=cut |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
|