line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Image::Grab; |
2
|
|
|
|
|
|
|
|
3
|
8
|
|
|
8
|
|
25713
|
use strict; |
|
8
|
|
|
|
|
12
|
|
|
8
|
|
|
|
|
472
|
|
4
|
8
|
|
|
8
|
|
41
|
use vars qw($VERSION @ISA @EXPORT @EXPORT_OK $AUTOLOAD); |
|
8
|
|
|
|
|
12
|
|
|
8
|
|
|
|
|
940
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# $Id: Grab.pm,v 1.6 2002/01/19 21:14:01 mah Exp $ |
7
|
|
|
|
|
|
|
$VERSION = '1.4.2'; |
8
|
|
|
|
|
|
|
|
9
|
8
|
|
|
8
|
|
43
|
use Carp; |
|
8
|
|
|
|
|
16
|
|
|
8
|
|
|
|
|
717
|
|
10
|
8
|
|
|
8
|
|
40
|
use Config; |
|
8
|
|
|
|
|
14
|
|
|
8
|
|
|
|
|
679
|
|
11
|
|
|
|
|
|
|
require HTTP::Request; |
12
|
|
|
|
|
|
|
require HTML::TreeBuilder; |
13
|
|
|
|
|
|
|
require URI::URL; |
14
|
|
|
|
|
|
|
require Image::Grab::RequestAgent; |
15
|
8
|
|
|
8
|
|
8340
|
use POSIX qw(strftime); |
|
8
|
|
|
|
|
142871
|
|
|
8
|
|
|
|
|
67
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
require Exporter; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
20
|
|
|
|
|
|
|
@EXPORT_OK = qw( |
21
|
|
|
|
|
|
|
&expand_url &grab |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# %fields, new, AUTOLOAD are from perltoot |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my %fields = ( |
27
|
|
|
|
|
|
|
cookiefile => undef, |
28
|
|
|
|
|
|
|
cookiejar => undef, |
29
|
|
|
|
|
|
|
date => undef, |
30
|
|
|
|
|
|
|
image => undef, |
31
|
|
|
|
|
|
|
"index" => undef, |
32
|
|
|
|
|
|
|
md5 => undef, |
33
|
|
|
|
|
|
|
refer => undef, |
34
|
|
|
|
|
|
|
regexp => undef, |
35
|
|
|
|
|
|
|
type => undef, |
36
|
|
|
|
|
|
|
ua => undef, |
37
|
|
|
|
|
|
|
url => undef, |
38
|
|
|
|
|
|
|
search_url => undef, |
39
|
|
|
|
|
|
|
debug => undef, |
40
|
|
|
|
|
|
|
do_posix => ($Config{patchlevel} && $Config{patchlevel} >= 5 and |
41
|
|
|
|
|
|
|
$Config{baserev} && $Config{baserev} >= 5) ? 1 : undef, |
42
|
|
|
|
|
|
|
); |
43
|
|
|
|
|
|
|
|
44
|
0
|
|
|
0
|
|
0
|
sub DESTROY {} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub new { |
47
|
11
|
|
|
11
|
0
|
2042
|
my $that = shift; |
48
|
11
|
|
33
|
|
|
97
|
my $class = ref($that) || $that; |
49
|
11
|
|
|
|
|
240
|
my $self = { |
50
|
|
|
|
|
|
|
_permitted => \%fields, |
51
|
|
|
|
|
|
|
%fields, |
52
|
|
|
|
|
|
|
}; |
53
|
|
|
|
|
|
|
|
54
|
11
|
100
|
|
|
|
72
|
if(@_) { |
55
|
4
|
|
|
|
|
23
|
my %arg = @_; |
56
|
|
|
|
|
|
|
|
57
|
4
|
|
|
|
|
16
|
foreach (keys %arg) { |
58
|
5
|
50
|
|
|
|
32
|
croak "Can't access `$_' field" |
59
|
|
|
|
|
|
|
unless exists $self->{_permitted}->{lc($_)}; |
60
|
5
|
|
|
|
|
21
|
$self->{lc($_)} = $arg{$_}; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
|
64
|
11
|
|
|
|
|
36
|
bless ($self, $class); |
65
|
11
|
|
|
|
|
115
|
$self->ua(new Image::Grab::RequestAgent); |
66
|
11
|
|
|
|
|
18
|
$self->{have_DigestMD5} = eval {require Digest::MD5}; |
|
11
|
|
|
|
|
162
|
|
67
|
11
|
|
|
|
|
23
|
$self->{have_MD5} = eval {require MD5;}; |
|
11
|
|
|
|
|
4055
|
|
68
|
11
|
|
|
|
|
53
|
$self->{have_magick} = eval {require Image::Magick;}; |
|
11
|
|
|
|
|
4115
|
|
69
|
11
|
|
|
|
|
82
|
return $self; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub AUTOLOAD { |
73
|
274
|
|
|
274
|
|
33874
|
my $self = shift; |
74
|
274
|
50
|
|
|
|
1046
|
my $type = ref($self) |
75
|
|
|
|
|
|
|
or croak "$self is not an object"; |
76
|
|
|
|
|
|
|
|
77
|
274
|
|
|
|
|
547
|
my $name = $AUTOLOAD; |
78
|
274
|
|
|
|
|
1155
|
$name =~ s/.*://; |
79
|
|
|
|
|
|
|
|
80
|
274
|
50
|
|
|
|
871
|
unless (exists $self->{_permitted}->{$name} ) { |
81
|
0
|
|
|
|
|
0
|
croak "Can't access `$name' field in class $type"; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
274
|
100
|
|
|
|
1021
|
if(@_) { |
|
|
100
|
|
|
|
|
|
85
|
63
|
|
|
|
|
121
|
my $val = shift; |
86
|
63
|
50
|
|
|
|
313
|
carp "$name: $val" if $self->debug; |
87
|
63
|
|
|
|
|
278
|
return $self->{$name} = $val; |
88
|
|
|
|
|
|
|
} elsif (defined $self->{$name}) { |
89
|
85
|
|
|
|
|
2415
|
return $self->{$name}; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
126
|
|
|
|
|
649
|
return undef; |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# Accessor functions that we have to write. |
97
|
|
|
|
|
|
|
sub realm { |
98
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
99
|
0
|
0
|
|
|
|
0
|
my $type = ref($self) |
100
|
|
|
|
|
|
|
or croak "$self is not an object"; |
101
|
|
|
|
|
|
|
|
102
|
0
|
0
|
|
|
|
0
|
if($#_ == 2){ |
103
|
0
|
|
|
|
|
0
|
$self->ua->register_realm(shift, shift, shift); |
104
|
0
|
|
|
|
|
0
|
return 1; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
0
|
|
|
|
|
0
|
croak "usage: realm(\$realm, \$user, \$pass)"; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub getAllURLs { |
111
|
3
|
|
|
3
|
1
|
26
|
my $self = shift; |
112
|
3
|
50
|
|
|
|
20
|
my $type = ref($self) |
113
|
|
|
|
|
|
|
or croak "$self is not an object"; |
114
|
3
|
|
100
|
|
|
25
|
my $times = (shift or 10); |
115
|
3
|
|
|
|
|
5
|
my $req; |
116
|
3
|
|
|
|
|
10
|
my $count = 0; |
117
|
3
|
|
|
|
|
9
|
my @link; |
118
|
|
|
|
|
|
|
my @now; |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# Need to load Cookie Jar? |
121
|
3
|
|
|
|
|
27
|
$self->loadCookieJar; |
122
|
|
|
|
|
|
|
|
123
|
3
|
|
|
|
|
87
|
@now = localtime; |
124
|
3
|
50
|
33
|
|
|
23
|
$self->search_url(strftime $self->search_url, @now) |
125
|
|
|
|
|
|
|
if defined $self->search_url and defined $self->do_posix; |
126
|
3
|
50
|
|
|
|
16
|
croak "Need to specify a search_url!" if !defined $self->search_url; |
127
|
3
|
|
|
|
|
14
|
$req = $self->ua->request(new HTTP::Request 'GET', $self->search_url); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# Try $times until successful |
130
|
3
|
|
33
|
|
|
53559
|
while( (!$req->is_success) && $count < $times){ |
131
|
0
|
|
|
|
|
0
|
$req = $self->ua->request(new HTTP::Request 'GET', $self->search_url); |
132
|
0
|
|
|
|
|
0
|
$count = $count + 1; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# return failure if we couldn't connect within $times tries |
136
|
3
|
50
|
33
|
|
|
46
|
if($count == $times && !$req->is_success){ |
137
|
0
|
|
|
|
|
0
|
return undef; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Get the base url |
141
|
3
|
|
|
|
|
13
|
my $base_url = $req->base; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# Get the img tags out of the document. |
144
|
3
|
|
|
|
|
1345
|
my $parser = new HTML::TreeBuilder; |
145
|
3
|
|
|
|
|
849
|
$parser->parse($req->content); |
146
|
3
|
|
|
|
|
6586
|
$parser->eof; |
147
|
3
|
|
|
|
|
562
|
foreach (@{$parser->extract_links(qw(img td body))}) { |
|
3
|
|
|
|
|
37
|
|
148
|
9
|
|
|
|
|
3254
|
push @link, URI::URL::url($$_[0])->abs($base_url)->as_string; |
149
|
|
|
|
|
|
|
} |
150
|
3
|
|
|
|
|
817
|
$parser->delete; |
151
|
|
|
|
|
|
|
|
152
|
3
|
|
|
|
|
508
|
return @link; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub getRealURL { |
156
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
157
|
0
|
0
|
|
|
|
0
|
my $type = ref($self) |
158
|
|
|
|
|
|
|
or croak "$self is not an object"; |
159
|
0
|
|
0
|
|
|
0
|
my $times = (shift or 10); |
160
|
|
|
|
|
|
|
|
161
|
0
|
|
|
|
|
0
|
carp "getRealURL has been deprecated. Use expand_url."; |
162
|
0
|
|
|
|
|
0
|
$self->expand_url(@_); |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub expand_url { |
166
|
8
|
|
|
8
|
1
|
23
|
my $self = shift; |
167
|
8
|
50
|
|
|
|
40
|
my $type = ref($self) |
168
|
|
|
|
|
|
|
or croak "$self is not an object"; |
169
|
8
|
|
50
|
|
|
47
|
my $times = (shift or 10); |
170
|
8
|
|
|
|
|
22
|
my $req; |
171
|
8
|
|
|
|
|
162
|
my $count = 0; |
172
|
8
|
|
|
|
|
27
|
my @link; |
173
|
|
|
|
|
|
|
my @now; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Expand any POSIX time escapes |
176
|
8
|
|
|
|
|
371
|
@now = localtime; |
177
|
|
|
|
|
|
|
|
178
|
8
|
100
|
|
|
|
125
|
if(defined $self->url) { |
179
|
7
|
50
|
|
|
|
78
|
$self->url(strftime($self->url, @now)) |
180
|
|
|
|
|
|
|
if defined $self->do_posix; |
181
|
7
|
|
|
|
|
41
|
return $self->url; |
182
|
|
|
|
|
|
|
} |
183
|
1
|
50
|
33
|
|
|
5
|
$self->regexp(strftime($self->regexp, @now)) |
184
|
|
|
|
|
|
|
if defined $self->regexp and defined $self->do_posix; |
185
|
|
|
|
|
|
|
|
186
|
1
|
|
|
|
|
5
|
@link = $self->getAllURLs($times); |
187
|
1
|
50
|
|
|
|
4
|
return undef if !@link; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# if this is a relative position tag... |
190
|
1
|
50
|
33
|
|
|
6
|
if($self->regexp || $self->index) { |
191
|
1
|
|
|
|
|
1
|
my (@match, $re); |
192
|
|
|
|
|
|
|
|
193
|
1
|
|
|
|
|
7
|
$self->refer($self->search_url); |
194
|
|
|
|
|
|
|
# set index to match first image |
195
|
1
|
50
|
|
|
|
12
|
$self->index(0) if !defined $self->index; |
196
|
1
|
|
50
|
|
|
5
|
$re = $self->regexp || '.'; |
197
|
1
|
50
|
|
|
|
3
|
@match = grep {defined && /$re/} @link; |
|
3
|
|
|
|
|
42
|
|
198
|
|
|
|
|
|
|
# Return the nth |
199
|
1
|
50
|
|
|
|
7
|
return $match[$self->index] |
200
|
|
|
|
|
|
|
if @match; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# only if we fail. |
204
|
0
|
|
|
|
|
0
|
return undef; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
sub loadCookieJar { |
208
|
11
|
|
|
11
|
1
|
33
|
my $self = shift; |
209
|
11
|
50
|
|
|
|
72
|
my $type = ref($self) |
210
|
|
|
|
|
|
|
or croak "$self is not an object"; |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# need to do CookieJar initialization? |
213
|
11
|
50
|
33
|
|
|
146
|
if($self->cookiefile and !-f $self->cookiefile){ |
|
|
50
|
33
|
|
|
|
|
214
|
0
|
|
|
|
|
0
|
carp $self->cookiefile, " is not a file"; |
215
|
|
|
|
|
|
|
} elsif ($self->cookiefile and !defined $self->cookiejar) { |
216
|
8
|
|
|
8
|
|
35310
|
use HTTP::Cookies; |
|
8
|
|
|
|
|
146870
|
|
|
8
|
|
|
|
|
5512
|
|
217
|
|
|
|
|
|
|
|
218
|
0
|
|
|
|
|
0
|
$self->cookiejar( |
219
|
|
|
|
|
|
|
HTTP::Cookies::Netscape->new( File => $self->cookiefile, |
220
|
|
|
|
|
|
|
AutoSave => 0, |
221
|
|
|
|
|
|
|
)); |
222
|
0
|
|
|
|
|
0
|
$self->cookiejar->load(); |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
sub grab { |
228
|
8
|
|
|
8
|
1
|
7053
|
my $self = shift; |
229
|
8
|
|
|
|
|
43
|
my $times = 1; |
230
|
|
|
|
|
|
|
|
231
|
8
|
100
|
|
|
|
82
|
if(ref($self)) { |
232
|
5
|
50
|
|
|
|
358
|
if(my $c = shift) { |
233
|
0
|
|
|
|
|
0
|
$times = $c; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
} else { |
236
|
3
|
100
|
|
|
|
21
|
if($self eq __PACKAGE__) { |
237
|
1
|
|
|
|
|
22
|
$self = Image::Grab->new(@_); |
238
|
|
|
|
|
|
|
} else { |
239
|
2
|
|
|
|
|
24
|
$self = Image::Grab->new(lc $self, @_); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
} |
242
|
8
|
|
|
|
|
60
|
my $req; |
243
|
|
|
|
|
|
|
my $count; |
244
|
0
|
|
|
|
|
0
|
my $rc; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# need to do CookieJar initialization? |
247
|
8
|
|
|
|
|
95
|
$self->loadCookieJar; |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# need to find image on page? |
250
|
8
|
|
|
|
|
57
|
my $url = $self->expand_url($times); |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# make sure we have a url |
253
|
8
|
50
|
|
|
|
47
|
croak "Couldn't determine an absolute URL!\n" unless defined $url; |
254
|
8
|
50
|
|
|
|
42
|
carp "Fetching URL: ", $url if $self->debug; |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# Set it up |
257
|
8
|
|
|
|
|
154
|
$req = new HTTP::Request 'GET', $url; |
258
|
8
|
100
|
|
|
|
49473
|
$req->push_header('Referer', $self->refer) if defined $self->refer; |
259
|
8
|
50
|
|
|
|
115
|
if($self->cookiejar){ |
260
|
0
|
|
|
|
|
0
|
$self->cookiejar->add_cookie_header($req); |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# Knock it down |
264
|
8
|
|
|
|
|
21
|
$count = 0; |
265
|
8
|
|
33
|
|
|
17
|
do{ |
266
|
8
|
|
|
|
|
23
|
$count++; |
267
|
8
|
|
|
|
|
45
|
$rc = $self->ua->request($req); |
268
|
8
|
50
|
|
|
|
135185
|
carp "Got: ", $rc->content |
269
|
|
|
|
|
|
|
if $self->debug; |
270
|
|
|
|
|
|
|
} while($count <= $times and not $rc->is_success); |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# Did we fail? |
273
|
8
|
50
|
|
|
|
116
|
return 0 unless $rc->is_success; |
274
|
|
|
|
|
|
|
|
275
|
8
|
50
|
|
|
|
83
|
carp "Message: ", $rc->message if $self->debug; |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# save what we got |
278
|
8
|
|
|
|
|
56
|
$self->image($rc->content); |
279
|
8
|
|
|
|
|
68
|
$self->date($rc->last_modified); |
280
|
|
|
|
|
|
|
|
281
|
8
|
50
|
|
|
|
44
|
if($self->{have_DigestMD5}) { |
|
|
0
|
|
|
|
|
|
282
|
8
|
|
|
|
|
38
|
$self->md5(Digest::MD5::md5_hex($self->image)); |
283
|
|
|
|
|
|
|
} elsif ($self->{have_MD5}) { |
284
|
0
|
|
|
|
|
0
|
$self->md5(MD5->hexhash($self->image)); |
285
|
|
|
|
|
|
|
} |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
|
288
|
8
|
|
|
|
|
44
|
$self->type($rc->content_type); |
289
|
|
|
|
|
|
|
|
290
|
8
|
|
|
|
|
34
|
$self->image; |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub grab_new { |
294
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
295
|
0
|
0
|
|
|
|
|
my $type = ref($self) |
296
|
|
|
|
|
|
|
or croak "$self is not an object"; |
297
|
0
|
|
0
|
|
|
|
my $tries = shift || 10; |
298
|
|
|
|
|
|
|
|
299
|
0
|
0
|
0
|
|
|
|
return $self->grab($tries) |
300
|
|
|
|
|
|
|
unless defined $self->date || defined $self->md5; |
301
|
|
|
|
|
|
|
|
302
|
0
|
|
|
|
|
|
my $tmp = $type->new; |
303
|
0
|
|
|
|
|
|
$tmp->url($self->url); |
304
|
0
|
|
|
|
|
|
$tmp->search_url($self->search_url); |
305
|
0
|
|
|
|
|
|
$tmp->index($self->index); |
306
|
0
|
|
|
|
|
|
$tmp->regexp($self->regexp); |
307
|
0
|
|
|
|
|
|
$tmp->grab; |
308
|
|
|
|
|
|
|
|
309
|
0
|
|
|
|
|
|
my $grab_new = 1; |
310
|
|
|
|
|
|
|
|
311
|
0
|
0
|
0
|
|
|
|
$grab_new = 0 |
312
|
|
|
|
|
|
|
if defined $self->date && $self->date >= $tmp->date; |
313
|
0
|
0
|
0
|
|
|
|
$grab_new = 0 |
314
|
|
|
|
|
|
|
if defined $self->md5 && $self->md5 eq $tmp->md5; |
315
|
|
|
|
|
|
|
|
316
|
0
|
0
|
|
|
|
|
return $self->grab($tries) |
317
|
|
|
|
|
|
|
if $grab_new; |
318
|
0
|
|
|
|
|
|
return undef; |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
1; |
322
|
|
|
|
|
|
|
__END__ |