line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/usr/bin/perl -w |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# WWW::Automate (c) 2002 Kirrily Robert |
4
|
|
|
|
|
|
|
# This software is distributed under the same licenses as Perl; see |
5
|
|
|
|
|
|
|
# the file COPYING for details. |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# $Id: Automate.pm,v 1.1.1.1 2005/12/01 03:07:33 chezskud Exp $ |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
package WWW::Automate; |
12
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
975
|
use HTTP::Request; |
|
1
|
|
|
|
|
23231
|
|
|
1
|
|
|
|
|
43
|
|
14
|
1
|
|
|
1
|
|
1090
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
22771
|
|
|
1
|
|
|
|
|
34
|
|
15
|
1
|
|
|
1
|
|
1035
|
use HTML::Form; |
|
1
|
|
|
|
|
19006
|
|
|
1
|
|
|
|
|
73
|
|
16
|
1
|
|
|
1
|
|
1018
|
use HTML::TokeParser; |
|
1
|
|
|
|
|
12280
|
|
|
1
|
|
|
|
|
36
|
|
17
|
1
|
|
|
1
|
|
683
|
use Clone qw(clone); |
|
1
|
|
|
|
|
15732
|
|
|
1
|
|
|
|
|
99
|
|
18
|
1
|
|
|
1
|
|
12
|
use Carp; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1593
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
our @ISA = qw( LWP::UserAgent ); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $VERSION = $VERSION = "0.21"; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $headers; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=pod |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 NAME |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
WWW::Automate - automate interaction with websites |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head1 NOTICE |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
B |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Please use WWW::Mechanize instead. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
=head1 SYNOPSIS |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
use WWW::Automate; |
41
|
|
|
|
|
|
|
my $agent = WWW::Automate->new(); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
$agent->get($url); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
$agent->follow($link); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
$agent->form($number); |
48
|
|
|
|
|
|
|
$agent->field($name, $value); |
49
|
|
|
|
|
|
|
$agent->click($button); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
$agent->back(); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
$agent->add_header($name => $value); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
print "OK" if $agent->{content} =~ /$expected/; |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head1 DESCRIPTION |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
This module is intended to help you automate interaction with a website. |
60
|
|
|
|
|
|
|
It bears a not-very-remarkable outwards resemblance to WWW::Chat, on |
61
|
|
|
|
|
|
|
which it is based. The main difference between this module and |
62
|
|
|
|
|
|
|
WWW::Chat is that WWW::Chat requires a pre-processing stage before you |
63
|
|
|
|
|
|
|
can run your script, whereas WWW::Automate does not. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
WWW::Automate is a subclass of LWP::UserAgent, so anything you can do |
66
|
|
|
|
|
|
|
with an LWP::UserAgent, you can also do with this. See |
67
|
|
|
|
|
|
|
L for more information on the possibilities. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
=head2 new() |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
Creates and returns a new WWW::Automate object, hereafter referred to as |
72
|
|
|
|
|
|
|
the 'agent'. |
73
|
1
|
|
|
|
|
41
|
|
|
1
|
|
|
|
|
5
|
|
74
|
1
|
|
|
1
|
|
2
|
my $agent = WWW::Automate->new() |
|
1
|
|
|
|
|
21327
|
|
|
1
|
|
|
|
|
598
|
|
75
|
1
|
|
|
1
|
|
6
|
|
|
1
|
|
|
|
|
312
|
|
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
22
|
|
76
|
1
|
|
|
1
|
|
128
|
=begin testing |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
2605
|
|
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
BEGIN: { |
79
|
1
|
|
|
|
|
555
|
use lib qw(lib/); |
80
|
1
|
|
|
|
|
333
|
use_ok('WWW::Automate'); |
81
|
1
|
|
|
|
|
451
|
use vars qw($agent); |
82
|
1
|
|
|
|
|
395
|
} |
83
|
1
|
|
|
|
|
516
|
|
84
|
1
|
|
|
|
|
383
|
ok(WWW::Automate->can('new'), "can we call new?"); |
85
|
|
|
|
|
|
|
ok($agent = WWW::Automate->new(), "create agent object"); |
86
|
|
|
|
|
|
|
isa_ok($agent, 'WWW::Automate', "agent is a WWW::Automate"); |
87
|
|
|
|
|
|
|
can_ok($agent, 'request'); # as a subclass of LWP::UserAgent |
88
|
|
|
|
|
|
|
like($agent->agent(), qr/WWW-Automate/, "Set user agent string"); |
89
|
|
|
|
|
|
|
like($agent->agent(), qr/$WWW::Automate::VERSION/, "Set user agent version"); |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
=end testing |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=cut |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
our $base = "http://localhost/"; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub new { |
98
|
3
|
|
|
3
|
1
|
5
|
shift; |
99
|
3
|
|
|
|
|
20
|
warn "WWW::Automate is no longer maintained. Please use WWW::Mechanize instead.\n"; |
100
|
3
|
|
|
|
|
28
|
my $self = { page_stack => [] }; |
101
|
3
|
|
|
|
|
5
|
bless $self; |
102
|
3
|
|
|
|
|
25
|
$self->agent("WWW-Automate-$VERSION"); |
103
|
3
|
|
|
|
|
988
|
$self->env_proxy(); |
104
|
3
|
|
|
|
|
6906
|
return $self; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head2 $agent->get($url) |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Given a URL/URI, fetches it. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
The results are stored internally in the agent object, as follows: |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
uri The current URI |
114
|
|
|
|
|
|
|
req The current request object [HTTP::Request] |
115
|
|
|
|
|
|
|
res The response received [HTTP::Response] |
116
|
|
|
|
|
|
|
status The status code of the response |
117
|
|
|
|
|
|
|
ct The content type of the response |
118
|
|
|
|
|
|
|
base The base URI for current response |
119
|
|
|
|
|
|
|
content The content of the response |
120
|
|
|
|
|
|
|
forms Array of forms found in content [HTML::Form] |
121
|
|
|
|
|
|
|
form Current form [HTML::Form] |
122
|
1
|
|
|
|
|
384
|
links Array of links found in content |
|
1
|
|
|
|
|
10
|
|
123
|
1
|
|
|
|
|
412
|
|
124
|
1
|
|
|
|
|
422
|
You can get at them with, for example: $agent->{content} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=begin testing |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
ok($agent->get("http://google.com"), "Get google webpage"); |
129
|
|
|
|
|
|
|
isa_ok($agent->{uri}, "URI", "Set uri"); |
130
|
|
|
|
|
|
|
isa_ok($agent->{req}, 'HTTP::Request', "req should be a HTTP::Request"); |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=end testing |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=cut |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
sub get { |
137
|
3
|
|
|
3
|
1
|
9
|
my ($self, $uri) = @_; |
138
|
3
|
|
|
|
|
25
|
$self->{uri} = URI->new_abs($uri, $base); |
139
|
3
|
|
|
|
|
10699
|
$self->{req} = HTTP::Request->new(GET => $uri); |
140
|
3
|
|
|
|
|
414
|
$self->do_request(); |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=head2 $agent->follow($string|$num) |
144
|
|
|
|
|
|
|
|
145
|
1
|
|
|
|
|
351
|
Follow a link. If you provide a string, the first link whose text |
|
1
|
|
|
|
|
7
|
|
146
|
1
|
|
|
|
|
319
|
matches that string will be followed. If you provide a number, it will |
147
|
1
|
|
|
|
|
665
|
be the nth link on the page. |
148
|
|
|
|
|
|
|
|
149
|
1
|
|
|
|
|
200
|
=begin testing |
150
|
1
|
|
|
|
|
202
|
|
151
|
1
|
|
|
|
|
406
|
ok(! $agent->follow(99999), "Can't follow too-high-numbered link"); |
152
|
|
|
|
|
|
|
ok($agent->follow(1), "Can follow first link"); |
153
|
|
|
|
|
|
|
ok($agent->back(), "Can go back"); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
ok(! $agent->follow(qr/asdfghjksdfghj/), "Can't follow unlikely named link"); |
156
|
|
|
|
|
|
|
ok($agent->follow("Search"), "Can follow obvious named link"); |
157
|
|
|
|
|
|
|
$agent->back(); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=end testing |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=cut |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
sub follow { |
164
|
4
|
|
|
4
|
1
|
10
|
my ($self, $link) = @_; |
165
|
4
|
|
|
|
|
5
|
my @links = @{$self->{links}}; |
|
4
|
|
|
|
|
11
|
|
166
|
4
|
|
|
|
|
6
|
my $thislink; |
167
|
4
|
100
|
|
|
|
11
|
if (isnumber($link)) { |
168
|
2
|
50
|
|
|
|
7
|
if ($link <= $#links) { |
169
|
0
|
|
|
|
|
0
|
$thislink = $links[$link]; |
170
|
|
|
|
|
|
|
} else { |
171
|
2
|
|
|
|
|
18
|
warn "Link number $link is greater than maximum link $#links ", |
172
|
|
|
|
|
|
|
"on this page ($self->{uri})\n"; |
173
|
2
|
|
|
|
|
51
|
return undef; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} else { # user provided a regexp |
176
|
2
|
|
|
|
|
5
|
LINK: foreach my $l (@links) { |
177
|
2
|
50
|
|
|
|
20
|
if ($l->[1] =~ /$link/) { |
178
|
0
|
|
|
|
|
0
|
$thislink = $l; # grab first match |
179
|
0
|
|
|
|
|
0
|
last LINK; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
} |
182
|
2
|
50
|
|
|
|
7
|
unless ($thislink) { |
183
|
2
|
|
|
|
|
9
|
warn "Can't find any link matching $link on this page ", |
184
|
|
|
|
|
|
|
"($self->{uri})\n"; |
185
|
2
|
|
|
|
|
33
|
return undef; |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
0
|
$thislink = $thislink->[0]; # we just want the URL, not the text |
190
|
|
|
|
|
|
|
|
191
|
0
|
|
|
|
|
0
|
$self->push_page_stack(); |
192
|
|
|
|
|
|
|
#print STDERR "thislink is $thislink, base is $self->{base}"; |
193
|
0
|
|
|
|
|
0
|
$self->{uri} = URI->new_abs($thislink, $self->{base}); |
194
|
0
|
|
|
|
|
0
|
$self->{req} = HTTP::Request->new(GET => $self->{uri}); |
195
|
0
|
|
|
|
|
0
|
$self->do_request(); |
196
|
|
|
|
|
|
|
|
197
|
0
|
|
|
|
|
0
|
return 1; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head2 $agent->form($number) |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
Selects the Nth form on the page as the target for subsequent calls to |
203
|
1
|
|
|
|
|
2
|
field() and click(). Emits a warning and returns false if there is no |
|
1
|
|
|
|
|
6
|
|
204
|
1
|
|
|
|
|
10
|
such form. Forms are indexed from 1, that is to say, the first form is |
205
|
1
|
|
|
|
|
14
|
number 1 (not zero). |
206
|
1
|
|
|
|
|
666
|
|
207
|
1
|
|
|
|
|
263
|
=begin testing |
208
|
1
|
|
|
|
|
284
|
|
209
|
|
|
|
|
|
|
my $t = WWW::Automate->new(); |
210
|
|
|
|
|
|
|
$t->get("http://google.com"); |
211
|
|
|
|
|
|
|
ok($t->form(1), "Can select the first form"); |
212
|
|
|
|
|
|
|
is($t->{form}, $t->{forms}->[0], "Set the form attribute"); |
213
|
|
|
|
|
|
|
ok(! $t->form(99), "Can't select the 99th form"); |
214
|
|
|
|
|
|
|
is($t->{form}, $t->{forms}->[0], "Form is still set to 1"); |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=end testing |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=cut |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub form { |
221
|
2
|
|
|
2
|
1
|
5
|
my ($self, $form) = @_; |
222
|
2
|
50
|
|
|
|
9
|
if ($self->{forms}->[$form-1]) { |
223
|
0
|
|
|
|
|
0
|
$self->{form} = $self->{forms}->[$form-1]; |
224
|
0
|
|
|
|
|
0
|
return 1; |
225
|
|
|
|
|
|
|
} else { |
226
|
2
|
|
|
|
|
480
|
carp "There is no form number $form"; |
227
|
2
|
|
|
|
|
109
|
return 0; |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 $agent->field($name, $value, $number) |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
Given the name of a field, set its value to the value specified. This |
234
|
|
|
|
|
|
|
applies to the current form (as set by the form() method or defaulting |
235
|
|
|
|
|
|
|
to the first form on the page). |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
The optional $number parameter is used to distinguish between two fields |
238
|
|
|
|
|
|
|
with the same name. The fields are numbered from 1. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub field { |
243
|
1
|
|
|
1
|
1
|
3
|
my ($self, $name, $value, $number) = @_; |
244
|
1
|
|
50
|
|
|
10
|
$number ||= 1; |
245
|
1
|
50
|
|
|
|
6
|
if ($number > 1) { |
246
|
0
|
|
|
|
|
0
|
$form->find_input($name, $number)->value($value); |
247
|
|
|
|
|
|
|
} else { |
248
|
1
|
|
|
|
|
315
|
$self->{form}->value($name => $value); |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
} |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 $agent->click($button, $x, $y); |
253
|
|
|
|
|
|
|
|
254
|
1
|
|
|
|
|
290
|
Has the effect of clicking a button on a form. This method takes an |
|
1
|
|
|
|
|
7
|
|
255
|
1
|
|
|
|
|
6
|
optional method which is the name of the button to be pressed. If there |
256
|
1
|
|
|
|
|
8
|
is only one button on the form, it simply clicks that one button. |
257
|
0
|
|
|
|
|
0
|
|
258
|
0
|
|
|
|
|
0
|
=begin testing |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
my $t = WWW::Automate->new(); |
261
|
|
|
|
|
|
|
$t->get("http://google.com"); |
262
|
|
|
|
|
|
|
$t->field(q => "foo"); |
263
|
|
|
|
|
|
|
ok($t->click("btnG"), "Can click 'btnG' ('Google Search' button)"); |
264
|
|
|
|
|
|
|
like($t->{content}, qr/foo\s?fighters/i, "Found 'Foo Fighters'"); |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=end testing |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
=cut |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
sub click { |
271
|
0
|
|
|
0
|
1
|
0
|
my ($self, $button, $x, $y) = @_; |
272
|
0
|
0
|
|
|
|
0
|
for ($x, $y) { $_ = 1 unless defined; } |
|
0
|
|
|
|
|
0
|
|
273
|
0
|
|
|
|
|
0
|
$self->push_page_stack(); |
274
|
0
|
|
|
|
|
0
|
$self->{uri} = $self->{form}->uri; |
275
|
0
|
|
|
|
|
0
|
$self->{req} = $self->{form}->click($name, $x, $y); |
276
|
0
|
|
|
|
|
0
|
$self->do_request(); |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=head2 $agent->submit() |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
Shortcut for $a->click("submit") |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=cut |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub submit { |
286
|
0
|
|
|
0
|
1
|
0
|
my ($self) = shift; |
287
|
0
|
|
|
|
|
0
|
$self->click("submit"); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head2 $agent->back(); |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
The equivalent of hitting the "back" button in a browser. Returns to |
293
|
|
|
|
|
|
|
the previous page. Won't go back past the first page. |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=cut |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
sub back { |
298
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
299
|
2
|
|
|
|
|
4
|
$self->pop_page_stack; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
=head2 $agent->add_header(name => $value) |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
Sets a header for the WWW::Automate agent to use every time it gets a |
305
|
|
|
|
|
|
|
webpage. This is *NOT* stored in the agent object (because if it were, |
306
|
|
|
|
|
|
|
it would disappear if you went back() past where you'd set it) but in |
307
|
|
|
|
|
|
|
the hash variable %WWW::Automate::headers, which is a hashref of all headers |
308
|
0
|
|
|
|
|
0
|
to be set. You can manipulate this directly if you want to; the |
|
0
|
|
|
|
|
0
|
|
309
|
0
|
|
|
|
|
0
|
add_header() method is just provided as a convenience function for the most |
310
|
|
|
|
|
|
|
common case of adding a header. |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=begin testing |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
$agent->add_header(foo => 'bar'); |
315
|
|
|
|
|
|
|
is($WWW::Automate::headers{'foo'}, 'bar', "set header"); |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=end testing |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=cut |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub add_header { |
322
|
0
|
|
|
0
|
1
|
0
|
my ($self, $name, $value) = @_; |
323
|
0
|
|
|
|
|
0
|
$WWW::Automate::headers{$name} = $value; |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=head1 INTERNAL METHODS |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
These methods are only used internally. You probably don't need to |
329
|
|
|
|
|
|
|
know about them. |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 push_page_stack() |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=head2 pop_page_stack() |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
The agent keeps a stack of visited pages, which it can pop when it needs |
336
|
|
|
|
|
|
|
to go BACK and so on. |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
The current page needs to be pushed onto the stack before we get a new |
339
|
|
|
|
|
|
|
page, and the stack needs to be popped when BACK occurs. |
340
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
341
|
0
|
|
|
|
|
0
|
Neither of these take any arguments, they just operate on the $agent |
342
|
0
|
|
|
|
|
0
|
object. |
|
0
|
|
|
|
|
0
|
|
343
|
0
|
|
|
|
|
0
|
|
344
|
0
|
|
|
|
|
0
|
=begin testing |
|
0
|
|
|
|
|
0
|
|
345
|
0
|
|
|
|
|
0
|
|
346
|
0
|
|
|
|
|
0
|
my $t = WWW::Automate->new(); |
|
0
|
|
|
|
|
0
|
|
347
|
0
|
|
|
|
|
0
|
$t->get("http://www.google.com"); |
348
|
0
|
|
|
|
|
0
|
is(scalar @{$t->{page_stack}}, 0, "Page stack starts empty"); |
|
0
|
|
|
|
|
0
|
|
349
|
0
|
|
|
|
|
0
|
$t->push_page_stack(); |
350
|
0
|
|
|
|
|
0
|
is(scalar @{$t->{page_stack}}, 1, "Pushed item onto page stack"); |
|
0
|
|
|
|
|
0
|
|
351
|
0
|
|
|
|
|
0
|
$t->push_page_stack(); |
352
|
0
|
|
|
|
|
0
|
is(scalar @{$t->{page_stack}}, 2, "Pushed item onto page stack"); |
|
0
|
|
|
|
|
0
|
|
353
|
|
|
|
|
|
|
$t->pop_page_stack(); |
354
|
|
|
|
|
|
|
is(scalar @{$t->{page_stack}}, 1, "Popped item from page stack"); |
355
|
|
|
|
|
|
|
$t->pop_page_stack(); |
356
|
|
|
|
|
|
|
is(scalar @{$t->{page_stack}}, 0, "Popped item from page stack"); |
357
|
|
|
|
|
|
|
$t->pop_page_stack(); |
358
|
|
|
|
|
|
|
is(scalar @{$t->{page_stack}}, 0, "Can't pop beyond end of page stack"); |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=end testing |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=cut |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub push_page_stack { |
366
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
367
|
0
|
|
|
|
|
0
|
$self->{page_stack} = [ @{$self->{page_stack}}, clone($self)]; |
|
0
|
|
|
|
|
0
|
|
368
|
0
|
|
|
|
|
0
|
return 1; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub pop_page_stack { |
372
|
2
|
|
|
2
|
1
|
3
|
my $self = shift; |
373
|
2
|
50
|
|
|
|
3
|
if (@{$self->{page_stack}}) { |
|
2
|
|
|
|
|
7
|
|
374
|
0
|
|
|
|
|
0
|
$self = pop @{$self->{page_stack}}; |
|
0
|
|
|
|
|
0
|
|
375
|
0
|
|
|
|
|
0
|
bless $self; |
376
|
|
|
|
|
|
|
} |
377
|
2
|
|
|
|
|
6
|
return 1; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=head2 extract_links() |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Extracts HREF links from the content of a webpage. |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=cut |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub extract_links { |
387
|
3
|
|
|
3
|
1
|
40
|
my $self = shift; |
388
|
3
|
|
|
|
|
18
|
my $p = HTML::TokeParser->new(\$self->{content}); |
389
|
3
|
|
|
|
|
503
|
my @links; |
390
|
|
|
|
|
|
|
|
391
|
3
|
|
|
|
|
14
|
while (my $token = $p->get_tag("a", "frame")) { |
392
|
3
|
50
|
|
|
|
937
|
my $url = $token->[0] eq 'a' ? $token->[1]{href} : $token->[1]{src}; |
393
|
3
|
50
|
|
|
|
13
|
next unless defined $url; # probably just a name link |
394
|
3
|
50
|
|
|
|
19
|
my $text = $token->[0] eq 'a' ? |
395
|
|
|
|
|
|
|
$p->get_trimmed_text("/a"):$token->[1]{name}; |
396
|
3
|
|
|
|
|
216
|
push(@links, [$url => $text]); |
397
|
|
|
|
|
|
|
} |
398
|
3
|
|
|
|
|
282
|
return \@links; |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=head2 do_request() |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
Actually performs a request on the $self->{req} request object, and sets |
404
|
|
|
|
|
|
|
a bunch of attributes on $self. |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=cut |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
sub do_request { |
409
|
3
|
|
|
3
|
1
|
6
|
my ($self) = @_; |
410
|
3
|
|
|
|
|
14
|
foreach my $h (keys %WWW::Automate::headers) { |
411
|
0
|
|
|
|
|
0
|
$self->{req}->header( $h => $WWW::Automate::headers{$h} ); |
412
|
|
|
|
|
|
|
} |
413
|
3
|
|
|
|
|
22
|
$self->{res} = $self->request($self->{req}); |
414
|
3
|
|
|
|
|
176416
|
$self->{status} = $self->{res}->code; |
415
|
3
|
|
|
|
|
46
|
$self->{base} = $self->{res}->base; |
416
|
3
|
|
50
|
|
|
1707
|
$self->{ct} = $self->{res}->content_type || ""; |
417
|
3
|
|
|
|
|
156
|
$self->{content} = $self->{res}->content; |
418
|
|
|
|
|
|
|
|
419
|
3
|
50
|
|
|
|
59
|
if ($self->{ct} eq 'text/html') { |
420
|
3
|
|
|
|
|
28
|
$self->{forms} = [ HTML::Form->parse($self->{content}, $self->{res}->base) ]; |
421
|
3
|
50
|
|
|
|
3424
|
$self->{form} = $self->{forms}->[0] if @{$self->{forms}}; |
|
3
|
|
|
|
|
16
|
|
422
|
3
|
|
|
|
|
16
|
$self->{links} = $self->extract_links(); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
} |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
sub isnumber { |
427
|
4
|
|
|
4
|
0
|
6
|
my $in = shift; |
428
|
4
|
100
|
|
|
|
17
|
if ($in =~ /^\d+$/) { |
429
|
2
|
|
|
|
|
17
|
return 1; |
430
|
|
|
|
|
|
|
} else { |
431
|
2
|
|
|
|
|
4
|
return 0; |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=head1 BUGS |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Please report any bugs via the system at http://rt.cpan.org/ |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
=head1 AUTHOR |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
Kirrily "Skud" Robert |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
=cut |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
1; |