| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | #!/usr/bin/perl -w | 
| 2 |  |  |  |  |  |  | # | 
| 3 |  |  |  |  |  |  | # WWW::Automate (c) 2002 Kirrily Robert | 
| 4 |  |  |  |  |  |  | # This software is distributed under the same licenses as Perl; see | 
| 5 |  |  |  |  |  |  | # the file COPYING for details. | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # $Id: Automate.pm,v 1.1.1.1 2005/12/01 03:07:33 chezskud Exp $ | 
| 9 |  |  |  |  |  |  | # | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | package WWW::Automate; | 
| 12 |  |  |  |  |  |  |  | 
| 13 | 1 |  |  | 1 |  | 975 | use HTTP::Request; | 
|  | 1 |  |  |  |  | 23231 |  | 
|  | 1 |  |  |  |  | 43 |  | 
| 14 | 1 |  |  | 1 |  | 1090 | use LWP::UserAgent; | 
|  | 1 |  |  |  |  | 22771 |  | 
|  | 1 |  |  |  |  | 34 |  | 
| 15 | 1 |  |  | 1 |  | 1035 | use HTML::Form; | 
|  | 1 |  |  |  |  | 19006 |  | 
|  | 1 |  |  |  |  | 73 |  | 
| 16 | 1 |  |  | 1 |  | 1018 | use HTML::TokeParser; | 
|  | 1 |  |  |  |  | 12280 |  | 
|  | 1 |  |  |  |  | 36 |  | 
| 17 | 1 |  |  | 1 |  | 683 | use Clone qw(clone); | 
|  | 1 |  |  |  |  | 15732 |  | 
|  | 1 |  |  |  |  | 99 |  | 
| 18 | 1 |  |  | 1 |  | 12 | use Carp; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 1593 |  | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | our @ISA = qw( LWP::UserAgent ); | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | my $VERSION = $VERSION = "0.21"; | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | my $headers; | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | =pod | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | =head1 NAME | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | WWW::Automate - automate interaction with websites | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | =head1 NOTICE | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | B | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | Please use WWW::Mechanize instead. | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | use WWW::Automate; | 
| 41 |  |  |  |  |  |  | my $agent = WWW::Automate->new(); | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | $agent->get($url); | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | $agent->follow($link); | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | $agent->form($number); | 
| 48 |  |  |  |  |  |  | $agent->field($name, $value); | 
| 49 |  |  |  |  |  |  | $agent->click($button); | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | $agent->back(); | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | $agent->add_header($name => $value); | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | print "OK" if $agent->{content} =~ /$expected/; | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | This module is intended to help you automate interaction with a website. | 
| 60 |  |  |  |  |  |  | It bears a not-very-remarkable outwards resemblance to WWW::Chat, on | 
| 61 |  |  |  |  |  |  | which it is based.  The main difference between this module and | 
| 62 |  |  |  |  |  |  | WWW::Chat is that WWW::Chat requires a pre-processing stage before you | 
| 63 |  |  |  |  |  |  | can run your script, whereas WWW::Automate does not. | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | WWW::Automate is a subclass of LWP::UserAgent, so anything you can do | 
| 66 |  |  |  |  |  |  | with an LWP::UserAgent, you can also do with this.  See | 
| 67 |  |  |  |  |  |  | L for more information on the possibilities. | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | =head2 new() | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | Creates and returns a new WWW::Automate object, hereafter referred to as | 
| 72 |  |  |  |  |  |  | the 'agent'. | 
| 73 | 1 |  |  |  |  | 41 |  | 
|  | 1 |  |  |  |  | 5 |  | 
| 74 | 1 |  |  | 1 |  | 2 | my $agent = WWW::Automate->new() | 
|  | 1 |  |  |  |  | 21327 |  | 
|  | 1 |  |  |  |  | 598 |  | 
| 75 | 1 |  |  | 1 |  | 6 |  | 
|  | 1 |  |  |  |  | 312 |  | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 22 |  | 
| 76 | 1 |  |  | 1 |  | 128 | =begin testing | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 2605 |  | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | BEGIN: { | 
| 79 | 1 |  |  |  |  | 555 | use lib qw(lib/); | 
| 80 | 1 |  |  |  |  | 333 | use_ok('WWW::Automate'); | 
| 81 | 1 |  |  |  |  | 451 | use vars qw($agent); | 
| 82 | 1 |  |  |  |  | 395 | } | 
| 83 | 1 |  |  |  |  | 516 |  | 
| 84 | 1 |  |  |  |  | 383 | ok(WWW::Automate->can('new'), "can we call new?"); | 
| 85 |  |  |  |  |  |  | ok($agent = WWW::Automate->new(), "create agent object"); | 
| 86 |  |  |  |  |  |  | isa_ok($agent, 'WWW::Automate', "agent is a WWW::Automate"); | 
| 87 |  |  |  |  |  |  | can_ok($agent, 'request');  # as a subclass of LWP::UserAgent | 
| 88 |  |  |  |  |  |  | like($agent->agent(), qr/WWW-Automate/, "Set user agent string"); | 
| 89 |  |  |  |  |  |  | like($agent->agent(), qr/$WWW::Automate::VERSION/, "Set user agent version"); | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | =end testing | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | =cut | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | our $base = "http://localhost/"; | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | sub new { | 
| 98 | 3 |  |  | 3 | 1 | 5 | shift; | 
| 99 | 3 |  |  |  |  | 20 | warn "WWW::Automate is no longer maintained.  Please use WWW::Mechanize instead.\n"; | 
| 100 | 3 |  |  |  |  | 28 | my $self = { page_stack => [] }; | 
| 101 | 3 |  |  |  |  | 5 | bless $self; | 
| 102 | 3 |  |  |  |  | 25 | $self->agent("WWW-Automate-$VERSION"); | 
| 103 | 3 |  |  |  |  | 988 | $self->env_proxy(); | 
| 104 | 3 |  |  |  |  | 6906 | return $self; | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | =head2 $agent->get($url) | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | Given a URL/URI, fetches it. | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | The results are stored internally in the agent object, as follows: | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | uri       The current URI | 
| 114 |  |  |  |  |  |  | req       The current request object        [HTTP::Request] | 
| 115 |  |  |  |  |  |  | res       The response received             [HTTP::Response] | 
| 116 |  |  |  |  |  |  | status    The status code of the response | 
| 117 |  |  |  |  |  |  | ct        The content type of the response | 
| 118 |  |  |  |  |  |  | base      The base URI for current response | 
| 119 |  |  |  |  |  |  | content   The content of the response | 
| 120 |  |  |  |  |  |  | forms     Array of forms found in content   [HTML::Form] | 
| 121 |  |  |  |  |  |  | form      Current form                      [HTML::Form] | 
| 122 | 1 |  |  |  |  | 384 | links     Array of links found in content | 
|  | 1 |  |  |  |  | 10 |  | 
| 123 | 1 |  |  |  |  | 412 |  | 
| 124 | 1 |  |  |  |  | 422 | You can get at them with, for example: $agent->{content} | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | =begin testing | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | ok($agent->get("http://google.com"), "Get google webpage"); | 
| 129 |  |  |  |  |  |  | isa_ok($agent->{uri}, "URI", "Set uri"); | 
| 130 |  |  |  |  |  |  | isa_ok($agent->{req}, 'HTTP::Request', "req should be a HTTP::Request"); | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | =end testing | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | =cut | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | sub get { | 
| 137 | 3 |  |  | 3 | 1 | 9 | my ($self, $uri) = @_; | 
| 138 | 3 |  |  |  |  | 25 | $self->{uri} = URI->new_abs($uri, $base); | 
| 139 | 3 |  |  |  |  | 10699 | $self->{req} = HTTP::Request->new(GET => $uri); | 
| 140 | 3 |  |  |  |  | 414 | $self->do_request(); | 
| 141 |  |  |  |  |  |  | } | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | =head2 $agent->follow($string|$num) | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 1 |  |  |  |  | 351 | Follow a link.  If you provide a string, the first link whose text | 
|  | 1 |  |  |  |  | 7 |  | 
| 146 | 1 |  |  |  |  | 319 | matches that string will be followed.  If you provide a number, it will | 
| 147 | 1 |  |  |  |  | 665 | be the nth link on the page. | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 1 |  |  |  |  | 200 | =begin testing | 
| 150 | 1 |  |  |  |  | 202 |  | 
| 151 | 1 |  |  |  |  | 406 | ok(! $agent->follow(99999), "Can't follow too-high-numbered link"); | 
| 152 |  |  |  |  |  |  | ok($agent->follow(1), "Can follow first link"); | 
| 153 |  |  |  |  |  |  | ok($agent->back(), "Can go back"); | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | ok(! $agent->follow(qr/asdfghjksdfghj/), "Can't follow unlikely named link"); | 
| 156 |  |  |  |  |  |  | ok($agent->follow("Search"), "Can follow obvious named link"); | 
| 157 |  |  |  |  |  |  | $agent->back(); | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | =end testing | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | =cut | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | sub follow { | 
| 164 | 4 |  |  | 4 | 1 | 10 | my ($self, $link) = @_; | 
| 165 | 4 |  |  |  |  | 5 | my @links = @{$self->{links}}; | 
|  | 4 |  |  |  |  | 11 |  | 
| 166 | 4 |  |  |  |  | 6 | my $thislink; | 
| 167 | 4 | 100 |  |  |  | 11 | if (isnumber($link)) { | 
| 168 | 2 | 50 |  |  |  | 7 | if ($link <= $#links) { | 
| 169 | 0 |  |  |  |  | 0 | $thislink = $links[$link]; | 
| 170 |  |  |  |  |  |  | } else { | 
| 171 | 2 |  |  |  |  | 18 | warn "Link number $link is greater than maximum link $#links ", | 
| 172 |  |  |  |  |  |  | "on this page ($self->{uri})\n"; | 
| 173 | 2 |  |  |  |  | 51 | return undef; | 
| 174 |  |  |  |  |  |  | } | 
| 175 |  |  |  |  |  |  | } else {                        # user provided a regexp | 
| 176 | 2 |  |  |  |  | 5 | LINK: foreach my $l (@links) { | 
| 177 | 2 | 50 |  |  |  | 20 | if ($l->[1] =~ /$link/) { | 
| 178 | 0 |  |  |  |  | 0 | $thislink = $l;     # grab first match | 
| 179 | 0 |  |  |  |  | 0 | last LINK; | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  | } | 
| 182 | 2 | 50 |  |  |  | 7 | unless ($thislink) { | 
| 183 | 2 |  |  |  |  | 9 | warn "Can't find any link matching $link on this page ", | 
| 184 |  |  |  |  |  |  | "($self->{uri})\n"; | 
| 185 | 2 |  |  |  |  | 33 | return undef; | 
| 186 |  |  |  |  |  |  | } | 
| 187 |  |  |  |  |  |  | } | 
| 188 |  |  |  |  |  |  |  | 
| 189 | 0 |  |  |  |  | 0 | $thislink = $thislink->[0];     # we just want the URL, not the text | 
| 190 |  |  |  |  |  |  |  | 
| 191 | 0 |  |  |  |  | 0 | $self->push_page_stack(); | 
| 192 |  |  |  |  |  |  | #print STDERR "thislink is $thislink, base is $self->{base}"; | 
| 193 | 0 |  |  |  |  | 0 | $self->{uri} = URI->new_abs($thislink, $self->{base}); | 
| 194 | 0 |  |  |  |  | 0 | $self->{req} = HTTP::Request->new(GET => $self->{uri}); | 
| 195 | 0 |  |  |  |  | 0 | $self->do_request(); | 
| 196 |  |  |  |  |  |  |  | 
| 197 | 0 |  |  |  |  | 0 | return 1; | 
| 198 |  |  |  |  |  |  | } | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  | =head2 $agent->form($number) | 
| 201 |  |  |  |  |  |  |  | 
| 202 |  |  |  |  |  |  | Selects the Nth form on the page as the target for subsequent calls to | 
| 203 | 1 |  |  |  |  | 2 | field() and click().  Emits a warning and returns false if there is no | 
|  | 1 |  |  |  |  | 6 |  | 
| 204 | 1 |  |  |  |  | 10 | such form.  Forms are indexed from 1, that is to say, the first form is | 
| 205 | 1 |  |  |  |  | 14 | number 1 (not zero). | 
| 206 | 1 |  |  |  |  | 666 |  | 
| 207 | 1 |  |  |  |  | 263 | =begin testing | 
| 208 | 1 |  |  |  |  | 284 |  | 
| 209 |  |  |  |  |  |  | my $t = WWW::Automate->new(); | 
| 210 |  |  |  |  |  |  | $t->get("http://google.com"); | 
| 211 |  |  |  |  |  |  | ok($t->form(1), "Can select the first form"); | 
| 212 |  |  |  |  |  |  | is($t->{form}, $t->{forms}->[0], "Set the form attribute"); | 
| 213 |  |  |  |  |  |  | ok(! $t->form(99), "Can't select the 99th form"); | 
| 214 |  |  |  |  |  |  | is($t->{form}, $t->{forms}->[0], "Form is still set to 1"); | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | =end testing | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | =cut | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | sub form { | 
| 221 | 2 |  |  | 2 | 1 | 5 | my ($self, $form) = @_; | 
| 222 | 2 | 50 |  |  |  | 9 | if ($self->{forms}->[$form-1]) { | 
| 223 | 0 |  |  |  |  | 0 | $self->{form} = $self->{forms}->[$form-1]; | 
| 224 | 0 |  |  |  |  | 0 | return 1; | 
| 225 |  |  |  |  |  |  | } else { | 
| 226 | 2 |  |  |  |  | 480 | carp "There is no form number $form"; | 
| 227 | 2 |  |  |  |  | 109 | return 0; | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  | } | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  | =head2 $agent->field($name, $value, $number) | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | Given the name of a field, set its value to the value specified.  This | 
| 234 |  |  |  |  |  |  | applies to the current form (as set by the form() method or defaulting | 
| 235 |  |  |  |  |  |  | to the first form on the page). | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | The optional $number parameter is used to distinguish between two fields | 
| 238 |  |  |  |  |  |  | with the same name.  The fields are numbered from 1. | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | =cut | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  | sub field { | 
| 243 | 1 |  |  | 1 | 1 | 3 | my ($self, $name, $value, $number) = @_; | 
| 244 | 1 |  | 50 |  |  | 10 | $number ||= 1; | 
| 245 | 1 | 50 |  |  |  | 6 | if ($number > 1) { | 
| 246 | 0 |  |  |  |  | 0 | $form->find_input($name, $number)->value($value); | 
| 247 |  |  |  |  |  |  | } else { | 
| 248 | 1 |  |  |  |  | 315 | $self->{form}->value($name => $value); | 
| 249 |  |  |  |  |  |  | } | 
| 250 |  |  |  |  |  |  | } | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | =head2 $agent->click($button, $x, $y); | 
| 253 |  |  |  |  |  |  |  | 
| 254 | 1 |  |  |  |  | 290 | Has the effect of clicking a button on a form.  This method takes an | 
|  | 1 |  |  |  |  | 7 |  | 
| 255 | 1 |  |  |  |  | 6 | optional method which is the name of the button to be pressed.  If there | 
| 256 | 1 |  |  |  |  | 8 | is only one button on the form, it simply clicks that one button. | 
| 257 | 0 |  |  |  |  | 0 |  | 
| 258 | 0 |  |  |  |  | 0 | =begin testing | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | my $t = WWW::Automate->new(); | 
| 261 |  |  |  |  |  |  | $t->get("http://google.com"); | 
| 262 |  |  |  |  |  |  | $t->field(q => "foo"); | 
| 263 |  |  |  |  |  |  | ok($t->click("btnG"), "Can click 'btnG' ('Google Search' button)"); | 
| 264 |  |  |  |  |  |  | like($t->{content}, qr/foo\s?fighters/i, "Found 'Foo Fighters'"); | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | =end testing | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | =cut | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | sub click { | 
| 271 | 0 |  |  | 0 | 1 | 0 | my ($self, $button, $x, $y) = @_; | 
| 272 | 0 | 0 |  |  |  | 0 | for ($x, $y) { $_ = 1 unless defined; } | 
|  | 0 |  |  |  |  | 0 |  | 
| 273 | 0 |  |  |  |  | 0 | $self->push_page_stack(); | 
| 274 | 0 |  |  |  |  | 0 | $self->{uri} = $self->{form}->uri; | 
| 275 | 0 |  |  |  |  | 0 | $self->{req} = $self->{form}->click($name, $x, $y); | 
| 276 | 0 |  |  |  |  | 0 | $self->do_request(); | 
| 277 |  |  |  |  |  |  | } | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | =head2 $agent->submit() | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | Shortcut for $a->click("submit") | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | =cut | 
| 284 |  |  |  |  |  |  |  | 
| 285 |  |  |  |  |  |  | sub submit { | 
| 286 | 0 |  |  | 0 | 1 | 0 | my ($self) = shift; | 
| 287 | 0 |  |  |  |  | 0 | $self->click("submit"); | 
| 288 |  |  |  |  |  |  | } | 
| 289 |  |  |  |  |  |  |  | 
| 290 |  |  |  |  |  |  | =head2 $agent->back(); | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  | The equivalent of hitting the "back" button in a browser.  Returns to | 
| 293 |  |  |  |  |  |  | the previous page.  Won't go back past the first page. | 
| 294 |  |  |  |  |  |  |  | 
| 295 |  |  |  |  |  |  | =cut | 
| 296 |  |  |  |  |  |  |  | 
| 297 |  |  |  |  |  |  | sub back { | 
| 298 | 2 |  |  | 2 | 1 | 4 | my $self = shift; | 
| 299 | 2 |  |  |  |  | 4 | $self->pop_page_stack; | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | =head2 $agent->add_header(name => $value) | 
| 303 |  |  |  |  |  |  |  | 
| 304 |  |  |  |  |  |  | Sets a header for the WWW::Automate agent to use every time it gets a | 
| 305 |  |  |  |  |  |  | webpage.  This is *NOT* stored in the agent object (because if it were, | 
| 306 |  |  |  |  |  |  | it would disappear if you went back() past where you'd set it) but in | 
| 307 |  |  |  |  |  |  | the hash variable %WWW::Automate::headers, which is a hashref of all headers | 
| 308 | 0 |  |  |  |  | 0 | to be set.  You can manipulate this directly if you want to; the | 
|  | 0 |  |  |  |  | 0 |  | 
| 309 | 0 |  |  |  |  | 0 | add_header() method is just provided as a convenience function for the most | 
| 310 |  |  |  |  |  |  | common case of adding a header. | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | =begin testing | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | $agent->add_header(foo => 'bar'); | 
| 315 |  |  |  |  |  |  | is($WWW::Automate::headers{'foo'}, 'bar', "set header"); | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | =end testing | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | =cut | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | sub add_header { | 
| 322 | 0 |  |  | 0 | 1 | 0 | my ($self, $name, $value) = @_; | 
| 323 | 0 |  |  |  |  | 0 | $WWW::Automate::headers{$name} = $value; | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  |  | 
| 326 |  |  |  |  |  |  | =head1 INTERNAL METHODS | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | These methods are only used internally.  You probably don't need to | 
| 329 |  |  |  |  |  |  | know about them. | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | =head2 push_page_stack() | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | =head2 pop_page_stack() | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | The agent keeps a stack of visited pages, which it can pop when it needs | 
| 336 |  |  |  |  |  |  | to go BACK and so on. | 
| 337 |  |  |  |  |  |  |  | 
| 338 |  |  |  |  |  |  | The current page needs to be pushed onto the stack before we get a new | 
| 339 |  |  |  |  |  |  | page, and the stack needs to be popped when BACK occurs. | 
| 340 | 0 |  |  |  |  | 0 |  | 
|  | 0 |  |  |  |  | 0 |  | 
| 341 | 0 |  |  |  |  | 0 | Neither of these take any arguments, they just operate on the $agent | 
| 342 | 0 |  |  |  |  | 0 | object. | 
|  | 0 |  |  |  |  | 0 |  | 
| 343 | 0 |  |  |  |  | 0 |  | 
| 344 | 0 |  |  |  |  | 0 | =begin testing | 
|  | 0 |  |  |  |  | 0 |  | 
| 345 | 0 |  |  |  |  | 0 |  | 
| 346 | 0 |  |  |  |  | 0 | my $t = WWW::Automate->new(); | 
|  | 0 |  |  |  |  | 0 |  | 
| 347 | 0 |  |  |  |  | 0 | $t->get("http://www.google.com"); | 
| 348 | 0 |  |  |  |  | 0 | is(scalar @{$t->{page_stack}}, 0, "Page stack starts empty"); | 
|  | 0 |  |  |  |  | 0 |  | 
| 349 | 0 |  |  |  |  | 0 | $t->push_page_stack(); | 
| 350 | 0 |  |  |  |  | 0 | is(scalar @{$t->{page_stack}}, 1, "Pushed item onto page stack"); | 
|  | 0 |  |  |  |  | 0 |  | 
| 351 | 0 |  |  |  |  | 0 | $t->push_page_stack(); | 
| 352 | 0 |  |  |  |  | 0 | is(scalar @{$t->{page_stack}}, 2, "Pushed item onto page stack"); | 
|  | 0 |  |  |  |  | 0 |  | 
| 353 |  |  |  |  |  |  | $t->pop_page_stack(); | 
| 354 |  |  |  |  |  |  | is(scalar @{$t->{page_stack}}, 1, "Popped item from page stack"); | 
| 355 |  |  |  |  |  |  | $t->pop_page_stack(); | 
| 356 |  |  |  |  |  |  | is(scalar @{$t->{page_stack}}, 0, "Popped item from page stack"); | 
| 357 |  |  |  |  |  |  | $t->pop_page_stack(); | 
| 358 |  |  |  |  |  |  | is(scalar @{$t->{page_stack}}, 0, "Can't pop beyond end of page stack"); | 
| 359 |  |  |  |  |  |  |  | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | =end testing | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | =cut | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | sub push_page_stack { | 
| 366 | 0 |  |  | 0 | 1 | 0 | my $self = shift; | 
| 367 | 0 |  |  |  |  | 0 | $self->{page_stack} = [ @{$self->{page_stack}}, clone($self)]; | 
|  | 0 |  |  |  |  | 0 |  | 
| 368 | 0 |  |  |  |  | 0 | return 1; | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | sub pop_page_stack { | 
| 372 | 2 |  |  | 2 | 1 | 3 | my $self = shift; | 
| 373 | 2 | 50 |  |  |  | 3 | if (@{$self->{page_stack}}) { | 
|  | 2 |  |  |  |  | 7 |  | 
| 374 | 0 |  |  |  |  | 0 | $self = pop @{$self->{page_stack}}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 375 | 0 |  |  |  |  | 0 | bless $self; | 
| 376 |  |  |  |  |  |  | } | 
| 377 | 2 |  |  |  |  | 6 | return 1; | 
| 378 |  |  |  |  |  |  | } | 
| 379 |  |  |  |  |  |  |  | 
| 380 |  |  |  |  |  |  | =head2 extract_links() | 
| 381 |  |  |  |  |  |  |  | 
| 382 |  |  |  |  |  |  | Extracts HREF links from the content of a webpage. | 
| 383 |  |  |  |  |  |  |  | 
| 384 |  |  |  |  |  |  | =cut | 
| 385 |  |  |  |  |  |  |  | 
| 386 |  |  |  |  |  |  | sub extract_links { | 
| 387 | 3 |  |  | 3 | 1 | 40 | my $self = shift; | 
| 388 | 3 |  |  |  |  | 18 | my $p = HTML::TokeParser->new(\$self->{content}); | 
| 389 | 3 |  |  |  |  | 503 | my @links; | 
| 390 |  |  |  |  |  |  |  | 
| 391 | 3 |  |  |  |  | 14 | while (my $token = $p->get_tag("a", "frame")) { | 
| 392 | 3 | 50 |  |  |  | 937 | my $url = $token->[0] eq 'a' ? $token->[1]{href} : $token->[1]{src}; | 
| 393 | 3 | 50 |  |  |  | 13 | next unless defined $url;   # probably just a name link | 
| 394 | 3 | 50 |  |  |  | 19 | my $text = $token->[0] eq 'a' ? | 
| 395 |  |  |  |  |  |  | $p->get_trimmed_text("/a"):$token->[1]{name}; | 
| 396 | 3 |  |  |  |  | 216 | push(@links, [$url => $text]); | 
| 397 |  |  |  |  |  |  | } | 
| 398 | 3 |  |  |  |  | 282 | return \@links; | 
| 399 |  |  |  |  |  |  | } | 
| 400 |  |  |  |  |  |  |  | 
| 401 |  |  |  |  |  |  | =head2 do_request() | 
| 402 |  |  |  |  |  |  |  | 
| 403 |  |  |  |  |  |  | Actually performs a request on the $self->{req} request object, and sets | 
| 404 |  |  |  |  |  |  | a bunch of attributes on $self. | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | =cut | 
| 407 |  |  |  |  |  |  |  | 
| 408 |  |  |  |  |  |  | sub do_request { | 
| 409 | 3 |  |  | 3 | 1 | 6 | my ($self) = @_; | 
| 410 | 3 |  |  |  |  | 14 | foreach my $h (keys %WWW::Automate::headers) { | 
| 411 | 0 |  |  |  |  | 0 | $self->{req}->header( $h => $WWW::Automate::headers{$h} ); | 
| 412 |  |  |  |  |  |  | } | 
| 413 | 3 |  |  |  |  | 22 | $self->{res}     = $self->request($self->{req}); | 
| 414 | 3 |  |  |  |  | 176416 | $self->{status}  = $self->{res}->code; | 
| 415 | 3 |  |  |  |  | 46 | $self->{base}    = $self->{res}->base; | 
| 416 | 3 |  | 50 |  |  | 1707 | $self->{ct}      = $self->{res}->content_type || ""; | 
| 417 | 3 |  |  |  |  | 156 | $self->{content} = $self->{res}->content; | 
| 418 |  |  |  |  |  |  |  | 
| 419 | 3 | 50 |  |  |  | 59 | if ($self->{ct} eq 'text/html') { | 
| 420 | 3 |  |  |  |  | 28 | $self->{forms} = [ HTML::Form->parse($self->{content}, $self->{res}->base) ]; | 
| 421 | 3 | 50 |  |  |  | 3424 | $self->{form}  = $self->{forms}->[0] if @{$self->{forms}}; | 
|  | 3 |  |  |  |  | 16 |  | 
| 422 | 3 |  |  |  |  | 16 | $self->{links} = $self->extract_links(); | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  | } | 
| 425 |  |  |  |  |  |  |  | 
| 426 |  |  |  |  |  |  | sub isnumber { | 
| 427 | 4 |  |  | 4 | 0 | 6 | my $in = shift; | 
| 428 | 4 | 100 |  |  |  | 17 | if ($in =~ /^\d+$/) { | 
| 429 | 2 |  |  |  |  | 17 | return 1; | 
| 430 |  |  |  |  |  |  | } else { | 
| 431 | 2 |  |  |  |  | 4 | return 0; | 
| 432 |  |  |  |  |  |  | } | 
| 433 |  |  |  |  |  |  | } | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | =head1 BUGS | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | Please report any bugs via the system at http://rt.cpan.org/ | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  | =head1 AUTHOR | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | Kirrily "Skud" Robert | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | =cut | 
| 444 |  |  |  |  |  |  |  | 
| 445 |  |  |  |  |  |  | 1; |