| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package WWW::Agent; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 3 |  |  | 3 |  | 231115 | use strict; | 
|  | 3 |  |  |  |  | 8 |  | 
|  | 3 |  |  |  |  | 117 |  | 
| 4 | 3 |  |  | 3 |  | 18 | use warnings; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 101 |  | 
| 5 | 3 |  |  | 3 |  | 28 | use Data::Dumper; | 
|  | 3 |  |  |  |  | 6 |  | 
|  | 3 |  |  |  |  | 171 |  | 
| 6 | 3 |  |  | 3 |  | 1009 | use POE; | 
|  | 3 |  |  |  |  | 59336 |  | 
|  | 3 |  |  |  |  | 25 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | =pod | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | =head1 NAME | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | WWW::Agent - Abstract web browser | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | use WWW::Agent | 
| 17 |  |  |  |  |  |  | use WWW::Agent::Plugins::Focus; | 
| 18 |  |  |  |  |  |  | ... | 
| 19 |  |  |  |  |  |  | $agent = new WWW::Agent (plugins => [ | 
| 20 |  |  |  |  |  |  | new WWW::Agent::Plugins::Focus, | 
| 21 |  |  |  |  |  |  | ... | 
| 22 |  |  |  |  |  |  | ]); | 
| 23 |  |  |  |  |  |  | $agent->run; | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | The web agent is a minimalistic web browser, in that the only thing it | 
| 28 |  |  |  |  |  |  | supports is to request an object. For this purpose, it maintains a | 
| 29 |  |  |  |  |  |  | concept of I, similar to those found in most modern web | 
| 30 |  |  |  |  |  |  | browsers. A request will be done in the context of a particular tab. | 
| 31 |  |  |  |  |  |  | As a consequence, the agents can handle multiple requests, also | 
| 32 |  |  |  |  |  |  | concurrently. This is achieved by using L underneath. | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | As the agent is otherwise dumb, it is up to plugins to do something | 
| 35 |  |  |  |  |  |  | useful. The range of possible features which plugins can add is | 
| 36 |  |  |  |  |  |  | considerable: Plugins can take care off testing websites for correct | 
| 37 |  |  |  |  |  |  | behaviour, scraping web sites and lauching external function, | 
| 38 |  |  |  |  |  |  | spidering sites to analyze pages or link structures, etc. | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | =head2 Plugin Events | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | See L. | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | =head1 INTERFACE | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | =head2 Constructor | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | The constructor expects a hash with the following key/value pairs: | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | =over | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | =item C (string): | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | This is currently ignored. | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  | =item C (list reference, optional): | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | Any number of plugins can be loaded into an agent. Each plugin must | 
| 59 |  |  |  |  |  |  | be an object (instantiated from the appropriate class). | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | The sequence of plugins in the list is significant as two or more | 
| 62 |  |  |  |  |  |  | plugins might register for one and the same event. The execution of | 
| 63 |  |  |  |  |  |  | the individual handler is organized according to the list. | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | =item C (L object, optional): | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | This object will be used to launch requests. Obviously any subclass | 
| 68 |  |  |  |  |  |  | can be used, say, for providing special headers or one providing | 
| 69 |  |  |  |  |  |  | additional caching. | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | If no LWP::UserAgent object is passed in, the a generic one will be | 
| 72 |  |  |  |  |  |  | used. | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | =back | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | =cut | 
| 77 |  |  |  |  |  |  |  | 
| 78 |  |  |  |  |  |  | sub _filter { | 
| 79 | 8 |  |  | 8 |  | 11 | my $kernel   = shift; | 
| 80 | 8 |  |  |  |  | 12 | my $heap     = shift; | 
| 81 | 8 |  |  |  |  | 14 | my $policies = shift; | 
| 82 | 8 |  |  |  |  | 10 | my $tab      = shift; | 
| 83 | 8 |  |  |  |  | 14 | my $value    = shift; | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | #warn "policies ".Dumper $policies; | 
| 86 |  |  |  |  |  |  |  | 
| 87 | 8 |  |  |  |  | 35 | foreach my $p (@$policies) { | 
| 88 |  |  |  |  |  |  | #warn " in _compute before one code value=$value"; | 
| 89 | 4 |  |  |  |  | 10 | $value = &{$p} ($kernel, $heap, $tab, $value, @_); | 
|  | 4 |  |  |  |  | 18 |  | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  | #warn " in _compute $value"; | 
| 92 | 8 |  |  |  |  | 99 | return $value; | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | sub _ok { | 
| 96 | 11 |  |  | 11 |  | 194 | my $kernel   = shift; | 
| 97 | 11 |  |  |  |  | 22 | my $heap     = shift; | 
| 98 | 11 |  |  |  |  | 18 | my $policies = shift; | 
| 99 |  |  |  |  |  |  |  | 
| 100 | 11 | 50 |  |  |  | 39 | return 1 unless $policies; | 
| 101 | 11 |  |  |  |  | 33 | foreach (@$policies) { | 
| 102 | 2 | 50 |  |  |  | 14 | &$_ ($kernel, $heap, @_) or die "not satisfied"; | 
| 103 |  |  |  |  |  |  | } | 
| 104 | 11 |  |  |  |  | 10661 | return 1; | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | sub new { | 
| 108 | 3 |  |  | 3 | 0 | 2493 | my $class   = shift; | 
| 109 | 3 |  |  |  |  | 13 | my %options = @_; | 
| 110 | 3 |  |  |  |  | 11 | my $self    = bless {}, $class; | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 3 |  | 50 |  |  | 40 | $self->{name}    = delete $options{name}    || 'agent'; | 
| 113 | 3 |  | 100 |  |  | 20 | $self->{plugins} = delete $options{plugins} || []; | 
| 114 |  |  |  |  |  |  |  | 
| 115 | 3 |  |  | 3 |  | 156169 | use LWP::UserAgent; | 
|  | 3 |  |  |  |  | 69506 |  | 
|  | 3 |  |  |  |  | 3971 |  | 
| 116 | 3 |  | 33 |  |  | 26 | $self->{ua}      = delete $options{ua}      || LWP::UserAgent->new; | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | POE::Session->create ( | 
| 119 |  |  |  |  |  |  | inline_states => { | 
| 120 |  |  |  |  |  |  | _start   => sub { | 
| 121 | 3 |  |  | 3 |  | 1344 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 122 | 3 |  |  |  |  | 14 | my ($plugins)       = $_[ARG0]; | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 3 |  |  |  |  | 15 | $kernel->alias_set ('agent'); | 
| 125 | 3 |  |  |  |  | 169 | $heap->{ua}       = $self->{ua}; | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 3 |  |  |  |  | 29 | $heap->{policies} = { | 
| 128 |  |  |  |  |  |  | init                   => [], | 
| 129 |  |  |  |  |  |  | cycle_consider         => [], | 
| 130 |  |  |  |  |  |  | cycle_prepare_request  => [], | 
| 131 |  |  |  |  |  |  | #				      cycle_initiate_request => [], | 
| 132 |  |  |  |  |  |  | #				      cycle_analyze_response => [], | 
| 133 |  |  |  |  |  |  | cycle_pos_response     => [], | 
| 134 |  |  |  |  |  |  | cycle_neg_response     => [], | 
| 135 |  |  |  |  |  |  | cycle_complete         => [], | 
| 136 |  |  |  |  |  |  | }; | 
| 137 |  |  |  |  |  |  |  | 
| 138 | 3 |  |  |  |  | 9 | foreach my $plugin (@$plugins) { | 
| 139 | 2 |  |  |  |  | 14 | my $ns = $plugin->{namespace};                # reserving namespace | 
| 140 | 2 | 50 |  |  |  | 10 | die "duplicate namespace '$ns'" if $heap->{$ns}; | 
| 141 | 2 |  |  |  |  | 16 | $heap->{$ns} = {}; | 
| 142 |  |  |  |  |  |  |  | 
| 143 | 2 | 50 |  |  |  | 9 | if ($plugin->{depends}) { | 
| 144 | 0 |  |  |  |  | 0 | foreach (@{$plugin->{depends}}) { | 
|  | 0 |  |  |  |  | 0 |  | 
| 145 | 0 | 0 |  |  |  | 0 | die "plugin '$ns' depends on other plugin '$_' but that has not been loaded" unless $heap->{$_}; | 
| 146 |  |  |  |  |  |  | } | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  |  | 
| 149 | 2 |  |  |  |  | 4 | foreach my $policy_group (keys %{$plugin->{hooks}}) { | 
|  | 2 |  |  |  |  | 8 |  | 
| 150 | 6 |  |  |  |  | 32 | my $policy   = $plugin->{hooks}->{$policy_group}; | 
| 151 |  |  |  |  |  |  |  | 
| 152 | 6 | 100 |  |  |  | 19 | if ($heap->{policies}->{$policy_group}) { # policy group already exists | 
| 153 | 4 |  |  |  |  | 6 | push @{$heap->{policies}->{$policy_group}}, $policy; | 
|  | 4 |  |  |  |  | 14 |  | 
| 154 |  |  |  |  |  |  | } else {                                  # no policy group existed, lets make one | 
| 155 | 2 |  |  |  |  | 20 | $kernel->state ( $policy_group, $policy ); | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  |  | 
| 160 | 3 |  |  |  |  | 35 | _ok ($kernel, $heap, $heap->{policies}->{init}); | 
| 161 |  |  |  |  |  |  | }, | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  | cycle_start  => sub { | 
| 164 | 4 |  |  | 4 |  | 1377 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 165 | 4 |  |  |  |  | 18 | my ($tab, $request) = @_[ARG0, ARG1]; | 
| 166 | 4 |  |  |  |  | 25 | $heap->{tabs}->{$tab}->{request} = $request; | 
| 167 | 4 |  |  |  |  | 21 | $kernel->yield ('cycle_consider', $tab); | 
| 168 |  |  |  |  |  |  | }, | 
| 169 |  |  |  |  |  |  | cycle_consider => sub { | 
| 170 | 4 |  |  | 4 |  | 705 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 171 | 4 |  |  |  |  | 10 | my ($tab)           = $_[ARG0]; | 
| 172 |  |  |  |  |  |  |  | 
| 173 | 4 | 50 |  |  |  | 26 | if (_ok ($kernel, $heap, $heap->{policies}->{cycle_consider}, $tab, $heap->{tabs}->{$tab}->{request})) { | 
| 174 | 4 |  |  |  |  | 16 | $kernel->yield ('cycle_prepare_request', $tab); | 
| 175 |  |  |  |  |  |  | } else { | 
| 176 | 0 |  |  |  |  | 0 | $kernel->yield ('cycle_complete', $tab); | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  | }, | 
| 179 |  |  |  |  |  |  | cycle_prepare_request => sub { | 
| 180 | 4 |  |  | 4 |  | 823 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 181 | 4 |  |  |  |  | 10 | my ($tab)           = $_[ARG0]; | 
| 182 | 4 |  |  |  |  | 10 | my $htab            = $heap->{tabs}->{$tab}; | 
| 183 |  |  |  |  |  |  |  | 
| 184 | 4 |  |  |  |  | 42 | $htab->{request} = _filter ($kernel, $heap, $heap->{policies}->{cycle_prepare_request}, $tab, $htab->{request}); | 
| 185 | 4 |  |  |  |  | 15 | $kernel->yield ('cycle_initiate_request', $tab); | 
| 186 |  |  |  |  |  |  | }, | 
| 187 |  |  |  |  |  |  | cycle_initiate_request => sub { | 
| 188 | 4 |  |  | 4 |  | 831 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 189 | 4 |  |  |  |  | 11 | my ($tab)           = $_[ARG0]; | 
| 190 | 4 |  |  |  |  | 11 | my $htab            = $heap->{tabs}->{$tab}; | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 4 |  |  |  |  | 59 | $htab->{response}   = $heap->{ua}->request ($htab->{request}); | 
| 193 | 4 |  |  |  |  | 689 | $kernel->yield ('cycle_analyze_response', $tab); | 
| 194 |  |  |  |  |  |  | }, | 
| 195 |  |  |  |  |  |  | cycle_analyze_response => sub { | 
| 196 | 4 |  |  | 4 |  | 747 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 197 | 4 |  |  |  |  | 10 | my ($tab)           = $_[ARG0]; | 
| 198 | 4 |  |  |  |  | 14 | my $htab            = $heap->{tabs}->{$tab}; | 
| 199 |  |  |  |  |  |  |  | 
| 200 | 4 | 50 |  |  |  | 22 | if ($htab->{response}->is_success) { | 
| 201 |  |  |  |  |  |  | #				      $htab->{current_url} = $htab->{response}->request->uri; | 
| 202 | 4 |  |  |  |  | 47 | $kernel->yield ('cycle_pos_response', $tab); | 
| 203 |  |  |  |  |  |  | } else { | 
| 204 | 0 |  |  |  |  | 0 | $kernel->yield ('cycle_neg_response', $tab); | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  | }, | 
| 207 |  |  |  |  |  |  | cycle_pos_response => sub { | 
| 208 | 4 |  |  | 4 |  | 1185 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 209 | 4 |  |  |  |  | 9 | my ($tab)           = $_[ARG0]; | 
| 210 | 4 |  |  |  |  | 12 | my $htab            = $heap->{tabs}->{$tab}; | 
| 211 |  |  |  |  |  |  |  | 
| 212 | 4 |  |  |  |  | 19 | $htab->{response} = _filter ($kernel, $heap, $heap->{policies}->{cycle_pos_response}, $tab, $htab->{response}); | 
| 213 | 4 |  |  |  |  | 15 | $kernel->yield ('cycle_complete', $tab); | 
| 214 |  |  |  |  |  |  | }, | 
| 215 |  |  |  |  |  |  | cycle_neg_response => sub { | 
| 216 | 0 |  |  | 0 |  | 0 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 217 | 0 |  |  |  |  | 0 | my ($tab)           = $_[ARG0]; | 
| 218 | 0 |  |  |  |  | 0 | my $htab            = $heap->{tabs}->{$tab}; | 
| 219 |  |  |  |  |  |  |  | 
| 220 | 0 |  |  |  |  | 0 | $htab->{response} = _filter ($kernel, $heap, $heap->{policies}->{cycle_neg_response}, $tab, $htab->{response}); | 
| 221 | 0 |  |  |  |  | 0 | $kernel->yield ('cycle_complete', $tab); | 
| 222 |  |  |  |  |  |  | }, | 
| 223 |  |  |  |  |  |  | cycle_complete => sub { | 
| 224 | 4 |  |  | 4 |  | 1263 | my ($kernel, $heap) = @_[KERNEL, HEAP]; | 
| 225 | 4 |  |  |  |  | 9 | my ($tab)           = $_[ARG0]; | 
| 226 |  |  |  |  |  |  |  | 
| 227 | 4 |  |  |  |  | 19 | _ok ($kernel, $heap, $heap->{policies}->{cycle_complete}, $tab); | 
| 228 |  |  |  |  |  |  | }, | 
| 229 |  |  |  |  |  |  | }, | 
| 230 | 3 |  |  |  |  | 144 | args => [ $self->{plugins} ], | 
| 231 |  |  |  |  |  |  | ); | 
| 232 | 3 |  |  |  |  | 480 | return $self; | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  |  | 
| 235 |  |  |  |  |  |  | =pod | 
| 236 |  |  |  |  |  |  |  | 
| 237 |  |  |  |  |  |  | =head2 Methods | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | =over | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | =item C (no parameters) | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | This method makes the agent run and do whatever it is told to do. If | 
| 244 |  |  |  |  |  |  | you have not posted any requests to C before that, then | 
| 245 |  |  |  |  |  |  | the agent will immediately terminate. | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  | Consequently it is either your responsibility to task the agent with | 
| 248 |  |  |  |  |  |  | requests, or the responsibility of specific plugins to do that.  One | 
| 249 |  |  |  |  |  |  | other option is to set up another L session which posts events to | 
| 250 |  |  |  |  |  |  | the agent (it's POE name is C, btw). | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | Example: | 
| 253 |  |  |  |  |  |  |  | 
| 254 |  |  |  |  |  |  | my $a = new WWW::Agent (....); | 
| 255 |  |  |  |  |  |  | use POE; | 
| 256 |  |  |  |  |  |  | POE::Kernel->post ('agent', 'cycle_start', 'new_tab', 'http://www.example.org/'); | 
| 257 |  |  |  |  |  |  | $a->run;  # fetch it and ... that's it | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | =cut | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | sub run { | 
| 262 | 3 |  |  | 3 | 1 | 14779 | my $self = shift; | 
| 263 | 3 |  |  |  |  | 20 | POE::Kernel->run(); | 
| 264 |  |  |  |  |  |  | } | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | =pod | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | =back | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | L, L, L | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  | =head1 AUTHOR | 
| 275 |  |  |  |  |  |  |  | 
| 276 |  |  |  |  |  |  | Robert Barta, Erho@bigpond.net.auE | 
| 277 |  |  |  |  |  |  |  | 
| 278 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | Copyright (C) 2005 by Robert Barta | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | This library is free software; you can redistribute it and/or modify | 
| 283 |  |  |  |  |  |  | it under the same terms as Perl itself, either Perl version 5.8.4 or, | 
| 284 |  |  |  |  |  |  | at your option, any later version of Perl 5 you may have available. | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | =cut | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  | our $VERSION  = '0.03'; | 
| 289 |  |  |  |  |  |  | our $REVISION = '$Id: Agent.pm,v 1.3 2005/03/19 10:01:15 rho Exp $'; | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | 1; | 
| 292 |  |  |  |  |  |  |  | 
| 293 |  |  |  |  |  |  |  | 
| 294 |  |  |  |  |  |  | __END__ |