File Coverage

blib/lib/WWW/Mechanize/Pliant.pm
Criterion Covered Total %
statement 38 138 27.5
branch 4 38 10.5
condition 1 12 8.3
subroutine 12 27 44.4
pod 7 10 70.0
total 62 225 27.5


line stmt bran cond sub pod time code
1             package WWW::Mechanize::Pliant;
2 3     3   131002 use strict;
  3         7  
  3         134  
3 3     3   16 use warnings FATAL => 'all';
  3         6  
  3         140  
4 3     3   26 use base qw(WWW::Mechanize);
  3         5  
  3         4092  
5 3     3   557282 use HTML::Entities qw(decode_entities);
  3         7  
  3         3009  
6              
7             our $VERSION = 0.12;
8              
9             =head1 ABSTRACT
10              
11             WWW::Mechanize::Pliant - crawl Pliant-based websites
12              
13             =head1 SYNOPSIS
14              
15             Pliant:
16              
17              
18             var Str search
19             input "Find:" search
20             button "Go"
21             #...
22              
23             Or,
24              
25             var Str search
26             input "Find:" search
27             icon "images/go.png" help "Go"
28             #...
29              
30             Mechanize code, for both cases:
31              
32             $mech = WWW::Mechanize::Pliant->new(cookie_jar => {});
33             $mech->get("http://mypliantsite.com");
34             $mech->field("search", "Beads Game");
35             $mech->click("Go");
36              
37             =head1 DETAILS
38              
39             At the moment, three methods of WWW::Mechanize have been customized
40             for Pliant specific operation: get(), field(), and click().
41             Instead of string names, they receive regular expressions as arguments.
42              
43             =cut
44              
45             sub decoded_content {
46 0     0 0 0 my ($self) = @_;
47 0         0 return decode_entities($self->content);
48             }
49              
50             sub postprocess {
51 1     1 0 4 my ($self) = @_;
52 1 50       10 if ($self->content =~ m{You should select this link to get the right page}) {
    50          
53             #print STDERR "following link $1\n";
54 0         0 $self->follow_link(url => $1);
55 0         0 return 1;
56             } elsif ($self->content =~ m{If your browser is not smart enough to switch back automatically when the computation is over, then you'll have to press the Back button (\d+) time}) {
57 0         0 my $num_back = $1;
58 0         0 $self->back() for (1..$num_back);
59 0         0 $self->reload();
60             }
61 1         102 return 0;
62             }
63              
64             sub get {
65 1     1 1 17285 my ($self, @args) = @_;
66 1         10 my $retval = $self->SUPER::get(@args);
67 1 50       31718 return unless $retval;
68 1   33     6 return $self->postprocess || $retval;
69             }
70              
71             sub follow_link {
72 0     0 1 0 my ($self, @args) = @_;
73 0         0 my $retval = $self->SUPER::follow_link(@args);
74 0 0       0 return unless $retval;
75 0   0     0 return $self->postprocess || $retval;
76             }
77              
78             sub submit {
79 0     0 1 0 my ($self, @args) = @_;
80 0         0 my $retval = $self->SUPER::submit(@args);
81 0 0       0 return unless $retval;
82 0   0     0 return $self->postprocess || $retval;
83             }
84              
85             sub do_operation {
86 0     0 0 0 my ($self, $regex, $func, @args) = @_;
87 0         0 my $retval = 0;
88 0 0       0 if (my $name = $self->pliant_form->find_field($regex) ) {
89 0         0 $self->form_name('pliant');
90 0         0 my $f = "SUPER::$func";
91 0         0 $self->$f($name, @args);
92 0         0 $retval = 1;
93             }
94 0         0 return $retval;
95             }
96              
97             =over
98              
99             =item field(pattern, value)
100              
101             This is the method that should be used to set the fields in the form.
102              
103             $form->field('email', 'john@somedomain.com');
104             $form->field(qr{payment_data.*?card_number}, '4444222233331111');
105             ...
106             $form->click("Submit Info");
107              
108             =back
109              
110             =cut
111              
112             sub field {
113 0     0 1 0 my ($self, $name, $value) = @_;
114 0         0 return $self->do_operation($name, "field", $value);
115             }
116              
117              
118             =over
119              
120             =item click(PATTERN)
121              
122             This will click on an image button or on a button. It will try to find
123             the button using these two regular expressions against the content,
124              
125             try1: qr{title="PATTERN"\s+onClick="button_pressed\('(.*?)'\)"}
126             try2: qr{name="(button.*?)"\s+value="PATTERN"}
127              
128             The first attempt is to find an image button with PATTERN in the title field.
129             The second attempt is to find a plain button with PATTERN in its caption.
130              
131             $form->click('Next');
132             $form->click('Buy now');
133              
134             Since PATTERN is a regular expression, if the name of the button has parenthesis,
135             you need to escape them:
136              
137             $form->click(qr{delete Greeting Card \(New Baby\)});
138            
139             =back
140              
141             =cut
142              
143             sub click {
144 0     0 1 0 my ($self, $regex) = @_;
145 0         0 my $retval = 0;
146 0         0 my $content = decode_entities($self->content);
147 0 0       0 if ($content =~ m{title="$regex"\s+onClick="button_pressed\('(.*?)'\)"}) {
    0          
148 0         0 $retval = $self->pliant_click($1);
149 0         0 $self->pliant_form->reinit;
150             } elsif ($content =~ m{name="(button.*?)"\s+value="$regex"}) {
151 0         0 $retval = $self->pliant_click($1);
152 0         0 $self->pliant_form->reinit;
153             }
154 0 0       0 return unless $retval;
155 0   0     0 return $self->postprocess || $retval;
156             }
157              
158             =head2 LOW LEVEL METHODS
159              
160             =over
161              
162             =item pliant_click(context)
163              
164             This is a low-level method, that you will not need to use directly.
165              
166             Context argument is something like "button*0*0..." which is usually an argument
167             to onClick event for image buttons or names of plain buttons. For example,
168             consider this pliant code:
169              
170             icon "images/next.png" help "Next"
171             ...
172              
173             To click on it, do this
174              
175             if ($html =~ m{title="Next"\s+onClick="button_pressed\('(.*?)'\)"}) {
176             $retval = $self->{mech}->pliant_click($1);
177             }
178              
179             =back
180              
181             =cut
182              
183             sub pliant_click {
184 0     0 1 0 my ($self, $context) = @_;
185 0         0 my $form = $self->form_name('pliant');
186 0         0 my $request = $form->click;
187 0         0 my $content = $request->content;
188 0         0 $content =~ s/_=&//;
189 0         0 my @data = split '&', $content;
190 0         0 my $found_button;
191 0         0 foreach (@data) {
192 0 0       0 if (/button/) {
    0          
    0          
193 0         0 $found_button++;
194 0         0 $_ = "$context=";
195             } elsif ( /_pliant_x/ ) {
196 0         0 $_ = "_pliant_x=0";
197             } elsif ( /_pliant_y/ ) {
198 0         0 $_ = "_pliant_y=0";
199             }
200             }
201 0 0       0 push @data, $context.'=' unless $found_button;
202 0         0 $content = join '&', @data;
203 0         0 $content =~ s{&%2F}{&data%2F}g;
204             #print "request content: $content\n";
205 0         0 $request->header('Content-Length', length($content));
206 0         0 $request->content($content);
207 0         0 return $self->request($request);
208             }
209              
210             =over
211              
212             =item pliant_form()
213              
214             Low-level method. Don't use.
215             Fetches WWW::Mechanize::Pliant::Form object associated with current page.
216              
217             =cut
218              
219             sub pliant_form {
220 1     1 1 7 my ($self) = @_;
221 1 50       5 if (!$self->{pliant_form}) {
222 1         10 $self->{pliant_form} = WWW::Mechanize::Pliant::Form->new($self);
223             }
224 0         0 $self->{pliant_form}->reinit;
225 0         0 return $self->{pliant_form};
226             }
227              
228             =back
229              
230             =head2 WWW::Mechanize::Pliant::Form
231              
232             This helper class does some of the dirty work of locating pliant
233             fields on the pliant page. You shouldn't use it, and its documented
234             here for backward compatibility and completeness.
235              
236             =cut
237              
238             package WWW::Mechanize::Pliant::Form;
239 3     3   19 use strict;
  3         6  
  3         135  
240 3     3   14 use warnings FATAL => 'all';
  3         7  
  3         139  
241 3     3   14 use HTML::Entities qw(decode_entities);
  3         6  
  3         2382  
242              
243             =over
244              
245             =item new(mech)
246              
247             The Form object works hand in hand with corresponding mechanize object.
248              
249             =cut
250              
251             sub new {
252 1     1   3 my ($class, $mech) = @_;
253 1         3 my $self = {};
254 1         3 $self->{mech} = $mech;
255 1         3 bless $self, $class;
256 1         5 $self->reinit;
257 0         0 return $self;
258             }
259              
260             =item reinit()
261              
262             This method should be called if the page in the associated mechanize object
263             has changed. It is automatically called at the end of click() routine,
264             so you will most likely never need to call this directly.
265              
266             =cut
267              
268             sub reinit {
269 1     1   3 my ($self) = @_;
270 1         274 $self->{fields} = [ $self->{mech}->form('pliant')->param ];
271             }
272              
273             =item find_field(pattern)
274              
275             Tries to find a field in the form object, given a regex.
276             This doesn't include search over image buttons or standard buttons.
277             If found returns full name of the field (with all the pliant mangling),
278             or undef if not found.
279              
280             =cut
281              
282             sub find_field {
283 0     0     my ($self, $regex) = @_;
284 0           my @inputs = $self->{mech}->form('pliant')->find_input($regex);
285 0           my @retval;
286 0 0         if ( @inputs ) {
287 0           @retval = map { $_->name } @inputs;
  0            
288             } else {
289 0           @retval = grep { /$regex/ } @{$self->{fields}};
  0            
  0            
290             }
291 0 0         return wantarray ? @retval : $retval[0];
292             }
293              
294             sub do_operation {
295 0     0     my ($self, $regex, $func, @args) = @_;
296 0           my $retval = 0;
297 0 0         if (my $name = $self->find_field($regex) ) {
298 0           $self->{mech}->form_name('pliant');
299 0           $self->{mech}->$func($name, @args);
300 0           $retval = 1;
301             }
302 0           return $retval;
303             }
304              
305             =item set_field(pattern, value)
306              
307             See WWW::Mechanize::Pliant::field(), usage is the same.
308            
309             =cut
310              
311             sub set_field {
312 0     0     my ($self, $regex, $value) = @_;
313 0           return $self->{mech}->field($regex, $value);
314             }
315              
316             sub find_checkbox_hidden_field {
317 0     0     my ($self, $regex) = @_;
318 0           foreach my $checkbox_name ( grep { ! /^dummy_/ } $self->find_field($regex) ) {
  0            
319 0 0         if ($self->find_field("dummy_$checkbox_name")) {
320 0           return $checkbox_name;
321             }
322             }
323 0           return undef;
324             }
325              
326             sub tick {
327 0     0     my ($self, $regex) = @_;
328 0           my $hidden_field = $self->find_checkbox_hidden_field($regex);
329 0           $self->{mech}->form_name('pliant');
330 0           $self->{mech}->tick("dummy_".$hidden_field, "on");
331 0           $self->{mech}->field($hidden_field, "true");
332 0           return 1;
333             }
334              
335             sub untick {
336 0     0     my ($self, $regex) = @_;
337 0           my $hidden_field = $self->find_checkbox_hidden_field($regex);
338 0           $self->{mech}->form_name('pliant');
339 0           $self->{mech}->untick("dummy_".$hidden_field, "on");
340 0           $self->{mech}->field($hidden_field, "false");
341 0           return 1;
342             }
343              
344             sub is_ticked {
345 0     0     my ($self, $regex) = @_;
346 0 0         if (my $name = $self->find_checkbox_hidden_field($regex) ) {
347 0           return $self->{mech}->form_name('pliant')->find_input($name)->value eq 'true';
348             }
349 0           return 0;
350             }
351              
352             =item click(PATTERN)
353              
354             See WWW::Mechanize::Pliant::click(), usage is the same.
355            
356             =cut
357              
358             sub click {
359 0     0     my ($self, $regex) = @_;
360 0           return $self->{mech}->click($regex);
361             }
362              
363             =pod
364              
365             =head1 AUTHOR
366              
367             Boris Reitman
368              
369             =head1 SEE ALSO
370              
371             WWW::Mechanize,
372             http://en.wikipedia.org/wiki/Pliant
373              
374             =cut
375              
376             1;