line
stmt
bran
cond
sub
pod
time
code
1
package WWW::Mechanize::Firefox;
2
80
80
4556055
use 5.006; #weaken
80
981
3
80
80
517
use strict;
80
190
80
2454
4
80
80
48150
use Time::HiRes qw(sleep); # hires sleep()
80
115410
80
346
5
6
80
80
55401
use URI ();
80
532048
80
2193
7
80
80
560
use File::Basename qw(dirname);
80
164
80
7437
8
80
80
39927
use HTTP::Response ();
80
1744599
80
2757
9
80
80
40155
use HTML::Selector::XPath 'selector_to_xpath';
80
213333
80
5677
10
80
80
37059
use MIME::Base64 'decode_base64';
80
49597
80
6561
11
80
80
34951
use WWW::Mechanize::Link;
80
32564
80
2528
12
80
80
37458
use Firefox::Application;
80
282
80
2666
13
80
80
577
use MozRepl::RemoteObject ();
80
182
80
1205
14
80
80
404
use MozRepl::RemoteObject::Methods ();
80
175
80
1153
15
80
80
38315
use HTTP::Cookies::MozRepl ();
80
269
80
1772
16
80
80
38356
use HTTP::Request::Common ();
80
295853
80
2174
17
80
80
567
use Scalar::Util qw'blessed weaken';
80
222
80
4983
18
80
80
525
use Encode qw(encode decode);
80
188
80
3875
19
80
80
526
use Carp qw(carp croak );
80
201
80
72513
20
21
our $VERSION = '0.80';
22
our @CARP_NOT = ('MozRepl::RemoteObject',
23
'MozRepl::AnyEvent',
24
'MozRepl::RemoteObject::Instance'
25
); # we trust these blindly
26
27
=head1 NAME
28
29
WWW::Mechanize::Firefox - use Firefox as if it were WWW::Mechanize
30
31
=head1 SYNOPSIS
32
33
use WWW::Mechanize::Firefox;
34
my $mech = WWW::Mechanize::Firefox->new();
35
$mech->get('http://google.com');
36
37
$mech->eval_in_page('alert("Hello Firefox")');
38
my $png = $mech->content_as_png();
39
40
This module will let you automate Firefox through the
41
Mozrepl plugin. You need to have installed
42
that plugin in your Firefox.
43
44
For more examples see L.
45
46
=head1 IMPORTANT NOTICE
47
48
The Mozrepl plugin that this module uses no longer works due to key technologies
49
it depends on being retired from the Mozilla platform in November 2017.
50
51
According the github repo L, the last known compatible version is Firefox 54.
52
53
Therefore this module cannot be used on Firefox versions greather than 54.
54
55
=head1 CONSTRUCTOR and CONFIGURATION
56
57
=head2 C<< $mech->new( %args ) >>
58
59
use WWW::Mechanize::Firefox;
60
my $mech = WWW::Mechanize::Firefox->new();
61
62
Creates a new instance and connects it to Firefox.
63
64
Note that Firefox must have the C
65
extension installed and enabled.
66
67
The following options are recognized:
68
69
=over 4
70
71
=item *
72
73
C - regex for the title of the tab to reuse. If no matching tab is
74
found, the constructor dies.
75
76
If you pass in the string C, the currently
77
active tab will be used instead.
78
79
If you pass in a L instance, this will be used
80
as the new tab. This is convenient if you have an existing tab
81
in Firefox as object already, for example created through
82
LC<< ->addTab() >>.
83
84
=item *
85
86
C - will create a new tab if no existing tab matching
87
the criteria given in C can be found.
88
89
=item *
90
91
C - make the tab the active tab
92
93
=item *
94
95
C - name of the program to launch if we can't connect to it on
96
the first try.
97
98
=item *
99
100
C - an array reference of ids of subframes to include when
101
searching for elements on a page.
102
103
If you want to always search through all frames, just pass C<1>. This
104
is the default.
105
106
To prevent searching through frames, pass
107
108
frames => 0
109
110
To whitelist frames to be searched, pass the list
111
of frame selectors:
112
113
frames => ['#content_frame']
114
115
=item *
116
117
C - whether web failures converted are fatal Perl errors. See
118
the C accessor. True by default to make error checking easier.
119
120
To make errors non-fatal, pass
121
122
autodie => 0
123
124
in the constructor.
125
126
=item *
127
128
C - the name of the User Agent to use. This overrides
129
how Firefox identifies itself.
130
131
=item *
132
133
C - array reference to log levels, passed through to L
134
135
=item *
136
137
C - L buffer size, if the default of 1MB is not enough
138
139
=item *
140
141
C - the set of default Javascript events to listen for while
142
waiting for a reply. In fact, WWW::Mechanize::Firefox will almost always
143
wait until a 'DOMContentLoaded' or 'load' event. 'pagehide' events
144
will tell it for what frames to wait.
145
146
The default set is
147
148
'DOMContentLoaded','load',
149
'pageshow',
150
'pagehide',
151
'error','abort','stop',
152
153
=item *
154
155
C - a premade L
156
157
=item *
158
159
C - a premade L instance or a connection string
160
suitable for initializing one
161
162
=item *
163
164
C - whether to use the command queueing of L.
165
Default is 1.
166
167
=item *
168
169
C - whether to use native JSON encoder of Firefox
170
171
js_JSON => 'native', # force using the native JSON encoder
172
173
The default is to autodetect whether a native JSON encoder is available and
174
whether the transport is UTF-8 safe.
175
176
=item *
177
178
C - the events that are sent to an input field before its
179
value is changed. By default this is C<[focus]>.
180
181
=item *
182
183
C - the events that are sent to an input field after its
184
value is changed. By default this is C<[blur, change]>.
185
186
=back
187
188
=cut
189
190
sub new {
191
61
61
1
117241
my ($class, %args) = @_;
192
193
61
50
363
if (! ref $args{ app }) {
194
61
355
my @passthrough = qw(launch repl bufsize log use_queue js_JSON);
195
61
100
213
my %options = map { exists $args{ $_ } ? ($_ => delete $args{ $_ }) : () }
366
977
196
@passthrough;
197
61
577
$args{ app } = Firefox::Application->new(
198
%options
199
);
200
};
201
202
0
0
if (my $tabname = delete $args{ tab }) {
203
0
0
if (! ref $tabname) {
0
204
0
0
if ($tabname eq 'current') {
205
0
$args{ tab } = $args{ app }->selectedTab();
206
} else {
207
0
croak "Don't know what to do with tab '$tabname'. Did you mean qr{$tabname}?";
208
};
209
} elsif ('MozRepl::RemoteObject::Instance' eq ref $tabname) {
210
# Nothing to do - we already got a tab passed in
211
# Just put it back in place
212
0
$args{ tab } = $tabname;
213
} else {
214
0
($args{ tab }) = grep { $_->{title} =~ /$tabname/ }
215
0
$args{ app }->openTabs();
216
0
0
if (! $args{ tab }) {
217
0
0
if (! delete $args{ create }) {
218
0
croak "Couldn't find a tab matching /$tabname/";
219
} else {
220
# fall through into tab creation
221
};
222
} else {
223
0
$args{ tab } = $args{ tab }->{tab};
224
};
225
};
226
};
227
0
0
if (! $args{ tab }) {
228
0
0
my @autoclose = exists $args{ autoclose } ? (autoclose => $args{ autoclose }) : ();
229
0
$args{ tab } = $args{ app }->addTab( @autoclose );
230
0
my $body = $args{ tab }->MozRepl::RemoteObject::Methods::dive(qw[ linkedBrowser contentWindow document body ]);
231
0
$body->{innerHTML} = __PACKAGE__;
232
};
233
234
0
0
if (delete $args{ autoclose }) {
235
0
$args{ app }->autoclose_tab($args{ tab });
236
};
237
0
0
if (! exists $args{ autodie }) { $args{ autodie } = 1 };
0
238
239
$args{ events } ||= [
240
0
0
'DOMContentLoaded','load',
241
'pageshow', # Navigation from cache will use "pageshow"
242
#'pagehide',
243
'error','abort','stop',
244
];
245
0
0
$args{ on_event } ||= undef;
246
0
0
$args{ pre_value } ||= ['focus'];
247
0
0
$args{ post_value } ||= ['change','blur'];
248
0
0
if( ! exists $args{ frames }) {
249
0
0
$args{ frames } ||= 1; # we default to searching frames
250
};
251
252
die "No tab found"
253
0
0
unless $args{tab};
254
255
0
0
if (delete $args{ activate }) {
256
0
$args{ app }->activateTab( $args{ tab });
257
};
258
259
0
0
$args{ response } ||= undef;
260
0
0
$args{ current_form } ||= undef;
261
262
0
0
$args{ event_log } ||= [];
263
264
0
my $agent = delete $args{ agent };
265
266
0
my $self= bless \%args, $class;
267
268
0
$self->_initXpathResultTypes;
269
270
0
0
if( defined $agent ) {
271
0
$self->agent( $agent );
272
};
273
274
0
$self
275
};
276
277
sub DESTROY {
278
0
0
my ($self) = @_;
279
0
local $@;
280
0
0
if (my $app = delete $self->{ app }) {
281
0
%$self = (); # wipe out all references we keep
282
# but keep $app alive until we can dispose of it
283
# as the last thing, now:
284
0
$app = undef;
285
};
286
}
287
288
=head2 C<< $mech->agent( $product_id ); >>
289
290
$mech->agent('wonderbot/JS 1.0');
291
292
Set the product token that is used to identify the user agent on the network.
293
The agent value is sent as the "User-Agent" header in the requests. The default
294
is whatever Firefox uses.
295
296
To reset the user agent to the Firefox default, pass an empty string:
297
298
$mech->agent('');
299
300
=cut
301
302
sub agent {
303
0
0
1
my ($self,$name) = @_;
304
0
0
if( defined $name ) {
0
305
0
$self->add_header('User-Agent',$name);
306
} elsif( $name eq '' ) {
307
0
$self->delete_header('User-Agent');
308
};
309
};
310
311
=head2 C<< $mech->autodie( [$state] ) >>
312
313
$mech->autodie(0);
314
315
Accessor to get/set whether warnings become fatal.
316
317
=cut
318
319
0
0
0
1
sub autodie { $_[0]->{autodie} = $_[1] if @_ == 2; $_[0]->{autodie} }
0
320
321
=head2 C<< $mech->events() >>
322
323
$mech->events( ['load'] );
324
325
Sets or gets the set of Javascript events that WWW::Mechanize::Firefox
326
will wait for after requesting a new page. Returns an array reference.
327
328
Changing the set of events will most likely make WWW::Mechanize::Firefox
329
stall while waiting for a response.
330
331
This method is special to WWW::Mechanize::Firefox.
332
333
=cut
334
335
0
0
0
1
sub events { $_[0]->{events} = $_[1] if (@_ > 1); $_[0]->{events} };
0
336
337
=head2 C<< $mech->on_event() >>
338
339
$mech->on_event(1); # prints every page load event
340
341
# or give it a callback
342
$mech->on_event(sub { warn "Page loaded with $ev->{name} event" });
343
344
Gets/sets the notification handler for the Javascript event
345
that finished a page load. Set it to C<1> to output via C,
346
or a code reference to call it with the event.
347
348
This method is special to WWW::Mechanize::Firefox.
349
350
=cut
351
352
0
0
0
1
sub on_event { $_[0]->{on_event} = $_[1] if (@_ > 1); $_[0]->{on_event} };
0
353
354
=head2 C<< $mech->cookies() >>
355
356
my $cookie_jar = $mech->cookies();
357
358
Returns a L object that was initialized
359
from the live Firefox instance.
360
361
B C<< ->set_cookie >> is not yet implemented,
362
as is saving the cookie jar.
363
364
=cut
365
366
sub cookies {
367
0
0
1
return HTTP::Cookies::MozRepl->new(
368
repl => $_[0]->repl
369
)
370
}
371
372
=head1 JAVASCRIPT METHODS
373
374
=head2 C<< $mech->allow( %options ) >>
375
376
Enables or disables browser features for the current tab.
377
The following options are recognized:
378
379
=over 4
380
381
=item *
382
383
C - Whether to allow plugin execution.
384
385
=item *
386
387
C - Whether to allow Javascript execution.
388
389
=item *
390
391
C - Attribute stating if refresh based redirects can be allowed.
392
393
=item *
394
395
C, C - Attribute stating if it should allow subframes (framesets/iframes) or not.
396
397
=item *
398
399
C - Attribute stating whether or not images should be loaded.
400
401
=back
402
403
Options not listed remain unchanged.
404
405
=head3 Disable Javascript
406
407
$mech->allow( javascript => 0 );
408
409
=cut
410
411
80
80
728
use vars '%known_options';
80
213
80
433887
412
%known_options = (
413
'javascript' => 'allowJavascript',
414
'plugins' => 'allowPlugins',
415
'metaredirects' => 'allowMetaRedirects',
416
'subframes' => 'allowSubframes',
417
'frames' => 'allowSubframes',
418
'images' => 'allowImages',
419
);
420
421
sub allow {
422
0
0
1
my ($self,%options) = @_;
423
0
my $shell = $self->docshell;
424
0
for my $opt (sort keys %options) {
425
0
0
if (my $opt_js = $known_options{ $opt }) {
426
0
$shell->{$opt_js} = $options{ $opt };
427
} else {
428
0
carp "Unknown option '$opt_js' (ignored)";
429
};
430
};
431
};
432
433
=head2 C<< $mech->js_errors() >>
434
435
print $_->{message}
436
for $mech->js_errors();
437
438
An interface to the Javascript Error Console
439
440
Returns the list of errors in the JEC
441
442
Maybe this should be called C or
443
C instead.
444
445
=cut
446
447
sub js_console {
448
0
0
0
my ($self) = @_;
449
0
my $getConsoleService = $self->repl->declare(<<'JS');
450
function() {
451
return Components.classes["@mozilla.org/consoleservice;1"]
452
.getService(Components.interfaces.nsIConsoleService);
453
}
454
JS
455
0
$getConsoleService->()
456
}
457
458
sub js_errors {
459
0
0
1
my ($self,$page) = @_;
460
0
my $console = $self->js_console;
461
0
my $getErrorMessages = $self->repl->declare(<<'JS', 'list');
462
function (consoleService) {
463
var out = {};
464
consoleService.getMessageArray(out, {});
465
return out.value || []
466
};
467
JS
468
0
$getErrorMessages->($console);
469
}
470
471
=head2 C<< $mech->clear_js_errors() >>
472
473
$mech->clear_js_errors();
474
475
Clears all Javascript messages from the console
476
477
=cut
478
479
sub clear_js_errors {
480
0
0
1
my ($self,$page) = @_;
481
0
$self->js_console->reset;
482
483
};
484
485
=head2 C<< $mech->eval_in_page( $str [, $env [, $document]] ) >>
486
487
=head2 C<< $mech->eval( $str [, $env [, $document]] ) >>
488
489
my ($value, $type) = $mech->eval( '2+2' );
490
491
Evaluates the given Javascript fragment in the
492
context of the web page.
493
Returns a pair of value and Javascript type.
494
495
This allows access to variables and functions declared
496
"globally" on the web page.
497
498
The returned result needs to be treated with
499
extreme care because
500
it might lead to Javascript execution in the context of
501
your application instead of the context of the webpage.
502
This should be evident for functions and complex data
503
structures like objects. When working with results from
504
untrusted sources, you can only safely use simple
505
types like C.
506
507
If you want to modify the environment the code is run under,
508
pass in a hash reference as the second parameter. All keys
509
will be inserted into the C object as well as
510
C. Also, complex data structures are only
511
supported if they contain no objects.
512
If you need finer control, you'll have to
513
write the Javascript yourself.
514
515
This method is special to WWW::Mechanize::Firefox.
516
517
Also, using this method opens a potential B as
518
the returned values can be objects and using these objects
519
can execute malicious code in the context of the Firefox application.
520
521
=cut
522
523
sub eval_in_page {
524
0
0
1
my ($self,$str,$env,$doc,$window) = @_;
525
0
0
$env ||= {};
526
0
my $js_env = {};
527
0
0
$doc ||= $self->document;
528
529
# do a manual transfer of keys, to circumvent our stupid
530
# transformation routine:
531
0
0
if (keys %$env) {
532
0
$js_env = $self->repl->declare(<<'JS')->();
533
function () { return new Object }
534
JS
535
0
for my $k (keys %$env) {
536
0
$js_env->{$k} = $env->{$k};
537
};
538
};
539
540
0
my $eval_in_sandbox = $self->repl->declare(<<'JS', 'list');
541
function (w,d,str,env,caller,line) {
542
var unsafeWin = w.wrappedJSObject;
543
var safeWin = XPCNativeWrapper(unsafeWin);
544
var sandbox = Components.utils.Sandbox(safeWin);
545
sandbox.window = safeWin;
546
sandbox.document = d;
547
// Transfer the environment
548
for (var e in env) {
549
sandbox[e] = env[e]
550
sandbox.window[e] = env[e]
551
}
552
sandbox.__proto__ = unsafeWin;
553
554
var res = Components.utils.evalInSandbox(str, sandbox, "1.8",caller,line);
555
return [res,typeof(res)];
556
};
557
JS
558
0
0
$window ||= $self->tab->{linkedBrowser}->{contentWindow};
559
# Report errors from scope of caller
560
# This feels weirdly backwards here, but oh well:
561
#local @CARP_NOT = (ref $self->repl); # we trust this
562
563
0
my ($caller,$line) = (caller)[1,2];
564
565
0
$eval_in_sandbox->($window,$doc,$str,$js_env,$caller,$line);
566
};
567
*eval = \&eval_in_page;
568
569
=head2 C<< $mech->unsafe_page_property_access( ELEMENT ) >>
570
571
Allows you unsafe access to properties of the current page. Using
572
such properties is an incredibly bad idea.
573
574
This is why the function Cs. If you really want to use
575
this function, edit the source code.
576
577
=cut
578
579
sub unsafe_page_property_access {
580
0
0
1
my ($mech,$element) = @_;
581
0
die;
582
0
my $window = $mech->tab->{linkedBrowser}->{contentWindow};
583
0
my $unsafe = $window->{wrappedJSObject};
584
0
$unsafe->{$element}
585
};
586
587
=head1 UI METHODS
588
589
See also L for how to add more than one tab
590
and how to manipulate windows and tabs.
591
592
=head2 C<< $mech->application() >>
593
594
my $ff = $mech->application();
595
596
Returns the L object for manipulating
597
more parts of the Firefox UI and application.
598
599
=cut
600
601
0
0
1
sub application { $_[0]->{app} };
602
603
=head2 C<< $mech->autoclose_tab >>
604
605
$mech->autoclose_tab( 0 ); # keep tab open after program end
606
607
Set whether to close the tab associated with the instance.
608
609
=cut
610
611
sub autoclose_tab {
612
0
0
1
my $self = shift;
613
0
$self->application->autoclose_tab($self->tab, @_);
614
};
615
616
=head2 C<< $mech->tab() >>
617
618
Gets the object that represents the Firefox tab used by WWW::Mechanize::Firefox.
619
620
This method is special to WWW::Mechanize::Firefox.
621
622
=cut
623
624
0
0
1
sub tab { $_[0]->{tab} };
625
626
=head2 C<< $mech->make_progress_listener( %callbacks ) >>
627
628
my $eventlistener = $mech->progress_listener(
629
onStateChange => \&onStateChange,
630
);
631
632
Creates an unconnected C<< nsIWebProgressListener >> interface
633
which calls the Perl subroutines you pass in.
634
635
Returns a handle. Once the handle gets released, all callbacks will
636
get stopped. Also, all Perl callbacks will get deregistered from the
637
Javascript bridge, so make sure not to use the same callback
638
in different progress listeners at the same time.
639
The sender may still call your callbacks.
640
641
=cut
642
643
sub make_progress_listener {
644
0
0
1
my ($mech,%handlers) = @_;
645
0
my $NOTIFY_STATE = $mech->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATE_ALL')
646
+ $mech->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATUS')
647
;
648
0
my ($obj) = $mech->repl->expr('new Object');
649
0
for my $key (keys %handlers) {
650
0
$obj->{$key} = $handlers{$key};
651
};
652
#warn "Listener created";
653
654
0
my $mk_nsIWebProgressListener = $mech->repl->declare(<<'JS');
655
function (myListener) {
656
var callbacks = ["onStateChange",
657
"onLocationChange",
658
"onProgressChange",
659
"onStatusChange",
660
"onSecurityChange"
661
// ,"onProgressChange64"
662
// ,"onRefreshAttempted"
663
];
664
for (var h in callbacks) {
665
var e = callbacks[h];
666
if (! myListener[e]) {
667
myListener[e] = function(){}
668
} else {
669
// alert("Setting callback for " + e);
670
};
671
};
672
myListener.QueryInterface = function(aIID) {
673
if (aIID.equals(Components.interfaces.nsIWebProgressListener) ||
674
// aIID.equals(Components.interfaces.nsIWebProgressListener2) ||
675
aIID.equals(Components.interfaces.nsISupportsWeakReference) ||
676
aIID.equals(Components.interfaces.nsISupports))
677
return this;
678
throw Components.results.NS_NOINTERFACE;
679
};
680
return myListener
681
}
682
JS
683
684
# Declare it here so we don't close over $lsn!
685
my $release = sub {
686
0
0
0
$_[0]->bridge->remove_callback(values %handlers)
687
if $_[0]->bridge;
688
0
};
689
0
my $lsn = $mk_nsIWebProgressListener->($obj);
690
0
$lsn->__on_destroy($release);
691
0
$lsn
692
};
693
694
695
=head2 C<< $mech->progress_listener( $source, %callbacks ) >>
696
697
my $eventlistener = progress_listener(
698
$browser,
699
onLocationChange => \&onLocationChange,
700
);
701
702
Sets up the callbacks for the C<< nsIWebProgressListener >> interface
703
to be the Perl subroutines you pass in.
704
705
C< $source > needs to support C<.addProgressListener> and C<.removeProgressListener>.
706
707
Returns a handle. Once the handle gets released, all callbacks will
708
get stopped. Also, all Perl callbacks will get deregistered from the
709
Javascript bridge, so make sure not to use the same callback
710
in different progress listeners at the same time.
711
712
=cut
713
714
sub progress_listener {
715
0
0
1
my ($self,$source,%handlers) = @_;
716
717
0
my $lsn = $self->make_progress_listener(%handlers);
718
0
$lsn->{source} = $source;
719
720
0
$lsn->__release_action('if(self.source)try{self.source.removeProgressListener(self)}catch(e){}');
721
0
my $NOTIFY_STATE = $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATE_ALL')
722
+ $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_LOCATION')
723
+ $self->repl->constant('Components.interfaces.nsIWebProgress.NOTIFY_STATUS');
724
0
$source->addProgressListener($lsn,$NOTIFY_STATE);
725
0
$lsn
726
};
727
728
=head2 C<< $mech->repl() >>
729
730
my ($value,$type) = $mech->repl->expr('2+2');
731
732
Gets the L instance that is used.
733
734
This method is special to WWW::Mechanize::Firefox.
735
736
=cut
737
738
0
0
1
sub repl { $_[0]->application->repl };
739
740
=head2 C<< $mech->highlight_node( @nodes ) >>
741
742
my @links = $mech->selector('a');
743
$mech->highlight_node(@links);
744
745
Convenience method that marks all nodes in the arguments
746
with
747
748
background: red;
749
border: solid black 1px;
750
display: block; /* if the element was display: none before */
751
752
This is convenient if you need visual verification that you've
753
got the right nodes.
754
755
There currently is no way to restore the nodes to their original
756
visual state except reloading the page.
757
758
=cut
759
760
sub highlight_node {
761
0
0
1
my ($self,@nodes) = @_;
762
0
for (@nodes) {
763
0
my $style = $_->{style};
764
$style->{display} = 'block'
765
0
0
if $style->{display} eq 'none';
766
0
$style->{background} = 'red';
767
0
$style->{border} = 'solid black 1px;';
768
};
769
};
770
771
=head1 NAVIGATION METHODS
772
773
=head2 C<< $mech->get( $url, %options ) >>
774
775
$mech->get( $url, ':content_file' => $tempfile );
776
777
Retrieves the URL C into the tab.
778
779
It returns a faked L object for interface compatibility
780
with L.
781
782
Recognized options:
783
784
=over 4
785
786
=item *
787
788
C<< :content_file >> - filename to store the data in
789
790
=item *
791
792
C<< no_cache >> - if true, bypass the browser cache
793
794
=item *
795
796
C<< synchronize >> - wait until all elements have loaded
797
798
The default is to wait until all elements have loaded. You can switch
799
this off by passing
800
801
synchronize => 0
802
803
for example if you want to manually poll for an element that appears fairly
804
early during the load of a complex page.
805
806
=back
807
808
=cut
809
810
sub get {
811
0
0
1
my ($self,$url, %options) = @_;
812
0
my $b = $self->tab->{linkedBrowser};
813
0
$self->clear_current_form;
814
815
0
my $flags = 0;
816
0
0
if ($options{ no_cache }) {
817
0
$flags = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
818
};
819
0
0
if (! exists $options{ synchronize }) {
820
0
$options{ synchronize } = $self->events;
821
};
822
0
0
if( !ref $options{ synchronize }) {
823
$options{ synchronize } = $options{ synchronize }
824
0
0
? $self->events
825
: []
826
};
827
828
$self->_sync_call( $options{ synchronize }, sub {
829
0
0
0
if (my $target = delete $options{":content_file"}) {
830
0
$self->save_url($url => ''.$target, %options);
831
} else {
832
0
$b->loadURIWithFlags(''.$url,$flags);
833
};
834
0
});
835
};
836
837
=head2 C<< $mech->get_local( $filename , %options ) >>
838
839
$mech->get_local('test.html');
840
841
Shorthand method to construct the appropriate
842
C<< file:// >> URI and load it into Firefox. Relative
843
paths will be interpreted as relative to C<$0>.
844
845
This method accepts the same options as C<< ->get() >>.
846
847
This method is special to WWW::Mechanize::Firefox but could
848
also exist in WWW::Mechanize through a plugin.
849
850
Options:
851
852
=over 4
853
854
=item *
855
856
B - a reference directory to use instead of C< dirname($0) >
857
858
=back
859
860
=cut
861
862
sub get_local {
863
0
0
1
my ($self, $htmlfile, %options) = @_;
864
0
require Cwd;
865
0
require File::Spec;
866
867
0
my $fn = $htmlfile;
868
0
0
if( ! File::Spec->file_name_is_absolute( $fn )) {
869
0
0
$options{ basedir } ||= dirname($0);
870
$fn = File::Spec->rel2abs(
871
0
File::Spec->catfile($options{basedir},$htmlfile),
872
Cwd::getcwd(),
873
);
874
};
875
0
$fn =~ s!\\!/!g; # fakey "make file:// URL"
876
877
0
$self->get("file://$fn", %options);
878
}
879
880
=head2 C<< $mech->post( $url, %options ) >>
881
882
$mech->post( 'http://example.com',
883
params => { param => "Hello World" },
884
headers => {
885
"Content-Type" => 'application/x-www-form-urlencoded',
886
},
887
charset => 'utf-8',
888
);
889
890
Sends a POST request to C<$url>.
891
892
A C header will be automatically calculated if
893
it is not given.
894
895
The following options are recognized:
896
897
=over 4
898
899
=item *
900
901
C - a hash of HTTP headers to send. If not given,
902
the content type will be generated automatically.
903
904
=item *
905
906
C - the raw data to send, if you've encoded it already.
907
908
=back
909
910
=cut
911
912
sub post {
913
0
0
1
my ($self, $url, %options) = @_;
914
0
my $b = $self->tab->{linkedBrowser};
915
0
$self->clear_current_form;
916
917
0
my $flags = 0;
918
0
0
if ($options{no_cache}) {
919
0
$flags = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
920
};
921
0
0
if (! exists $options{synchronize}) {
922
0
$options{synchronize} = $self->events;
923
};
924
0
0
if( !ref $options{synchronize}) {
925
$options{synchronize} = $options{synchronize}
926
0
0
? $self->events
927
: []
928
};
929
930
# If we don't have data, encode the parameters:
931
0
0
if( !$options{ data }) {
932
0
my $req= HTTP::Request::Common::POST( $url, $options{params} );
933
0
$options{ data } = $req->content;
934
};
935
936
0
0
$options{ charset } ||= 'utf-8';
937
0
0
$options{ headers } ||= {};
938
0
0
$options{ headers }->{"Content-Type"} ||= "application/x-www-form-urlencoded";
939
0
0
if( $options{ charset }) {
940
0
$options{ headers }->{"Content-Type"} .= "; charset=$options{ charset }";
941
};
942
943
0
my $streamPostData = $self->repl->declare(<<'JS');
944
function(headers, dataString) {
945
// POST method requests must wrap the encoded text in a MIME stream
946
const Cc = Components.classes;
947
const Ci = Components.interfaces;
948
var stringStream = Cc["@mozilla.org/io/string-input-stream;1"].
949
createInstance(Ci.nsIStringInputStream);
950
if ("data" in stringStream) // Gecko 1.9 or newer
951
stringStream.data = dataString;
952
else // 1.8 or older
953
stringStream.setData(dataString, dataString.length);
954
955
var postData = Cc["@mozilla.org/network/mime-input-stream;1"].
956
createInstance(Ci.nsIMIMEInputStream);
957
for( h in headers ) {
958
postData.addHeader( h, headers[h] );
959
};
960
postData.addContentLength = true;
961
postData.setData(stringStream);
962
963
return postData;
964
}
965
JS
966
967
$self->_sync_call($options{synchronize}, sub {
968
0
0
my $postData = $streamPostData->($options{headers}, $options{data});
969
0
$b->loadURIWithFlags(''.$url, $flags, undef, $options{charset}, $postData);
970
0
});
971
}
972
973
=head2 C<< $mech->add_header( $name => $value, ... ) >>
974
975
$mech->add_header(
976
'X-WWW-Mechanize-Firefox' => "I'm using it",
977
Encoding => 'text/klingon',
978
);
979
980
This method sets up custom headers that will be sent with B HTTP(S)
981
request that Firefox makes.
982
983
Using multiple instances of WWW::Mechanize::Firefox objects with the same
984
application together with changed request headers will most likely have weird
985
effects. So don't do that.
986
987
Note that currently, we only support one value per header.
988
989
Some versions of Firefox don't work with the method that is used to set
990
the custom headers. Please see C for the exact
991
versions where the implemented mechanism doesn't work. Roughly, this is
992
for versions 17 to 24 of Firefox.
993
994
=cut
995
996
# This subroutine creates the custom header observer. It has a hashref
997
# of headers that it will add to EACH request that Firefox sends out.
998
# It removes itself when the Perl object gets destroyed.
999
sub _custom_header_observer {
1000
0
0
my ($self, @headers) = @_;
1001
1002
# This routine was taken from http://d.hatena.ne.jp/oppara/20090410/p1
1003
0
my $on_modify_request = $self->repl->declare(<<'JS');
1004
function() { // headers passed via arguments
1005
const Cc= Components.classes;
1006
const Ci= Components.interfaces;
1007
const observerService= Cc['@mozilla.org/observer-service;1'].getService(Ci.nsIObserverService);
1008
var h= [].slice.call(arguments);
1009
var hr= {};
1010
for( var i=0; i
1011
var k= h[i];
1012
var v= h[i+1];
1013
hr[k]= v;
1014
};
1015
1016
var myObserver= {
1017
headers: hr,
1018
observe: function(subject,topic,data) {
1019
if(topic != 'http-on-modify-request') return;
1020
1021
var http = subject.QueryInterface(Ci.nsIHttpChannel);
1022
for( var k in this.headers) {
1023
var v= this.headers[k];
1024
http.setRequestHeader(k,v, false);
1025
1026
if (k== 'Referer' && http.referrer) {
1027
http.referrer.spec = v;
1028
};
1029
};
1030
}
1031
}
1032
observerService.addObserver(myObserver,'http-on-modify-request',false);
1033
return myObserver;
1034
};
1035
JS
1036
0
my $obs = $on_modify_request->(@headers);
1037
1038
# Clean up after ourselves
1039
0
$obs->__release_action(<<'JS');
1040
const Cc= Components.classes;
1041
const Ci= Components.interfaces;
1042
const observerService= Cc['@mozilla.org/observer-service;1'].getService(Ci.nsIObserverService);
1043
try {
1044
observerService.removeObserver(self,'http-on-modify-request',false);
1045
} catch (e) {}
1046
JS
1047
0
return $obs;
1048
};
1049
1050
sub add_header {
1051
0
0
1
my ($self, @headers) = @_;
1052
0
0
$self->{custom_header_observer} ||= $self->_custom_header_observer;
1053
1054
# This is slooow, but we only do it when changing the headers...
1055
0
my $h = $self->{custom_header_observer}->{headers};
1056
0
while( my ($k,$v) = splice @headers, 0, 2 ) {
1057
0
$h->{$k} = $v;
1058
};
1059
};
1060
1061
=head2 C<< $mech->delete_header( $name , $name2... ) >>
1062
1063
$mech->delete_header( 'User-Agent' );
1064
1065
Removes HTTP headers from the agent's list of special headers. Note
1066
that Firefox may still send a header with its default value.
1067
1068
=cut
1069
1070
sub delete_header {
1071
0
0
1
my ($self, @headers) = @_;
1072
1073
0
0
0
if( $self->{custom_header_observer} and @headers ) {
1074
# This is slooow, but we only do it when changing the headers...
1075
0
my $h = $self->{custom_header_observer}->{headers};
1076
1077
delete $h->{$_}
1078
0
for( @headers );
1079
};
1080
};
1081
1082
=head2 C<< $mech->reset_headers >>
1083
1084
$mech->reset_headers();
1085
1086
Removes all custom headers and makes Firefox send its defaults again.
1087
1088
=cut
1089
1090
sub reset_headers {
1091
0
0
1
my ($self) = @_;
1092
0
delete $self->{custom_header_observer};
1093
};
1094
1095
sub _addLoadEventListener {
1096
0
0
my ($self,%options) = @_;
1097
1098
0
0
$options{ tab } ||= $self->tab;
1099
0
0
$options{ window } ||= $self->application->getMostRecentWindow;
1100
0
0
$options{ events } ||= $self->events;
1101
0
my $add_load_listener = $self->repl->declare(<<'JS');
1102
function( mainWindow, tab, waitForLoad, events ) {
1103
var browser= mainWindow.gBrowser.getBrowserForTab( tab );
1104
1105
var lock= {
1106
"busy": 1,
1107
"log":[],
1108
"events": events,
1109
"browser": browser,
1110
"cb": undefined,
1111
"release": function() {
1112
for(var i=0; i
1113
this.browser.removeEventListener(this.events[i], this.cb, true);
1114
};
1115
}
1116
};
1117
var unloadedFrames= [];
1118
1119
lock.cb= function (e) {
1120
var t= e.target;
1121
var toplevel= (t == browser.contentDocument);
1122
lock.log.push("Event "+e.type);
1123
var reloadedFrame= false;
1124
lock.log.push( "" + unloadedFrames.length + " frames.");
1125
1126
if( "FRAME" == t.tagName
1127
|| "IFRAME" == t.tagName ) {
1128
loc= t.src;
1129
} else if( !t.tagName ) {
1130
// Document
1131
loc= t.URL;
1132
} else { // ignore
1133
lock.log.push("Ignoring " + e.type + " on " + t.tagName);
1134
};
1135
try {
1136
if( t instanceof HTMLDocument ) {
1137
// We are only interested in HTML pages here
1138
var container= t.defaultView.frameElement || browser.contentWindow;
1139
for( var i=0; i < unloadedFrames.length; i++ ) {
1140
try {
1141
// lock.log.push( "" + i + " " + unloadedFrames[i].id + " - " + unloadedFrames[i].src );
1142
reloadedFrame= reloadedFrame
1143
|| unloadedFrames[i] === container;
1144
} catch (e) {
1145
// alert("Some frame element has gone away already...");
1146
};
1147
// alert("Caught " + e.type + " on remembered element. Great - " + reloadedFrame);
1148
};
1149
1150
if ("pagehide" == e.type && container ) {
1151
// alert("pagehide on container /lock"+lock.id);
1152
// A frame or window gets reloaded.
1153
// A frame gets reloaded. We remember it so we can
1154
// tell when it has completed. We won't get a separate
1155
// completion event on the parent document :-(
1156
lock.log.push("Remembering frame parent, for 'load' event");
1157
unloadedFrames.push( container );
1158
// Maybe we should just attach all events here?!
1159
};
1160
};
1161
} catch (e) { alert("Error while looking: " + e.message+" " + e.line) };
1162
1163
// if (! toplevel && !reloadedFrame ) { return ; };
1164
lock.log.push("<> " + e.type + " on " + loc);
1165
1166
if( (reloadedFrame)
1167
// && !waitForLoad
1168
&& "DOMContentLoaded" == e.type
1169
) {
1170
// We loaded a document
1171
// See if it contains (i)frames
1172
// and wait for "load" to fire if so
1173
// alert("Reloaded a container /lock:" + lock.id);
1174
lock.log.push("DOMContentLoaded for toplevel");
1175
var q= "//IFRAME|//FRAME";
1176
var frames= t.evaluate(q,t,null,XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ).snapshotLength;
1177
lock.log.push("Found " + frames + " frames");
1178
if( frames ) {
1179
lock.log.push("Waiting for 'load' because we found frames");
1180
waitForLoad= true;
1181
} else if( /^about:neterror\?/.test( loc ) || !waitForLoad ) {
1182
lock.log.push("Early out on DOMContentLoaded");
1183
lock.busy= 0;
1184
};
1185
1186
} else if( (reloadedFrame)
1187
&& ( "load" == e.type
1188
|| "pageshow" == e.type
1189
)) { // We always are done on "load" on toplevel
1190
lock.log.push("'" + e.type + "' on top level, old state was " + lock.busy);
1191
lock.busy= 0;
1192
1193
} else if( (toplevel || reloadedFrame)
1194
&& ("error" == e.type || "stop" == e.type)) { // We always are done on "load" on toplevel
1195
lock.log.push("'" + e.type + "' on top level, old state was " + lock.busy);
1196
lock.busy= 0;
1197
};
1198
1199
};
1200
1201
for(var i=0; i
1202
browser.addEventListener(events[i], lock.cb, true);
1203
};
1204
lock.log.push("Listening");
1205
1206
return lock
1207
}
1208
JS
1209
0
return $add_load_listener->($options{ window }, $options{ tab }, 1, $options{ events } );
1210
}
1211
1212
sub _addEventListener {
1213
0
0
my ($self,@args) = @_;
1214
0
0
0
if (@args <= 2 and ref($args[0]) eq 'MozRepl::RemoteObject::Instance') {
1215
0
@args = [@args];
1216
};
1217
0
for (@args) {
1218
0
0
$_->[1] ||= $self->events;
1219
0
0
$_->[1] = [$_->[1]]
1220
unless ref $_->[1];
1221
};
1222
# Now, flatten the arg list again...
1223
0
@args = map { @$_ } @args;
0
1224
1225
# This registers multiple events for a one-shot event
1226
0
my $make_semaphore = $self->repl->declare(<<'JS');
1227
function() {
1228
var lock = { "busy": 0, "event" : null };
1229
var listeners = [];
1230
var pairs = arguments;
1231
for( var k = 0; k < pairs.length ; k++) {
1232
var b = pairs[k];
1233
k++;
1234
var events = pairs[k];
1235
1236
for( var i = 0; i < events.length; i++) {
1237
var evname = events[i];
1238
var callback = (function(listeners,evname){
1239
return function(e) {
1240
if (! lock.busy) {
1241
lock.busy++;
1242
lock.event = e.type;
1243
lock.js_event = {};
1244
lock.js_event.target = e.originalTarget;
1245
lock.js_event.type = e.type;
1246
//alert("Caught first event " + e.type + " " + e.message);
1247
} else {
1248
//alert("Caught duplicate event " + e.type + " " + e.message);
1249
};
1250
for( var j = 0; j < listeners.length; j++) {
1251
listeners[j][0].removeEventListener(listeners[j][1],listeners[j][2],true);
1252
};
1253
};
1254
})(listeners,evname);
1255
listeners.push([b,evname,callback]);
1256
b.addEventListener(evname,callback,true);
1257
};
1258
};
1259
return lock
1260
}
1261
JS
1262
# $browser,$events
1263
0
return $make_semaphore->(@args);
1264
};
1265
1266
sub _wait_while_busy {
1267
0
0
my ($self,@elements) = @_;
1268
# Now do the busy-wait
1269
# Should this also include a ->poll()
1270
# and a callback?
1271
0
my $i=0;
1272
0
while (1) {
1273
0
$i++;
1274
0
0
last if($i == 30 );
1275
0
for my $element (@elements) {
1276
0
0
0
if ((my $s = $element->{busy} || 0) < 1) {
1277
0
for my $element (@elements) {
1278
0
push @{ $self->{event_log} },
1279
0
join "\n", @{ $element->{log}};
0
1280
};
1281
0
return $element;
1282
};
1283
};
1284
0
sleep 0.1;
1285
1286
# if (time-$timer > 4) {
1287
# $timer= time;
1288
# for my $element (@elements) {
1289
# for (@{ $element->{log}}) {
1290
# print $_,"\n";
1291
# };
1292
# print "---\n";
1293
# };
1294
# };
1295
};
1296
}
1297
1298
=head2 C<< $mech->synchronize( $event, $callback ) >>
1299
1300
Wraps a synchronization semaphore around the callback
1301
and waits until the event C<$event> fires on the browser.
1302
If you want to wait for one of multiple events to occur,
1303
pass an array reference as the first parameter.
1304
1305
Usually, you want to use it like this:
1306
1307
my $l = $mech->xpath('//a[@onclick]', single => 1);
1308
$mech->synchronize('DOMFrameContentLoaded', sub {
1309
$mech->click( $l );
1310
});
1311
1312
It is necessary to synchronize with the browser whenever
1313
a click performs an action that takes longer and
1314
fires an event on the browser object.
1315
1316
The C event is fired by Firefox when
1317
the whole DOM and all C
1318
If your document doesn't have frames, use the C
1319
event instead.
1320
1321
If you leave out C<$event>, the value of C<< ->events() >> will
1322
be used instead.
1323
1324
=cut
1325
1326
sub _install_response_header_listener {
1327
0
0
my ($self) = @_;
1328
1329
0
weaken $self;
1330
1331
# Pre-Filter the progress on the JS side of things so we
1332
# don't get that much traffic back and forth between Perl and JS
1333
0
my $make_state_change_filter = $self->repl->declare(<<'JS');
1334
function (cb,console) {
1335
const STATE_START = Components.interfaces.nsIWebProgressListener.STATE_START;
1336
const STATE_STOP = Components.interfaces.nsIWebProgressListener.STATE_STOP;
1337
const STATE_TRANSFERRING = Components.interfaces.nsIWebProgressListener.STATE_TRANSFERRING;
1338
const STATE_IS_DOCUMENT = Components.interfaces.nsIWebProgressListener.STATE_IS_DOCUMENT;
1339
const STATE_IS_WINDOW = Components.interfaces.nsIWebProgressListener.STATE_IS_WINDOW;
1340
1341
return function (progress,request,flags,status) {
1342
if( 0 && console ) {
1343
const nsIChannel = Components.interfaces.nsIChannel;
1344
var ch = request.QueryInterface(nsIChannel);
1345
1346
console.log("STATE: "
1347
+ (flags & STATE_START ? "s" : "-")
1348
+ (flags & STATE_STOP ? "S" : "-")
1349
+ (flags & STATE_TRANSFERRING ? "T" : "-")
1350
+ (flags & STATE_IS_DOCUMENT ? "D" : "-")
1351
+ (flags & STATE_IS_WINDOW ? "W" : "-")
1352
+ " " + status
1353
+ " " + ch.originalURI.spec
1354
+ " -> " + ch.URI.spec
1355
);
1356
};
1357
// if (flags & (STATE_STOP|STATE_IS_WINDOW) == (STATE_STOP|STATE_IS_WINDOW)) {
1358
if (flags & (STATE_STOP|STATE_IS_DOCUMENT) == (STATE_STOP|STATE_IS_DOCUMENT)) {
1359
cb(progress,request,flags,status);
1360
} else if ((flags & STATE_STOP) == STATE_STOP) {
1361
cb(progress,request,flags,status);
1362
}
1363
}
1364
}
1365
JS
1366
1367
# These should be cached and optimized into one hash query
1368
0
my $STATE_STOP = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_STOP');
1369
0
my $STATE_IS_DOCUMENT = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_IS_DOCUMENT');
1370
0
my $STATE_IS_WINDOW = $self->repl->constant('Components.interfaces.nsIWebProgressListener.STATE_IS_WINDOW');
1371
1372
my $state_change = $make_state_change_filter->(sub {
1373
0
0
my ($progress,$request,$flags,$status) = @_;
1374
#warn sprintf "State : %032b %08x\n", $flags, $status;
1375
#warn sprintf " %032b\n", $STATE_STOP | $STATE_IS_DOCUMENT | $STATE_IS_WINDOW ;
1376
1377
0
0
0
if ( $STATE_STOP == $flags # some error
1378
or ($flags & ($STATE_STOP | $STATE_IS_DOCUMENT)) == ($STATE_STOP | $STATE_IS_DOCUMENT)) {
1379
0
0
if ($status == 0 ) {
1380
#warn "Storing request to response";
1381
#warn "URI ".$request->{URI}->{asciiSpec};
1382
0
0
$self->{ response } ||= $request;
1383
} else {
1384
#warn "Erasing response";
1385
0
undef $self->{ response };
1386
};
1387
};
1388
#}, $self->tab->{linkedBrowser}->{contentWindow}->{console}, $lock);
1389
0
}, $self->tab->{linkedBrowser}->{contentWindow}->{console});
1390
1391
0
my $browser = $self->tab->{linkedBrowser};
1392
1393
# These should mimick the LWP::UserAgent events maybe?
1394
0
return $self->progress_listener(
1395
$browser,
1396
onStateChange => $state_change,
1397
#onProgressChange => sub { print "Progress : @_\n" },
1398
#onLocationChange => sub { printf "Location : %s\n", $_[2]->{spec} },
1399
#onStatusChange => sub { print "Status : @_\n"; },
1400
);
1401
};
1402
1403
sub synchronize {
1404
0
0
1
my ($self,$events,$callback) = @_;
1405
0
0
0
if (ref $events and ref $events eq 'CODE') {
1406
0
$callback = $events;
1407
0
$events = $self->events;
1408
};
1409
1410
0
0
$events = [ $events ]
1411
unless ref $events;
1412
1413
0
undef $self->{response};
1414
1415
0
my $need_response = defined wantarray;
1416
0
my $response_catcher = $self->_install_response_header_listener();
1417
1418
0
my $load_lock = $self->_addLoadEventListener( tab => $self->tab, events => $events );
1419
0
$callback->();
1420
1421
0
my $ev = $self->_wait_while_busy($load_lock);
1422
0
0
if (my $h = $self->{on_event}) {
1423
0
0
if (ref $h eq 'CODE') {
1424
0
$h->($ev)
1425
} else {
1426
0
warn "Received $ev->{event}";
1427
#warn "$ev->{event}->{text}"";
1428
};
1429
};
1430
1431
# Clean up our event listener
1432
0
$load_lock->release;
1433
1434
0
undef $response_catcher;
1435
# Response catcher gets released here
1436
1437
0
$self->signal_http_status;
1438
0
0
if ($need_response) {
1439
0
return $self->response
1440
};
1441
};
1442
1443
=head2 C<< $mech->res() >> / C<< $mech->response(%options) >>
1444
1445
my $response = $mech->response(headers => 0);
1446
1447
Returns the current response as a L object.
1448
1449
The C option tells the module whether to fetch the headers
1450
from Firefox or not. This is mainly an internal optimization hack.
1451
1452
=cut
1453
1454
sub _headerVisitor {
1455
0
0
my ($self,$cb) = @_;
1456
0
my $obj = $self->repl->expr('new Object');
1457
0
$obj->{visitHeader} = $cb;
1458
0
$obj
1459
};
1460
1461
sub _extract_response {
1462
0
0
my ($self,$request,%options) = @_;
1463
1464
0
my $nsIHttpChannel = $self->repl->constant('Components.interfaces.nsIHttpChannel');
1465
0
my $httpChannel = $request->QueryInterface($nsIHttpChannel);
1466
1467
0
my @headers;
1468
0
0
if( $options{ headers }) {
1469
0
0
my $v = $self->_headerVisitor(sub{push @headers, @_});
0
1470
1471
# If this fails, we're calling it too early :-(
1472
0
$httpChannel->visitResponseHeaders($v);
1473
};
1474
1475
my $res = HTTP::Response->new(
1476
$httpChannel->{responseStatus},
1477
$httpChannel->{responseStatusText},
1478
0
\@headers,
1479
undef, # no body so far
1480
);
1481
0
return $res;
1482
};
1483
1484
sub response {
1485
0
0
1
my ($self, %options) = @_;
1486
1487
0
0
if( ! exists $options{ headers }) {
1488
0
$options{ headers } = 1;
1489
};
1490
1491
# If we still have a valid JS response,
1492
# create a HTTP::Response from that
1493
0
0
if (my $js_res = $self->{ response }) {
1494
#my $ouri = $js_res->{originalURI};
1495
0
my $ouri = $js_res->{URI};
1496
0
my $scheme = '';
1497
#warn "Reading response for ".$js_res->{URI}->{asciiSpec};
1498
#warn " original ".$js_res->{originalURI}->{asciiSpec};
1499
0
0
if ($ouri) {
1500
0
$scheme = $ouri->{scheme};
1501
};
1502
1503
0
0
0
if ($scheme and $scheme =~ /^https?/) {
0
0
1504
# We can only extract from a HTTP Response
1505
0
return $self->_extract_response( $js_res, %options );
1506
} elsif ($scheme and $scheme =~ /^(file|data|about)\b/) {
1507
# We're cool!
1508
0
return HTTP::Response->new( 200, '', ['Content-Encoding','UTF-8'], encode 'UTF-8' => $self->content);
1509
} else {
1510
# We'll make up a response, below
1511
#my $url = $self->document->{documentURI};
1512
#carp "Making up a response for unknown URL scheme '$scheme' (from '$url')";
1513
};
1514
};
1515
1516
# Otherwise, make up a reason:
1517
0
my $eff_url = $self->document->{documentURI};
1518
#warn $eff_url;
1519
0
0
if ($eff_url =~ /^about:neterror/) {
1520
# this is an error
1521
0
return HTTP::Response->new(500)
1522
};
1523
1524
# We're cool, except we don't know what we're doing here:
1525
0
return HTTP::Response->new( 200, '', ['Content-Encoding','UTF-8'], encode 'UTF-8' => $self->content);
1526
}
1527
*res = \&response;
1528
1529
=head2 C<< $mech->success() >>
1530
1531
$mech->get('http://google.com');
1532
print "Yay"
1533
if $mech->success();
1534
1535
Returns a boolean telling whether the last request was successful.
1536
If there hasn't been an operation yet, returns false.
1537
1538
This is a convenience function that wraps C<< $mech->res->is_success >>.
1539
1540
=cut
1541
1542
sub success {
1543
0
0
1
my $res = $_[0]->response( headers => 0 );
1544
0
0
$res and $res->is_success
1545
}
1546
1547
=head2 C<< $mech->status() >>
1548
1549
$mech->get('http://google.com');
1550
print $mech->status();
1551
# 200
1552
1553
Returns the HTTP status code of the response.
1554
This is a 3-digit number like 200 for OK, 404 for not found, and so on.
1555
1556
=cut
1557
1558
sub status {
1559
0
0
1
my ($self) = @_;
1560
0
return $self->response( headers => 0 )->code
1561
};
1562
1563
=head2 C<< $mech->reload( [$bypass_cache] ) >>
1564
1565
$mech->reload();
1566
1567
Reloads the current page. If C<$bypass_cache>
1568
is a true value, the browser is not allowed to
1569
use a cached page. This is the difference between
1570
pressing C (cached) and C (uncached).
1571
1572
Returns the (new) response.
1573
1574
=cut
1575
1576
sub reload {
1577
0
0
1
my ($self, $bypass_cache) = @_;
1578
0
0
$bypass_cache ||= 0;
1579
0
0
if ($bypass_cache) {
1580
0
$bypass_cache = $self->repl->constant('nsIWebNavigation.LOAD_FLAGS_BYPASS_CACHE');
1581
};
1582
$self->synchronize( sub {
1583
0
0
$self->tab->{linkedBrowser}->reloadWithFlags($bypass_cache);
1584
0
});
1585
}
1586
1587
# Internal convenience method for dipatching a call either synchronized
1588
# or not
1589
sub _sync_call {
1590
0
0
my ($self, $events, $cb) = @_;
1591
1592
0
0
if (@$events) {
1593
0
$self->synchronize( $events, $cb );
1594
} else {
1595
0
$cb->();
1596
};
1597
};
1598
1599
=head2 C<< $mech->back( [$synchronize] ) >>
1600
1601
$mech->back();
1602
1603
Goes one page back in the page history.
1604
1605
Returns the (new) response.
1606
1607
=cut
1608
1609
sub back {
1610
0
0
1
my ($self, $synchronize) = @_;
1611
0
0
$synchronize ||= (@_ != 2);
1612
0
0
if( !ref $synchronize ) {
1613
0
0
$synchronize = $synchronize
1614
? $self->events
1615
: []
1616
};
1617
1618
$self->_sync_call($synchronize, sub {
1619
0
0
$self->tab->{linkedBrowser}->goBack;
1620
0
});
1621
}
1622
1623
=head2 C<< $mech->forward( [$synchronize] ) >>
1624
1625
$mech->forward();
1626
1627
Goes one page forward in the page history.
1628
1629
Returns the (new) response.
1630
1631
=cut
1632
1633
sub forward {
1634
0
0
1
my ($self, $synchronize) = @_;
1635
0
0
$synchronize ||= (@_ != 2);
1636
0
0
if( !ref $synchronize ) {
1637
0
0
$synchronize = $synchronize
1638
? $self->events
1639
: []
1640
};
1641
1642
$self->_sync_call($synchronize, sub {
1643
0
0
$self->tab->{linkedBrowser}->goForward;
1644
0
});
1645
}
1646
1647
=head2 C<< $mech->uri() >>
1648
1649
print "We are at " . $mech->uri;
1650
1651
Returns the current document URI.
1652
1653
=cut
1654
1655
sub uri {
1656
0
0
1
my ($self) = @_;
1657
0
my $loc = $self->tab->MozRepl::RemoteObject::Methods::dive(qw[
1658
linkedBrowser
1659
currentURI
1660
asciiSpec ]);
1661
0
return URI->new( $loc );
1662
};
1663
1664
=head1 CONTENT METHODS
1665
1666
=head2 C<< $mech->document() >>
1667
1668
Returns the DOM document object.
1669
1670
This is WWW::Mechanize::Firefox specific.
1671
1672
=cut
1673
1674
sub document {
1675
0
0
1
my ($self) = @_;
1676
#$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser contentWindow document]);
1677
0
$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser contentDocument]);
1678
}
1679
1680
=head2 C<< $mech->docshell() >>
1681
1682
my $ds = $mech->docshell;
1683
1684
Returns the C Javascript object associated with the tab.
1685
1686
This is WWW::Mechanize::Firefox specific.
1687
1688
=cut
1689
1690
sub docshell {
1691
0
0
1
my ($self) = @_;
1692
0
$self->tab->MozRepl::RemoteObject::Methods::dive(qw[linkedBrowser docShell]);
1693
}
1694
1695
=head2 C<< $mech->content( %options ) >>
1696
1697
print $mech->content;
1698
print $mech->content( format => 'html' ); # default
1699
print $mech->content( format => 'text' ); # identical to ->text
1700
1701
This always returns the content as a Unicode string. It tries
1702
to decode the raw content according to its input encoding.
1703
This currently only works for HTML pages, not for images etc.
1704
1705
Recognized options:
1706
1707
=over 4
1708
1709
=item *
1710
1711
C - the document to use.
1712
1713
Default is C<< $self->document >>.
1714
1715
=item *
1716
1717
C - the stuff to return
1718
1719
The allowed values are C and C. The default is C.
1720
1721
=back
1722
1723
=cut
1724
1725
sub content {
1726
0
0
1
my ($self, %options) = @_;
1727
0
0
$options{ format } ||= 'html';
1728
1729
0
0
my $d = delete $options{ document } || $self->document; # keep a reference to it!
1730
0
0
my $format = delete $options{ format } || 'html';
1731
0
my $content;
1732
1733
0
0
if( $format eq 'html' ) {
0
1734
0
my $html = $self->repl->declare(<<'JS', 'list');
1735
function(d){
1736
var e = d.createElement("div");
1737
e.appendChild(d.documentElement.cloneNode(true));
1738
return [e.innerHTML,d.inputEncoding];
1739
}
1740
JS
1741
# We return the raw bytes here.
1742
0
($content,my $encoding) = $html->($d);
1743
0
0
if (! utf8::is_utf8($content)) {
1744
#warn "Switching on UTF-8 (from $encoding)";
1745
# Switch on UTF-8 flag
1746
# This should never happen, as JSON::XS (and JSON) should always
1747
# already return proper UTF-8
1748
# But it does happen.
1749
0
$content = Encode::decode($encoding, $content);
1750
};
1751
} elsif ( $format eq 'text' ) {
1752
0
$content = $self->text;
1753
}
1754
else {
1755
0
$self->die( qq{Unknown "format" parameter "$format"} );
1756
}
1757
1758
0
return $content
1759
};
1760
1761
=head2 C<< $mech->text() >>
1762
1763
Returns the text of the current HTML content. If the content isn't
1764
HTML, $mech will die.
1765
1766
=cut
1767
1768
sub text {
1769
0
0
1
my $self = shift;
1770
1771
# Waugh - this is highly inefficient but conveniently short to write
1772
# Maybe this should skip SCRIPT nodes...
1773
0
join '', map { $_->{nodeValue} } $self->xpath('//*/text()');
0
1774
}
1775
1776
1777
=head2 C<< $mech->content_encoding() >>
1778
1779
print "The content is encoded as ", $mech->content_encoding;
1780
1781
Returns the encoding that the content is in. This can be used
1782
to convert the content from UTF-8 back to its native encoding.
1783
1784
=cut
1785
1786
sub content_encoding {
1787
0
0
1
my ($self, $d) = @_;
1788
0
0
$d ||= $self->document; # keep a reference to it!
1789
0
return $d->{inputEncoding};
1790
};
1791
1792
=head2 C<< $mech->update_html( $html ) >>
1793
1794
$mech->update_html($html);
1795
1796
Writes C<$html> into the current document. This is mostly
1797
implemented as a convenience method for L.
1798
1799
=cut
1800
1801
sub update_html {
1802
0
0
1
my ($self,$content) = @_;
1803
0
my $url = URI->new('data:');
1804
0
$url->media_type("text/html");
1805
0
$url->data($content);
1806
$self->synchronize($self->events, sub {
1807
0
0
$self->tab->{linkedBrowser}->loadURI("$url");
1808
0
});
1809
return
1810
0
};
1811
1812
=head2 C<< $mech->save_content( $localname [, $resource_directory] [, %options ] ) >>
1813
1814
$mech->get('http://google.com');
1815
$mech->save_content('google search page','google search page files');
1816
1817
Saves the given URL to the given filename. The URL will be
1818
fetched from the cache if possible, avoiding unnecessary network
1819
traffic.
1820
1821
If C<$resource_directory> is given, the whole page will be saved.
1822
All CSS, subframes and images
1823
will be saved into that directory, while the page HTML itself will
1824
still be saved in the file pointed to by C<$localname>.
1825
1826
Returns a C object through which you can cancel the
1827
download by calling its C<< ->cancelSave >> method. Also, you can poll
1828
the download status through the C<< ->{currentState} >> property.
1829
1830
If you need to set persist flags pass the unsigned long value in the
1831
C option.
1832
1833
$mech->get('http://zombisoft.com');
1834
$mech->save_content('Zombisoft','zombisoft-resource-files', "persist" => 512 | 2048);
1835
1836
A list of flags and their values can be found at
1837
L.
1838
1839
If you are interested in the intermediate download progress, create
1840
a ProgressListener through C<< $mech->progress_listener >>
1841
and pass it in the C option.
1842
1843
The download will
1844
continue in the background. It will not show up in the
1845
Download Manager.
1846
1847
=cut
1848
1849
sub save_content {
1850
0
0
1
my ($self,$localname,$resource_directory,%options) = @_;
1851
1852
0
$localname = File::Spec->rel2abs($localname, '.');
1853
# Touch the file
1854
0
0
if (! -f $localname) {
1855
0
0
open my $fh, '>', $localname
1856
or die "Couldn't create '$localname': $!";
1857
};
1858
1859
0
0
if ($resource_directory) {
1860
0
$resource_directory = File::Spec->rel2abs($resource_directory, '.');
1861
1862
# Create the directory
1863
0
0
if (! -d $resource_directory) {
1864
0
0
mkdir $resource_directory
1865
or die "Couldn't create '$resource_directory': $!";
1866
};
1867
};
1868
1869
0
my $transfer_file = $self->repl->declare(<<'JS');
1870
function (document,filetarget,rscdir,progress,persistflags) {
1871
//new file object
1872
var obj_target;
1873
if (filetarget) {
1874
obj_target = Components.classes["@mozilla.org/file/local;1"]
1875
.createInstance(Components.interfaces.nsILocalFile);
1876
};
1877
1878
//set file with path
1879
obj_target.initWithPath(filetarget);
1880
1881
var obj_rscdir;
1882
if (rscdir) {
1883
obj_rscdir = Components.classes["@mozilla.org/file/local;1"]
1884
.createInstance(Components.interfaces.nsILocalFile);
1885
obj_rscdir.initWithPath(rscdir);
1886
};
1887
1888
var obj_Persist = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
1889
.createInstance(Components.interfaces.nsIWebBrowserPersist);
1890
1891
// with persist flags if desired
1892
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
1893
const flags = nsIWBP.PERSIST_FLAGS_REPLACE_EXISTING_FILES;
1894
obj_Persist.persistFlags = flags | nsIWBP.PERSIST_FLAGS_FROM_CACHE
1895
| nsIWBP["PERSIST_FLAGS_FORCE_ALLOW_COOKIES"]
1896
| persistflags
1897
;
1898
1899
obj_Persist.progressListener = progress;
1900
1901
//save file to target
1902
obj_Persist.saveDocument(document,obj_target, obj_rscdir, null,0,0);
1903
return obj_Persist
1904
};
1905
JS
1906
#warn "=> $localname / $resource_directory";
1907
$transfer_file->(
1908
$self->document,
1909
$localname,
1910
$resource_directory,
1911
$options{progress},
1912
$options{persist}
1913
0
);
1914
}
1915
1916
=head2 C<< $mech->save_url( $url, $localname, [%options] ) >>
1917
1918
$mech->save_url('http://google.com','google_index.html');
1919
1920
Saves the given URL to the given filename. The URL will be
1921
fetched from the cache if possible, avoiding unnecessary network
1922
traffic.
1923
1924
If you are interested in the intermediate download progress, create
1925
a ProgressListener through C<< $mech->progress_listener >>
1926
and pass it in the C option.
1927
The download will
1928
continue in the background. It will also not show up in the
1929
Download Manager.
1930
1931
If the C option is not passed in, C< ->save_url >
1932
will only return after the download has finished.
1933
1934
Returns a C object through which you can cancel the
1935
download by calling its C<< ->cancelSave >> method. Also, you can poll
1936
the download status through the C<< ->{currentState} >> property.
1937
1938
=cut
1939
1940
sub save_url {
1941
0
0
1
my ($self,$url,$localname,%options) = @_;
1942
1943
0
$localname = File::Spec->rel2abs($localname, '.');
1944
1945
0
0
if (! -f $localname) {
1946
0
0
open my $fh, '>', $localname
1947
or die "Couldn't create '$localname': $!";
1948
};
1949
1950
0
my $res;
1951
0
0
if( ! $options{ progress }) {
1952
0
$options{ wait } = 1;
1953
# We will do a synchronous download
1954
0
my $STATE_FINISHED = $self->repl->constant('Components.interfaces.nsIWebBrowserPersist.PERSIST_STATE_FINISHED');
1955
$options{ progress }= $self->make_progress_listener(onStateChange => sub {
1956
0
0
my ($webprogress,$request,$flags,$status) = @_;
1957
0
0
if( $res->{currentState} == $STATE_FINISHED) {
1958
0
$options{ wait }= 0;
1959
};
1960
},
1961
# onProgressChange => sub {
1962
# my ($aWebProgress, $aRequest, $aCurSelfProgress, $aMaxSelfProgress, $aCurTotalProgress, $aMaxTotalProgress)= @_;
1963
#diag sprintf "%03.2f", $aCurTotalProgress / ($aMaxTotalProgress||1) * 100;
1964
#}
1965
0
);
1966
};
1967
1968
0
my $transfer_file = $self->repl->declare(<<'JS');
1969
function (source,filetarget,progress,tab) {
1970
//new obj_URI object
1971
var ios = Components.classes["@mozilla.org/network/io-service;1"]
1972
.getService(Components.interfaces.nsIIOService)
1973
var obj_URI = ios.newURI(source, null, null);
1974
1975
//new file object
1976
var obj_target;
1977
if (filetarget) {
1978
obj_target = Components.classes["@mozilla.org/file/local;1"]
1979
.createInstance(Components.interfaces.nsILocalFile);
1980
};
1981
1982
//set file with path
1983
obj_target.initWithPath(filetarget);
1984
1985
//new persistence object
1986
var obj_Persist = Components.classes["@mozilla.org/embedding/browser/nsWebBrowserPersist;1"]
1987
.createInstance(Components.interfaces.nsIWebBrowserPersist);
1988
1989
// with persist flags if desired
1990
const nsIWBP = Components.interfaces.nsIWebBrowserPersist;
1991
const flags = nsIWBP.PERSIST_FLAGS_REPLACE_EXISTING_FILES;
1992
// Also make it send the proper cookies
1993
// If we are on a 3.0 Firefox, PERSIST_FLAGS_FORCE_ALLOW_COOKIES does
1994
// not exist, so we need to get creative:
1995
1996
obj_Persist.persistFlags = flags | nsIWBP.PERSIST_FLAGS_FROM_CACHE
1997
| nsIWBP["PERSIST_FLAGS_FORCE_ALLOW_COOKIES"]
1998
;
1999
obj_Persist.progressListener = progress;
2000
/* {
2001
"onStateChange": function() {
2002
var myargs= Array.slice(arguments);
2003
alert("onStateChange (" + myargs.join(",")+")");
2004
try {
2005
progress.onStateChange.apply(null,arguments);
2006
} catch(e) {
2007
alert(e.message);
2008
};
2009
},
2010
"onProgressChange": function() {
2011
var myargs= Array.slice(arguments);
2012
alert("onProgressChange (" + myargs.join(",")+")");
2013
try {
2014
progress.onProgressChange.apply(null,arguments);
2015
} catch(e) {
2016
alert(e.message);
2017
};
2018
}
2019
};
2020
*/
2021
2022
// Since Firefox 18, we need to provide a proper privacyContext
2023
// This is cobbled together from half-documented parts in various places
2024
// of the Mozilla documentation. The changes file does not list the
2025
// necessary steps :-(
2026
// https://developer.mozilla.org/en-US/docs/Supporting_per-window_private_browsing
2027
// The documentation is even wrong. It recommends to import("chrome://gre/modules/PrivateBrowsingUtils.jsm")
2028
// but the correct URL is "resource://gre/modules/PrivateBrowsingUtils.jsm".
2029
// Also, the method is not named "getPrivacyContextFromWindow" but "privacyContextFromWindow".
2030
var privacyContext;
2031
var version = Components.classes["@mozilla.org/xre/app-info;1"]
2032
.getService(Components.interfaces.nsIXULAppInfo).version;
2033
if( version >= 18.0 ) {
2034
Components.utils.import("resource://gre/modules/PrivateBrowsingUtils.jsm");
2035
privacyContext = PrivateBrowsingUtils.privacyContextFromWindow(tab.linkedBrowser.contentDocument.defaultView);
2036
};
2037
2038
//save file to target
2039
if( version < 36.0 ) {
2040
obj_Persist.saveURI(obj_URI,null,null,null,null,obj_target,privacyContext);
2041
} else {
2042
obj_Persist.saveURI(obj_URI,null,null, ios.referrerPolicy, null,null,obj_target,privacyContext);
2043
}
2044
return obj_Persist
2045
};
2046
JS
2047
2048
0
$res= $transfer_file->("$url" => $localname, $options{progress}, $self->tab);
2049
0
while( $options{ wait }) {
2050
0
$self->repl->poll;
2051
sleep 1
2052
0
0
if $options{ wait };
2053
};
2054
0
$res
2055
}
2056
2057
=head2 C<< $mech->base() >>
2058
2059
print $mech->base;
2060
2061
Returns the URL base for the current page.
2062
2063
The base is either specified through a C
2064
tag or is the current URL.
2065
2066
This method is specific to WWW::Mechanize::Firefox
2067
2068
=cut
2069
2070
sub base {
2071
0
0
1
my ($self) = @_;
2072
0
(my $base) = $self->selector('base');
2073
$base = $base->{href}
2074
0
0
if $base;
2075
0
0
$base ||= $self->uri;
2076
};
2077
2078
=head2 C<< $mech->content_type() >>
2079
2080
=head2 C<< $mech->ct() >>
2081
2082
print $mech->content_type;
2083
2084
Returns the content type of the currently loaded document
2085
2086
=cut
2087
2088
sub content_type {
2089
0
0
1
my ($self) = @_;
2090
0
return $self->document->{contentType};
2091
};
2092
2093
*ct = \&content_type;
2094
2095
=head2 C<< $mech->is_html() >>
2096
2097
print $mech->is_html();
2098
2099
Returns true/false on whether our content is HTML, according to the
2100
HTTP headers.
2101
2102
=cut
2103
2104
sub is_html {
2105
0
0
1
my $self = shift;
2106
0
0
return defined $self->ct && ($self->ct eq 'text/html');
2107
}
2108
2109
=head2 C<< $mech->title() >>
2110
2111
print "We are on page " . $mech->title;
2112
2113
Returns the current document title.
2114
2115
=cut
2116
2117
sub title {
2118
0
0
1
my ($self) = @_;
2119
0
return $self->document->{title};
2120
};
2121
2122
=head1 EXTRACTION METHODS
2123
2124
=head2 C<< $mech->links() >>
2125
2126
print $_->text . " -> " . $_->url . "\n"
2127
for $mech->links;
2128
2129
Returns all links in the document as L objects.
2130
2131
Currently accepts no parameters. See C<< ->xpath >>
2132
or C<< ->selector >> when you want more control.
2133
2134
=cut
2135
2136
our %link_spec = (
2137
a => { url => 'href', },
2138
area => { url => 'href', },
2139
frame => { url => 'src', },
2140
iframe => { url => 'src', },
2141
link => { url => 'href', },
2142
meta => { url => 'content', xpath => (join '',
2143
q{translate(@http-equiv,'ABCDEFGHIJKLMNOPQRSTUVWXYZ',},
2144
q{'abcdefghijklmnopqrstuvwxyz')="refresh"}), },
2145
);
2146
2147
# taken from WWW::Mechanize. This should possibly just be reused there
2148
sub make_link {
2149
0
0
0
my ($self,$node,$base) = @_;
2150
0
my $tag = lc $node->{tagName};
2151
2152
0
0
if (! exists $link_spec{ $tag }) {
2153
0
warn "Unknown tag '$tag'";
2154
};
2155
0
my $url = $node->{ $link_spec{ $tag }->{url} };
2156
2157
0
0
if ($tag eq 'meta') {
2158
0
my $content = $url;
2159
0
0
if ( $content =~ /^\d+\s*;\s*url\s*=\s*(\S+)/i ) {
2160
0
$url = $1;
2161
0
0
$url =~ s/^"(.+)"$/$1/ or $url =~ s/^'(.+)'$/$1/;
2162
}
2163
else {
2164
0
undef $url;
2165
}
2166
};
2167
2168
0
0
if (defined $url) {
2169
my $res = WWW::Mechanize::Link->new({
2170
tag => $tag,
2171
name => $node->{name},
2172
base => $base,
2173
url => $url,
2174
text => $node->{innerHTML},
2175
0
attrs => {},
2176
});
2177
2178
0
$res
2179
} else {
2180
()
2181
0
};
2182
}
2183
2184
sub links {
2185
0
0
1
my ($self) = @_;
2186
0
my @links = $self->selector( join ",", sort keys %link_spec);
2187
0
my $base = $self->base;
2188
return map {
2189
0
$self->make_link($_,$base)
0
2190
} @links;
2191
};
2192
2193
# Call croak or carp, depending on the C< autodie > setting
2194
sub signal_condition {
2195
0
0
0
my ($self,$msg) = @_;
2196
0
0
if ($self->{autodie}) {
2197
0
croak $msg
2198
} else {
2199
0
carp $msg
2200
}
2201
};
2202
2203
# Call croak on the C< autodie > setting if we have a non-200 status
2204
sub signal_http_status {
2205
0
0
0
my ($self) = @_;
2206
0
0
if ($self->{autodie}) {
2207
0
0
if ($self->status !~ /^2/) {
2208
# there was an error
2209
0
0
croak ($self->response(headers => 0)->message || sprintf "Got status code %d", $self->status );
2210
};
2211
} else {
2212
# silent
2213
}
2214
};
2215
2216
=head2 C<< $mech->find_link_dom( %options ) >>
2217
2218
print $_->{innerHTML} . "\n"
2219
for $mech->find_link_dom( text_contains => 'CPAN' );
2220
2221
A method to find links, like L's
2222
C<< ->find_links >> method. This method returns DOM objects from
2223
Firefox instead of WWW::Mechanize::Link objects.
2224
2225
Note that Firefox
2226
might have reordered the links or frame links in the document
2227
so the absolute numbers passed via C
2228
might not be the same between
2229
L and L.
2230
2231
Returns the DOM object as L::Instance.
2232
2233
The supported options are:
2234
2235
=over 4
2236
2237
=item *
2238
2239
C<< text >> and C<< text_contains >> and C<< text_regex >>
2240
2241
Match the text of the link as a complete string, substring or regular expression.
2242
2243
Matching as a complete string or substring is a bit faster, as it is
2244
done in the XPath engine of Firefox.
2245
2246
=item *
2247
2248
C<< id >> and C<< id_contains >> and C<< id_regex >>
2249
2250
Matches the C attribute of the link completely or as part
2251
2252
=item *
2253
2254
C<< name >> and C<< name_contains >> and C<< name_regex >>
2255
2256
Matches the C attribute of the link
2257
2258
=item *
2259
2260
C<< url >> and C<< url_regex >>
2261
2262
Matches the URL attribute of the link (C, C or C).
2263
2264
=item *
2265
2266
C<< class >> - the C attribute of the link
2267
2268
=item *
2269
2270
C<< n >> - the (1-based) index. Defaults to returning the first link.
2271
2272
=item *
2273
2274
C<< single >> - If true, ensure that only one element is found. Otherwise croak
2275
or carp, depending on the C parameter.
2276
2277
=item *
2278
2279
C<< one >> - If true, ensure that at least one element is found. Otherwise croak
2280
or carp, depending on the C parameter.
2281
2282
The method Cs if no link is found. If the C option is true,
2283
it also Cs when more than one link is found.
2284
2285
=back
2286
2287
=cut
2288
2289
80
80
795
use vars '%xpath_quote';
80
209
80
503566
2290
%xpath_quote = (
2291
'"' => '\"',
2292
#"'" => "\\'",
2293
#'[' => '[',
2294
#']' => ']',
2295
#'[' => '[\[]',
2296
#'[' => '\[',
2297
#']' => '[\]]',
2298
);
2299
2300
# Return the default limiter if no other limiting option is set:
2301
sub _default_limiter {
2302
0
0
my ($default, $options) = @_;
2303
0
0
if (! grep { exists $options->{ $_ } } qw(single one maybe all any)) {
0
2304
0
$options->{ $default } = 1;
2305
};
2306
return ()
2307
0
};
2308
2309
sub quote_xpath($) {
2310
0
0
0
local $_ = $_[0];
2311
0
0
s/(['"\[\]])/$xpath_quote{$1} || $1/ge;
0
2312
0
$_
2313
};
2314
2315
#sub perl_regex_to_xpath($) {
2316
# my ($re) = @_;
2317
# my $flags = '';
2318
# warn $re;
2319
# $re =~ s!^\(\?([a-z]*)\-[a-z]*:(.*)\)$!$2!
2320
# and $flags = $1;
2321
# warn qq{=> XPATH: "$re" , "$flags"};
2322
# ($re, $flags)
2323
#};
2324
2325
sub find_link_dom {
2326
0
0
1
my ($self,%opts) = @_;
2327
0
my %xpath_options;
2328
2329
0
for (qw(node document frames)) {
2330
# Copy over XPath options that were passed in
2331
0
0
if (exists $opts{ $_ }) {
2332
0
$xpath_options{ $_ } = delete $opts{ $_ };
2333
};
2334
};
2335
2336
0
my $single = delete $opts{ single };
2337
0
0
my $one = delete $opts{ one } || $single;
2338
0
0
0
if ($single and exists $opts{ n }) {
2339
0
croak "It doesn't make sense to use 'single' and 'n' option together"
2340
};
2341
0
0
my $n = (delete $opts{ n } || 1);
2342
0
0
$n--
2343
if ($n ne 'all'); # 1-based indexing
2344
0
my @spec;
2345
2346
# Decode text and text_contains into XPath
2347
0
for my $lvalue (qw( text id name class )) {
2348
0
my %lefthand = (
2349
text => 'text()',
2350
);
2351
0
my %match_op = (
2352
'' => q{%s="%s"},
2353
'contains' => q{contains(%s,"%s")},
2354
# Ideally we would also handle *_regex here, but Firefox XPath
2355
# does not support fn:matches() :-(
2356
#'regex' => q{matches(%s,"%s","%s")},
2357
);
2358
0
0
my $lhs = $lefthand{ $lvalue } || '@'.$lvalue;
2359
0
for my $op (keys %match_op) {
2360
0
my $v = $match_op{ $op };
2361
0
0
$op = '_'.$op if length($op);
2362
0
my $key = "${lvalue}$op";
2363
2364
0
0
if (exists $opts{ $key }) {
2365
0
my $p = delete $opts{ $key };
2366
0
push @spec, sprintf $v, $lhs, $p;
2367
};
2368
};
2369
};
2370
2371
0
0
if (my $p = delete $opts{ url }) {
2372
0
push @spec, sprintf '@href = "%s" or @src="%s"', quote_xpath $p, quote_xpath $p;
2373
}
2374
0
my @tags = (sort keys %link_spec);
2375
0
0
if (my $p = delete $opts{ tag }) {
2376
0
@tags = $p;
2377
};
2378
0
0
if (my $p = delete $opts{ tag_regex }) {
2379
0
@tags = grep /$p/, @tags;
2380
};
2381
2382
my $q = join '|',
2383
map {
2384
0
my @full = map {qq{($_)}} grep {defined} (@spec, $link_spec{$_}->{xpath});
0
0
0
2385
0
0
if (@full) {
2386
0
sprintf "//%s[%s]", $_, join " and ", @full;
2387
} else {
2388
0
sprintf "//%s", $_
2389
};
2390
} (@tags);
2391
#warn $q;
2392
2393
0
my @res = $self->xpath($q, %xpath_options );
2394
2395
0
0
if (keys %opts) {
2396
# post-filter the remaining links through WWW::Mechanize
2397
# for all the options we don't support with XPath
2398
2399
0
my $base = $self->base;
2400
0
require WWW::Mechanize;
2401
@res = grep {
2402
0
WWW::Mechanize::_match_any_link_parms($self->make_link($_,$base),\%opts)
0
2403
} @res;
2404
};
2405
2406
0
0
if ($one) {
2407
0
0
if (0 == @res) { $self->signal_condition( "No link found matching '$q'" )};
0
2408
0
0
if ($single) {
2409
0
0
if (1 < @res) {
2410
0
$self->highlight_node(@res);
2411
0
$self->signal_condition(
2412
sprintf "%d elements found found matching '%s'", scalar @res, $q
2413
);
2414
};
2415
};
2416
};
2417
2418
0
0
if ($n eq 'all') {
2419
return @res
2420
0
};
2421
0
$res[$n]
2422
}
2423
2424
=head2 C<< $mech->find_link( %options ) >>
2425
2426
print $_->text . "\n"
2427
for $mech->find_link( text_contains => 'CPAN' );
2428
2429
A method quite similar to L's method.
2430
The options are documented in C<< ->find_link_dom >>.
2431
2432
Returns a L object.
2433
2434
This defaults to not look through child frames.
2435
2436
=cut
2437
2438
sub find_link {
2439
0
0
1
my ($self,%opts) = @_;
2440
0
my $base = $self->base;
2441
croak "Option 'all' not available for ->find_link. Did you mean to call ->find_all_links()?"
2442
0
0
0
if 'all' eq ($opts{n} || '');
2443
0
0
if (my $link = $self->find_link_dom(frames => 0, %opts)) {
2444
0
return $self->make_link($link, $base)
2445
} else {
2446
return
2447
0
};
2448
};
2449
2450
=head2 C<< $mech->find_all_links( %options ) >>
2451
2452
print $_->text . "\n"
2453
for $mech->find_all_links( text_regex => qr/google/i );
2454
2455
Finds all links in the document.
2456
The options are documented in C<< ->find_link_dom >>.
2457
2458
Returns them as list or an array reference, depending
2459
on context.
2460
2461
This defaults to not look through child frames.
2462
2463
=cut
2464
2465
sub find_all_links {
2466
0
0
1
my ($self, %opts) = @_;
2467
0
$opts{ n } = 'all';
2468
0
my $base = $self->base;
2469
my @matches = map {
2470
0
$self->make_link($_, $base);
0
2471
} $self->find_all_links_dom( frames => 0, %opts );
2472
0
0
return @matches if wantarray;
2473
0
return \@matches;
2474
};
2475
2476
=head2 C<< $mech->find_all_links_dom %options >>
2477
2478
print $_->{innerHTML} . "\n"
2479
for $mech->find_all_links_dom( text_regex => qr/google/i );
2480
2481
Finds all matching linky DOM nodes in the document.
2482
The options are documented in C<< ->find_link_dom >>.
2483
2484
Returns them as list or an array reference, depending
2485
on context.
2486
2487
This defaults to not look through child frames.
2488
2489
=cut
2490
2491
sub find_all_links_dom {
2492
0
0
1
my ($self,%opts) = @_;
2493
0
$opts{ n } = 'all';
2494
0
my @matches = $self->find_link_dom( frames => 0, %opts );
2495
0
0
return @matches if wantarray;
2496
0
return \@matches;
2497
};
2498
2499
=head2 C<< $mech->follow_link( $link ) >>
2500
2501
=head2 C<< $mech->follow_link( %options ) >>
2502
2503
$mech->follow_link( xpath => '//a[text() = "Click here!"]' );
2504
2505
Follows the given link. Takes the same parameters that C
2506
uses. In addition, C can be passed to (not) force
2507
waiting for a new page to be loaded.
2508
2509
Note that C<< ->follow_link >> will only try to follow link-like
2510
things like C tags.
2511
2512
=cut
2513
2514
sub follow_link {
2515
0
0
1
my ($self,$link,%opts);
2516
0
0
if (@_ == 2) { # assume only a link parameter
2517
0
($self,$link) = @_;
2518
0
$self->click($link);
2519
} else {
2520
0
($self,%opts) = @_;
2521
0
_default_limiter( one => \%opts );
2522
0
$link = $self->find_link_dom(%opts);
2523
0
$self->click({ dom => $link, %opts });
2524
}
2525
}
2526
2527
=head2 C<< $mech->xpath( $query, %options ) >>
2528
2529
my $link = $mech->xpath('//a[@id="clickme"]', one => 1);
2530
# croaks if there is no link or more than one link found
2531
2532
my @para = $mech->xpath('//p');
2533
# Collects all paragraphs
2534
2535
my @para_text = $mech->xpath('//p/text()', type => $mech->xpathResult('STRING_TYPE'));
2536
# Collects all paragraphs as text
2537
2538
Runs an XPath query in Firefox against the current document.
2539
2540
If you need more information about the returned results,
2541
use the C<< ->xpathEx() >> function.
2542
2543
The options allow the following keys:
2544
2545
=over 4
2546
2547
=item *
2548
2549
C<< document >> - document in which the query is to be executed. Use this to
2550
search a node within a specific subframe of C<< $mech->document >>.
2551
2552
=item *
2553
2554
C<< frames >> - if true, search all documents in all frames and iframes.
2555
This may or may not conflict with C. This will default to the
2556
C setting of the WWW::Mechanize::Firefox object.
2557
2558
=item *
2559
2560
C<< node >> - node relative to which the query is to be executed. Note
2561
that you will have to use a relative XPath expression as well. Use
2562
2563
.//foo
2564
2565
instead of
2566
2567
//foo
2568
2569
=item *
2570
2571
C<< single >> - If true, ensure that only one element is found. Otherwise croak
2572
or carp, depending on the C parameter.
2573
2574
=item *
2575
2576
C<< one >> - If true, ensure that at least one element is found. Otherwise croak
2577
or carp, depending on the C parameter.
2578
2579
=item *
2580
2581
C<< maybe >> - If true, ensure that at most one element is found. Otherwise
2582
croak or carp, depending on the C parameter.
2583
2584
=item *
2585
2586
C<< all >> - If true, return all elements found. This is the default.
2587
You can use this option if you want to use C<< ->xpath >> in scalar context
2588
to count the number of matched elements, as it will otherwise emit a warning
2589
for each usage in scalar context without any of the above restricting options.
2590
2591
=item *
2592
2593
C<< any >> - no error is raised, no matter if an item is found or not.
2594
2595
=item *
2596
2597
C<< type >> - force the return type of the query.
2598
2599
type => $mech->xpathResult('ORDERED_NODE_SNAPSHOT_TYPE'),
2600
2601
WWW::Mechanize::Firefox tries a best effort in giving you the appropriate
2602
result of your query, be it a DOM node or a string or a number. In the case
2603
you need to restrict the return type, you can pass this in.
2604
2605
The allowed strings are documented in the MDN. Interesting types are
2606
2607
ANY_TYPE (default, uses whatever things the query returns)
2608
STRING_TYPE
2609
NUMBER_TYPE
2610
ORDERED_NODE_SNAPSHOT_TYPE
2611
2612
=back
2613
2614
Returns the matched results.
2615
2616
You can pass in a list of queries as an array reference for the first parameter.
2617
The result will then be the list of all elements matching any of the queries.
2618
2619
This is a method that is not implemented in WWW::Mechanize.
2620
2621
In the long run, this should go into a general plugin for
2622
L.
2623
2624
=cut
2625
2626
sub xpath {
2627
0
0
1
my ($self,$query,%options) = @_;
2628
2629
0
my $single = $options{ single };
2630
0
my $first = $options{ one };
2631
0
my $maybe = $options{ maybe };
2632
0
my $any = $options{ any };
2633
0
0
my $return_first_element = ($single or $first or $maybe or $any );
2634
2635
# Construct some helper variables
2636
0
0
my $zero_allowed = not ($single or $first);
2637
0
0
my $two_allowed = not( $single or $maybe);
2638
2639
# Sanity check for the common error of
2640
# my $item = $mech->xpath("//foo");
2641
0
0
0
if (! exists $options{ all } and not ($return_first_element)) {
2642
0
0
0
$self->signal_condition(join "\n",
2643
"You asked for many elements but seem to only want a single item.",
2644
"Did you forget to pass the 'single' option with a true value?",
2645
"Pass 'all => 1' to suppress this message and receive the count of items.",
2646
) if defined wantarray and !wantarray;
2647
};
2648
2649
# How can we return here a set of strings
2650
# if we don't return an array in .result?!
2651
my @res= map {
2652
!defined $_->{resultType}
2653
? ()
2654
: $_->{ resultType } == $self->{ XpathResultTypes }->{ORDERED_NODE_SNAPSHOT_TYPE }
2655
|| $_->{ resultType } == $self->{ XpathResultTypes }->{UNORDERED_NODE_SNAPSHOT_TYPE }
2656
|| $_->{ resultType } == $self->{ XpathResultTypes }->{ORDERED_NODE_ITERATOR_TYPE }
2657
|| $_->{ resultType } == $self->{ XpathResultTypes }->{UNORDERED_NODE_ITERATOR_TYPE }
2658
0
? @{ $_->{result} }
2659
: $_->{ result }
2660
0
0
0
} $self->xpathEx(
0
2661
$query,
2662
#type => $self->{XpathResultTypes}->{ORDERED_NODE_SNAPSHOT_TYPE},
2663
type => $self->{XpathResultTypes}->{ANY_TYPE},
2664
0
return_first => $return_first_element,
2665
%options
2666
);
2667
2668
0
0
0
if (! $zero_allowed and @res == 0) {
2669
0
0
$options{ user_info } ||= $query;
2670
0
$self->signal_condition( "No elements found for $options{ user_info }" );
2671
};
2672
2673
0
0
0
if (! $two_allowed and @res > 1) {
2674
0
0
$options{ user_info } ||= $query;
2675
0
$self->highlight_node(@res);
2676
0
$self->signal_condition( (scalar @res) . " elements found for $options{ user_info }" );
2677
};
2678
2679
0
0
$return_first_element ? $res[0] : @res
2680
};
2681
2682
sub _initXpathResultTypes {
2683
0
0
my( $self )= @_;
2684
$self->{XpathResultTypes} ||= {
2685
0
0
ANY_TYPE => $self->repl->constant('XPathResult.ANY_TYPE'),
2686
NUMBER_TYPE => $self->repl->constant('XPathResult.NUMBER_TYPE'),
2687
STRING_TYPE => $self->repl->constant('XPathResult.STRING_TYPE'),
2688
BOOLEAN_TYPE => $self->repl->constant('XPathResult.BOOLEAN_TYPE'),
2689
UNORDERED_NODE_ITERATOR_TYPE => $self->repl->constant('XPathResult.UNORDERED_NODE_ITERATOR_TYPE'),
2690
ORDERED_NODE_ITERATOR_TYPE => $self->repl->constant('XPathResult.ORDERED_NODE_ITERATOR_TYPE'),
2691
UNORDERED_NODE_SNAPSHOT_TYPE => $self->repl->constant('XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE'),
2692
ORDERED_NODE_SNAPSHOT_TYPE => $self->repl->constant('XPathResult.ORDERED_NODE_SNAPSHOT_TYPE'),
2693
ANY_UNORDERED_TYPE => $self->repl->constant('XPathResult.ANY_UNORDERED_NODE_TYPE'),
2694
FIRST_ORDERED_NODE_TYPE => $self->repl->constant('XPathResult.FIRST_ORDERED_NODE_TYPE'),
2695
};
2696
2697
0
$self->{XpathResultTypenames} = +{ reverse %{ $self->{XpathResultTypes} } };
0
2698
};
2699
2700
0
0
0
sub xpathResultType { $_[0]->{ XpathResultTypenames }->{ $_[1] } };
2701
0
0
0
sub xpathResult { $_[0]->{XpathResultTypes}{$_[1]}; }
2702
2703
=head2 C<< $mech->xpathEx( $query, %options ) >>
2704
2705
my @links = $mech->xpathEx('//a[@id="clickme"]');
2706
2707
Runs an XPath query in Firefox against a document. Returns a list
2708
of found elements. Each element in the result has the following properties:
2709
2710
=over 4
2711
2712
=item *
2713
2714
C<< resultType >> - the type of the result. The numerical value of C<< $mech->xpathResult() >>.
2715
2716
=item *
2717
2718
C<< resultSize >> - the number of elements in this result. This is 1 for atomic results like
2719
strings or numbers, and the number of elements for nodesets.
2720
2721
=item *
2722
2723
C<< result >> - the best result available. This is the nodeset
2724
or the text or number, depending on the query.
2725
2726
=back
2727
2728
=cut
2729
2730
sub xpathEx {
2731
# Returns verbose information about how things matched
2732
0
0
1
my ($self, $query, %options) = @_;
2733
2734
0
0
0
if ('ARRAY' ne (ref $query||'')) {
2735
0
$query = [$query];
2736
};
2737
2738
0
0
if ($options{ node }) {
2739
0
0
$options{ document } ||= $options{ node }->{ownerDocument};
2740
#warn "Have node, searching below node";
2741
} else {
2742
0
0
$options{ document } ||= $self->document;
2743
#warn "Searching below given document";
2744
#$options{node} = $options{document};
2745
};
2746
2747
0
0
$options{type} ||= $self->{XpathResult}->{ANY_TYPE};
2748
2749
0
0
$options{ user_info } ||= join " or ", map {qq{'$_'}} @$query;
0
2750
2751
# Sanity check for the common error of
2752
# my $item = $mech->xpathEx("//foo");
2753
0
0
if (! wantarray) {
2754
0
$self->signal_condition(join "\n",
2755
"->xpathEx needs to be called in list context.",
2756
);
2757
};
2758
2759
0
0
if (not exists $options{ frames }) {
2760
0
$options{frames} = $self->{frames};
2761
};
2762
2763
0
my $query_xpath = $self->repl->declare(<<'JS');
2764
function(doc, q, ref, type) {
2765
var xpr = doc.evaluate(q, ref, null, type, null);
2766
var r = { resultType: xpr.resultType, resultSize: 0, result: null };
2767
switch(xpr.resultType) {
2768
case XPathResult.NUMBER_TYPE:
2769
r.result= r.numberValue = xpr.numberValue;
2770
r.resultSize= 1;
2771
break;
2772
case XPathResult.STRING_TYPE:
2773
r.result= r.stringValue = xpr.stringValue;
2774
r.resultSize= 1;
2775
break;
2776
case XPathResult.BOOLEAN_TYPE:
2777
r.result= r.booleanValue = xpr.booleanValue;
2778
r.resultSize= 1;
2779
break;
2780
case XPathResult.UNORDERED_NODE_ITERATOR_TYPE:
2781
case XPathResult.ORDERED_NODE_ITERATOR_TYPE:
2782
r.result= r.nodeSet = [];
2783
var n;
2784
while (n = xpr.iterateNext()) {
2785
r.nodeSet.push(n);
2786
r.resultSize++;
2787
}
2788
break;
2789
case XPathResult.UNORDERED_NODE_SNAPSHOT_TYPE:
2790
case XPathResult.ORDERED_NODE_SNAPSHOT_TYPE:
2791
r.result= r.nodeSet = [];
2792
r.resultSize= xpr.snapshotLength;
2793
for (var i = 0 ; i < xpr.snapshotLength; i++ ) {
2794
r.nodeSet[i] = xpr.snapshotItem(i);
2795
}
2796
break;
2797
case XPathResult.ANY_UNORDERED_NODE_TYPE:
2798
case XPathResult.FIRST_ORDERED_NODE_TYPE:
2799
r.result= r.singleNodeValue = xpr.singleNodeValue;
2800
r.resultSize= 1;
2801
break;
2802
default:
2803
break;
2804
}
2805
return r;
2806
}
2807
JS
2808
2809
0
my @res;
2810
2811
DOCUMENTS: {
2812
0
my @documents = $options{ document };
0
2813
#warn "Invalid root document" unless $options{ document };
2814
2815
# recursively join the results of sub(i)frames if wanted
2816
# This should maybe go into the loop to expand every frame as we descend
2817
# into the available subframes
2818
2819
0
while (@documents) {
2820
0
my $doc = shift @documents;
2821
#warn "Invalid document" unless $doc;
2822
2823
0
0
my $n = $options{ node } || $doc;
2824
#warn ">Searching @$query in $doc->{title}";
2825
# Munge the multiple @$queries into one:
2826
0
my $q = join "|", @$query;
2827
#warn $q;
2828
0
my @found = $query_xpath->($doc, $q, $n, $options{type});
2829
0
push @res, @found;
2830
2831
# A small optimization to return if we already have enough elements
2832
# We can't do this on $return_first as there might be more elements
2833
0
0
0
if( @res and $options{ return_first } and grep { $_->{resultSize} } @res ) {
0
0
2834
0
@res= grep { $_->{resultSize} } @res;
0
2835
0
last DOCUMENTS;
2836
};
2837
2838
0
0
0
if ($options{ frames } and not $options{ node }) {
2839
#warn ">Expanding below " . $doc->{title};
2840
#local $nesting .= "--";
2841
0
my @d = $self->expand_frames( $options{ frames }, $doc );
2842
#warn "Found $_->{title}" for @d;
2843
0
push @documents, @d;
2844
};
2845
};
2846
};
2847
2848
@res
2849
0
}
2850
2851
=head2 C<< $mech->selector( $css_selector, %options ) >>
2852
2853
my @text = $mech->selector('p.content');
2854
2855
Returns all nodes matching the given CSS selector. If
2856
C<$css_selector> is an array reference, it returns
2857
all nodes matched by any of the CSS selectors in the array.
2858
2859
This takes the same options that C<< ->xpath >> does.
2860
2861
In the long run, this should go into a general plugin for
2862
L.
2863
2864
=cut
2865
2866
sub selector {
2867
0
0
1
my ($self,$query,%options) = @_;
2868
0
0
$options{ user_info } ||= "CSS selector '$query'";
2869
0
0
0
if ('ARRAY' ne (ref $query || '')) {
2870
0
$query = [$query];
2871
};
2872
0
0
my $root = $options{ node } ? './' : '';
2873
0
my @q = map { selector_to_xpath($_, root => $root) } @$query;
0
2874
0
$self->xpath(\@q, %options);
2875
};
2876
2877
=head2 C<< $mech->by_id( $id, %options ) >>
2878
2879
my @text = $mech->by_id('_foo:bar');
2880
2881
Returns all nodes matching the given ids. If
2882
C<$id> is an array reference, it returns
2883
all nodes matched by any of the ids in the array.
2884
2885
This method is equivalent to calling C<< ->xpath >> :
2886
2887
$self->xpath(qq{//*[\@id="$_"], %options)
2888
2889
It is convenient when your element ids get mistaken for
2890
CSS selectors.
2891
2892
=cut
2893
2894
sub by_id {
2895
0
0
1
my ($self,$query,%options) = @_;
2896
0
0
0
if ('ARRAY' ne (ref $query||'')) {
2897
0
$query = [$query];
2898
};
2899
$options{ user_info } ||= "id "
2900
0
0
. join(" or ", map {qq{'$_'}} @$query)
0
2901
. " found";
2902
0
$query = [map { qq{.//*[\@id="$_"]} } @$query];
0
2903
0
$self->xpath($query, %options)
2904
}
2905
2906
=head2 C<< $mech->click( $name [,$x ,$y] ) >>
2907
2908
$mech->click( 'go' );
2909
$mech->click({ xpath => '//button[@name="go"]' });
2910
2911
Has the effect of clicking a button (or other element) on the current form. The
2912
first argument is the C of the button to be clicked. The second and third
2913
arguments (optional) allow you to specify the (x,y) coordinates of the click.
2914
2915
If there is only one button on the form, C<< $mech->click() >> with
2916
no arguments simply clicks that one button.
2917
2918
If you pass in a hash reference instead of a name,
2919
the following keys are recognized:
2920
2921
=over 4
2922
2923
=item *
2924
2925
C - Find the element to click by the CSS selector
2926
2927
=item *
2928
2929
C - Find the element to click by the XPath query
2930
2931
=item *
2932
2933
C - Click on the passed DOM element
2934
2935
You can use this to click on arbitrary page elements. There is no convenient
2936
way to pass x/y co-ordinates with this method.
2937
2938
=item *
2939
2940
C - Click on the element with the given id
2941
2942
This is useful if your document ids contain characters that
2943
do look like CSS selectors. It is equivalent to
2944
2945
xpath => qq{//*[\@id="$id"]}
2946
2947
=item *
2948
2949
C - Synchronize the click (default is 1)
2950
2951
Synchronizing means that WWW::Mechanize::Firefox will wait until
2952
one of the events listed in C is fired. You want to switch
2953
it off when there will be no HTTP response or DOM event fired, for
2954
example for clicks that only modify the DOM.
2955
2956
You can pass in a scalar that is a false value to not wait for
2957
any kind of event.
2958
2959
Passing in an array reference will use the array elements as
2960
Javascript events to wait for.
2961
2962
Passing in any other true value will use the value of C<< ->events >>
2963
as the list of events to wait for.
2964
2965
=back
2966
2967
Returns a L object.
2968
2969
As a deviation from the WWW::Mechanize API, you can also pass a
2970
hash reference as the first parameter. In it, you can specify
2971
the parameters to search much like for the C calls.
2972
2973
Note: Currently, clicking on images with the C attribute
2974
does not trigger the move to the new URL. A workaround is to program
2975
the new URL into your script.
2976
2977
=cut
2978
2979
sub click {
2980
0
0
1
my ($self,$name,$x,$y) = @_;
2981
0
my %options;
2982
my @buttons;
2983
0
0
0
if (! defined $name) {
0
0
0
2984
0
croak("->click called with undef link");
2985
} elsif (ref $name and blessed($name) and $name->can('__click')) {
2986
0
$options{ dom } = $name;
2987
} elsif (ref $name eq 'HASH') { # options
2988
0
%options = %$name;
2989
} else {
2990
0
$options{ name } = $name;
2991
};
2992
0
0
if (exists $options{ name }) {
2993
0
0
$name = quotemeta($options{ name }|| '');
2994
$options{ xpath } = [
2995
0
sprintf( q{//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="button" and @name="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and (@type="button" or @type="submit" or @type="image") and @name="%s")]}, $name, $name),
2996
];
2997
0
0
if ($options{ name } eq '') {
2998
0
push @{ $options{ xpath }},
0
2999
q{//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" or translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input") and @type="button" or @type="submit" or @type="image"]},
3000
;
3001
};
3002
0
$options{ user_info } = "Button with name '$name'";
3003
};
3004
3005
0
0
if (! exists $options{ synchronize }) {
0
3006
0
$options{ synchronize } = $self->events;
3007
} elsif( ! ref $options{ synchronize }) {
3008
$options{ synchronize } = $options{ synchronize }
3009
0
0
? $self->events
3010
: [],
3011
};
3012
3013
0
0
if ($options{ dom }) {
3014
0
@buttons = $options{ dom };
3015
} else {
3016
0
@buttons = $self->_option_query(%options);
3017
};
3018
3019
$self->_sync_call(
3020
$options{ synchronize }, sub { # ,'abort'
3021
0
0
$buttons[0]->__click($x,$y);
3022
}
3023
0
);
3024
3025
0
0
if (defined wantarray) {
3026
0
return $self->response
3027
};
3028
}
3029
3030
=head2 C<< $mech->click_button( ... ) >>
3031
3032
$mech->click_button( name => 'go' );
3033
$mech->click_button( input => $mybutton );
3034
3035
Has the effect of clicking a button on the current form by specifying its
3036
name, value, or index. Its arguments are a list of key/value pairs. Only
3037
one of name, number, input or value must be specified in the keys.
3038
3039
=over 4
3040
3041
=item *
3042
3043
C - name of the button
3044
3045
=item *
3046
3047
C - value of the button
3048
3049
=item *
3050
3051
C - DOM node
3052
3053
=item *
3054
3055
C - id of the button
3056
3057
=item *
3058
3059
C - number of the button
3060
3061
=back
3062
3063
If you find yourself wanting to specify a button through its
3064
C or C, consider using C<< ->click >> instead.
3065
3066
=cut
3067
3068
sub click_button {
3069
0
0
1
my ($self,%options) = @_;
3070
0
my $node;
3071
my $xpath;
3072
0
my $user_message;
3073
0
0
if (exists $options{ input }) {
0
0
0
0
3074
0
$node = delete $options{ input };
3075
} elsif (exists $options{ name }) {
3076
0
my $v = delete $options{ name };
3077
0
$xpath = sprintf( '//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="button" and @name="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and (@type="button" or @type="submit") and @name="%s")]', $v, $v);
3078
0
$user_message = "Button name '$v' unknown";
3079
} elsif (exists $options{ value }) {
3080
0
my $v = delete $options{ value };
3081
0
$xpath = sprintf( '//*[(translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" and @value="%s") or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")="input" and (@type="button" or @type="submit") and @value="%s")]', $v, $v);
3082
0
$user_message = "Button value '$v' unknown";
3083
} elsif (exists $options{ id }) {
3084
0
my $v = delete $options{ id };
3085
0
$xpath = sprintf '//*[@id="%s"]', $v;
3086
0
$user_message = "Button name '$v' unknown";
3087
} elsif (exists $options{ number }) {
3088
0
my $v = delete $options{ number };
3089
0
$xpath = sprintf '//*[translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "button" or (translate(local-name(.), "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz") = "input" and @type="submit")][%s]', $v;
3090
0
$user_message = "Button number '$v' out of range";
3091
};
3092
0
0
$node ||= $self->xpath( $xpath,
3093
node => $self->current_form,
3094
single => 1,
3095
user_message => $user_message,
3096
);
3097
0
0
if ($node) {
3098
0
$self->click({ dom => $node, %options });
3099
} else {
3100
3101
0
$self->signal_condition($user_message);
3102
};
3103
3104
}
3105
3106
=head1 FORM METHODS
3107
3108
=head2 C<< $mech->current_form() >>
3109
3110
print $mech->current_form->{name};
3111
3112
Returns the current form.
3113
3114
This method is incompatible with L.
3115
It returns the DOM C<<
3116
a L instance.
3117
3118
Note that WWW::Mechanize::Firefox has little way to know
3119
that the current form is not displayed in the browser
3120
anymore, so it often holds on to the last value. If
3121
you want to make sure that a fresh or no form is used,
3122
remove it:
3123
3124
$mech->clear_current_form;
3125
3126
The current form will be reset by WWW::Mechanize::Firefox
3127
on calls to C<< ->get() >> and C<< ->get_local() >>,
3128
and on calls to C<< ->submit() >> and C<< ->submit_with_fields >>.
3129
3130
=cut
3131
3132
sub current_form {
3133
$_[0]->{current_form}
3134
0
0
1
};
3135
sub clear_current_form {
3136
0
0
0
undef $_[0]->{current_form};
3137
};
3138
3139
=head2 C<< $mech->form_name( $name [, %options] ) >>
3140
3141
$mech->form_name( 'search' );
3142
3143
Selects the current form by its name. The options
3144
are identical to those accepted by the L<< /$mech->xpath >> method.
3145
3146
=cut
3147
3148
sub form_name {
3149
0
0
1
my ($self,$name,%options) = @_;
3150
0
$name = quote_xpath $name;
3151
0
_default_limiter( single => \%options );
3152
0
$self->{current_form} = $self->selector("form[name='$name']",
3153
user_info => "form name '$name'",
3154
%options
3155
);
3156
};
3157
3158
=head2 C<< $mech->form_id( $id [, %options] ) >>
3159
3160
$mech->form_id( 'login' );
3161
3162
Selects the current form by its C attribute.
3163
The options
3164
are identical to those accepted by the L<< /$mech->xpath >> method.
3165
3166
This is equivalent to calling
3167
3168
$mech->by_id($id,single => 1,%options)
3169
3170
=cut
3171
3172
sub form_id {
3173
0
0
1
my ($self,$name,%options) = @_;
3174
3175
0
_default_limiter( single => \%options );
3176
0
$self->{current_form} = $self->by_id($name,
3177
user_info => "form with id '$name'",
3178
%options
3179
);
3180
};
3181
3182
=head2 C<< $mech->form_number( $number [, %options] ) >>
3183
3184
$mech->form_number( 2 );
3185
3186
Selects the Ith form.
3187
The options
3188
are identical to those accepted by the L<< /$mech->xpath >> method.
3189
3190
=cut
3191
3192
sub form_number {
3193
0
0
1
my ($self,$number,%options) = @_;
3194
3195
0
_default_limiter( single => \%options );
3196
0
$self->{current_form} = $self->xpath("(//form)[$number]",
3197
user_info => "form number $number",
3198
%options
3199
);
3200
};
3201
3202
=head2 C<< $mech->form_with_fields( [$options], @fields ) >>
3203
3204
$mech->form_with_fields(
3205
'user', 'password'
3206
);
3207
3208
Find the form which has the listed fields.
3209
3210
If the first argument is a hash reference, it's taken
3211
as options to C<< ->xpath >>.
3212
3213
See also L<< /$mech->submit_form >>.
3214
3215
=cut
3216
3217
sub form_with_fields {
3218
0
0
1
my ($self,@fields) = @_;
3219
0
my $options = {};
3220
0
0
if (ref $fields[0] eq 'HASH') {
3221
0
$options = shift @fields;
3222
};
3223
0
my @clauses = map { $self->application->element_query([qw[input select textarea]], { 'name' => $_ })} @fields;
0
3224
3225
3226
0
my $q = "//form[" . join( " and ", @clauses)."]";
3227
#warn $q;
3228
0
_default_limiter( single => $options );
3229
0
$self->{current_form} = $self->xpath($q,
3230
user_info => "form with fields [@fields]",
3231
%$options
3232
);
3233
};
3234
3235
=head2 C<< $mech->forms( %options ) >>
3236
3237
my @forms = $mech->forms();
3238
3239
When called in a list context, returns a list
3240
of the forms found in the last fetched page.
3241
In a scalar context, returns a reference to
3242
an array with those forms.
3243
3244
The options
3245
are identical to those accepted by the L<< /$mech->selector >> method.
3246
3247
The returned elements are the DOM C<<
3248
3249
=cut
3250
3251
sub forms {
3252
0
0
1
my ($self, %options) = @_;
3253
0
my @res = $self->selector('form', %options);
3254
return wantarray ? @res
3255
0
0
: \@res
3256
};
3257
3258
=head2 C<< $mech->field( $selector, $value, [,\@pre_events [,\@post_events]] ) >>
3259
3260
$mech->field( user => 'joe' );
3261
$mech->field( not_empty => '', [], [] ); # bypass JS validation
3262
3263
Sets the field with the name given in C<$selector> to the given value.
3264
Returns the value.
3265
3266
The method understands very basic CSS selectors in the value for C<$selector>,
3267
like the L find_input() method.
3268
3269
A selector prefixed with '#' must match the id attribute of the input.
3270
A selector prefixed with '.' matches the class attribute. A selector
3271
prefixed with '^' or with no prefix matches the name attribute.
3272
3273
By passing the array reference C<@pre_events>, you can indicate which
3274
Javascript events you want to be triggered before setting the value.
3275
C<@post_events> contains the events you want to be triggered
3276
after setting the value.
3277
3278
By default, the events set in the
3279
constructor for C and C
3280
are triggered.
3281
3282
=cut
3283
3284
sub field {
3285
0
0
1
my ($self,$name,$value,$pre,$post) = @_;
3286
0
$self->get_set_value(
3287
name => $name,
3288
value => $value,
3289
pre => $pre,
3290
post => $post,
3291
node => $self->current_form,
3292
);
3293
}
3294
3295
=head2 C<< $mech->value( $selector_or_element, [%options] ) >>
3296
3297
print $mech->value( 'user' );
3298
3299
Returns the value of the field given by C<$selector_or_name> or of the
3300
DOM element passed in.
3301
3302
The legacy form of
3303
3304
$mech->value( name => value );
3305
3306
is also still supported but will likely be deprecated
3307
in favour of the C<< ->field >> method.
3308
3309
For fields that can have multiple values, like a C field,
3310
the method is context sensitive and returns the first selected
3311
value in scalar context and all values in list context.
3312
3313
=cut
3314
3315
sub value {
3316
0
0
0
1
if (@_ == 3) {
3317
0
my ($self,$name,$value) = @_;
3318
0
return $self->field($name => $value);
3319
} else {
3320
0
my ($self,$name,%options) = @_;
3321
0
return $self->get_set_value(
3322
node => $self->current_form,
3323
%options,
3324
name => $name,
3325
);
3326
};
3327
};
3328
3329
=head2 C<< $mech->get_set_value( %options ) >>
3330
3331
Allows fine-grained access to getting/setting a value
3332
with a different API. Supported keys are:
3333
3334
pre
3335
post
3336
name
3337
node
3338
value
3339
3340
in addition to all keys that C<< $mech->xpath >> supports.
3341
3342
=cut
3343
3344
sub _field_by_name {
3345
0
0
my ($self,%options) = @_;
3346
0
my @fields;
3347
0
my $name = delete $options{ name };
3348
0
my $attr = 'name';
3349
0
0
if ($name =~ s/^\^//) { # if it starts with ^, it's supposed to be a name
0
0
3350
0
$attr = 'name'
3351
} elsif ($name =~ s/^#//) {
3352
0
$attr = 'id'
3353
} elsif ($name =~ s/^\.//) {
3354
0
$attr = 'class'
3355
};
3356
0
0
if (blessed $name) {
3357
0
@fields = $name;
3358
} else {
3359
0
_default_limiter( single => \%options );
3360
0
my $query = $self->application->element_query([qw[input select textarea]], { $attr => $name });
3361
#warn $query;
3362
0
@fields = $self->xpath($query,%options);
3363
};
3364
@fields
3365
0
}
3366
3367
sub get_set_value {
3368
0
0
1
my ($self,%options) = @_;
3369
0
my $set_value = exists $options{ value };
3370
0
my $value = delete $options{ value };
3371
0
0
my $pre = delete $options{pre} || $self->{pre_value};
3372
0
0
my $post = delete $options{post} || $self->{post_value};
3373
0
my $name = delete $options{ name };
3374
0
my @fields = $self->_field_by_name(
3375
name => $name,
3376
user_info => "input with name '$name'",
3377
%options );
3378
0
0
$pre = [$pre]
3379
if (! ref $pre);
3380
0
0
$post = [$post]
3381
if (! ref $post);
3382
3383
0
0
if ($fields[0]) {
3384
0
my $tag = $fields[0]->{tagName};
3385
0
0
if ($set_value) {
3386
0
for my $ev (@$pre) {
3387
0
$fields[0]->__event($ev);
3388
};
3389
3390
0
0
if ('select' eq $tag) {
3391
0
$self->select($fields[0], $value);
3392
} else {
3393
0
$fields[0]->{value} = $value;
3394
};
3395
3396
0
for my $ev (@$post) {
3397
0
$fields[0]->__event($ev);
3398
};
3399
};
3400
# What about 'checkbox'es/radioboxes?
3401
3402
# Don't bother to fetch the field's value if it's not wanted
3403
0
0
return unless defined wantarray;
3404
3405
# We could save some work here for the simple case of single-select
3406
# dropdowns by not enumerating all options
3407
0
0
if ('SELECT' eq uc $tag) {
3408
0
my @options = $self->xpath('.//option', node => $fields[0] );
3409
0
my @values = map { $_->{value} } grep { $_->{selected} } @options;
0
0
3410
0
0
if (wantarray) {
3411
return @values
3412
0
} else {
3413
0
return $values[0];
3414
}
3415
} else {
3416
return $fields[0]->{value}
3417
0
};
3418
} else {
3419
return
3420
0
}
3421
}
3422
3423
=head2 C<< $mech->select( $name, $value ) >>
3424
3425
=head2 C<< $mech->select( $name, \@values ) >>
3426
3427
Given the name of a C field, set its value to the value
3428
specified. If the field is not C<< >> and the
3429
C<$value> is an array, only the B value will be set.
3430
Passing C<$value> as a hash with
3431
an C key selects an item by number (e.g.
3432
C<< {n => 3} >> or C<< {n => [2,4]} >>).
3433
The numbering starts at 1. This applies to the current form.
3434
3435
If you have a field with C<< >> and you pass a single
3436
C<$value>, then C<$value> will be added to the list of fields selected,
3437
without clearing the others. However, if you pass an array reference,
3438
then all previously selected values will be cleared.
3439
3440
Returns true on successfully setting the value. On failure, returns
3441
false and calls C<< $self>warn() >> with an error message.
3442
3443
=cut
3444
3445
sub select {
3446
0
0
1
my ($self, $name, $value) = @_;
3447
0
my ($field) = $self->_field_by_name(
3448
node => $self->current_form,
3449
name => $name,
3450
#%options,
3451
);
3452
3453
0
0
if (! $field) {
3454
return
3455
0
};
3456
3457
0
my @options = $self->xpath( './/option', node => $field);
3458
0
my @by_index;
3459
my @by_value;
3460
0
my $single = $field->{type} eq "select-one";
3461
0
my $deselect;
3462
3463
0
0
0
if ('HASH' eq ref $value||'') {
0
0
3464
0
for (keys %$value) {
3465
0
0
$self->warn(qq{Unknown select value parameter "$_"})
3466
unless $_ eq 'n';
3467
}
3468
3469
0
$deselect = ref $value->{n};
3470
0
0
@by_index = ref $value->{n} ? @{ $value->{n} } : $value->{n};
0
3471
} elsif ('ARRAY' eq ref $value||'') {
3472
# clear all preselected values
3473
0
$deselect = 1;
3474
0
@by_value = @{ $value };
0
3475
} else {
3476
0
@by_value = $value;
3477
};
3478
3479
0
0
if ($deselect) {
3480
0
for my $o (@options) {
3481
0
$o->{selected} = 0;
3482
}
3483
};
3484
3485
0
0
if ($single) {
3486
# Only use the first element for single-element boxes
3487
0
0
$#by_index = 0+@by_index ? 0 : -1;
3488
0
0
$#by_value = 0+@by_value ? 0 : -1;
3489
};
3490
3491
# Select the items, either by index or by value
3492
0
for my $idx (@by_index) {
3493
0
$options[$idx-1]->{selected} = 1;
3494
};
3495
3496
0
for my $v (@by_value) {
3497
0
my $option = $self->xpath( sprintf( './/option[@value="%s"]', quote_xpath $v) , node => $field, single => 1 );
3498
0
$option->{selected} = 1;
3499
};
3500
3501
0
return @by_index + @by_value > 0;
3502
}
3503
3504
=head2 C<< $mech->tick( $name, $value [, $set ] ) >>
3505
3506
$mech->tick("confirmation_box", 'yes');
3507
3508
"Ticks" the first checkbox that has both the name and value associated with it
3509
on the current form. Dies if there is no named check box for that value.
3510
Passing in a false value as the third optional argument will cause the
3511
checkbox to be unticked.
3512
3513
(Un)ticking the checkbox is done by sending a click event to it if needed.
3514
If C<$value> is C, the first checkbox matching C<$name> will
3515
be (un)ticked.
3516
3517
If C<$name> is a reference to a hash, that hash will be used
3518
as the options to C<< ->find_link_dom >> to find the element.
3519
3520
=cut
3521
3522
sub tick {
3523
0
0
1
my ($self, $name, $value, $set) = @_;
3524
0
0
$set = 1
3525
if (@_ < 4);
3526
0
my %options;
3527
my @boxes;
3528
3529
0
0
0
if (! defined $name) {
0
0
0
3530
0
croak("->tick called with undef name");
3531
} elsif (ref $name and blessed($name) and $name->can('__click')) {
3532
0
$options{ dom } = $name;
3533
} elsif (ref $name eq 'HASH') { # options
3534
0
%options = %$name;
3535
} else {
3536
0
$options{ name } = $name;
3537
};
3538
3539
0
0
if (exists $options{ name }) {
3540
0
my $attr = 'name';
3541
0
0
if ($name =~ s/^\^//) { # if it starts with ^, it's supposed to be a name
0
0
3542
0
$attr = 'name'
3543
} elsif ($name =~ s/^#//) {
3544
0
$attr = 'id'
3545
} elsif ($name =~ s/^\.//) {
3546
0
$attr = 'class'
3547
};
3548
0
$name = quotemeta($name);
3549
0
0
$value = quotemeta($value) if $value;
3550
3551
0
_default_limiter( one => \%options );
3552
$options{ xpath } = [
3553
0
0
defined $value
3554
? sprintf( q{//input[@type="checkbox" and @%s="%s" and @value="%s"]}, $attr, $name, $value)
3555
: sprintf( q{//input[@type="checkbox" and @%s="%s"]}, $attr, $name)
3556
];
3557
0
0
$options{ user_info } = defined $value
3558
? "Checkbox with name '$name' and value '$value'"
3559
: "Checkbox with name '$name'";
3560
};
3561
3562
0
0
if ($options{ dom }) {
3563
0
@boxes = $options{ dom };
3564
} else {
3565
0
@boxes = $self->_option_query(%options);
3566
};
3567
3568
0
my $target = $boxes[0];
3569
0
my $is_set = $self->application->bool_ff_to_perl( $target->{checked} );
3570
0
0
0
if ($set xor $is_set) {
3571
0
0
if ($set) {
3572
0
$target->{checked}= 'checked';
3573
} else {
3574
0
$target->{checked} = 0;
3575
};
3576
};
3577
};
3578
3579
=head2 C<< $mech->untick( $name, $value ) >>
3580
3581
$mech->untick('spam_confirm','yes',undef)
3582
3583
Causes the checkbox to be unticked. Shorthand for
3584
3585
$mech->tick($name,$value,undef)
3586
3587
=cut
3588
3589
sub untick {
3590
0
0
1
my ($self, $name, $value) = @_;
3591
0
$self->tick( $name, $value, undef );
3592
};
3593
3594
=head2 C<< $mech->submit( $form ) >>
3595
3596
$mech->submit;
3597
3598
Submits the form. Note that this does B fire the C
3599
event and thus also does not fire eventual Javascript handlers.
3600
Maybe you want to use C<< $mech->click >> instead.
3601
3602
The default is to submit the current form as returned
3603
by C<< $mech->current_form >>.
3604
3605
=cut
3606
3607
sub submit {
3608
0
0
1
my ($self,$dom_form) = @_;
3609
0
0
$dom_form ||= $self->current_form;
3610
0
0
if ($dom_form) {
3611
0
$dom_form->submit(); # why don't we ->synchronize here??
3612
0
$self->signal_http_status;
3613
3614
0
$self->clear_current_form;
3615
0
1;
3616
} else {
3617
0
croak "I don't know which form to submit, sorry.";
3618
}
3619
};
3620
3621
=head2 C<< $mech->submit_form( %options ) >>
3622
3623
$mech->submit_form(
3624
with_fields => {
3625
user => 'me',
3626
pass => 'secret',
3627
}
3628
);
3629
3630
This method lets you select a form from the previously fetched page,
3631
fill in its fields, and submit it. It combines the form_number/form_name,
3632
set_fields and click methods into one higher level call. Its arguments are
3633
a list of key/value pairs, all of which are optional.
3634
3635
=over 4
3636
3637
=item *
3638
3639
C<< form => $mech->current_form() >>
3640
3641
Specifies the form to be filled and submitted. Defaults to the current form.
3642
3643
=item *
3644
3645
C<< fields => \%fields >>
3646
3647
Specifies the fields to be filled in the current form
3648
3649
=item *
3650
3651
C<< with_fields => \%fields >>
3652
3653
Probably all you need for the common case. It combines a smart form selector
3654
and data setting in one operation. It selects the first form that contains
3655
all fields mentioned in \%fields. This is nice because you don't need to
3656
know the name or number of the form to do this.
3657
3658
(calls L<< /$mech->form_with_fields() >> and L<< /$mech->set_fields() >>).
3659
3660
If you choose this, the form_number, form_name, form_id and fields options
3661
will be ignored.
3662
3663
=back
3664
3665
=cut
3666
3667
sub submit_form {
3668
0
0
1
my ($self,%options) = @_;
3669
3670
0
my $form = delete $options{ form };
3671
0
my $fields;
3672
0
0
if (! $form) {
3673
0
0
if ($fields = delete $options{ with_fields }) {
3674
0
my @names = keys %$fields;
3675
0
$form = $self->form_with_fields( \%options, @names );
3676
0
0
if (! $form) {
3677
0
$self->signal_condition("Couldn't find a matching form for @names.");
3678
return
3679
0
};
3680
} else {
3681
0
0
$fields = delete $options{ fields } || {};
3682
0
$form = $self->current_form;
3683
};
3684
};
3685
3686
0
0
if (! $form) {
3687
0
$self->signal_condition("No form found to submit.");
3688
return
3689
0
};
3690
0
$self->do_set_fields( form => $form, fields => $fields );
3691
0
$self->submit($form);
3692
}
3693
3694
=head2 C<< $mech->set_fields( $name => $value, ... ) >>
3695
3696
$mech->set_fields(
3697
user => 'me',
3698
pass => 'secret',
3699
);
3700
3701
This method sets multiple fields of the current form. It takes a list of
3702
field name and value pairs. If there is more than one field with the same
3703
name, the first one found is set. If you want to select which of the
3704
duplicate field to set, use a value which is an anonymous array which
3705
has the field value and its number as the 2 elements.
3706
3707
=cut
3708
3709
sub set_fields {
3710
0
0
1
my ($self, %fields) = @_;
3711
0
my $f = $self->current_form;
3712
0
0
if (! $f) {
3713
0
croak "Can't set fields: No current form set.";
3714
};
3715
0
$self->do_set_fields(form => $f, fields => \%fields);
3716
};
3717
3718
sub do_set_fields {
3719
0
0
0
my ($self, %options) = @_;
3720
0
my $form = delete $options{ form };
3721
0
my $fields = delete $options{ fields };
3722
3723
0
while (my($n,$v) = each %$fields) {
3724
0
0
if (ref $v) {
3725
0
($v,my $num) = @$v;
3726
0
0
warn "Index larger than 1 not supported, ignoring"
3727
unless $num == 1;
3728
};
3729
3730
0
$self->get_set_value( node => $form, name => $n, value => $v, %options );
3731
}
3732
};
3733
3734
=head2 C<< $mech->set_visible( @values ) >>
3735
3736
$mech->set_visible( $username, $password );
3737
3738
This method sets fields of the current form without having to know their
3739
names. So if you have a login screen that wants a username and password,
3740
you do not have to fetch the form and inspect the source (or use the
3741
C utility, installed with L) to see what
3742
the field names are; you can just say
3743
3744
$mech->set_visible( $username, $password );
3745
3746
and the first and second fields will be set accordingly. The method is
3747
called set_visible because it acts only on visible fields;
3748
hidden form inputs are not considered. It also respects
3749
the respective return value of C<< ->is_visible() >> for each
3750
field, so hiding of fields through CSS affects this too.
3751
3752
The specifiers that are possible in L are not yet supported.
3753
3754
=cut
3755
3756
sub set_visible {
3757
0
0
1
my ($self,@values) = @_;
3758
0
my $form = $self->current_form;
3759
0
my @form;
3760
0
0
if ($form) { @form = (node => $form) };
0
3761
0
my @visible_fields = $self->xpath( q{//input[not(@type) or }
3762
. q{(@type!= "hidden" and }
3763
. q{ @type!= "button" and }
3764
. q{ @type!= "submit" and }
3765
. q{ @type!= "image")]},
3766
@form
3767
);
3768
3769
0
@visible_fields = grep { $self->is_visible( $_ ) } @visible_fields;
0
3770
3771
0
0
if (@values > @visible_fields) {
3772
0
$self->signal_condition( "Not enough fields on page" );
3773
} else {
3774
0
for my $idx (0..$#values) {
3775
0
$self->field( $visible_fields[ $idx ] => $values[ $idx ]);
3776
};
3777
}
3778
}
3779
3780
=head2 C<< $mech->is_visible( $element ) >>
3781
3782
=head2 C<< $mech->is_visible( %options ) >>
3783
3784
if ($mech->is_visible( selector => '#login' )) {
3785
print "You can log in now.";
3786
};
3787
3788
Returns true if the element is visible, that is, it is
3789
a member of the DOM and neither it nor its ancestors have
3790
a CSS C attribute of C or
3791
a C attribute of C.
3792
3793
You can either pass in a DOM element or a set of key/value
3794
pairs to search the document for the element you want.
3795
3796
=over 4
3797
3798
=item *
3799
3800
C - the XPath query
3801
3802
=item *
3803
3804
C - the CSS selector
3805
3806
=item *
3807
3808
C - a DOM node
3809
3810
=back
3811
3812
The remaining options are passed through to either the
3813
L<< /$mech->xpath|xpath >> or L<< /$mech->selector|selector >> method.
3814
3815
=cut
3816
3817
sub is_visible {
3818
0
0
1
my ($self,%options);
3819
0
0
if (2 == @_) {
3820
0
($self,$options{dom}) = @_;
3821
} else {
3822
0
($self,%options) = @_;
3823
};
3824
0
_default_limiter( 'maybe', \%options );
3825
0
0
if (! $options{dom}) {
3826
0
$options{dom} = $self->_option_query(%options);
3827
};
3828
# No element means not visible
3829
return
3830
0
0
unless $options{ dom };
3831
0
0
$options{ window } ||= $self->tab->{linkedBrowser}->{contentWindow};
3832
3833
0
my $_is_visible = $self->repl->declare(<<'JS');
3834
function (obj,window)
3835
{
3836
while (obj) {
3837
// No object
3838
if (!obj) return false;
3839
3840
try {
3841
if( obj["parentNode"] ) 1;
3842
} catch (e) {
3843
// Dead object
3844
return false
3845
};
3846
// Descends from document, so we're done
3847
if (obj.parentNode === obj.ownerDocument) {
3848
return true;
3849
};
3850
// Not in the DOM
3851
if (!obj.parentNode) {
3852
return false;
3853
};
3854
// Direct style check
3855
if (obj.style) {
3856
if (obj.style.display == 'none') return false;
3857
if (obj.style.visibility == 'hidden') return false;
3858
};
3859
3860
if (window.getComputedStyle) {
3861
var style = window.getComputedStyle(obj, null);
3862
if (style.display == 'none') {
3863
return false; }
3864
if (style.visibility == 'hidden') {
3865
return false;
3866
};
3867
};
3868
obj = obj.parentNode;
3869
};
3870
// The object does not live in the DOM at all
3871
return false
3872
}
3873
JS
3874
0
!!$_is_visible->($options{dom}, $options{window});
3875
};
3876
3877
=head2 C<< $mech->wait_until_invisible( $element ) >>
3878
3879
=head2 C<< $mech->wait_until_invisible( %options ) >>
3880
3881
$mech->wait_until_invisible( $please_wait );
3882
3883
Waits until an element is not visible anymore.
3884
3885
Takes the same options as L<< $mech->is_visible/->is_visible >>.
3886
3887
In addition, the following options are accepted:
3888
3889
=over 4
3890
3891
=item *
3892
3893
C - the timeout after which the function will C. To catch
3894
the condition and handle it in your calling program, use an L block.
3895
A timeout of C<0> means to never time out.
3896
3897
=item *
3898
3899
C - the interval in seconds used to L. Subsecond
3900
intervals are possible.
3901
3902
=back
3903
3904
Note that when passing in a selector, that selector is requeried
3905
on every poll instance. So the following query will work as expected:
3906
3907
xpath => '//*[contains(text(),"stand by")]'
3908
3909
This also means that if your selector query relies on finding
3910
a changing text, you need to pass the node explicitly instead of
3911
passing the selector.
3912
3913
=cut
3914
3915
sub wait_until_invisible {
3916
0
0
1
my ($self,%options);
3917
0
0
if (2 == @_) {
3918
0
($self,$options{dom}) = @_;
3919
} else {
3920
0
($self,%options) = @_;
3921
};
3922
0
0
my $sleep = delete $options{ sleep } || 0.3;
3923
0
0
my $timeout = delete $options{ timeout } || 0;
3924
3925
0
_default_limiter( 'maybe', \%options );
3926
3927
3928
0
my $timeout_after;
3929
0
0
if ($timeout) {
3930
0
$timeout_after = time + $timeout;
3931
};
3932
0
my $v;
3933
my $node;
3934
0
0
do {
0
3935
0
$node = $options{ dom };
3936
0
0
if (! $node) {
3937
0
$node = $self->_option_query(%options);
3938
};
3939
return
3940
0
0
unless $node;
3941
0
sleep $sleep;
3942
} while ( $v = $self->is_visible($node)
3943
and (!$timeout_after or time < $timeout_after ));
3944
0
0
0
if ($node and time >= $timeout_after) {
3945
0
croak "Timeout of $timeout seconds reached while waiting for element to become invisible";
3946
};
3947
};
3948
3949
# Internal method to run either an XPath, CSS or id query against the DOM
3950
# Returns the element(s) found
3951
my %rename = (
3952
xpath => 'xpath',
3953
selector => 'selector',
3954
id => 'by_id',
3955
by_id => 'by_id',
3956
);
3957
3958
sub _option_query {
3959
0
0
my ($self,%options) = @_;
3960
0
my ($method,$q);
3961
0
for my $meth (keys %rename) {
3962
0
0
if (exists $options{ $meth }) {
3963
0
$q = delete $options{ $meth };
3964
0
0
$method = $rename{ $meth } || $meth;
3965
}
3966
};
3967
0
_default_limiter( 'one' => \%options );
3968
0
0
croak "Need either a name, a selector or an xpath key!"
3969
if not $method;
3970
0
return $self->$method( $q, %options );
3971
};
3972
3973
=head2 C<< $mech->clickables() >>
3974
3975
print "You could click on\n";
3976
for my $el ($mech->clickables) {
3977
print $el->{innerHTML}, "\n";
3978
};
3979
3980
Returns all clickable elements, that is, all elements
3981
with an C attribute.
3982
3983
=cut
3984
3985
sub clickables {
3986
0
0
1
my ($self, %options) = @_;
3987
0
$self->xpath('//*[@onclick]', %options);
3988
};
3989
3990
=head2 C<< $mech->expand_frames( $spec ) >>
3991
3992
my @frames = $mech->expand_frames();
3993
3994
Expands the frame selectors (or C<1> to match all frames)
3995
into their respective DOM document nodes according to the current
3996
document. All frames will be visited in breadth first order.
3997
3998
This is mostly an internal method.
3999
4000
=cut
4001
4002
sub expand_frames {
4003
0
0
1
my ($self, $spec, $document) = @_;
4004
0
0
$spec ||= $self->{frames};
4005
0
0
my @spec = ref $spec ? @$spec : $spec;
4006
0
0
$document ||= $self->document;
4007
4008
0
0
0
if (! ref $spec and $spec !~ /\D/ and $spec == 1) {
0
4009
# All frames
4010
0
@spec = qw( frame iframe );
4011
};
4012
4013
# Optimize the default case of only names in @spec
4014
0
my @res;
4015
0
0
if (! grep {ref} @spec) {
0
4016
0
@res = map { $_->{contentDocument} }
0
4017
$self->selector(
4018
\@spec,
4019
document => $document,
4020
frames => 0, # otherwise we'll recurse :)
4021
);
4022
} else {
4023
@res =
4024
map { #warn "Expanding $_";
4025
0
0
ref $_
0
4026
? $_
4027
# Just recurse into the above code path
4028
: $self->expand_frames( $_, $document );
4029
} @spec;
4030
}
4031
};
4032
4033
=head1 IMAGE METHODS
4034
4035
=head2 C<< $mech->content_as_png( [$tab, \%coordinates, \%target_size ] ) >>
4036
4037
my $png_data = $mech->content_as_png();
4038
4039
# Create scaled-down 480px wide preview
4040
my $png_data = $mech->content_as_png(undef, undef, { width => 480 });
4041
4042
Returns the given tab or the current page rendered as PNG image.
4043
4044
All parameters are optional.
4045
4046
=over 4
4047
4048
=item *
4049
4050
C<$tab> defaults to the current tab.
4051
4052
=item *
4053
4054
If the coordinates are given, that rectangle will be cut out.
4055
The coordinates should be a hash with the four usual entries,
4056
C,C,C,C.
4057
4058
=item *
4059
4060
The target size of the image can also be given. It defaults to
4061
the size of the image. The allowed parameters in the hash are
4062
4063
C, C - for specifying the scale, default is 1.0 in each direction.
4064
4065
C, C - for specifying the target size
4066
4067
If you want the resulting image to be 480 pixels wide, specify
4068
4069
{ width => 480 }
4070
4071
The height will then be calculated from the ratio of original width to
4072
original height.
4073
4074
=back
4075
4076
This method is specific to WWW::Mechanize::Firefox.
4077
4078
Currently, the data transfer between Firefox and Perl
4079
is done Base64-encoded. It would be beneficial to find what's
4080
necessary to make JSON handle binary data more gracefully.
4081
4082
=cut
4083
4084
sub content_as_png {
4085
0
0
1
my ($self, $tab, $rect, $target_rect) = @_;
4086
0
0
$tab ||= $self->tab;
4087
0
0
$rect ||= {};
4088
0
0
$target_rect ||= {};
4089
4090
# Mostly taken from
4091
# http://wiki.github.com/bard/mozrepl/interactor-screenshot-server
4092
# Except for the addition of a target image size
4093
0
my $screenshot = $self->repl->declare(<<'JS');
4094
function (tab,rect,target_rect) {
4095
var browser = tab.linkedBrowser;
4096
var browserWindow = Components.classes['@mozilla.org/appshell/window-mediator;1']
4097
.getService(Components.interfaces.nsIWindowMediator)
4098
.getMostRecentWindow('navigator:browser');
4099
var win = browser.contentWindow;
4100
var body = win.document.body;
4101
if(!body) {
4102
return;
4103
};
4104
var canvas = browserWindow
4105
.document
4106
.createElementNS('http://www.w3.org/1999/xhtml', 'canvas');
4107
var left = rect.left || 0;
4108
var top = rect.top || 0;
4109
var width = rect.width || body.clientWidth;
4110
var height = rect.height || body.clientHeight;
4111
4112
if( isNaN( target_rect.scalex * target_rect.scaley ) || target_rect.scalex * target_rect.scaley == 0) {
4113
// No scale was given
4114
// Was a fixed target width / height given?
4115
if( target_rect.width ) {
4116
target_rect.scalex = target_rect.width / width;
4117
};
4118
if( target_rect.height ) {
4119
target_rect.scaley = target_rect.height / height
4120
};
4121
4122
// If only one of scalex / scaley is given, force the other
4123
// to be the same, default to 1.0
4124
target_rect.scalex = target_rect.scalex || target_rect.scaley || (target_rect.width / width) || 1.0;
4125
target_rect.scaley = target_rect.scaley || target_rect.scalex || (target_rect.height / height) || 1.0;
4126
} else {
4127
//alert("scales fixed");
4128
};
4129
// Calculate the target width/height if missing:
4130
target_rect.height = target_rect.height || height * target_rect.scaley;
4131
target_rect.width = target_rect.width || width * target_rect.scalex;
4132
4133
canvas.width = target_rect.width;
4134
canvas.height = target_rect.height;
4135
var ctx = canvas.getContext('2d');
4136
ctx.clearRect(0, 0, target_rect.width, target_rect.height);
4137
ctx.save();
4138
ctx.scale(target_rect.scalex, target_rect.scaley);
4139
ctx.drawWindow(win, left, top, width, height, 'rgb(255,255,255)');
4140
ctx.restore();
4141
4142
//return atob(
4143
return canvas
4144
.toDataURL('image/png', '')
4145
.split(',')[1]
4146
// );
4147
}
4148
JS
4149
0
my $scr = $screenshot->($tab, $rect, $target_rect);
4150
0
0
return $scr ? decode_base64($scr) : undef
4151
};
4152
4153
=head2 C<< $mech->element_as_png( $element ) >>
4154
4155
my $shiny = $mech->selector('#shiny', single => 1);
4156
my $i_want_this = $mech->element_as_png($shiny);
4157
4158
Returns PNG image data for a single element
4159
4160
=cut
4161
4162
sub element_as_png {
4163
0
0
1
my ($self, $element) = @_;
4164
0
my $tab = $self->tab;
4165
4166
0
my $pos = $self->element_coordinates($element);
4167
0
return $self->content_as_png($tab, $pos);
4168
};
4169
4170
=head2 C<< $mech->element_coordinates( $element ) >>
4171
4172
my $shiny = $mech->selector('#shiny', single => 1);
4173
my ($pos) = $mech->element_coordinates($shiny);
4174
print $pos->{left},',', $pos->{top};
4175
4176
Returns the page-coordinates of the C<$element>
4177
in pixels as a hash with four entries, C, C, C and C.
4178
4179
This function might get moved into another module more geared
4180
towards rendering HTML.
4181
4182
=cut
4183
4184
sub element_coordinates {
4185
0
0
1
my ($self, $element) = @_;
4186
4187
# Mostly taken from
4188
# http://www.quirksmode.org/js/findpos.html
4189
0
my $findPos = $self->repl->declare(<<'JS');
4190
function (obj) {
4191
var res = {
4192
left: 0,
4193
top: 0,
4194
width: obj.scrollWidth,
4195
height: obj.scrollHeight
4196
};
4197
if (obj.offsetParent) {
4198
do {
4199
res.left += obj.offsetLeft;
4200
res.top += obj.offsetTop;
4201
} while (obj = obj.offsetParent);
4202
}
4203
return res;
4204
}
4205
JS
4206
0
$findPos->($element);
4207
};
4208
4209
1;
4210
4211
__END__