line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Mechanize::Plugin::AutoWrite; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
=head1 NAME |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
WWW::Mechanize::Plugin::AutoWrite - WWW::Mechanize plugin that writes the fetched pages to the disk. |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 SYNOPSIS |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use WWW::Mechanize; |
10
|
|
|
|
|
|
|
use WWW::Mechanize::Plugin::AutoWrite; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
my $mech = WWW::Mechanize->new(); |
13
|
|
|
|
|
|
|
$mech->autowrite->file('/tmp/mech.html'); |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
$mech->get('http://search.cpan.org/'); |
16
|
|
|
|
|
|
|
# now the contents of the page is written to /tmp/mech.html |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
or: |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $mech = WWW::Mechanize->new(); |
21
|
|
|
|
|
|
|
$mech->autowrite->dir('/tmp/mech/'); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$mech->get('http://search.cpan.org/'); |
24
|
|
|
|
|
|
|
# now the contents of the page are written to /tmp/mech/001.html and the HTTP |
25
|
|
|
|
|
|
|
# transaction is logged into /tmp/mech/001.http |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
$mech->submit_form( |
28
|
|
|
|
|
|
|
'form_name' => 'f', |
29
|
|
|
|
|
|
|
'fields' => { |
30
|
|
|
|
|
|
|
'query' => 'WWW::Mechanize::Plugin::AutoWrite', |
31
|
|
|
|
|
|
|
'mode' => 'module', |
32
|
|
|
|
|
|
|
}, |
33
|
|
|
|
|
|
|
); |
34
|
|
|
|
|
|
|
# Now the pages are saved into /tmp/mech/002.html and /tmp/mech/002.http |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
or: |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
my $mech = WWW::Mechanize->new(); |
40
|
|
|
|
|
|
|
$mech->autowrite->dir('/tmp/mech/'); # Save the whole session |
41
|
|
|
|
|
|
|
$mech->autowrite->file('/tmp/mech/last.html'); # Save the last page in a file |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
$mech->get('http://www.wikipedia.com/'); |
44
|
|
|
|
|
|
|
# now the contents of the page are written both to /tmp/mech/001.html and |
45
|
|
|
|
|
|
|
# /tmp/mech/last.html |
46
|
|
|
|
|
|
|
$mech->follow_link(text => 'Galego'); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
L overrides the method |
51
|
|
|
|
|
|
|
L with a custom version that records all HTTP |
52
|
|
|
|
|
|
|
transactions into the disk. This has for effect that every time that a new HTTP |
53
|
|
|
|
|
|
|
request is made (GET, POST, etc) the contents of the page returned by the server |
54
|
|
|
|
|
|
|
and the HTTP transaction (the request and the response) are stored into local |
55
|
|
|
|
|
|
|
file(s) on disk. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
If no destination file/folder is provided then this module will act as a noop |
58
|
|
|
|
|
|
|
and nothing will be written to the disk. It's also possible to provide both a |
59
|
|
|
|
|
|
|
file and a folder in order to have the HTTP session and the last page saved |
60
|
|
|
|
|
|
|
simultaneously. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head1 RATIONALE |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
The idea is to have the static page loaded into a web browser and to reload the |
65
|
|
|
|
|
|
|
page as needed. A better idea is to use a browser that has a builtin mechanism |
66
|
|
|
|
|
|
|
for monitoring changes to local files. The I web browser does this |
67
|
|
|
|
|
|
|
automatically once a page is loaded through the procotol C |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Another reason for the existence of this module is to be able to trace an HTTP |
70
|
|
|
|
|
|
|
transaction in order to debug better I operations performed on |
71
|
|
|
|
|
|
|
complex web sites. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
This module can be configured through the attributes enumerated here. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 file |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Get/set the name of the file where the last page downloaded (the content's of |
80
|
|
|
|
|
|
|
the HTTP response) will be saved. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
Set this attribute to a false value in order to disable the saving of the last |
83
|
|
|
|
|
|
|
page downloaded. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head2 dir |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Get/set the name of the folder where the HTTP session (the content's of the HTTP |
88
|
|
|
|
|
|
|
response as well as the HTTP headers) will be saved. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Set this attribute to a false value in order to disable the saving of the HTTP |
91
|
|
|
|
|
|
|
session. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head2 counter |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Get/set the counter used no name each file with a unique name when saving the |
96
|
|
|
|
|
|
|
HTTP session (the counter is used only when L is set). |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
It can be useful to reset the counter when multiple sessions need to be saved |
100
|
|
|
|
|
|
|
into different folders. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
foreach my $entry (@entries) { |
103
|
|
|
|
|
|
|
$mech->autowrite->dir("/tmp/$entry/"); |
104
|
|
|
|
|
|
|
$mech->autowrite->counter(0); |
105
|
|
|
|
|
|
|
# Complex mechanize |
106
|
|
|
|
|
|
|
mechanize_process($mech, $entry); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=cut |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
|
112
|
1
|
|
|
1
|
|
396477
|
use 5.006; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
43
|
|
113
|
1
|
|
|
1
|
|
6
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
34
|
|
114
|
1
|
|
|
1
|
|
8
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
56
|
|
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
our $VERSION = '0.06'; |
117
|
|
|
|
|
|
|
|
118
|
1
|
|
|
1
|
|
6
|
use File::Slurp qw{ write_file }; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
71
|
|
119
|
1
|
|
|
1
|
|
8
|
use File::Path qw{ mkpath }; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
53
|
|
120
|
1
|
|
|
1
|
|
5
|
use File::Spec; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
34
|
|
121
|
1
|
|
|
1
|
|
6
|
use File::Basename qw{ fileparse }; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
196
|
|
122
|
|
|
|
|
|
|
|
123
|
1
|
|
|
1
|
|
841
|
use MIME::Types; |
|
1
|
|
|
|
|
6541
|
|
|
1
|
|
|
|
|
49
|
|
124
|
|
|
|
|
|
|
|
125
|
1
|
|
|
1
|
|
11
|
use base qw(Class::Accessor::Fast); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
1069
|
|
126
|
|
|
|
|
|
|
__PACKAGE__->mk_accessors( |
127
|
|
|
|
|
|
|
qw( |
128
|
|
|
|
|
|
|
file |
129
|
|
|
|
|
|
|
dir |
130
|
|
|
|
|
|
|
counter |
131
|
|
|
|
|
|
|
) |
132
|
|
|
|
|
|
|
); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# We need a reference to the original method used by mechanize for the requests. |
136
|
|
|
|
|
|
|
my $REQUEST_SUB; |
137
|
|
|
|
|
|
|
BEGIN { |
138
|
1
|
|
|
1
|
|
4702
|
$REQUEST_SUB = \&WWW::Mechanize::request; |
139
|
|
|
|
|
|
|
} |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# MIME types lookup |
143
|
|
|
|
|
|
|
my $MIME_TYPES = MIME::Types->new(); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head1 METHODS |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
This module offers the following public methods: |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=cut |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# |
154
|
|
|
|
|
|
|
# Returns the next iteration of the counter. This method initializes the counter |
155
|
|
|
|
|
|
|
# the first time it's invoked. |
156
|
|
|
|
|
|
|
# |
157
|
|
|
|
|
|
|
sub _inc_counter { |
158
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
159
|
|
|
|
|
|
|
|
160
|
0
|
0
|
|
|
|
0
|
if (! defined $self->{'counter'}) { |
161
|
0
|
|
|
|
|
0
|
$self->{'counter'} = 0; |
162
|
|
|
|
|
|
|
} |
163
|
|
|
|
|
|
|
|
164
|
0
|
|
|
|
|
0
|
return ++$self->{'counter'}; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 write_to_file |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
This method writes the HTTP requests into a file and/or a folder. It's called |
171
|
|
|
|
|
|
|
automatically by mechanize once the plugin is loaded and configured. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
sub write_to_file { |
176
|
3
|
|
|
3
|
1
|
7
|
my $self = shift; |
177
|
3
|
|
|
|
|
6
|
my ($mech, $request, $response) = @_; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
# write to a single file if autowrite is set |
181
|
3
|
50
|
|
|
|
16
|
if (my $filename = $self->file) { |
182
|
|
|
|
|
|
|
# Make sure that the path to the file exists |
183
|
3
|
|
|
|
|
97
|
my (undef, $folder) = fileparse($filename); |
184
|
3
|
|
|
|
|
190
|
mkpath($folder); |
185
|
|
|
|
|
|
|
|
186
|
3
|
|
|
|
|
16
|
write_file($filename, $mech->content); |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# write to multiple files in a folder if autowritedir is set |
191
|
3
|
50
|
|
|
|
1155
|
if (my $foldername = $self->dir) { |
192
|
0
|
|
|
|
|
0
|
mkpath($foldername); # works fine with already existing folders |
193
|
|
|
|
|
|
|
# my $encoding = $response->content_encoding; |
194
|
|
|
|
|
|
|
|
195
|
0
|
|
|
|
|
0
|
my $counter = $self->_inc_counter(); |
196
|
0
|
|
|
|
|
0
|
my $file; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# Get the extension of the file based on the mime-type |
199
|
0
|
|
0
|
|
|
0
|
my $mime = $MIME_TYPES->type($response->content_type || 'text/plain'); |
200
|
0
|
0
|
|
|
|
0
|
my ($extension) = defined $mime ? $mime->extensions : ('txt'); |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# Write the contents of the page |
205
|
0
|
|
|
|
|
0
|
$file = File::Spec->catfile( |
206
|
|
|
|
|
|
|
$foldername, |
207
|
|
|
|
|
|
|
sprintf "%03d.%s", $counter, $extension |
208
|
|
|
|
|
|
|
); |
209
|
0
|
|
|
|
|
0
|
write_file($file, $mech->content); |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# Remember that the response has the document body which we don't want at |
213
|
|
|
|
|
|
|
# this point. So let's clone the response and get rid of the request's body. |
214
|
0
|
|
|
|
|
0
|
$response = $response->clone(); |
215
|
0
|
|
|
|
|
0
|
$response->content(undef); |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Write the HTTP transaction (request "\n" response) |
218
|
0
|
|
|
|
|
0
|
$file = File::Spec->catfile( |
219
|
|
|
|
|
|
|
$foldername, |
220
|
|
|
|
|
|
|
sprintf "%03d.http", $counter |
221
|
|
|
|
|
|
|
); |
222
|
|
|
|
|
|
|
|
223
|
0
|
|
|
|
|
0
|
write_file($file, $request->as_string, "\n", $response->as_string); |
224
|
|
|
|
|
|
|
} |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# |
229
|
|
|
|
|
|
|
# NOTE: We are injecting methods into WWW::Mechanize it's evil but it's the only |
230
|
|
|
|
|
|
|
# way for a plugin to work. |
231
|
|
|
|
|
|
|
# |
232
|
|
|
|
|
|
|
package WWW::Mechanize; |
233
|
|
|
|
|
|
|
|
234
|
1
|
|
|
1
|
|
11
|
use Scalar::Util qw{ blessed }; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
62
|
|
235
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
60
|
|
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head1 WWW::Mechanize::request |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
The method L is overriden by a custom version that will |
240
|
|
|
|
|
|
|
invoke the original L and then record the request. |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=cut |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
{ |
245
|
|
|
|
|
|
|
|
246
|
1
|
|
|
1
|
|
6
|
no warnings qw{ redefine }; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
322
|
|
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
sub request { |
249
|
3
|
|
|
3
|
1
|
5596
|
my $self = shift; |
250
|
3
|
|
|
|
|
7
|
my ($request, @args) = @_; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# Perform the actual HTTP request |
253
|
3
|
|
|
|
|
13
|
my $response = $REQUEST_SUB->($self, $request, @args); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
# Write the request, response and contents |
257
|
3
|
50
|
|
|
|
891265
|
if (exists $self->{autowrite}) { |
258
|
|
|
|
|
|
|
|
259
|
3
|
|
|
|
|
10
|
my $autowrite = $self->{autowrite}; |
260
|
3
|
50
|
|
|
|
11
|
if (_is_an_autowrite($autowrite)) { |
261
|
3
|
|
|
|
|
15
|
$autowrite->write_to_file($self, $request, $response); |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
else { |
264
|
0
|
|
0
|
|
|
0
|
croak "Wrong type for member 'autowrite', got ", ref($autowrite) || 'a scalar'; |
265
|
|
|
|
|
|
|
} |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
3
|
|
|
|
|
41
|
return $response; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
=head1 WWW::Mechanize::autowrite |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
This accessor returns the autowrite instance associated with this mechanize |
277
|
|
|
|
|
|
|
instance. The first time that this method is invoked it will create |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=cut |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
sub autowrite { |
282
|
5
|
|
|
5
|
0
|
4212
|
my $self = shift; |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# The first time that this accessor is invoked |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# set |
287
|
5
|
50
|
|
|
|
14
|
if (@_) { |
288
|
0
|
|
|
|
|
0
|
my $autowrite = shift; |
289
|
|
|
|
|
|
|
|
290
|
0
|
0
|
|
|
|
0
|
if (! _is_an_autowrite($autowrite)) { |
291
|
0
|
|
|
|
|
0
|
croak "Parameter must be an instance of WWW::Mechanize::Plugin::AutoWrite"; |
292
|
|
|
|
|
|
|
} |
293
|
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
0
|
$self->{'autowrite'} = $autowrite; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
# get |
297
|
|
|
|
|
|
|
else { |
298
|
5
|
100
|
|
|
|
29
|
if (! exists $self->{'autowrite'}) { |
299
|
|
|
|
|
|
|
# Create an autowrite instance on the fly |
300
|
2
|
|
|
|
|
20
|
$self->{'autowrite'} = WWW::Mechanize::Plugin::AutoWrite->new(); |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
5
|
|
|
|
|
76
|
return $self->{'autowrite'}; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# |
309
|
|
|
|
|
|
|
# Returns true if the parameter is an instance of |
310
|
|
|
|
|
|
|
# WWW::Mechanize::Plugin::AutoWrite. |
311
|
|
|
|
|
|
|
# |
312
|
|
|
|
|
|
|
sub _is_an_autowrite { |
313
|
3
|
|
|
3
|
|
27
|
my ($autowrite) = @_; |
314
|
|
|
|
|
|
|
return |
315
|
3
|
|
33
|
|
|
61
|
blessed($autowrite) |
316
|
|
|
|
|
|
|
&& $autowrite->isa('WWW::Mechanize::Plugin::AutoWrite') |
317
|
|
|
|
|
|
|
; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
1; |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=head1 COMPATIBILITY |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
The version 0.04 has a different API and is not backward compatible. This |
326
|
|
|
|
|
|
|
affects only the configuration of the plugin. The behaviour should be the same. |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
=head1 SEE ALSO |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
L |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
=head1 AUTHOR |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
Jozef Kutej, Ejkutej@cpan.orgE, |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Emmanuel Rodriguez, Epotyl@cpan.orgE |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
Copyright (C) 2007 by Jozef Kutej |
341
|
|
|
|
|
|
|
Copyright (C) 2008 by Jozef Kutej, Emmanuel Rodriguez |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
344
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.8.8 or, |
345
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=cut |