line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTTP::Recorder; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = "0.07"; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
HTTP::Recorder - record interaction with websites |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head2 This module is deprecated |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
It works by tagging links in a page, and then when a link is clicked |
14
|
|
|
|
|
|
|
looking on the submitted tag to see which link was clicked |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
It can not handle Javascript-created links or JS manipulation of the page |
17
|
|
|
|
|
|
|
so it works only for fairly static websites |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
For better options check out Selenium |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
Patchs are welcome, and I'll fix bugs as much as I can, but please don't |
22
|
|
|
|
|
|
|
expect me to implement new features |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head2 Using HTTP::Recorder as a Web Proxy |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
Set HTTP::Recorder as the user agent for a proxy, and it rewrites HTTP |
27
|
|
|
|
|
|
|
responses so that additional requests can be recorded. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head3 The Proxy Script |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
For quick start, run the httprecorder script |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
httprecorder |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
This will open a local proxy on port 8080, and will dump the recorded traffic |
36
|
|
|
|
|
|
|
to a file named http_traffic in the current directory. use the -help parameter |
37
|
|
|
|
|
|
|
for usage info |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Start the proxy script, then change the settings in your web browser |
40
|
|
|
|
|
|
|
so that it will use this proxy for web requests. For more information |
41
|
|
|
|
|
|
|
about proxy settings and the default port, see L. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
The script will be recorded in the specified file, and can be viewed |
44
|
|
|
|
|
|
|
and modified via the control panel. |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
For better control, use this example: |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
#!/usr/bin/perl |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
use HTTP::Proxy; |
51
|
|
|
|
|
|
|
use HTTP::Recorder; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my $proxy = HTTP::Proxy->new(); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# create a new HTTP::Recorder object |
56
|
|
|
|
|
|
|
my $agent = new HTTP::Recorder; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# set the log file (optional) |
59
|
|
|
|
|
|
|
$agent->file("/tmp/myfile"); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# set HTTP::Recorder as the agent for the proxy |
62
|
|
|
|
|
|
|
$proxy->agent( $agent ); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# start the proxy |
65
|
|
|
|
|
|
|
$proxy->start(); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head3 Start Recording |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Now you can use your browser as your normally would, and your actions |
70
|
|
|
|
|
|
|
will be recorded in the file you specified. Alternatively, you can |
71
|
|
|
|
|
|
|
start recording from the Control Panel. |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head3 Using the Control Panel |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
If you have Javascript enabled in your browser, go to the |
76
|
|
|
|
|
|
|
L control URL (http://http-recorder by default), |
77
|
|
|
|
|
|
|
optionally type a URL into the "Goto page" field, and click "Go". |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
In the new window, interact with web sites as you normally do, |
80
|
|
|
|
|
|
|
including typing a new address into the address field. The Control |
81
|
|
|
|
|
|
|
Panel will be updated after each recorded action. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
The Control Panel allows you to modify, delete, or save your script. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head2 SSL sessions |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
As of version 0.03, L can record SSL sessions. |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
To begin recording an SSL session, go to the control URL |
90
|
|
|
|
|
|
|
(http://http-recorder/ by default), and enter the initial URL. |
91
|
|
|
|
|
|
|
Then, interact with the web site as usual. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head2 Script output |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
By default, L outputs L scripts. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
However, you can override HTTP::Recorder::Logger to output other types |
98
|
|
|
|
|
|
|
of scripts. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=cut |
101
|
|
|
|
|
|
|
|
102
|
2
|
|
|
2
|
|
36491
|
use strict; |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
143
|
|
103
|
2
|
|
|
2
|
|
17
|
use warnings; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
124
|
|
104
|
2
|
|
|
2
|
|
3285
|
use LWP::UserAgent; |
|
2
|
|
|
|
|
112695
|
|
|
2
|
|
|
|
|
112
|
|
105
|
2
|
|
|
2
|
|
1798
|
use HTML::TokeParser; |
|
2
|
|
|
|
|
25084
|
|
|
2
|
|
|
|
|
76
|
|
106
|
2
|
|
|
2
|
|
1298
|
use HTTP::Recorder::Logger; |
|
2
|
|
|
|
|
15
|
|
|
2
|
|
|
|
|
65
|
|
107
|
2
|
|
|
2
|
|
13
|
use URI::Escape qw(uri_escape uri_unescape); |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
130
|
|
108
|
2
|
|
|
2
|
|
2124
|
use URI::QueryParam; |
|
2
|
|
|
|
|
1510
|
|
|
2
|
|
|
|
|
54
|
|
109
|
2
|
|
|
2
|
|
1619
|
use HTTP::Request::Params; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
our @ISA = qw( LWP::UserAgent ); |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=head1 Functions |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 new |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Creates and returns a new L object, referred to as the 'agent'. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=cut |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
sub new { |
122
|
|
|
|
|
|
|
my $class = shift; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
my %args = ( @_ ); |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
my $self = $class->SUPER::new( %args ); |
127
|
|
|
|
|
|
|
bless $self, $class; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
$self->{prefix} = $args{prefix} || "rec"; |
130
|
|
|
|
|
|
|
$self->{control} = $args{control} || "http-recorder"; |
131
|
|
|
|
|
|
|
$self->{logger} = $args{logger} || |
132
|
|
|
|
|
|
|
new HTTP::Recorder::Logger(file => $args{file}); |
133
|
|
|
|
|
|
|
$self->{ignore_favicon} = $args{ignore_favicon} || 1; |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
return $self; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 $agent->prefix([$value]) |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Get or set the prefix string that L uses for rewriting |
141
|
|
|
|
|
|
|
responses. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub prefix { shift->_elem('prefix', @_); } |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 $agent->control([$value]) |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Get or set the URL of the control panel. By default, the control URL |
150
|
|
|
|
|
|
|
is 'http-recorder'. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
The control URL will display a control panel which will allow you to |
153
|
|
|
|
|
|
|
view and edit the current script. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=cut |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub control { shift->_elem('control', @_); } |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 $agent->logger([$value]) |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Get or set the logger object. The default logger is a |
162
|
|
|
|
|
|
|
L, which generates L scripts. |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=cut |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub logger { |
167
|
|
|
|
|
|
|
my $self = shift; |
168
|
|
|
|
|
|
|
$self->_elem('logger', @_); |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head2 $agent->ignore_favicon([0|1]) |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Get or set ignore_favicon flag that causes L to skip |
174
|
|
|
|
|
|
|
logging requests favicon.ico files. The value is 1 by default. |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=cut |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
sub ignore_favicon { shift->_elem('ignore_favicon', @_); } |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 $agent->file([$value]) |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Get or set the filename for generated scripts. The default is |
183
|
|
|
|
|
|
|
'/tmp/scriptfile'. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=cut |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub file { |
188
|
|
|
|
|
|
|
my $self = shift; |
189
|
|
|
|
|
|
|
my $file = shift; |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
$self->{logger}->file($file) if $file; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub send_request { |
195
|
|
|
|
|
|
|
my $self = shift; |
196
|
|
|
|
|
|
|
my $request = shift; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
my $response; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
# special handling if the URL is the control URL |
201
|
|
|
|
|
|
|
if ($request->uri->host eq $self->{control}) { |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# get the arguments passed from the form |
204
|
|
|
|
|
|
|
my $arghash; |
205
|
|
|
|
|
|
|
$arghash = extract_values($request); |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# there may be an action we need to perform |
208
|
|
|
|
|
|
|
if (exists $arghash->{updatescript}) { |
209
|
|
|
|
|
|
|
my $script = $arghash->{ScriptContent}; |
210
|
|
|
|
|
|
|
$self->{logger}->SetScript($script || ''); |
211
|
|
|
|
|
|
|
} elsif (exists $arghash->{clearscript}) { |
212
|
|
|
|
|
|
|
$self->{logger}->SetScript(""); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
my ($h, $content); |
216
|
|
|
|
|
|
|
if (exists $arghash->{goto}) { |
217
|
|
|
|
|
|
|
my $url = $arghash->{url}; |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
if ($url) { |
220
|
|
|
|
|
|
|
my $r = new HTTP::Request("GET", $url); |
221
|
|
|
|
|
|
|
my $response = $self->send_request( $r ); |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
return $response; |
224
|
|
|
|
|
|
|
} else { |
225
|
|
|
|
|
|
|
$h = HTTP::Headers->new(Content_Type => 'text/html'); |
226
|
|
|
|
|
|
|
$content = $self->get_start_page(); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} elsif (exists $arghash->{savescript}) { |
229
|
|
|
|
|
|
|
$h = HTTP::Headers->new(Content_Type => 'text/plain', |
230
|
|
|
|
|
|
|
Content_Disposition => 'attachment; filename=recorder-script.pl'); |
231
|
|
|
|
|
|
|
my @script = $self->{logger}->GetScript(); |
232
|
|
|
|
|
|
|
$content = join('', @script); |
233
|
|
|
|
|
|
|
} else { |
234
|
|
|
|
|
|
|
$h = HTTP::Headers->new(Content_Type => 'text/html'); |
235
|
|
|
|
|
|
|
$content = $self->get_recorder_content(); |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
$response = HTTP::Response->new(200, |
239
|
|
|
|
|
|
|
"", |
240
|
|
|
|
|
|
|
$h, |
241
|
|
|
|
|
|
|
$content, |
242
|
|
|
|
|
|
|
); |
243
|
|
|
|
|
|
|
} else { |
244
|
|
|
|
|
|
|
$request = $self->modify_request ($request) |
245
|
|
|
|
|
|
|
unless $self->{ignore_favicon} |
246
|
|
|
|
|
|
|
&& $request->uri->path =~ /favicon\.ico$/i; |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
$response = $self->SUPER::send_request( $request ); |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
my $content_type = $response->headers->header('Content-type') || ""; |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
# don't try to modify the content unless it's text/ |
253
|
|
|
|
|
|
|
if ($content_type =~ m#^text/#i) { |
254
|
|
|
|
|
|
|
$self->modify_response($response); |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
return $response; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub modify_request { |
262
|
|
|
|
|
|
|
my $self = shift; |
263
|
|
|
|
|
|
|
my $request = shift; |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
my $values = extract_values($request); |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# log the actions |
268
|
|
|
|
|
|
|
my $action = $values->{"$self->{prefix}-action"}; |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
my $referer = $request->headers->referer; |
271
|
|
|
|
|
|
|
if (!$action) { |
272
|
|
|
|
|
|
|
if (!$referer) { |
273
|
|
|
|
|
|
|
my $uri = $self->unmodify($request->uri);; |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
# log a blank line to give the code a little breathing room |
276
|
|
|
|
|
|
|
$self->{logger}->LogLine(); |
277
|
|
|
|
|
|
|
$self->{logger}->GotoPage(url => $uri); |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
} elsif ($action eq "follow") { |
280
|
|
|
|
|
|
|
$self->{logger}->FollowLink(text => $values->{"$self->{prefix}-text"} || "", |
281
|
|
|
|
|
|
|
index => $values->{"$self->{prefix}-index"} || "", |
282
|
|
|
|
|
|
|
url => $values->{"$self->{prefix}-url"}); |
283
|
|
|
|
|
|
|
} elsif ($action eq "submitform") { |
284
|
|
|
|
|
|
|
my %fields; |
285
|
|
|
|
|
|
|
my ($btn_name, $btn_value, $btn_number); |
286
|
|
|
|
|
|
|
foreach my $param (keys %$values) { |
287
|
|
|
|
|
|
|
my %fieldhash; |
288
|
|
|
|
|
|
|
my ($fieldtype, $fieldname); |
289
|
|
|
|
|
|
|
if ($param =~ /^$self->{prefix}-form(\d+)-(\w+)-(.*)$/) { |
290
|
|
|
|
|
|
|
$fieldtype = $2; |
291
|
|
|
|
|
|
|
$fieldname = $3; |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
if ($fieldtype eq 'submit') { |
294
|
|
|
|
|
|
|
next unless $values->{$fieldname}; |
295
|
|
|
|
|
|
|
$btn_name = $fieldname; |
296
|
|
|
|
|
|
|
$btn_value = $values->{$fieldname}; |
297
|
|
|
|
|
|
|
} else { |
298
|
|
|
|
|
|
|
next if ($fieldtype eq 'hidden'); |
299
|
|
|
|
|
|
|
next unless $fieldname && exists $values->{$fieldname}; |
300
|
|
|
|
|
|
|
$fieldhash{'name'} = $fieldname; |
301
|
|
|
|
|
|
|
$fieldhash{'type'} = $fieldtype; |
302
|
|
|
|
|
|
|
if (ref($values->{$fieldname}) eq 'ARRAY') { |
303
|
|
|
|
|
|
|
my @tempvalues = @{$values->{$fieldname}}; |
304
|
|
|
|
|
|
|
for (my $i = 0 ; $i < scalar @tempvalues ; $i++) { |
305
|
|
|
|
|
|
|
$fieldhash{'value'} = $tempvalues[$i]; |
306
|
|
|
|
|
|
|
my %temphash = %fieldhash; |
307
|
|
|
|
|
|
|
$fields{"$fieldname-$i"} = \%temphash; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
} else { |
310
|
|
|
|
|
|
|
$fieldhash{'value'} = $values->{$fieldname}; |
311
|
|
|
|
|
|
|
$fields{$fieldname} = \%fieldhash; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
} |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
} |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
$self->{logger}->SetFieldsAndSubmit(name => $values->{"$self->{prefix}-formname"}, |
318
|
|
|
|
|
|
|
number => $values->{"$self->{prefix}-formnumber"}, |
319
|
|
|
|
|
|
|
fields => \%fields, |
320
|
|
|
|
|
|
|
button_name => $btn_name, |
321
|
|
|
|
|
|
|
button_value => $btn_value); |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# log a blank line to give the code a little breathing room |
324
|
|
|
|
|
|
|
$self->{logger}->LogLine(); |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# undo what we've done |
328
|
|
|
|
|
|
|
$request->uri($self->unmodify($request->uri)); |
329
|
|
|
|
|
|
|
$request->content($self->unmodify($request->content)); |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
# reset the Content-Length (if needed) to prevent warnings from |
332
|
|
|
|
|
|
|
# HTTP::Protocol |
333
|
|
|
|
|
|
|
if ($action && ($action eq "submitform")) { |
334
|
|
|
|
|
|
|
$request->headers->header('Content-Length' => length($request->content()) ); |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
my $https = $values->{"$self->{prefix}-https"}; |
339
|
|
|
|
|
|
|
if ( $https && $https == 1) { |
340
|
|
|
|
|
|
|
my $uri = $request->uri; |
341
|
|
|
|
|
|
|
$uri->scheme('https') if $uri->scheme eq 'http'; |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
$request = new HTTP::Request($request->method, |
344
|
|
|
|
|
|
|
$uri, |
345
|
|
|
|
|
|
|
$request->headers, |
346
|
|
|
|
|
|
|
$request->content); |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
return $request; |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
sub unmodify { |
354
|
|
|
|
|
|
|
my $self = shift; |
355
|
|
|
|
|
|
|
my $content = shift; |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
return $content unless $content; |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
# get rid of the arguments we added |
360
|
|
|
|
|
|
|
my $prefix = $self->{prefix}; |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# workaround: the content can be a simple string |
363
|
|
|
|
|
|
|
if (not ref $content) { |
364
|
|
|
|
|
|
|
$content =~ s/(?:^|(?<=\&))\Q$prefix\E-[^=]+=[^\&]*(\&|$)//g; |
365
|
|
|
|
|
|
|
return $content; |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
for my $key ($content->query_param) { |
369
|
|
|
|
|
|
|
if ($key =~ /^$prefix-/) { |
370
|
|
|
|
|
|
|
$content->query_param_delete($key); |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
return $content; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
sub extract_values { |
377
|
|
|
|
|
|
|
my $request = shift; |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
my $parser = HTTP::Request::Params->new({ |
380
|
|
|
|
|
|
|
req => $request, |
381
|
|
|
|
|
|
|
}); |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
# un-escape all params |
384
|
|
|
|
|
|
|
for my $key (keys %{$parser->params}) { |
385
|
|
|
|
|
|
|
$parser->params->{$key} = uri_unescape($parser->params->{$key}); |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
return $parser->params; |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub modify_response { |
392
|
|
|
|
|
|
|
my $self = shift; |
393
|
|
|
|
|
|
|
my $response = shift; |
394
|
|
|
|
|
|
|
my $formcount = 0; |
395
|
|
|
|
|
|
|
my $formnumber = 0; |
396
|
|
|
|
|
|
|
my $linknumber = 1; |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
$response->headers->push_header('Cache-Control', 'no-store, no-cache'); |
399
|
|
|
|
|
|
|
$response->headers->push_header('Pragma', 'no-cache'); |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
my $content = $response->content(); |
402
|
|
|
|
|
|
|
my $p = HTML::TokeParser->new(\$content); |
403
|
|
|
|
|
|
|
my $newcontent = ""; |
404
|
|
|
|
|
|
|
my %links; |
405
|
|
|
|
|
|
|
my $formname; |
406
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
my $js_href = 0; |
408
|
|
|
|
|
|
|
my $in_head = 0; |
409
|
|
|
|
|
|
|
my $basehref; |
410
|
|
|
|
|
|
|
while (my $token = $p->get_token()) { |
411
|
|
|
|
|
|
|
if (@$token[0] eq 'S') { |
412
|
|
|
|
|
|
|
my $tagname = @$token[1]; |
413
|
|
|
|
|
|
|
my $attrs = @$token[2]; |
414
|
|
|
|
|
|
|
my $oldaction; |
415
|
|
|
|
|
|
|
my $text; |
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
if ($tagname eq 'head') { |
418
|
|
|
|
|
|
|
$in_head = 1; |
419
|
|
|
|
|
|
|
} elsif ($in_head && $tagname eq 'base') { |
420
|
|
|
|
|
|
|
$basehref = new URI($attrs->{'base'}); |
421
|
|
|
|
|
|
|
} elsif (($tagname eq 'a' || $tagname eq 'link') && $attrs->{'href'}) { |
422
|
|
|
|
|
|
|
my $t = $p->get_token(); |
423
|
|
|
|
|
|
|
if (@$t[0] eq 'T') { |
424
|
|
|
|
|
|
|
$text = @$t[1]; |
425
|
|
|
|
|
|
|
} else { |
426
|
|
|
|
|
|
|
undef $text; |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
$p->unget_token($t); |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
# up the counter for links with the same text |
431
|
|
|
|
|
|
|
my $index; |
432
|
|
|
|
|
|
|
if (defined $text) { |
433
|
|
|
|
|
|
|
$links{$text} = 0 if !(exists $links{$text}); |
434
|
|
|
|
|
|
|
$links{$text}++; |
435
|
|
|
|
|
|
|
$index = $links{$text}; |
436
|
|
|
|
|
|
|
} else { |
437
|
|
|
|
|
|
|
$index = $linknumber; |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
if ($attrs->{'href'} =~ m/^javascript:/i) { |
440
|
|
|
|
|
|
|
$js_href = 1; |
441
|
|
|
|
|
|
|
} else { |
442
|
|
|
|
|
|
|
if ($tagname eq 'a') { |
443
|
|
|
|
|
|
|
$attrs->{'href'} = |
444
|
|
|
|
|
|
|
$self->rewrite_href($attrs->{'href'}, |
445
|
|
|
|
|
|
|
$text, |
446
|
|
|
|
|
|
|
$index, |
447
|
|
|
|
|
|
|
$response->base); |
448
|
|
|
|
|
|
|
} elsif ($tagname eq 'link') { |
449
|
|
|
|
|
|
|
$attrs->{'href'} = |
450
|
|
|
|
|
|
|
$self->rewrite_linkhref($attrs->{'href'}, |
451
|
|
|
|
|
|
|
$response->base); |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
$linknumber++; |
455
|
|
|
|
|
|
|
} elsif ($tagname eq 'form') { |
456
|
|
|
|
|
|
|
$formcount++; |
457
|
|
|
|
|
|
|
$formnumber++; |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
# put the hidden field before the real field |
461
|
|
|
|
|
|
|
# so that it won't be inside |
462
|
|
|
|
|
|
|
if (!$js_href && $tagname ne 'form' && ($formcount == 1)) { |
463
|
|
|
|
|
|
|
my ($formfield, $fieldprefix, $fieldtype, $fieldname); |
464
|
|
|
|
|
|
|
$fieldprefix = "$self->{prefix}-form" . $formnumber; |
465
|
|
|
|
|
|
|
$fieldtype = lc($attrs->{type} || 'unknown'); |
466
|
|
|
|
|
|
|
if ($attrs->{name}) { |
467
|
|
|
|
|
|
|
$fieldname = $attrs->{name}; |
468
|
|
|
|
|
|
|
$formfield = ($fieldprefix . '-' . |
469
|
|
|
|
|
|
|
$fieldtype . '-' . $fieldname); |
470
|
|
|
|
|
|
|
$newcontent .= "\n"; |
471
|
|
|
|
|
|
|
} |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
$newcontent .= ("<".$tagname); |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
# keep the attributes in their original order |
477
|
|
|
|
|
|
|
my $attrlist = @$token[3]; |
478
|
|
|
|
|
|
|
foreach my $attr (@$attrlist) { |
479
|
|
|
|
|
|
|
# only rewrite if |
480
|
|
|
|
|
|
|
# - it's not part of a javascript link |
481
|
|
|
|
|
|
|
# - it's not a hidden field |
482
|
|
|
|
|
|
|
$newcontent .= (" ".$attr."=\"".$attrs->{$attr}."\""); |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
$newcontent .= (">\n"); |
485
|
|
|
|
|
|
|
if ($tagname eq 'head') { |
486
|
|
|
|
|
|
|
# add the javascript to update the script, right after the head opening tag |
487
|
|
|
|
|
|
|
$newcontent .= $self->script_update(); |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
if ($tagname eq 'form') { |
490
|
|
|
|
|
|
|
if ($formcount == 1) { |
491
|
|
|
|
|
|
|
$newcontent .= $self->rewrite_form_content($attrs->{name} || "", $formnumber, $response->base); |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} elsif (@$token[0] eq 'E') { |
495
|
|
|
|
|
|
|
my $tagname = @$token[1]; |
496
|
|
|
|
|
|
|
if ($tagname eq 'head') { |
497
|
|
|
|
|
|
|
if (!$basehref) { |
498
|
|
|
|
|
|
|
$basehref = $response->base; |
499
|
|
|
|
|
|
|
$basehref->scheme('http') if $basehref->scheme eq 'https'; |
500
|
|
|
|
|
|
|
$newcontent .= "\n"; |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
$basehref = ""; |
503
|
|
|
|
|
|
|
$in_head = 0; |
504
|
|
|
|
|
|
|
} |
505
|
|
|
|
|
|
|
$newcontent .= (""); |
506
|
|
|
|
|
|
|
$newcontent .= ($tagname.">\n"); |
507
|
|
|
|
|
|
|
if ($tagname eq 'form') { |
508
|
|
|
|
|
|
|
$formcount--; |
509
|
|
|
|
|
|
|
} elsif ($tagname eq 'a' || $tagname eq 'link') { |
510
|
|
|
|
|
|
|
$js_href = 0; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
} elsif (@$token[0] eq 'PI') { |
513
|
|
|
|
|
|
|
$newcontent .= (@$token[2]); |
514
|
|
|
|
|
|
|
} else { |
515
|
|
|
|
|
|
|
$newcontent .= (@$token[1]); |
516
|
|
|
|
|
|
|
} |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
$response->content($newcontent); |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
return; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
sub rewrite_href { |
525
|
|
|
|
|
|
|
my $self = shift; |
526
|
|
|
|
|
|
|
my $href = shift || ""; |
527
|
|
|
|
|
|
|
my $text = shift || ""; |
528
|
|
|
|
|
|
|
my $index = shift || 1; |
529
|
|
|
|
|
|
|
my $base = shift; |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
my $newhref = new URI($href); |
532
|
|
|
|
|
|
|
my $prefix = $self->{prefix}; |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
if ($base->scheme eq 'https') { |
535
|
|
|
|
|
|
|
$newhref->query_param_append( "$prefix-https", 1); |
536
|
|
|
|
|
|
|
$newhref->scheme('http'); |
537
|
|
|
|
|
|
|
} |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
# the original URL |
540
|
|
|
|
|
|
|
$newhref->query_param_append( "$prefix-url", uri_escape($href)); |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# the action (i.e. follow link) |
543
|
|
|
|
|
|
|
$newhref->query_param_append( "$prefix-action", 'follow'); |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
# the link information |
546
|
|
|
|
|
|
|
$text = uri_escape($text); # might have special characters |
547
|
|
|
|
|
|
|
$newhref->query_param_append( "$prefix-text", $text); |
548
|
|
|
|
|
|
|
$newhref->query_param_append( "$prefix-index", $index); |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
return $newhref; |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
sub rewrite_linkhref { |
554
|
|
|
|
|
|
|
my $self = shift; |
555
|
|
|
|
|
|
|
my $href = shift || ""; |
556
|
|
|
|
|
|
|
my $base = shift; |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
my $newhref = new URI($href); |
559
|
|
|
|
|
|
|
my $prefix = $self->{prefix}; |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
$newhref->query_param_append( "$prefix-https", 1) |
562
|
|
|
|
|
|
|
if $base->scheme eq 'https'; |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
# the original URL |
565
|
|
|
|
|
|
|
$newhref->query_param_append( "$prefix-url", uri_escape($href)); |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
# the action (i.e. don't record) |
568
|
|
|
|
|
|
|
$newhref->query_param_append( "$prefix-action", 'norecord'); |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
return $newhref; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
sub rewrite_form_content { |
574
|
|
|
|
|
|
|
my $self = shift; |
575
|
|
|
|
|
|
|
my $name = shift || ""; |
576
|
|
|
|
|
|
|
my $number = shift; |
577
|
|
|
|
|
|
|
my $url = shift; |
578
|
|
|
|
|
|
|
my $fields; |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
my $https = 1 if ($url->scheme eq 'https'); |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
$fields .= ("{prefix}-action\" value=\"submitform\">\n"); |
583
|
|
|
|
|
|
|
$fields .= ("{prefix}-formname\" value=\"$name\">\n"); |
584
|
|
|
|
|
|
|
$fields .= ("{prefix}-formnumber\" value=\"$number\">\n"); |
585
|
|
|
|
|
|
|
if ($https) { |
586
|
|
|
|
|
|
|
$fields .= ("{prefix}-https\" value=\"$https\">\n"); |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
return $fields; |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
sub get_start_page { |
593
|
|
|
|
|
|
|
my $self = shift; |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
my $content = <
|
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
HTTP::Recorder Start Page |
599
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
Start Recording |
607
|
|
|
|
|
|
|
Type a url into the browser's adddress field to begin recording. |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
EOF |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
return $content; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
sub get_recorder_content { |
615
|
|
|
|
|
|
|
my $self = shift; |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
my @script = $self->{logger}->GetScript(); |
618
|
|
|
|
|
|
|
my $script = ""; |
619
|
|
|
|
|
|
|
foreach my $line (@script) { |
620
|
|
|
|
|
|
|
next unless $line; |
621
|
|
|
|
|
|
|
$line =~ s/\n//g; |
622
|
|
|
|
|
|
|
$script .= "$line\n"; |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
my $content = <
|
626
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
HTTP::Recorder Control Panel |
639
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
onLoad="javascript:scrollScriptAreaToEnd()" |
646
|
|
|
|
|
|
|
> |
647
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
EOF |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
return $content; |
694
|
|
|
|
|
|
|
} |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
sub script_update { |
697
|
|
|
|
|
|
|
my $self = shift; |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
my $url = "http://" . $self->control . "/"; |
700
|
|
|
|
|
|
|
my $js = <
|
701
|
|
|
|
|
|
|
// find the top-level opener window |
702
|
|
|
|
|
|
|
var opwindow = window.opener; |
703
|
|
|
|
|
|
|
while (opwindow.opener) { |
704
|
|
|
|
|
|
|
opwindow = opwindow.opener; |
705
|
|
|
|
|
|
|
} |
706
|
|
|
|
|
|
|
// update it with HTTP::Recorder's control panel |
707
|
|
|
|
|
|
|
if (opwindow) { |
708
|
|
|
|
|
|
|
opwindow.location = "http://http-recorder/"; |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
EOF |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
return <
|
713
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
EOF |
719
|
|
|
|
|
|
|
} |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=head1 Bugs, Missing Features, and other Oddities |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
=head2 Javascript |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
L can't play back Javascript actions, and |
726
|
|
|
|
|
|
|
L doesn't record them. |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=head2 Why are my images corrupted? |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
HTTP::Recorder only tries to rewrite responses that are of type |
731
|
|
|
|
|
|
|
text/*, which it determines by reading the Content-Type header of the |
732
|
|
|
|
|
|
|
HTTP::Response object. However, if the received image gives the wrong |
733
|
|
|
|
|
|
|
Content-Type header, it may be corrupted by the recorder. While this |
734
|
|
|
|
|
|
|
may not be pleasant to look at, it shouldn't have an effect on your |
735
|
|
|
|
|
|
|
recording session. |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=head1 See Also |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
See also L, L, L. |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head1 Requests & Bugs |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
Please submit any feature requests, suggestions, bugs, or patches at |
744
|
|
|
|
|
|
|
http://rt.cpan.org/, or email to bug-HTTP-Recorder@rt.cpan.org. |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
If you're submitting a bug of the type "X doesn't record correctly," |
747
|
|
|
|
|
|
|
be sure to include a (preferably short and simple) HTML page that |
748
|
|
|
|
|
|
|
demonstrates the problem, and a clear explanation of a) what it does |
749
|
|
|
|
|
|
|
that it shouldn't, and b) what it should do instead. |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
=head1 Author |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
Copyright 2003-2005 by Linda Julien |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
Maintained by Shmuel Fomberg |
756
|
|
|
|
|
|
|
|
757
|
|
|
|
|
|
|
Released under the GNU Public License. |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
=cut |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
1; |