line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HTTP::Recorder; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = "0.03_03"; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
HTTP::Recorder - record interaction with websites |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 VERSION |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Version <0.03_03> |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 SYNOPSIS |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
Set HTTP::Recorder as the user agent for a proxy, and it rewrites HTTP |
16
|
|
|
|
|
|
|
responses so that additional requests can be recorded. |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
Set it up like this: |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
#!/usr/bin/perl |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
use HTTP::Proxy; |
23
|
|
|
|
|
|
|
use HTTP::Recorder; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $proxy = HTTP::Proxy->new(); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# create a new HTTP::Recorder object |
28
|
|
|
|
|
|
|
my $agent = new HTTP::Recorder; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# set the log file (optional) |
31
|
|
|
|
|
|
|
$agent->file("/tmp/myfile"); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# set HTTP::Recorder as the agent for the proxy |
34
|
|
|
|
|
|
|
$proxy->agent( $agent ); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# start the proxy |
37
|
|
|
|
|
|
|
$proxy->start(); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
1; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Then, tell your web browser to use this proxy, and the script will be |
42
|
|
|
|
|
|
|
recorded in the specified file. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head2 SSL sessions |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
As of version 0.03, L can record SSL sessions. |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
To begin recording an SSL session, go to the control URL |
49
|
|
|
|
|
|
|
(http://http-recorder/ by default), and enter the initial URL. |
50
|
|
|
|
|
|
|
Then, interact with the web site as usual. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=head2 Script output |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
By default, L outputs L scripts. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
However, you can override HTTP::Recorder::Logger to output other types |
57
|
|
|
|
|
|
|
of scripts. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=cut |
60
|
|
|
|
|
|
|
|
61
|
2
|
|
|
2
|
|
14250
|
use strict; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
53
|
|
62
|
2
|
|
|
2
|
|
7
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
55
|
|
63
|
2
|
|
|
2
|
|
1150
|
use LWP::UserAgent; |
|
2
|
|
|
|
|
74462
|
|
|
2
|
|
|
|
|
57
|
|
64
|
2
|
|
|
2
|
|
892
|
use HTML::TokeParser; |
|
2
|
|
|
|
|
14637
|
|
|
2
|
|
|
|
|
66
|
|
65
|
2
|
|
|
2
|
|
784
|
use HTTP::Recorder::Logger; |
|
2
|
|
|
|
|
2
|
|
|
2
|
|
|
|
|
56
|
|
66
|
2
|
|
|
2
|
|
9
|
use URI::Escape qw(uri_escape uri_unescape); |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
5058
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
our @ISA = qw( LWP::UserAgent ); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=head1 Functions |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head2 new |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
Creates and returns a new L object, referred to as the 'agent'. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub new { |
79
|
0
|
|
|
0
|
1
|
|
my $class = shift; |
80
|
|
|
|
|
|
|
|
81
|
0
|
|
|
|
|
|
my %args = ( @_ ); |
82
|
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
my $self = $class->SUPER::new( %args ); |
84
|
0
|
|
|
|
|
|
bless $self, $class; |
85
|
|
|
|
|
|
|
|
86
|
0
|
|
0
|
|
|
|
$self->{prefix} = $args{prefix} || "rec"; |
87
|
0
|
|
0
|
|
|
|
$self->{showwindow} = $args{showwindow} || 0; |
88
|
0
|
|
0
|
|
|
|
$self->{control} = $args{control} || "http-recorder"; |
89
|
|
|
|
|
|
|
$self->{logger} = $args{logger} || |
90
|
0
|
|
0
|
|
|
|
new HTTP::Recorder::Logger(file => $args{file}); |
91
|
0
|
|
0
|
|
|
|
$self->{ignore_favicon} = $args{ignore_favicon} || 1; |
92
|
|
|
|
|
|
|
|
93
|
0
|
|
|
|
|
|
return $self; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head2 $agent->prefix([$value]) |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Get or set the prefix string that L uses for rewriting |
99
|
|
|
|
|
|
|
responses. |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=cut |
102
|
|
|
|
|
|
|
|
103
|
0
|
|
|
0
|
1
|
|
sub prefix { shift->_elem('prefix', @_); } |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head2 $agent->showwindow([0|1]) |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Get or set whether L opens a JavaScript popup window, |
108
|
|
|
|
|
|
|
displaying the recorder's control panel. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=cut |
111
|
|
|
|
|
|
|
|
112
|
0
|
|
|
0
|
1
|
|
sub showwindow { shift->_elem('showwindow', @_); } |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head2 $agent->control([$value]) |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Get or set the URL of L's control panel. By default, |
117
|
|
|
|
|
|
|
the control URL is 'http-recorder'. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
The control URL will display a control panel which will allow you to |
120
|
|
|
|
|
|
|
view and edit the current script. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
123
|
|
|
|
|
|
|
|
124
|
0
|
|
|
0
|
1
|
|
sub control { shift->_elem('control', @_); } |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head2 $agent->logger([$value]) |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Get or set the logger object. The default logger is a |
129
|
|
|
|
|
|
|
L, which generates L scripts. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=cut |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub logger { |
134
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
135
|
0
|
|
|
|
|
|
$self->_elem('logger', @_); |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=head2 B<$agent->ignore_favicon([0|1])> |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
Get or set ignore_favicon flag that causes L to skip |
141
|
|
|
|
|
|
|
logging requests which match /favicon\.ico$/. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=cut |
144
|
|
|
|
|
|
|
|
145
|
0
|
|
|
0
|
1
|
|
sub ignore_favicon { shift->_elem('ignore_favicon', @_); } |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 $agent->file([$value]) |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Get or set the filename for generated scripts. The default is |
150
|
|
|
|
|
|
|
'/tmp/scriptfile'. |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub file { |
155
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
156
|
0
|
|
|
|
|
|
my $file = shift; |
157
|
|
|
|
|
|
|
|
158
|
0
|
0
|
|
|
|
|
$self->{logger}->file($file) if $file; |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
sub send_request { |
162
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
163
|
0
|
|
|
|
|
|
my $request = shift; |
164
|
|
|
|
|
|
|
|
165
|
0
|
|
|
|
|
|
my $response; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# special handling if the URL is the control URL |
168
|
0
|
0
|
|
|
|
|
if ($request->uri->host eq $self->{control}) { |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
# get the arguments passed from the form |
171
|
0
|
|
|
|
|
|
my $arghash; |
172
|
0
|
|
|
|
|
|
$arghash = extract_values($request); |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# there may be an action we need to perform |
175
|
0
|
0
|
|
|
|
|
if (exists $arghash->{updatescript}) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
176
|
0
|
|
|
|
|
|
my $script = uri_unescape(@{$arghash->{ScriptContent}}[0]); |
|
0
|
|
|
|
|
|
|
177
|
0
|
|
0
|
|
|
|
$self->{logger}->SetScript($script || ''); |
178
|
|
|
|
|
|
|
} elsif (exists $arghash->{clearscript}) { |
179
|
0
|
|
|
|
|
|
$self->{logger}->SetScript(""); |
180
|
|
|
|
|
|
|
} elsif (exists $arghash->{goto}) { |
181
|
0
|
|
|
|
|
|
my $url = uri_unescape(@{$arghash->{url}}[0]); |
|
0
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
0
|
|
|
|
|
|
my $r = new HTTP::Request("GET", $url); |
184
|
0
|
|
|
|
|
|
my $response = $self->send_request( $r ); |
185
|
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
return $response; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
0
|
|
|
|
|
|
my ($h, $content); |
190
|
0
|
0
|
|
|
|
|
if (exists $arghash->{savescript}) { |
191
|
0
|
|
|
|
|
|
$h = HTTP::Headers->new(Content_Type => 'text/plain'); |
192
|
0
|
|
|
|
|
|
my @script = $self->{logger}->GetScript(); |
193
|
0
|
|
|
|
|
|
$content = join('', @script); |
194
|
|
|
|
|
|
|
} else { |
195
|
0
|
|
|
|
|
|
$h = HTTP::Headers->new(Content_Type => 'text/html'); |
196
|
0
|
|
|
|
|
|
$content = $self->get_recorder_content(); |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
$response = HTTP::Response->new(200, |
200
|
|
|
|
|
|
|
"", |
201
|
|
|
|
|
|
|
$h, |
202
|
|
|
|
|
|
|
$content, |
203
|
|
|
|
|
|
|
); |
204
|
|
|
|
|
|
|
} else { |
205
|
|
|
|
|
|
|
$request = $self->modify_request ($request) |
206
|
|
|
|
|
|
|
unless $self->{ignore_favicon} |
207
|
0
|
0
|
0
|
|
|
|
&& $request->uri->path =~ /favicon\.ico$/i; |
208
|
|
|
|
|
|
|
|
209
|
0
|
|
|
|
|
|
$response = $self->SUPER::send_request( $request ); |
210
|
|
|
|
|
|
|
|
211
|
0
|
|
0
|
|
|
|
my $content_type = $response->headers->header('Content-type') || ""; |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
# don't try to modify the content unless it's text/ |
214
|
0
|
0
|
|
|
|
|
if ($content_type =~ m#^text/#i) { |
215
|
0
|
|
|
|
|
|
$self->modify_response($response); |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
0
|
|
|
|
|
|
return $response; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub modify_request { |
223
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
224
|
0
|
|
|
|
|
|
my $request = shift; |
225
|
|
|
|
|
|
|
|
226
|
0
|
|
|
|
|
|
my $values = extract_values($request); |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# log the actions |
229
|
0
|
|
|
|
|
|
my $action = @{$values->{"$self->{prefix}-action"}}[0]; |
|
0
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
|
231
|
0
|
|
|
|
|
|
my $referer = $request->headers->referer; |
232
|
0
|
0
|
|
|
|
|
if (!$action) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
233
|
0
|
0
|
|
|
|
|
if (!$referer) { |
234
|
0
|
|
|
|
|
|
my $uri = $request->uri; |
235
|
0
|
|
|
|
|
|
$self->unmodify(\$uri); |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
# log a blank line to give the code a little breathing room |
238
|
0
|
|
|
|
|
|
$self->{logger}->LogLine(); |
239
|
0
|
|
|
|
|
|
$self->{logger}->GotoPage(url => $uri); |
240
|
|
|
|
|
|
|
} |
241
|
|
|
|
|
|
|
} elsif ($action eq "follow") { |
242
|
|
|
|
|
|
|
$self->{logger}->FollowLink(text => @{$values->{"$self->{prefix}-text"}}[0] || "", |
243
|
|
|
|
|
|
|
index => @{$values->{"$self->{prefix}-index"}}[0] || "", |
244
|
0
|
|
0
|
|
|
|
url => @{$values->{"$self->{prefix}-url"}}[0]); |
|
0
|
|
0
|
|
|
|
|
245
|
|
|
|
|
|
|
} elsif ($action eq "submitform") { |
246
|
0
|
|
|
|
|
|
my %fields; |
247
|
0
|
|
|
|
|
|
my ($btn_name, $btn_value, $btn_number); |
248
|
0
|
|
|
|
|
|
foreach my $param (keys %$values) { |
249
|
0
|
|
|
|
|
|
my %fieldhash; |
250
|
0
|
|
|
|
|
|
my ($fieldtype, $fieldname); |
251
|
0
|
0
|
|
|
|
|
if ($param =~ /^$self->{prefix}-form(\d+)-(\w+)-(.*)$/) { |
252
|
0
|
|
|
|
|
|
$fieldtype = $2; |
253
|
0
|
|
|
|
|
|
$fieldname = $3; |
254
|
|
|
|
|
|
|
|
255
|
0
|
0
|
|
|
|
|
if ($fieldtype eq 'submit') { |
256
|
0
|
0
|
|
|
|
|
next unless $values->{$fieldname}; |
257
|
0
|
|
|
|
|
|
$btn_name = $fieldname; |
258
|
0
|
|
|
|
|
|
$btn_value = $values->{$fieldname}; |
259
|
|
|
|
|
|
|
} else { |
260
|
0
|
0
|
|
|
|
|
next if ($fieldtype eq 'hidden'); |
261
|
0
|
0
|
0
|
|
|
|
next unless $fieldname && exists $values->{$fieldname}[0]; |
262
|
0
|
|
|
|
|
|
$fieldhash{'name'} = $fieldname; |
263
|
0
|
|
|
|
|
|
$fieldhash{'type'} = $fieldtype; |
264
|
0
|
|
|
|
|
|
my @tempvalues = @{$values->{$fieldname}}; |
|
0
|
|
|
|
|
|
|
265
|
0
|
0
|
|
|
|
|
if ($fieldtype eq 'checkbox') { |
266
|
0
|
|
|
|
|
|
for (my $i = 0 ; $i < scalar @tempvalues ; $i++) { |
267
|
0
|
|
|
|
|
|
$fieldhash{'value'} = $tempvalues[$i]; |
268
|
0
|
|
|
|
|
|
$fields{"$fieldname-$i"} = \%fieldhash; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} else { |
271
|
0
|
|
|
|
|
|
$fieldhash{'value'} = $tempvalues[0]; |
272
|
0
|
|
|
|
|
|
$fields{$fieldname} = \%fieldhash; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
0
|
|
|
|
|
|
$self->{logger}->SetFieldsAndSubmit(name => @{$values->{"$self->{prefix}-formname"}}[0], |
279
|
0
|
|
|
|
|
|
number => @{$values->{"$self->{prefix}-formnumber"}}[0], |
|
0
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
fields => \%fields, |
281
|
|
|
|
|
|
|
button_name => $btn_name, |
282
|
|
|
|
|
|
|
button_value => $btn_value); |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
# log a blank line to give the code a little breathing room |
285
|
0
|
|
|
|
|
|
$self->{logger}->LogLine(); |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# undo what we've done |
289
|
0
|
|
|
|
|
|
$request->uri($self->unmodify($request->uri)); |
290
|
0
|
|
|
|
|
|
$request->content($self->unmodify($request->content)); |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# reset the Content-Length (if needed) to prevent warnings from |
293
|
|
|
|
|
|
|
# HTTP::Protocol |
294
|
0
|
0
|
0
|
|
|
|
if ($action && ($action eq "submitform")) { |
295
|
0
|
|
|
|
|
|
$request->headers->header('Content-Length' => length($request->content()) ); |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
0
|
|
|
|
|
|
my $https = $values->{"$self->{prefix}-https"}; |
300
|
0
|
0
|
0
|
|
|
|
if ( $https && $https == 1) { |
301
|
0
|
|
|
|
|
|
my $uri = $request->uri; |
302
|
0
|
|
|
|
|
|
$uri =~ s/^http:/https:/i; |
303
|
|
|
|
|
|
|
|
304
|
0
|
|
|
|
|
|
$request = new HTTP::Request($request->method, |
305
|
|
|
|
|
|
|
$uri, |
306
|
|
|
|
|
|
|
$request->headers, |
307
|
|
|
|
|
|
|
$request->content); |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
return $request; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub unmodify { |
315
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
316
|
0
|
|
|
|
|
|
my $content = shift; |
317
|
|
|
|
|
|
|
|
318
|
0
|
0
|
|
|
|
|
return $content unless $content; |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# get rid of the stuff we added |
321
|
0
|
|
|
|
|
|
my $prefix = $self->{prefix}; |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
|
$content =~ s/$prefix-(.*?)\?(.*?)&//g; |
324
|
0
|
|
|
|
|
|
$content =~ s/$prefix-(.*?)&//g; |
325
|
0
|
|
|
|
|
|
$content =~ s/$prefix-(.*?)$//g; |
326
|
0
|
|
|
|
|
|
$content =~ s/&$//g; |
327
|
0
|
|
|
|
|
|
$content =~ s/\?$//g; |
328
|
|
|
|
|
|
|
|
329
|
0
|
|
|
|
|
|
return $content; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
sub extract_values { |
333
|
0
|
|
|
0
|
0
|
|
my $request = shift; |
334
|
|
|
|
|
|
|
|
335
|
0
|
|
|
|
|
|
my $values = {}; |
336
|
|
|
|
|
|
|
|
337
|
0
|
0
|
|
|
|
|
if ($request->headers->content_type eq 'multipart/form-data') { |
338
|
0
|
|
|
|
|
|
my $content = $request->content; |
339
|
0
|
|
|
|
|
|
my @segments = split(/--+/, $content); |
340
|
0
|
|
|
|
|
|
foreach (@segments) { |
341
|
0
|
0
|
|
|
|
|
next unless $_; |
342
|
0
|
|
|
|
|
|
$_ =~ s/.*Content-Disposition: //s; |
343
|
0
|
|
|
|
|
|
$_ =~ s/\r+/\n/sg; |
344
|
0
|
|
|
|
|
|
$_ =~ s/\n+/; /sg; |
345
|
0
|
|
|
|
|
|
my @fields = split(/; /, $_); |
346
|
0
|
0
|
|
|
|
|
next unless $fields[1]; |
347
|
0
|
|
|
|
|
|
$fields[1] =~ s/name="(.*)"/$1/g; |
348
|
0
|
0
|
|
|
|
|
next unless exists $fields[2]; |
349
|
0
|
0
|
|
|
|
|
if ($fields[2] =~ m/^filename/) { |
350
|
0
|
|
|
|
|
|
$fields[2] = "file here!!"; |
351
|
|
|
|
|
|
|
} else { |
352
|
0
|
|
|
|
|
|
$fields[2] =~ s/\n//sg; |
353
|
|
|
|
|
|
|
} |
354
|
0
|
|
|
|
|
|
push (@{$values->{$fields[1]}}, $fields[2]); |
|
0
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
0
|
|
|
|
|
|
my $content; |
360
|
0
|
0
|
|
|
|
|
if ($request->method eq "POST") { |
361
|
0
|
|
|
|
|
|
$content = $request->content; |
362
|
|
|
|
|
|
|
} else { |
363
|
0
|
|
|
|
|
|
my @foo = split(/\?/,$request->uri); |
364
|
0
|
|
|
|
|
|
$content = $foo[1]; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
|
367
|
0
|
0
|
|
|
|
|
return () unless defined $content; |
368
|
|
|
|
|
|
|
|
369
|
0
|
|
|
|
|
|
my(@parts, $key, $val); |
370
|
|
|
|
|
|
|
|
371
|
0
|
0
|
0
|
|
|
|
if ($content =~ m/=/ or $content =~ m/&/) { |
372
|
|
|
|
|
|
|
|
373
|
0
|
|
|
|
|
|
$content =~ tr/+/ /; # RFC1630 |
374
|
0
|
|
|
|
|
|
@parts = split(/&/, $content); |
375
|
|
|
|
|
|
|
|
376
|
0
|
|
|
|
|
|
foreach (@parts) { # Extract into key and value. |
377
|
0
|
|
|
|
|
|
($key, $val) = m/^(.*?)=(.*)/; |
378
|
0
|
0
|
|
|
|
|
$val = (defined $val) ? uri_unescape($val) : ''; |
379
|
0
|
|
|
|
|
|
$key = uri_unescape($key); |
380
|
|
|
|
|
|
|
|
381
|
0
|
0
|
|
|
|
|
push (@{$values->{$key}}, $val) if defined $val; |
|
0
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
} |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
|
385
|
0
|
|
|
|
|
|
return $values; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
sub modify_response { |
389
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
390
|
0
|
|
|
|
|
|
my $response = shift; |
391
|
0
|
|
|
|
|
|
my $formcount = 0; |
392
|
0
|
|
|
|
|
|
my $formnumber = 0; |
393
|
0
|
|
|
|
|
|
my $linknumber = 1; |
394
|
|
|
|
|
|
|
|
395
|
0
|
|
|
|
|
|
$response->headers->push_header('Cache-Control', 'no-store, no-cache'); |
396
|
0
|
|
|
|
|
|
$response->headers->push_header('Pragma', 'no-cache'); |
397
|
|
|
|
|
|
|
|
398
|
0
|
|
|
|
|
|
my $content = $response->content(); |
399
|
0
|
|
|
|
|
|
my $p = HTML::TokeParser->new(\$content); |
400
|
0
|
|
|
|
|
|
my $newcontent = ""; |
401
|
0
|
|
|
|
|
|
my %links; |
402
|
|
|
|
|
|
|
my $formname; |
403
|
|
|
|
|
|
|
|
404
|
0
|
|
|
|
|
|
my $js_href = 0; |
405
|
0
|
|
|
|
|
|
my $in_head = 0; |
406
|
0
|
|
|
|
|
|
my $basehref; |
407
|
0
|
|
|
|
|
|
while (my $token = $p->get_token()) { |
408
|
0
|
0
|
|
|
|
|
if (@$token[0] eq 'S') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
409
|
0
|
|
|
|
|
|
my $tagname = @$token[1]; |
410
|
0
|
|
|
|
|
|
my $attrs = @$token[2]; |
411
|
0
|
|
|
|
|
|
my $oldaction; |
412
|
|
|
|
|
|
|
my $text; |
413
|
|
|
|
|
|
|
|
414
|
0
|
0
|
0
|
|
|
|
if ($tagname eq 'head') { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
415
|
0
|
|
|
|
|
|
$in_head = 1; |
416
|
|
|
|
|
|
|
} elsif ($in_head && $tagname eq 'base') { |
417
|
0
|
|
|
|
|
|
$basehref = new URI($attrs->{'base'}); |
418
|
|
|
|
|
|
|
} elsif ($tagname eq 'html') { |
419
|
0
|
0
|
|
|
|
|
if ($self->{showwindow}) { |
420
|
0
|
|
|
|
|
|
$newcontent .= $self->script_popup(); |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
} elsif (($tagname eq 'a' || $tagname eq 'link') && |
423
|
|
|
|
|
|
|
$attrs->{'href'}) { |
424
|
0
|
|
|
|
|
|
my $t = $p->get_token(); |
425
|
0
|
0
|
|
|
|
|
if (@$t[0] eq 'T') { |
426
|
0
|
|
|
|
|
|
$text = @$t[1]; |
427
|
|
|
|
|
|
|
} else { |
428
|
0
|
|
|
|
|
|
undef $text; |
429
|
|
|
|
|
|
|
} |
430
|
0
|
|
|
|
|
|
$p->unget_token($t); |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
# up the counter for links with the same text |
433
|
0
|
|
|
|
|
|
my $index; |
434
|
0
|
0
|
|
|
|
|
if (defined $text) { |
435
|
0
|
0
|
|
|
|
|
$links{$text} = 0 if !(exists $links{$text}); |
436
|
0
|
|
|
|
|
|
$links{$text}++; |
437
|
0
|
|
|
|
|
|
$index = $links{$text}; |
438
|
|
|
|
|
|
|
} else { |
439
|
0
|
|
|
|
|
|
$index = $linknumber; |
440
|
|
|
|
|
|
|
} |
441
|
0
|
0
|
|
|
|
|
if ($attrs->{'href'} =~ m/^javascript:/i) { |
442
|
0
|
|
|
|
|
|
$js_href = 1; |
443
|
|
|
|
|
|
|
} else { |
444
|
0
|
0
|
|
|
|
|
if ($tagname eq 'a') { |
|
|
0
|
|
|
|
|
|
445
|
|
|
|
|
|
|
$attrs->{'href'} = |
446
|
0
|
|
|
|
|
|
$self->rewrite_href($attrs->{'href'}, |
447
|
|
|
|
|
|
|
$text, |
448
|
|
|
|
|
|
|
$index, |
449
|
|
|
|
|
|
|
$response->base); |
450
|
|
|
|
|
|
|
} elsif ($tagname eq 'link') { |
451
|
|
|
|
|
|
|
$attrs->{'href'} = |
452
|
0
|
|
|
|
|
|
$self->rewrite_linkhref($attrs->{'href'}, |
453
|
|
|
|
|
|
|
$response->base); |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
} |
456
|
0
|
|
|
|
|
|
$linknumber++; |
457
|
|
|
|
|
|
|
} elsif ($tagname eq 'form') { |
458
|
0
|
|
|
|
|
|
$formcount++; |
459
|
0
|
|
|
|
|
|
$formnumber++; |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
# put the hidden field before the real field |
463
|
|
|
|
|
|
|
# so that it won't be inside |
464
|
0
|
0
|
0
|
|
|
|
if (!$js_href && |
|
|
|
0
|
|
|
|
|
465
|
|
|
|
|
|
|
$tagname ne 'form' && ($formcount == 1)) { |
466
|
0
|
|
|
|
|
|
my ($formfield, $fieldprefix, $fieldtype, $fieldname); |
467
|
0
|
|
|
|
|
|
$fieldprefix = "$self->{prefix}-form" . $formnumber; |
468
|
0
|
|
0
|
|
|
|
$fieldtype = lc($attrs->{type}) || 'unknown'; |
469
|
0
|
0
|
|
|
|
|
if ($attrs->{name}) { |
470
|
0
|
|
|
|
|
|
$fieldname = $attrs->{name}; |
471
|
0
|
|
|
|
|
|
$formfield = ($fieldprefix . '-' . |
472
|
|
|
|
|
|
|
$fieldtype . '-' . $fieldname); |
473
|
0
|
|
|
|
|
|
$newcontent .= "\n"; |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
0
|
|
|
|
|
|
$newcontent .= ("<".$tagname); |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# keep the attributes in their original order |
480
|
0
|
|
|
|
|
|
my $attrlist = @$token[3]; |
481
|
0
|
|
|
|
|
|
foreach my $attr (@$attrlist) { |
482
|
|
|
|
|
|
|
# only rewrite if |
483
|
|
|
|
|
|
|
# - it's not part of a javascript link |
484
|
|
|
|
|
|
|
# - it's not a hidden field |
485
|
0
|
|
|
|
|
|
$newcontent .= (" ".$attr."=\"".$attrs->{$attr}."\""); |
486
|
|
|
|
|
|
|
} |
487
|
0
|
|
|
|
|
|
$newcontent .= (">\n"); |
488
|
0
|
0
|
|
|
|
|
if ($tagname eq 'form') { |
489
|
0
|
0
|
|
|
|
|
if ($formcount == 1) { |
490
|
0
|
|
0
|
|
|
|
$newcontent .= $self->rewrite_form_content($attrs->{name} || "", |
491
|
|
|
|
|
|
|
$formnumber, |
492
|
|
|
|
|
|
|
$response->base); |
493
|
|
|
|
|
|
|
} |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
} elsif (@$token[0] eq 'E') { |
496
|
0
|
|
|
|
|
|
my $tagname = @$token[1]; |
497
|
0
|
0
|
|
|
|
|
if ($tagname eq 'head') { |
498
|
0
|
0
|
|
|
|
|
if (!$basehref) { |
499
|
0
|
|
|
|
|
|
$basehref = $response->base; |
500
|
0
|
0
|
|
|
|
|
$basehref->scheme('http') if $basehref->scheme eq 'https'; |
501
|
0
|
|
|
|
|
|
$newcontent .= "\n"; |
502
|
|
|
|
|
|
|
} |
503
|
0
|
|
|
|
|
|
$basehref = ""; |
504
|
0
|
|
|
|
|
|
$in_head = 0; |
505
|
|
|
|
|
|
|
} |
506
|
0
|
|
|
|
|
|
$newcontent .= (""); |
507
|
0
|
|
|
|
|
|
$newcontent .= ($tagname.">\n"); |
508
|
0
|
0
|
0
|
|
|
|
if ($tagname eq 'form') { |
|
|
0
|
|
|
|
|
|
509
|
0
|
|
|
|
|
|
$formcount--; |
510
|
|
|
|
|
|
|
} elsif ($tagname eq 'a' || $tagname eq 'link') { |
511
|
0
|
|
|
|
|
|
$js_href = 0; |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
} elsif (@$token[0] eq 'PI') { |
514
|
0
|
|
|
|
|
|
$newcontent .= (@$token[2]); |
515
|
|
|
|
|
|
|
} else { |
516
|
0
|
|
|
|
|
|
$newcontent .= (@$token[1]); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
0
|
|
|
|
|
|
$response->content($newcontent); |
521
|
|
|
|
|
|
|
|
522
|
0
|
|
|
|
|
|
return; |
523
|
|
|
|
|
|
|
} |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
sub rewrite_href { |
526
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
527
|
0
|
|
0
|
|
|
|
my $href = shift || ""; |
528
|
0
|
|
0
|
|
|
|
my $text = shift || ""; |
529
|
0
|
|
0
|
|
|
|
my $index = shift || 1; |
530
|
0
|
|
|
|
|
|
my $url = shift; |
531
|
|
|
|
|
|
|
|
532
|
0
|
|
|
|
|
|
my @parts = split(/\?/, $href); |
533
|
0
|
|
|
|
|
|
my $realhref = uri_escape($href); |
534
|
0
|
|
0
|
|
|
|
my $realargs = $parts[1] || ""; |
535
|
0
|
|
|
|
|
|
my $base = $parts[0]; |
536
|
|
|
|
|
|
|
|
537
|
0
|
|
|
|
|
|
my $https = 0; |
538
|
0
|
0
|
|
|
|
|
$https = 1 if $url->scheme eq 'https'; |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
# the link text might have special characters in it |
541
|
0
|
|
|
|
|
|
$text = uri_escape($text); |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
# figure out if the link is an anchor on the same page |
544
|
0
|
|
|
|
|
|
my $anchor; |
545
|
0
|
0
|
|
|
|
|
if ($href =~ m/^#/) { |
546
|
0
|
|
|
|
|
|
$anchor = $href; |
547
|
0
|
|
|
|
|
|
$base = ""; |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
0
|
|
|
|
|
|
$href = "$base?$self->{prefix}-url=$realhref"; |
551
|
0
|
0
|
|
|
|
|
$href .= "&$self->{prefix}-https=$https" if $https; |
552
|
0
|
0
|
|
|
|
|
$href .= "&$realargs" if $realargs; |
553
|
0
|
|
|
|
|
|
$href .= "&$self->{prefix}-action=follow"; |
554
|
0
|
|
|
|
|
|
$href .= "&$self->{prefix}-text=$text"; |
555
|
0
|
|
|
|
|
|
$href .= "&$self->{prefix}-index=$index"; |
556
|
0
|
0
|
|
|
|
|
$href .= $anchor if $anchor; |
557
|
|
|
|
|
|
|
|
558
|
0
|
|
|
|
|
|
return $href; |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
sub rewrite_linkhref { |
562
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
563
|
0
|
|
0
|
|
|
|
my $href = shift || ""; |
564
|
0
|
|
|
|
|
|
my $url = shift; |
565
|
|
|
|
|
|
|
|
566
|
0
|
|
|
|
|
|
my @parts = split(/\?/, $href); |
567
|
0
|
|
|
|
|
|
my $realhref = uri_escape($href); |
568
|
0
|
|
0
|
|
|
|
my $realargs = $parts[1] || ""; |
569
|
|
|
|
|
|
|
|
570
|
0
|
|
|
|
|
|
my $https = 0; |
571
|
0
|
0
|
|
|
|
|
$https = 1 if $url->scheme eq 'https'; |
572
|
0
|
|
|
|
|
|
my $base = $parts[0]; |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# figure out if the link is an anchor on the same page |
575
|
0
|
|
|
|
|
|
my $anchor; |
576
|
0
|
0
|
|
|
|
|
if ($href =~ m/^#/) { |
577
|
0
|
|
|
|
|
|
$anchor = $href; |
578
|
0
|
|
|
|
|
|
$base = ""; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
0
|
|
|
|
|
|
$href = "$base?$self->{prefix}-url=$realhref"; |
582
|
0
|
0
|
|
|
|
|
$href .= "&$self->{prefix}-https=$https" if $https; |
583
|
0
|
0
|
|
|
|
|
$href .= "&$realargs" if $realargs; |
584
|
0
|
|
|
|
|
|
$href .= "&$self->{prefix}-action=norecord"; |
585
|
0
|
0
|
|
|
|
|
$href .= $anchor if $anchor; |
586
|
|
|
|
|
|
|
|
587
|
0
|
|
|
|
|
|
return $href; |
588
|
|
|
|
|
|
|
} |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
sub rewrite_form_content { |
591
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
592
|
0
|
|
0
|
|
|
|
my $name = shift || ""; |
593
|
0
|
|
|
|
|
|
my $number = shift; |
594
|
0
|
|
|
|
|
|
my $fields; |
595
|
0
|
|
|
|
|
|
my $url = shift; |
596
|
|
|
|
|
|
|
|
597
|
0
|
0
|
|
|
|
|
my $https = 1 if ($url =~ m/^https/i); |
598
|
|
|
|
|
|
|
|
599
|
0
|
|
|
|
|
|
$fields .= ("{prefix}-action\" value=\"submitform\">\n"); |
600
|
0
|
|
|
|
|
|
$fields .= ("{prefix}-formname\" value=\"$name\">\n"); |
601
|
0
|
|
|
|
|
|
$fields .= ("{prefix}-formnumber\" value=\"$number\">\n"); |
602
|
0
|
0
|
|
|
|
|
if ($https) { |
603
|
0
|
|
|
|
|
|
$fields .= ("{prefix}-https\" value=\"$https\">\n"); |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
0
|
|
|
|
|
|
return $fields; |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
sub get_recorder_content { |
610
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
611
|
|
|
|
|
|
|
|
612
|
0
|
|
|
|
|
|
my @script = $self->{logger}->GetScript(); |
613
|
0
|
|
|
|
|
|
my $script = ""; |
614
|
0
|
|
|
|
|
|
foreach my $line (@script) { |
615
|
0
|
0
|
|
|
|
|
next unless $line; |
616
|
0
|
|
|
|
|
|
$line =~ s/\n//g; |
617
|
0
|
|
|
|
|
|
$script .= "$line\n"; |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
0
|
|
|
|
|
|
my $content = <
|
621
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
|
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
EOF |
671
|
|
|
|
|
|
|
|
672
|
0
|
|
|
|
|
|
return $content; |
673
|
|
|
|
|
|
|
} |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
sub script_popup { |
676
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
677
|
|
|
|
|
|
|
|
678
|
0
|
|
|
|
|
|
my $url = "http://" . $self->control . "/"; |
679
|
0
|
|
|
|
|
|
my $js = <
|
680
|
|
|
|
|
|
|
mywin = window.open("$url", "script", "width=400,height=400,toolbar=no,scrollbars=yes,resizable=yes"); |
681
|
|
|
|
|
|
|
EOF |
682
|
|
|
|
|
|
|
|
683
|
0
|
|
|
|
|
|
return <
|
684
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
EOF |
690
|
|
|
|
|
|
|
} |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=head1 Bugs, Missing Features, and other Oddities |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
=head2 Javascript |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
L won't record Javascript actions. |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=head2 Why are my images corrupted? |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
HTTP::Recorder only tries to rewrite responses that are of type |
701
|
|
|
|
|
|
|
text/*, which it determines by reading the Content-Type header of the |
702
|
|
|
|
|
|
|
HTTP::Response object. However, if the received image gives the |
703
|
|
|
|
|
|
|
wrong Content-Type header, it may be corrupted by the recorder. While |
704
|
|
|
|
|
|
|
this may not be pleasant to look at, it shouldn't have an effect on |
705
|
|
|
|
|
|
|
your recording session. |
706
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
=head1 See Also |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
See also L, L, L. |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=head1 Requests & Bugs |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
Please submit any feature requests, suggestions, bugs, or patches at |
714
|
|
|
|
|
|
|
http://rt.cpan.org/, or email to bug-HTTP-Recorder@rt.cpan.org. |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
=head1 Mailing List |
717
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
There's a mailing list for users and developers of HTTP::Recorder. |
719
|
|
|
|
|
|
|
You can subscribe at |
720
|
|
|
|
|
|
|
http://lists.fsck.com/mailman/listinfo/http-recorder, or by sending |
721
|
|
|
|
|
|
|
email to http-recorder-request@lists.fsck.com with the subject |
722
|
|
|
|
|
|
|
"subscribe". |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
The archives can be found at |
725
|
|
|
|
|
|
|
http://lists.fsck.com/pipermail/http-recorder. |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=head1 Author |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
Copyright 2003-2005 by Linda Julien |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
Released under the GNU Public License. |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
=cut |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
1; |