line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Sitebase::Navigator; |
2
|
|
|
|
|
|
|
|
3
|
2
|
|
|
2
|
|
21155
|
use warnings; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
85
|
|
4
|
2
|
|
|
2
|
|
12
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
63
|
|
5
|
2
|
|
|
2
|
|
12
|
use WWW::Sitebase -Base; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
22
|
|
6
|
2
|
|
|
2
|
|
14021
|
use Carp; |
|
2
|
|
|
2
|
|
5
|
|
|
2
|
|
|
2
|
|
86
|
|
|
2
|
|
|
|
|
13
|
|
|
2
|
|
|
|
|
12
|
|
|
2
|
|
|
|
|
67
|
|
|
2
|
|
|
|
|
13
|
|
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
155
|
|
7
|
2
|
|
|
2
|
|
2650
|
use WWW::Mechanize; |
|
2
|
|
|
|
|
387441
|
|
|
2
|
|
|
|
|
91
|
|
8
|
2
|
|
|
2
|
|
21
|
use File::Spec::Functions; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
259
|
|
9
|
2
|
|
|
2
|
|
1049
|
use Term::ReadKey; # For password prompt |
|
2
|
|
|
|
|
5756
|
|
|
2
|
|
|
|
|
7098
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 NAME |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
WWW::Sitebase::Navigator - Base class for modules that navigate web sites |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 VERSION |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Version 0.09 |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=cut |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
our $VERSION = '0.09'; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 SYNOPSIS |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This module is a base class for modules that navigate web sites |
26
|
|
|
|
|
|
|
like Myspace or Bebo. It provides basic methods like |
27
|
|
|
|
|
|
|
get_page and submit_form that are more robsut than their counterparts |
28
|
|
|
|
|
|
|
in WWW::Mechanize. It also provides some core methods like "site_login". |
29
|
|
|
|
|
|
|
If you subclass this module and override the "site_info" method, |
30
|
|
|
|
|
|
|
you'll have a module that can log into your web site. Ta Da. |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
Note that this module is a subclass of "Spiffy" using "use Spiffy -Base". |
33
|
|
|
|
|
|
|
perldoc Spiffy for more info or look it up on CPAN. |
34
|
|
|
|
|
|
|
Most importantly this means we use Spiffy's "field" method to create |
35
|
|
|
|
|
|
|
accessor methods, you don't need to include "my $self = shift" |
36
|
|
|
|
|
|
|
in your methods, and you can use "super" to call the base class's |
37
|
|
|
|
|
|
|
version of an overridden method. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
use WWW::Sitebase::Navigator -Base; |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
field site_info => { |
42
|
|
|
|
|
|
|
home_page => 'http://www.myspace.com', # URL of site's homepage |
43
|
|
|
|
|
|
|
account_field => 'email', # Fieldname from the login form |
44
|
|
|
|
|
|
|
password_field => 'password', # Password fieldname |
45
|
|
|
|
|
|
|
cache_dir => '.www-MYSITE', |
46
|
|
|
|
|
|
|
login_form_name => 'login', # The name of the login form. OR |
47
|
|
|
|
|
|
|
login_form_no => 1, # The number of the login form (defaults to 1). |
48
|
|
|
|
|
|
|
# 1 is the first form on the page. |
49
|
|
|
|
|
|
|
login_verify_re => 'Welcome.*view my profile', # (optional) |
50
|
|
|
|
|
|
|
# Non-case-sensitive RE we should see once we're logged in |
51
|
|
|
|
|
|
|
not_logged_in_re => 'Sign In<\/title>', |
52
|
|
|
|
|
|
|
# If we log in and it fails (bad password, account suddenly |
53
|
|
|
|
|
|
|
# gets logged out), the page will have this RE on it. |
54
|
|
|
|
|
|
|
# Case insensitive. |
55
|
|
|
|
|
|
|
home_uri_re => '\?fuseaction=user&', |
56
|
|
|
|
|
|
|
# _go_home uses this and the next two items to load |
57
|
|
|
|
|
|
|
# the home page. You can provide these options or |
58
|
|
|
|
|
|
|
# just override the method. |
59
|
|
|
|
|
|
|
# First, this is matched against the current URL to see if we're |
60
|
|
|
|
|
|
|
# already on the home page. |
61
|
|
|
|
|
|
|
home_link_re => 'fuseaction=user', |
62
|
|
|
|
|
|
|
# If we're not on the home page, this RE is |
63
|
|
|
|
|
|
|
# used to find a link to the "Home" button on the current |
64
|
|
|
|
|
|
|
# page. |
65
|
|
|
|
|
|
|
home_url => 'http://www.myspace.com?fuseaction=user', |
66
|
|
|
|
|
|
|
# If the "Home" button link isn't found, this URL is |
67
|
|
|
|
|
|
|
# retreived. |
68
|
|
|
|
|
|
|
error_regexs => [ |
69
|
|
|
|
|
|
|
'An unexpected error has occurred', |
70
|
|
|
|
|
|
|
'Site is temporarily down', |
71
|
|
|
|
|
|
|
'We hired monkeys to program our site, please wait '. |
72
|
|
|
|
|
|
|
'while they throw bananas at each other.' |
73
|
|
|
|
|
|
|
], |
74
|
|
|
|
|
|
|
# error_regexs is optional. If the site you're navigating |
75
|
|
|
|
|
|
|
# displays error pages that do not return proper HTTP Status |
76
|
|
|
|
|
|
|
# codes (i.e. returns a 200 but displays an error), you can enter |
77
|
|
|
|
|
|
|
# REs here and any page that matches will be retried. |
78
|
|
|
|
|
|
|
# This is meant for IIS and ColdFusion-based sites that |
79
|
|
|
|
|
|
|
# periodically spew error messages that go away when tried again. |
80
|
|
|
|
|
|
|
}; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
IMPORTANT: If the site your module navigates uses ANY SSL, you'll |
83
|
|
|
|
|
|
|
need to add "Crypt::SSLEay" to your list of prerequisite modules. Otherwise |
84
|
|
|
|
|
|
|
your methods will die if they hit an SSL-encrypted page. |
85
|
|
|
|
|
|
|
WWW::Sitebase::Navigator doesn't require this for you to prevent unnecessary |
86
|
|
|
|
|
|
|
overhead for sites that don't need it. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=cut |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# Where should we store files? (cookies, cache dir). We use, and untaint, |
91
|
|
|
|
|
|
|
# the user's home dir for the default. |
92
|
0
|
|
|
0
|
|
|
sub _home_dir { |
93
|
0
|
|
|
|
|
|
my $home_dir = ""; |
94
|
0
|
0
|
|
|
|
|
if ( defined $ENV{'HOME'} ) { |
95
|
0
|
|
|
|
|
|
$home_dir = "$ENV{'HOME'}"; |
96
|
|
|
|
|
|
|
|
97
|
0
|
0
|
|
|
|
|
if ( $home_dir =~ /^([\-A-Za-z0-9_ \/\.@\+\\:]*)$/ ) { |
98
|
0
|
|
|
|
|
|
$home_dir = $1; |
99
|
|
|
|
|
|
|
} else { |
100
|
0
|
|
|
|
|
|
croak "Invalid characters in $ENV{HOME}."; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
return $home_dir; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 OPTIONS |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head2 default_options |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Override this method to allow additional options to be passed to |
112
|
|
|
|
|
|
|
"new". You should also provide accessor methods for them. |
113
|
|
|
|
|
|
|
These are parsed by Params::Validate. In breif, setting an |
114
|
|
|
|
|
|
|
option to "0" means it's optional, "1" means it's required. |
115
|
|
|
|
|
|
|
See Params::Validate for more info. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
sub default_options { |
118
|
|
|
|
|
|
|
$self->{default_options}={ |
119
|
|
|
|
|
|
|
account_name => 0, |
120
|
|
|
|
|
|
|
password => 0, |
121
|
|
|
|
|
|
|
cache_dir => 0, # Default set by site_info field method |
122
|
|
|
|
|
|
|
cache_file => 0, # Default set by field method below |
123
|
|
|
|
|
|
|
auto_login => 0, # Default set by field method below |
124
|
|
|
|
|
|
|
human => 0, # Default set by field method below |
125
|
|
|
|
|
|
|
config_file => 0 |
126
|
|
|
|
|
|
|
}; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
return $self->{default_options}; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# So to add a "questions" option that's mandatory: |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub default_options { |
134
|
|
|
|
|
|
|
super; |
135
|
|
|
|
|
|
|
$self->{default_options}->{questions}=1; |
136
|
|
|
|
|
|
|
return $self->{default_options}; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=cut |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
# Options they can pass via hash or hashref. |
142
|
0
|
|
|
0
|
1
|
|
sub default_options { |
143
|
|
|
|
|
|
|
$self->{default_options}={ |
144
|
0
|
|
|
|
|
|
account_name => 0, |
145
|
|
|
|
|
|
|
password => 0, |
146
|
|
|
|
|
|
|
cache_dir => 0, # Default set by site_info field method |
147
|
|
|
|
|
|
|
cache_file => 0, # Default set by field method below |
148
|
|
|
|
|
|
|
auto_login => 0, # Default set by field method below |
149
|
|
|
|
|
|
|
human => 0, # Default set by field method below |
150
|
|
|
|
|
|
|
config_file => 0 |
151
|
|
|
|
|
|
|
}; |
152
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
return $self->{default_options}; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head2 positional_parameters |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
You can also allow your users to provide information to the "new" |
159
|
|
|
|
|
|
|
method via positional parameters. If the first argument passed |
160
|
|
|
|
|
|
|
to "new" is not a known valid option, positional parameters |
161
|
|
|
|
|
|
|
are used instead. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
These default to: |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
const positional_parameters => [ 'account_name', 'password' ]; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
You can override this method to provide your own list if you like: |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
const positional_parameters => [ 'account_name', 'password', 'shoe_size' ]; |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
# Options they can pass by position. |
176
|
|
|
|
|
|
|
# Just "new( 'joe@bebo.com', 'mypass' )". |
177
|
|
|
|
|
|
|
const positional_parameters => [ 'account_name', 'password' ]; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
field 'account_name'; |
180
|
|
|
|
|
|
|
field 'password'; |
181
|
|
|
|
|
|
|
field cache_file => 'login_cache'; |
182
|
|
|
|
|
|
|
field auto_login => 0; |
183
|
|
|
|
|
|
|
field human => 1; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
stub 'site_info'; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head1 OPTION ACCESSORS |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
These methods can be used to set/retreive the respective option's value. |
190
|
|
|
|
|
|
|
They're also up top here to document the option, which can be passed |
191
|
|
|
|
|
|
|
directly to the "new" method. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head2 account_name |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
Sets or returns the account name (email address) under which you're logged in. |
196
|
|
|
|
|
|
|
Note that the account name is retreived from the user or from your program |
197
|
|
|
|
|
|
|
depending on how you called the "new" method. You'll probably only use this |
198
|
|
|
|
|
|
|
accessor method to get account_name. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
EXAMPLE |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
The following would prompt the user for their login information, then print |
203
|
|
|
|
|
|
|
out the account name: |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
use WWW::Bebo; |
206
|
|
|
|
|
|
|
my $bebo = new WWW::Bebo; |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
print $site->account_name; |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
$site->account_name( 'other_account@bebo.com' ); |
211
|
|
|
|
|
|
|
$site->password( 'other_accounts_password' ); |
212
|
|
|
|
|
|
|
$site->site_login; |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
WARNING: If you do change account_name, make sure you change password and |
215
|
|
|
|
|
|
|
call site_login. Changing account_name doesn't (currently) log you |
216
|
|
|
|
|
|
|
out, nor does it clear "password". If you change this and don't log in |
217
|
|
|
|
|
|
|
under the new account, it'll just have the wrong value, which will probably |
218
|
|
|
|
|
|
|
be ignored, but who knows. |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=cut |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=head2 password |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
Sets or returns the password you used, or will use, to log in. See the |
226
|
|
|
|
|
|
|
warning under "account_name" above - same applies here. |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=cut |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=head2 cache_dir |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
WWW::Sitebase::Navigator stores the last account/password used in a |
234
|
|
|
|
|
|
|
cache file for convenience if the user's entering it. Other modules |
235
|
|
|
|
|
|
|
store other cache data as well. |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
cache_dir sets or returns the directory in which we should store cache |
238
|
|
|
|
|
|
|
data. Defaults to $self->site_info->{cache_dir}. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
If using this from a CGI script, you will need to provide the |
241
|
|
|
|
|
|
|
account and password in the "new" method call, or call "new" with |
242
|
|
|
|
|
|
|
"auto_login => 0" so cache_dir will not be used. |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=cut |
245
|
|
|
|
|
|
|
|
246
|
0
|
|
|
0
|
1
|
|
sub cache_dir { return catfile( $self->_home_dir, |
247
|
0
|
|
|
|
|
|
$self->site_info->{'cache_dir'} ) } |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=head2 cache_file |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Sets or returns the name of the file into which the login |
252
|
|
|
|
|
|
|
cache data is stored. Defaults to login_cache. |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
If using this from a CGI script, you will need to provide the |
255
|
|
|
|
|
|
|
account and password in the "new" method call, so cache_file will |
256
|
|
|
|
|
|
|
not be used. |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=cut |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=head2 auto_login |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Really only useful as an option passed to the "new" method when |
264
|
|
|
|
|
|
|
creating a new object. |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# Create a new object and prompt the user to log in. |
267
|
|
|
|
|
|
|
my $site = new WWW::MySite( auto_login => 1 ); |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=cut |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head2 human |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
When set to a true value (which is the default), adds delays to |
275
|
|
|
|
|
|
|
make the module act more like a human. This is both to offset |
276
|
|
|
|
|
|
|
"faux security" measures, and to conserve bandwidth. If you're |
277
|
|
|
|
|
|
|
trying to use multiple accounts to spam users who don't |
278
|
|
|
|
|
|
|
want to hear what you have to say, you should turn this off |
279
|
|
|
|
|
|
|
because it'll make your spamming go faster. |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=cut |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
=head1 FUNCTIONS |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=head2 new( $account, $password ) |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=head2 new( ) |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
If called without the optional account and password, the new method |
291
|
|
|
|
|
|
|
looks in a user-specific preferences file in the user's home |
292
|
|
|
|
|
|
|
directory for the last-used account and password. It prompts |
293
|
|
|
|
|
|
|
for the username and password with which to log in, providing |
294
|
|
|
|
|
|
|
the last-used data (from the preferences file) as defaults. |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
Once the account and password have been retreived, the new method |
297
|
|
|
|
|
|
|
automatically invokes the "site_login" method and returns a new |
298
|
|
|
|
|
|
|
object reference. The new object already contains the |
299
|
|
|
|
|
|
|
content of the user's "home" page, the user's friend ID, and |
300
|
|
|
|
|
|
|
a WWW::Mechanize object used internally as the "browser" that is used |
301
|
|
|
|
|
|
|
by all methods in the class. |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
If account_name and password are specified, the "new" method will |
304
|
|
|
|
|
|
|
set auto_login to 1 and call the "site_login" method. This just means |
305
|
|
|
|
|
|
|
that if you pass an account_name and password when creating the object, |
306
|
|
|
|
|
|
|
it'll log you in unless you explicitly state "auto_login => 0". |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
WWW::Sitebase::Navigator is a subclass of WWW::Sitebase, which |
309
|
|
|
|
|
|
|
basically just means people can call your "new" method in many ways: |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
EXAMPLES |
312
|
|
|
|
|
|
|
use WWW::YourSiteModule; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
# Just create the object |
315
|
|
|
|
|
|
|
my $site = new WWW::YourSiteModule; |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
# Prompt for username and password |
318
|
|
|
|
|
|
|
my $site = new WWW::YourSiteModule( auto_login => 1 ); |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# Pass just username and password (logs you in) |
321
|
|
|
|
|
|
|
my $site = new WWW::YourSiteModule( 'my@email.com', 'mypass' ); |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# Pass options as a hashref |
324
|
|
|
|
|
|
|
my $site = new WWW::YourSiteModule( { |
325
|
|
|
|
|
|
|
account_name => 'my@email.com', |
326
|
|
|
|
|
|
|
password => 'mypass', |
327
|
|
|
|
|
|
|
cache_file => 'passcache', |
328
|
|
|
|
|
|
|
} ); |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# Pass options as a hash |
331
|
|
|
|
|
|
|
my $site = new WWW::YourSiteModule( |
332
|
|
|
|
|
|
|
account_name => 'my@email.com', |
333
|
|
|
|
|
|
|
password => 'mypass', |
334
|
|
|
|
|
|
|
cache_file => 'passcache', |
335
|
|
|
|
|
|
|
auto_login => 0, # Don't log in, just create the object) |
336
|
|
|
|
|
|
|
); |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
=cut |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
sub new() { |
341
|
|
|
|
|
|
|
# Call the Base new method (it's ok to feel special about it). |
342
|
0
|
|
|
0
|
1
|
|
my $self = super; |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
# Log in if requested |
345
|
0
|
0
|
0
|
|
|
|
$self->auto_login(1) if ( $self->account_name && $self->password ); |
346
|
0
|
0
|
|
|
|
|
if ( $self->auto_login ) { |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# Prompt for username/password if we don't have them yet. |
349
|
|
|
|
|
|
|
# (should this be moved to site_login?) |
350
|
0
|
0
|
|
|
|
|
$self->_get_acct unless $self->account_name; |
351
|
|
|
|
|
|
|
|
352
|
0
|
|
|
|
|
|
$self->site_login; |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
} else { |
355
|
|
|
|
|
|
|
|
356
|
0
|
|
|
|
|
|
$self->logout; # Why? Resets variables and gets Mech object. |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
0
|
|
|
|
|
|
return $self; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=head2 site_login |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
Logs into the account identified by the "account_name" and |
366
|
|
|
|
|
|
|
"password" options. |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
If you don't call the new method with "login => 1", you'll need to |
369
|
|
|
|
|
|
|
call this method if you want to log in. |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
If the login gets a "you must be logged-in" page when you first try to |
372
|
|
|
|
|
|
|
log in, $site->error will be set to an error message that says to |
373
|
|
|
|
|
|
|
check the username and password. |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
Once login is successful for a given username/password combination, |
376
|
|
|
|
|
|
|
the object "remembers" that the username/password |
377
|
|
|
|
|
|
|
is valid, and if it encounters a "you must be logged-in" page, it will |
378
|
|
|
|
|
|
|
try up to 20 times to re-login. |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=cut |
381
|
|
|
|
|
|
|
|
382
|
0
|
|
|
0
|
1
|
|
sub site_login { |
383
|
|
|
|
|
|
|
|
384
|
0
|
|
|
|
|
|
my $verify_re; |
385
|
0
|
0
|
|
|
|
|
if ( defined $self->site_info->{'login_verify_re'} ) { |
386
|
0
|
|
|
|
|
|
$verify_re = $self->site_info->{'login_verify_re'} |
387
|
|
|
|
|
|
|
}; |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# Reset everything (oddly, this also happens to create a new browser |
390
|
|
|
|
|
|
|
# object). |
391
|
0
|
|
|
|
|
|
$self->logout; |
392
|
|
|
|
|
|
|
|
393
|
0
|
0
|
|
|
|
|
croak "site_login called but account_name isn't set" unless |
394
|
|
|
|
|
|
|
( $self->account_name ); |
395
|
0
|
0
|
|
|
|
|
croak "site_login called but password isn't set" unless ( $self->password ); |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
# Now log in |
398
|
0
|
|
|
|
|
|
$self->_try_login; |
399
|
0
|
0
|
|
|
|
|
return undef if $self->error; |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
# Load the home page. |
402
|
|
|
|
|
|
|
# $self->_go_home; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
# Verify we're logged in |
405
|
0
|
0
|
0
|
|
|
|
if ( ( ! $verify_re ) || |
406
|
|
|
|
|
|
|
( $self->current_page->decoded_content =~ /$verify_re/si ) |
407
|
|
|
|
|
|
|
) { |
408
|
0
|
|
|
|
|
|
$self->logged_in( 1 ); |
409
|
|
|
|
|
|
|
} else { |
410
|
0
|
|
|
|
|
|
$self->logged_in( 0 ); |
411
|
0
|
0
|
|
|
|
|
unless ( $self->error ) { |
412
|
0
|
|
|
|
|
|
$self->error( "Login Failed. Couldn't verify load of home page." ) |
413
|
|
|
|
|
|
|
} |
414
|
0
|
|
|
|
|
|
return undef; |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
|
417
|
0
|
|
|
|
|
|
return 1; |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
# _try_login |
422
|
|
|
|
|
|
|
# You call this as $self->_try_login. Attempts to log in using |
423
|
|
|
|
|
|
|
# the set account_name and password. It gets and submits the login form, |
424
|
|
|
|
|
|
|
# then checks for a valid submission and for a "you must be logged-in" |
425
|
|
|
|
|
|
|
# page. |
426
|
|
|
|
|
|
|
# If called with a number as an argument, tries that many times to |
427
|
|
|
|
|
|
|
# submit the form. It calls itself recursively. |
428
|
0
|
|
|
0
|
|
|
sub _try_login { |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
# Set the recursive tries counter. |
431
|
0
|
|
|
|
|
|
my ( $tries_left ) = @_; |
432
|
0
|
0
|
|
|
|
|
if ( $tries_left ) { $tries_left--; return if ( $tries_left ) < 1; } |
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
433
|
0
|
0
|
|
|
|
|
$tries_left = 20 unless defined $tries_left; |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# Default the login form to form#1 for backward compatibility. |
436
|
|
|
|
|
|
|
$self->site_info->{'login_form_no'} = 1 |
437
|
0
|
0
|
0
|
|
|
|
unless ( $self->site_info->{'login_form_no'} || $self->site_info->{'login_form_name'} ); |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
# Submit the login form |
440
|
0
|
|
|
|
|
|
my $submitted = $self->_submit_login; |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
# Check for success |
443
|
0
|
0
|
|
|
|
|
if ( $submitted ) { |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
# Check for invalid login page, which means we either have |
446
|
|
|
|
|
|
|
# an invalid login/password, or bebo is messing up again. |
447
|
0
|
0
|
|
|
|
|
unless ( $self->_check_login ) { |
448
|
|
|
|
|
|
|
# Fail unless we already know this account/password is good, in |
449
|
|
|
|
|
|
|
# which case we'll just beat the door down until we can get in |
450
|
|
|
|
|
|
|
# or the maximum number of attempts has been reached. |
451
|
0
|
0
|
|
|
|
|
if ( $self->_account_verified ) { |
452
|
0
|
|
|
|
|
|
$self->_try_login( $tries_left ); |
453
|
|
|
|
|
|
|
} else { |
454
|
0
|
|
|
|
|
|
$self->error( "Login Failed. Got 'You Must Be Logged-In' page ". |
455
|
|
|
|
|
|
|
"when logging in.\nCheck username and password." ); |
456
|
0
|
|
|
|
|
|
return undef; |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
} |
459
|
|
|
|
|
|
|
} else { |
460
|
0
|
|
|
|
|
|
return undef; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
0
|
|
|
|
|
|
return 1; |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head2 _submit_login |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
This method just calls submit_form with the values specified in site_info. |
470
|
|
|
|
|
|
|
It's been separated out just in case you have a sticky login form and you |
471
|
|
|
|
|
|
|
want to override this method to do something fancy. The other option was to |
472
|
|
|
|
|
|
|
give a lot more options in site_info, but to really give the amount of control |
473
|
|
|
|
|
|
|
you might need, it just makes more sense to set up site_info for the usual cases, |
474
|
|
|
|
|
|
|
and override this method if you need to get fancy. |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
You must return 1 for success, 0 for failure. All you really need to do is |
477
|
|
|
|
|
|
|
this: |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# Submit the login form |
480
|
|
|
|
|
|
|
my $submitted = $self->submit_form( |
481
|
|
|
|
|
|
|
page => $self->site_info->{'home_page'}, |
482
|
|
|
|
|
|
|
form_name => $self->site_info->{'login_form_name'}, |
483
|
|
|
|
|
|
|
form_no => $self->site_info->{'login_form_no'}, |
484
|
|
|
|
|
|
|
fields_ref => { |
485
|
|
|
|
|
|
|
$self->site_info->{'account_field'} => $self->account_name, |
486
|
|
|
|
|
|
|
$self->site_info->{'password_field'} => $self->password |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
); |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
return $submitted; |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
And fill in your special values instead. Again, only do this if your login |
493
|
|
|
|
|
|
|
doesn't work with the stuff you set up in site_info. |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=cut |
496
|
|
|
|
|
|
|
|
497
|
0
|
|
|
0
|
|
|
sub _submit_login { |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
return $self->submit_form( |
500
|
|
|
|
|
|
|
page => $self->site_info->{'home_page'}, |
501
|
|
|
|
|
|
|
form_name => $self->site_info->{'login_form_name'}, |
502
|
|
|
|
|
|
|
form_no => $self->site_info->{'login_form_no'}, |
503
|
|
|
|
|
|
|
fields_ref => { |
504
|
|
|
|
|
|
|
$self->site_info->{'account_field'} => $self->account_name, |
505
|
0
|
|
|
|
|
|
$self->site_info->{'password_field'} => $self->password |
506
|
|
|
|
|
|
|
} |
507
|
|
|
|
|
|
|
); |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
} |
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
=head2 _check_login |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
Checks for "You must be logged in to do that". If found, tries to log |
514
|
|
|
|
|
|
|
in again and returns 0, otherwise returns 1. |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=cut |
517
|
|
|
|
|
|
|
|
518
|
0
|
|
|
0
|
|
|
sub _check_login { |
519
|
0
|
|
|
|
|
|
my ( $res ) = @_; |
520
|
0
|
|
|
|
|
|
my $re = ""; |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
# Check the current page by default |
523
|
0
|
0
|
|
|
|
|
unless ( $res ) { $res = $self->current_page } |
|
0
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
# Check for the "proper" error response, or just look for the |
526
|
|
|
|
|
|
|
# error message on the page. |
527
|
0
|
|
|
|
|
|
$re = $self->site_info->{'not_logged_in_re'}; |
528
|
0
|
0
|
0
|
|
|
|
if ( ( $res->is_error == 403 ) || ( $res->decoded_content =~ /$re/is ) ) { |
529
|
0
|
0
|
|
|
|
|
if ( $res->is_error ) { |
530
|
0
|
|
|
|
|
|
warn "Error: " . $res->is_error . "\n" |
531
|
|
|
|
|
|
|
} else { |
532
|
0
|
|
|
|
|
|
warn "Got \"not logged in\" page\n"; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
# If we already logged in, try to log us back in. |
535
|
0
|
0
|
|
|
|
|
if ( $self->logged_in ) { $self->site_login } |
|
0
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# Return 0 so they'll try again. |
537
|
0
|
|
|
|
|
|
return 0; |
538
|
|
|
|
|
|
|
} else { |
539
|
0
|
|
|
|
|
|
return 1; |
540
|
|
|
|
|
|
|
} |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
# _account_verified |
545
|
|
|
|
|
|
|
# Returns true if we've verified that the current account and password |
546
|
|
|
|
|
|
|
# are valid (by successfully logging in with them) |
547
|
0
|
|
|
0
|
|
|
sub _account_verified { |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
( ( $self->{_account_verified}->{ $self->account_name } ) && |
550
|
0
|
0
|
|
|
|
|
( $self->password = $self->{_account_verified}->{ $self->account_name } ) |
551
|
|
|
|
|
|
|
) |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
# _init_account |
556
|
|
|
|
|
|
|
# Initialize basic account/login-specific settings after login |
557
|
0
|
|
|
0
|
|
|
sub _init_account { |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
# Get our friend ID from our profile page (which happens to |
560
|
|
|
|
|
|
|
# be the page we go to after logging in). |
561
|
0
|
|
|
|
|
|
$self->_get_friend_id( $self->current_page ); |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
# If for some reason we couldn't set this, fail login. |
564
|
0
|
0
|
|
|
|
|
unless ( $self->my_friend_id ) { $self->logged_in(0) ; return } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
# Set the user_name and friend_count fields. |
567
|
0
|
|
|
|
|
|
$self->user_name( $self->current_page ); |
568
|
0
|
|
|
|
|
|
$self->friend_count( $self->current_page ); |
569
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
# Cache whether or not we're a band. |
571
|
0
|
|
|
|
|
|
$self->is_band; |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
# Note that we've verified this account/password |
574
|
0
|
|
|
|
|
|
$self->{_account_verified}->{ $self->account_name } = $self->password; |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
} |
577
|
|
|
|
|
|
|
|
578
|
0
|
|
|
0
|
|
|
sub _new_mech { |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
# Set up our web browser (WWW::Mechanize object) |
581
|
0
|
|
|
|
|
|
$self->mech( new WWW::Mechanize( |
582
|
|
|
|
|
|
|
onerror => undef, |
583
|
|
|
|
|
|
|
# We'll say we're Safari running on MacOS 10.9.1 |
584
|
|
|
|
|
|
|
agent => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1)' |
585
|
|
|
|
|
|
|
. ' AppleWebKit/537.73.11 (KHTML, like Gecko) Version/7.0.1' |
586
|
|
|
|
|
|
|
. ' Safari/537.73.11', |
587
|
|
|
|
|
|
|
stack_depth => 1, |
588
|
|
|
|
|
|
|
quiet => 1, |
589
|
|
|
|
|
|
|
) ); |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
# We need to follow redirects for POST too. |
592
|
0
|
|
|
|
|
|
push @{ $self->mech->requests_redirectable }, 'POST'; |
|
0
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
597
|
|
|
|
|
|
|
# _get_acct() |
598
|
|
|
|
|
|
|
# Get and store the login and password. We check the user's preference |
599
|
|
|
|
|
|
|
# file for defaults, then prompt them. |
600
|
|
|
|
|
|
|
|
601
|
0
|
|
|
0
|
|
|
sub _get_acct { |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
# Initialize |
604
|
0
|
|
|
|
|
|
my %prefs = (); |
605
|
0
|
|
|
|
|
|
my $ref = ""; |
606
|
0
|
|
|
|
|
|
my ( $pref, $value, $res ); |
607
|
0
|
|
|
|
|
|
my $cache_filepath = catfile( $self->cache_dir, $self->cache_file); |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# Read what we got last time. |
610
|
0
|
0
|
|
|
|
|
if ( open ( PREFS, "< ", $cache_filepath ) ) { |
611
|
0
|
|
|
|
|
|
while () { |
612
|
0
|
|
|
|
|
|
chomp; |
613
|
0
|
|
|
|
|
|
( $pref, $value ) = split( ":" ); |
614
|
0
|
|
|
|
|
|
$prefs{"$pref"} = $value; |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
|
617
|
0
|
|
|
|
|
|
close PREFS; |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
# Prompt them for current values |
621
|
0
|
0
|
|
|
|
|
unless ( defined $prefs{"email"} ) { $prefs{"email"} = "" } |
|
0
|
|
|
|
|
|
|
622
|
0
|
|
|
|
|
|
print "Email [" . $prefs{"email"} . "]: "; |
623
|
0
|
|
|
|
|
|
$res = ; chomp $res; |
|
0
|
|
|
|
|
|
|
624
|
0
|
0
|
|
|
|
|
if ( $res ) { |
625
|
0
|
|
|
|
|
|
$prefs{"email"} = $res; |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
0
|
0
|
|
|
|
|
unless ( defined $prefs{"password"} ) { $prefs{"password"} = "" } |
|
0
|
|
|
|
|
|
|
629
|
0
|
0
|
|
|
|
|
my $password_indicator = $prefs{'password'} ? '*****' : ''; |
630
|
0
|
|
|
|
|
|
print "Password [". $password_indicator . "]: "; |
631
|
0
|
|
|
|
|
|
ReadMode 'noecho'; # From Term::ReadKey. Make password not echo. |
632
|
0
|
|
|
|
|
|
$res = ReadLine 0; |
633
|
0
|
|
|
|
|
|
chomp $res; |
634
|
0
|
|
|
|
|
|
ReadMode 'normal'; |
635
|
0
|
|
|
|
|
|
print "\n"; # Because ReadLine won't output a new line when they hit return |
636
|
0
|
0
|
|
|
|
|
if ( $res ) { |
637
|
0
|
|
|
|
|
|
$prefs{"password"} = $res; |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
# Make the cache directory if it doesn't exist. |
641
|
0
|
|
|
|
|
|
$self->make_cache_dir; |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
# Store the new values. We clobber the file, set it r/w by the user, |
644
|
|
|
|
|
|
|
# *then* write. |
645
|
0
|
0
|
|
|
|
|
open ( PREFS, ">", $cache_filepath ) or croak $!; |
646
|
0
|
|
|
|
|
|
chmod 0600, $cache_filepath; |
647
|
|
|
|
|
|
|
print PREFS "email:" . $prefs{"email"} . "\n" . |
648
|
0
|
|
|
|
|
|
"password:" . $prefs{"password"} . "\n"; |
649
|
0
|
0
|
|
|
|
|
close PREFS || croak "Error closing file when writing username/password: $!"; |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# Store the account info. |
652
|
0
|
|
|
|
|
|
$self->{account_name}=$prefs{"email"}; |
653
|
0
|
|
|
|
|
|
$self->{password}=$prefs{"password"}; |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
=head2 logout |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
Clears the current web browsing object and resets any login-specific |
659
|
|
|
|
|
|
|
internal values. Currently this drops and creates a new WWW::Mechanize |
660
|
|
|
|
|
|
|
object. This may change in the future to actually clicking "logout" |
661
|
|
|
|
|
|
|
or something. |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
=cut |
664
|
|
|
|
|
|
|
|
665
|
0
|
|
|
0
|
1
|
|
sub logout { |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
# If you change this to just log out instead of making a new Mech |
668
|
|
|
|
|
|
|
# object, be sure you change site_login too. |
669
|
0
|
|
|
|
|
|
$self->_new_mech; |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
# Clear anything login-specific |
672
|
0
|
|
|
|
|
|
$self->logged_in(0); |
673
|
0
|
|
|
|
|
|
$self->error(0); |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
# Do NOT clear options that are set by the user! |
676
|
|
|
|
|
|
|
# $self->{account_name} = undef; |
677
|
|
|
|
|
|
|
# $self->{password} = undef; |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
} |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
682
|
|
|
|
|
|
|
# Value return methods |
683
|
|
|
|
|
|
|
# These methods return internal data that is of use to outsiders |
684
|
|
|
|
|
|
|
|
685
|
0
|
|
|
0
|
|
|
sub ____CHECK_STATUS____ {} |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
=head1 CHECK STATUS |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=head2 logged_in |
690
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
Returns true if login was successful. When you call the new method |
692
|
|
|
|
|
|
|
of WWW::Sitebase::Navigator, the class logs in using the username and password |
693
|
|
|
|
|
|
|
you provided (or that it prompted for). It then retreives your "home" |
694
|
|
|
|
|
|
|
page (the one you see when you click the "Home" button that's set up in your |
695
|
|
|
|
|
|
|
site_info field), and checks it against an RE. If the page matches the RE, |
696
|
|
|
|
|
|
|
logged_in is set to a true value. Otherwise it's set to a false value. |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
Notes: |
699
|
|
|
|
|
|
|
- This method is only set on login. If you're logged out somehow, |
700
|
|
|
|
|
|
|
this method won't tell you that (yet - I may add that later). |
701
|
|
|
|
|
|
|
- The internal login method calls this method to set the value. |
702
|
|
|
|
|
|
|
You can (currently) call logged_in with a value, and it'll set |
703
|
|
|
|
|
|
|
it, but that would be stupid, and it might not work later |
704
|
|
|
|
|
|
|
anyway, so don't. |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
Examples, pretending we have a subclass named WWW::Bebo to navigate a site |
707
|
|
|
|
|
|
|
named bebo.com: |
708
|
|
|
|
|
|
|
|
709
|
|
|
|
|
|
|
my $bebo = new WWW::Bebo; |
710
|
|
|
|
|
|
|
unless ( $site->logged_in ) { |
711
|
|
|
|
|
|
|
die "Login failed\n"; |
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
# This will log you in, looping forever until it succeeds. |
715
|
|
|
|
|
|
|
my $bebo; |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
do { |
718
|
|
|
|
|
|
|
$bebo = new WWW::Bebo( $username, $password ); |
719
|
|
|
|
|
|
|
} until ( $site->logged_in ); |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=cut |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
field logged_in => 0; |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=head2 error |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
This value is set by some methods to return an error message. |
728
|
|
|
|
|
|
|
If there's no error, it returns a false value, so you can do this: |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
$site->get_profile( 12345 ); |
731
|
|
|
|
|
|
|
if ( $site->error ) { |
732
|
|
|
|
|
|
|
warn $site->error . "\n"; |
733
|
|
|
|
|
|
|
} else { |
734
|
|
|
|
|
|
|
# Do stuff |
735
|
|
|
|
|
|
|
} |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=cut |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
field 'error' => 0; |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head2 current_page |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
Returns a reference to an HTTP::Response object that contains the last page |
744
|
|
|
|
|
|
|
retreived by the WWW::Sitebase::Navigator object. All methods (i.e. get_page, post_comment, |
745
|
|
|
|
|
|
|
get_profile, etc) set this value. |
746
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
EXAMPLE |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
The following will print the content of the user's profile page: |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
use WWW::Bebo; |
752
|
|
|
|
|
|
|
my $bebo = new WWW::Bebo; |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
print $site->current_page->decoded_content; |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
=cut |
757
|
|
|
|
|
|
|
|
758
|
0
|
|
|
0
|
1
|
|
sub current_page { |
759
|
|
|
|
|
|
|
|
760
|
0
|
|
|
|
|
|
return $self->{current_page}; |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
} |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
=head2 mech |
765
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
The internal WWW::Mechanize object. Use at your own risk: I don't |
767
|
|
|
|
|
|
|
promose this method will stay here or work the same in the future. |
768
|
|
|
|
|
|
|
The internal methods used to access sites are subject to change at |
769
|
|
|
|
|
|
|
any time, including using something different than WWW::Mechanize. |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
=cut |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
field 'mech'; |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
=head2 get_page( $url, [ %options ] ) |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
get_page returns a referece to a HTTP::Response object that contains |
778
|
|
|
|
|
|
|
the web page specified by $url. |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
get_page will try up to 20 times until it gets the page, with a 2-second |
781
|
|
|
|
|
|
|
delay between attempts. It checks for invalid HTTP response codes, |
782
|
|
|
|
|
|
|
and error pages as defined in site_info->{error_regexps}. |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
Options can be: |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
re => $regular_expression |
787
|
|
|
|
|
|
|
follow => 1 |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
"re" Is a regular expression. If provided, get_page |
790
|
|
|
|
|
|
|
will consider the page an error unless the page content matches |
791
|
|
|
|
|
|
|
the regexp. This is designed to get past network problems and such. |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
If "follow" is set, a "Referer" header will be added, simulating |
794
|
|
|
|
|
|
|
clicking on a link on the current page to get to the URL provided. |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
EXAMPLE |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
# The following displays the HTML source of MySpace.com's home |
799
|
|
|
|
|
|
|
# page, verifying that there is evidence of a login form on the |
800
|
|
|
|
|
|
|
# retreived page. |
801
|
|
|
|
|
|
|
my $res=get_page( "http://www.myspace.com/", re => 'E-Mail:.*?Password:' ); |
802
|
|
|
|
|
|
|
|
803
|
|
|
|
|
|
|
print $res->decoded_content; |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
=cut |
806
|
|
|
|
|
|
|
|
807
|
0
|
|
|
0
|
1
|
|
sub get_page { |
808
|
|
|
|
|
|
|
|
809
|
0
|
|
|
|
|
|
my ( $url, %options ) = @_; |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
# Reset error |
812
|
0
|
|
|
|
|
|
$self->error( 0 ); |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
# Try to get the page 20 times. |
815
|
0
|
|
|
|
|
|
my $attempts = 20; |
816
|
0
|
|
|
|
|
|
my $res; |
817
|
0
|
|
|
|
|
|
my %headers = (); |
818
|
0
|
0
|
|
|
|
|
if ( $options{follow} ) { |
819
|
|
|
|
|
|
|
%headers = ( 'Referer' => $self->{current_page}->request->uri ) |
820
|
0
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
do { |
823
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
# Try to get the page |
825
|
|
|
|
|
|
|
# unless ( $res = $self->_read_cache( $url ) ) |
826
|
0
|
|
|
|
|
|
$res = $self->mech->get( $url, %headers); |
827
|
|
|
|
|
|
|
# } |
828
|
0
|
|
|
|
|
|
$attempts--; |
829
|
|
|
|
|
|
|
|
830
|
0
|
|
0
|
|
|
|
} until ( ( $self->_page_ok( $res, $options{re} ) ) || ( $attempts <= 0 ) ); |
831
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
# We both set "current_page" and return the value. |
833
|
|
|
|
|
|
|
# $self->_cache_page( $url, $res ) unless $self->error; |
834
|
0
|
|
|
|
|
|
$self->{current_page} = $res; |
835
|
0
|
0
|
|
|
|
|
sleep ( int( rand( 5 ) ) + 6 ) if $self->human; |
836
|
0
|
0
|
|
|
|
|
if ( $self->error ) { |
837
|
0
|
|
|
|
|
|
return undef; |
838
|
|
|
|
|
|
|
} else { |
839
|
0
|
|
|
|
|
|
return ( $res ); |
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
} |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=head2 follow_to( $url, $regexp ) |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
Convenience method that calls get_page with follow => 1. |
847
|
|
|
|
|
|
|
Use this if you're stepping through pages. |
848
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
=cut |
850
|
|
|
|
|
|
|
|
851
|
0
|
|
|
0
|
1
|
|
sub follow_to { |
852
|
|
|
|
|
|
|
|
853
|
0
|
|
|
|
|
|
my ( $url, $regexp ) = @_; |
854
|
|
|
|
|
|
|
|
855
|
0
|
|
|
|
|
|
$self->get_page( $url, re => $regexp, follow => 1 ); |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
=head2 follow_link |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
This is the method you "should" use to navigate your sites, as it's |
862
|
|
|
|
|
|
|
the most "human"-looking. |
863
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
This is like a robust version of WWW::Mechanize's "follow_link" |
865
|
|
|
|
|
|
|
method. It calls "find_link" with your arguments (and as such takes |
866
|
|
|
|
|
|
|
the same arguments. It adds the "re" argument, which is passed to |
867
|
|
|
|
|
|
|
get_page to verify we in fact got the page. Returns an HTTP::Response |
868
|
|
|
|
|
|
|
object if it succeeds, sets $self->error and returns undef if it fails. |
869
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
$self->_go_home; |
871
|
|
|
|
|
|
|
$self->follow_link( text_regex => qr/inbox/i, re => 'Mail Center' ) |
872
|
|
|
|
|
|
|
or die $self->error; |
873
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
There are a lot of options, so perldoc WWW::Mechanize and search for |
875
|
|
|
|
|
|
|
$mech->find_link to see them all. |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
=cut |
878
|
|
|
|
|
|
|
|
879
|
0
|
|
|
0
|
1
|
|
sub follow_link { |
880
|
|
|
|
|
|
|
|
881
|
0
|
|
|
|
|
|
my ( %options ) = @_; |
882
|
0
|
|
|
|
|
|
my $res; |
883
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
# Take out options that are just for us |
885
|
0
|
|
|
|
|
|
my $re = ''; |
886
|
0
|
0
|
|
|
|
|
if ( $options{re} ) { $re = $options{re}; delete $options{re}; } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
# Find the link |
889
|
0
|
|
|
|
|
|
my $link = $self->mech->find_link( %options ); |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
# Follow it |
892
|
0
|
0
|
|
|
|
|
if ( $link ) { |
893
|
0
|
|
|
|
|
|
$res = $self->get_page( $link->url, re => $re, follow => 1 ); |
894
|
0
|
|
|
|
|
|
return $res; |
895
|
|
|
|
|
|
|
} else { |
896
|
0
|
|
|
|
|
|
$self->error('Link not found on page'); |
897
|
0
|
|
|
|
|
|
return undef; |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
=head2 _cache_page( $url, $res ) |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
Stores $res in a cache. |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
=cut |
907
|
|
|
|
|
|
|
|
908
|
0
|
|
|
0
|
|
|
sub _cache_page { |
909
|
|
|
|
|
|
|
|
910
|
0
|
|
|
|
|
|
my ( $url, $res ) = @_; |
911
|
|
|
|
|
|
|
|
912
|
0
|
|
|
|
|
|
$self->{page_cache}->{$url} = $res; |
913
|
|
|
|
|
|
|
|
914
|
0
|
|
|
|
|
|
$self->_clean_cache; |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
=head2 _read_cache( $url ) |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
Check the cache for this page. |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
=cut |
923
|
|
|
|
|
|
|
|
924
|
0
|
|
|
0
|
|
|
sub _read_cache { |
925
|
|
|
|
|
|
|
|
926
|
0
|
|
|
|
|
|
my ( $url ) = @_; |
927
|
|
|
|
|
|
|
|
928
|
0
|
0
|
0
|
|
|
|
if ( ( $self->{page_cache}->{$url} ) && |
929
|
|
|
|
|
|
|
( $self->{page_cache}->{$url}->is_fresh ) ) { |
930
|
0
|
|
|
|
|
|
return $self->{page_cache}->{$url}; |
931
|
|
|
|
|
|
|
} else { |
932
|
0
|
|
|
|
|
|
return ""; |
933
|
|
|
|
|
|
|
} |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
} |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
=head2 _clean_cache |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
Cleans any non-"fresh" page from the cache. |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
=cut |
942
|
|
|
|
|
|
|
|
943
|
0
|
|
|
0
|
|
|
sub _clean_cache { |
944
|
|
|
|
|
|
|
|
945
|
0
|
|
|
|
|
|
foreach my $url ( keys( %{ $self->{'page_cache'} } ) ) { |
|
0
|
|
|
|
|
|
|
946
|
0
|
0
|
|
|
|
|
unless ( $url->is_fresh ) { |
947
|
0
|
|
|
|
|
|
delete $self->{'page_cache'}->{ $url }; |
948
|
|
|
|
|
|
|
} |
949
|
|
|
|
|
|
|
} |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
} |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
#--------------------------------------------------------------------- |
954
|
|
|
|
|
|
|
# _page_ok( $response, $regexp ) |
955
|
|
|
|
|
|
|
# Takes a UserAgent response object and checks to see if the |
956
|
|
|
|
|
|
|
# page was sucessfully retreived, and checks the content against |
957
|
|
|
|
|
|
|
# known error messages (listed at the top of this file). |
958
|
|
|
|
|
|
|
# If passed a regexp, it will return true ONLY if the page content |
959
|
|
|
|
|
|
|
# matches the regexp (instead of checking the known errors). |
960
|
|
|
|
|
|
|
# It will delay 2 seconds if it fails so you can retry immediately. |
961
|
|
|
|
|
|
|
# Called by get_page and submit_form. |
962
|
|
|
|
|
|
|
# Sets the internal error method to 0 if there's no error, or |
963
|
|
|
|
|
|
|
# to a printable error message if there is an error. |
964
|
|
|
|
|
|
|
|
965
|
0
|
|
|
0
|
|
|
sub _page_ok { |
966
|
0
|
|
|
|
|
|
my ( $res, $regexp ) = @_; |
967
|
|
|
|
|
|
|
|
968
|
|
|
|
|
|
|
# Reset error |
969
|
0
|
|
|
|
|
|
$self->error(0); |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
# Check for errors |
972
|
0
|
|
|
|
|
|
my $page_ok = 1; |
973
|
0
|
|
|
|
|
|
my $page; |
974
|
|
|
|
|
|
|
my $errors; |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
# If we think we're logged in, check for the "You must be logged-in" |
977
|
|
|
|
|
|
|
# error page. |
978
|
0
|
0
|
0
|
|
|
|
if ( ( $self->logged_in ) && ( ! $self->_check_login( $res ) ) ) { |
|
|
0
|
|
|
|
|
|
979
|
0
|
|
|
|
|
|
$self->error( "Not logged in" ); |
980
|
0
|
|
|
|
|
|
$page_ok=0; |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
# If the page load is "successful", check for other problems. |
984
|
|
|
|
|
|
|
elsif ( $res->is_success ) { |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
# Page loaded, but make sure it isn't an error page. |
987
|
0
|
|
|
|
|
|
$page = $res->decoded_content; # Get the content |
988
|
0
|
|
|
|
|
|
$page =~ s/[ \t\n\r]+/ /g; # Strip whitespace |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
# If they gave us a RE with which to verify the page, look for it. |
991
|
0
|
0
|
|
|
|
|
if ( $regexp ) { |
992
|
|
|
|
|
|
|
# Page must match the regexp |
993
|
0
|
0
|
|
|
|
|
unless ( $page =~ /$regexp/ism ) { |
994
|
0
|
|
|
|
|
|
$page_ok = 0; |
995
|
0
|
|
|
|
|
|
$self->error("Page doesn't match verification pattern."); |
996
|
|
|
|
|
|
|
# warn "Page doesn't match verification pattern.\n"; |
997
|
|
|
|
|
|
|
} |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
# Otherwise, look for our known temporary errors. |
1000
|
|
|
|
|
|
|
} else { |
1001
|
0
|
0
|
|
|
|
|
if ( defined $self->site_info->{'error_regexs'} ) { |
1002
|
0
|
|
|
|
|
|
$errors = $self->site_info->{'error_regexs'}; |
1003
|
0
|
|
|
|
|
|
foreach my $error_regex ( @{$errors} ) { |
|
0
|
|
|
|
|
|
|
1004
|
0
|
0
|
|
|
|
|
if ( $page =~ /$error_regex/ism ) { |
1005
|
0
|
|
|
|
|
|
$page_ok = 0; |
1006
|
0
|
|
|
|
|
|
$self->error( "Got error page." ); |
1007
|
|
|
|
|
|
|
# warn "Got error page.\n"; |
1008
|
0
|
|
|
|
|
|
last; |
1009
|
|
|
|
|
|
|
} |
1010
|
|
|
|
|
|
|
} |
1011
|
|
|
|
|
|
|
} |
1012
|
|
|
|
|
|
|
} |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
} else { |
1015
|
|
|
|
|
|
|
|
1016
|
0
|
|
|
|
|
|
$self->error("Error getting page: \n" . |
1017
|
|
|
|
|
|
|
" " . $res->status_line); |
1018
|
0
|
|
|
|
|
|
$page_ok = 0; |
1019
|
|
|
|
|
|
|
|
1020
|
0
|
|
|
|
|
|
warn "Error getting page: \n" . |
1021
|
|
|
|
|
|
|
" " . $res->status_line . "\n"; |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
} |
1024
|
|
|
|
|
|
|
|
1025
|
0
|
0
|
|
|
|
|
sleep 2 unless ( $page_ok ); |
1026
|
|
|
|
|
|
|
|
1027
|
0
|
|
|
|
|
|
return $page_ok; |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
=head2 submit_form( %options ) |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
Valid options: |
1035
|
|
|
|
|
|
|
$site->submit_form( |
1036
|
|
|
|
|
|
|
page => "http://some.url.org/formpage.html", |
1037
|
|
|
|
|
|
|
form_no => 1, |
1038
|
|
|
|
|
|
|
form_name => "myform", # Use this OR form_no OR form |
1039
|
|
|
|
|
|
|
form => $form, # HTML::Form object with a ready-to-post form. |
1040
|
|
|
|
|
|
|
# (page, form_no, form_name, fields_ref and action will |
1041
|
|
|
|
|
|
|
# be ignored). |
1042
|
|
|
|
|
|
|
button => "mybutton", |
1043
|
|
|
|
|
|
|
no_click => 0, # 0 or 1. |
1044
|
|
|
|
|
|
|
fields_ref => { field => 'value', field2 => 'value' }, |
1045
|
|
|
|
|
|
|
re1 => 'something unique.?about this[ \t\n]+page', |
1046
|
|
|
|
|
|
|
re2 => 'something unique about the submitted page', |
1047
|
|
|
|
|
|
|
action => 'http://some.url.org/newpostpage.cgi', # Only needed in weird occasions |
1048
|
|
|
|
|
|
|
); |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
This powerful little method reads the web page specified by "page", |
1051
|
|
|
|
|
|
|
finds the form specified by "form_no" or "form_name", fills in the values |
1052
|
|
|
|
|
|
|
specified in "fields_ref", and clicks the button named "button". |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
You may or may not need this method - it's used internally by |
1055
|
|
|
|
|
|
|
any method that needs to fill in and post a form. I made it |
1056
|
|
|
|
|
|
|
public just in case you need to fill in and post a form that's not |
1057
|
|
|
|
|
|
|
handled by another method (in which case, see CONTRIBUTING below :). |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
"page" can either be a text string that is a URL or a reference to an |
1060
|
|
|
|
|
|
|
HTTP::Response object that contains the source of the page |
1061
|
|
|
|
|
|
|
that contains the form. If it is an empty string or not specified, |
1062
|
|
|
|
|
|
|
the current page ( $site->current_page ) is used. |
1063
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
"form_no" is used to numerically identify the form on the page. It's a |
1065
|
|
|
|
|
|
|
simple counter starting from 1. If there are 3 forms on the page and |
1066
|
|
|
|
|
|
|
you want to fill in and submit the second form, set "form_no => 2". |
1067
|
|
|
|
|
|
|
For the first form, use "form_no => 1". |
1068
|
|
|
|
|
|
|
|
1069
|
|
|
|
|
|
|
"form_name" is used to indentify the form by name. In actuality, |
1070
|
|
|
|
|
|
|
submit_form simply uses "form_name" to iterate through the forms |
1071
|
|
|
|
|
|
|
and sets "form_no" for you. |
1072
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
"form" can be used if you have a customized form you want to submit. |
1074
|
|
|
|
|
|
|
Pass an HTML::Form object and set "button", "no_click", and "re2" |
1075
|
|
|
|
|
|
|
as desired, and you can use submit_form's tenacious submission routine |
1076
|
|
|
|
|
|
|
with your own values. |
1077
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
"button" is the name of the button to submit. This will frequently |
1079
|
|
|
|
|
|
|
be "submit", but if they've named the button something clever like |
1080
|
|
|
|
|
|
|
"Submit22" (as MySpace did in their login form), then you may have to |
1081
|
|
|
|
|
|
|
use that. If no button is specified (either by button => '' or by |
1082
|
|
|
|
|
|
|
not specifying button at all), the first button on the form |
1083
|
|
|
|
|
|
|
is clicked. |
1084
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
If "no_click" is set to 1, the form willl be submitted without |
1086
|
|
|
|
|
|
|
clicking any button. This is used to simulate the JavaScript |
1087
|
|
|
|
|
|
|
form submits Myspace does on the browse pages. |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
"fields_ref" is a reference to a hash that contains field names |
1090
|
|
|
|
|
|
|
and values you want to fill in on the form. |
1091
|
|
|
|
|
|
|
For checkboxes with no "value" attribute, specify a value of "on" |
1092
|
|
|
|
|
|
|
to check it, "off" to uncheck it. |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
"re1" is an optional Regular Expression that will be used to make |
1095
|
|
|
|
|
|
|
sure the proper form page has been loaded. The page content will |
1096
|
|
|
|
|
|
|
be matched to the RE, and will be treated as an error page and retried |
1097
|
|
|
|
|
|
|
until it matches. See get_page for more info. |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
"re2" is an optional RE that will me used to make sure that the |
1100
|
|
|
|
|
|
|
post was successful. USE THIS CAREFULLY! If your RE breaks, you could |
1101
|
|
|
|
|
|
|
end up repeatedly posting a form. |
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
"action" is the post action for the form, as in: |
1104
|
|
|
|
|
|
|
|
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
|
1107
|
|
|
|
|
|
|
This is here because Myspace likes to do weird things like reset |
1108
|
|
|
|
|
|
|
form actions with Javascript then post them without clicking form buttons. |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
EXAMPLE |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
This is how WWW::Myspace's post_comment method posted a comment: |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
# Submit the comment to $friend_id's page |
1115
|
|
|
|
|
|
|
$self->submit_form( "${VIEW_COMMENT_FORM}${friend_id}", 1, "submit", |
1116
|
|
|
|
|
|
|
{ 'f_comments' => "$message" }, '', 'f_comments' |
1117
|
|
|
|
|
|
|
); |
1118
|
|
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
# Confirm it |
1120
|
|
|
|
|
|
|
$self->submit_form( "", 1, "submit", {} ); |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
=cut |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
|
1125
|
0
|
|
|
0
|
1
|
|
sub submit_form { |
1126
|
|
|
|
|
|
|
|
1127
|
0
|
|
|
|
|
|
my ( %options ) = @_; |
1128
|
|
|
|
|
|
|
|
1129
|
|
|
|
|
|
|
# Initialize our variables |
1130
|
0
|
|
|
|
|
|
my $mech = $self->mech; # For convenience |
1131
|
0
|
|
|
|
|
|
my $res = ""; |
1132
|
0
|
|
|
|
|
|
my ( $field, $form_no ); |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
# If they gave us a form, use it. Otherwise, get it and fill it in. |
1135
|
0
|
|
|
|
|
|
my $f = ""; |
1136
|
0
|
0
|
|
|
|
|
if ( $options{'form'} ) { |
1137
|
0
|
|
|
|
|
|
$f = $options{'form'}; |
1138
|
|
|
|
|
|
|
} else { |
1139
|
|
|
|
|
|
|
# Get the page |
1140
|
0
|
0
|
|
|
|
|
if ( ref( $options{'page'} ) eq "HTTP::Response" ) { |
|
|
0
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
# They gave us a page already |
1142
|
0
|
|
|
|
|
|
$res = $options{'page'}; |
1143
|
|
|
|
|
|
|
} elsif ( ! $options{'page'} ) { |
1144
|
0
|
|
|
|
|
|
$res = $self->current_page; |
1145
|
|
|
|
|
|
|
} else { |
1146
|
|
|
|
|
|
|
# Get the page |
1147
|
0
|
|
|
|
|
|
$res = $self->get_page( $options{'page'}, re => $options{'re1'} ); |
1148
|
|
|
|
|
|
|
# If we couldn't get the page, return failure. |
1149
|
0
|
0
|
|
|
|
|
return 0 if $self->error; |
1150
|
|
|
|
|
|
|
} |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
# Select the form they wanted, or return failure if we can't. |
1153
|
0
|
|
|
|
|
|
my @forms = HTML::Form->parse( $res ); |
1154
|
0
|
0
|
|
|
|
|
if ( $options{'form_no'} ) { |
1155
|
0
|
|
|
|
|
|
$options{'form_no'}--; # To be like WWW::Mechanize; |
1156
|
0
|
0
|
|
|
|
|
unless ( @forms > $options{'form_no'} ) { |
1157
|
0
|
|
|
|
|
|
$self->error( "Form not on page in submit_form!" ); |
1158
|
0
|
|
|
|
|
|
return 0; |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
} |
1161
|
0
|
0
|
|
|
|
|
if ( $options{'form_name'} ) { |
1162
|
0
|
|
|
|
|
|
$form_no = 0; |
1163
|
0
|
|
|
|
|
|
foreach my $form ( @forms ) { |
1164
|
0
|
0
|
0
|
|
|
|
if ( ( $form->attr( 'name' ) ) && ( $form->attr( 'name' ) eq $options{'form_name'} ) ) { |
1165
|
0
|
|
|
|
|
|
$options{'form_no'} = $form_no; |
1166
|
0
|
|
|
|
|
|
last; |
1167
|
|
|
|
|
|
|
} |
1168
|
0
|
|
|
|
|
|
$form_no++; |
1169
|
|
|
|
|
|
|
} |
1170
|
|
|
|
|
|
|
} |
1171
|
|
|
|
|
|
|
|
1172
|
0
|
|
|
|
|
|
$f = $forms[ $options{'form_no'} ]; |
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
# Set the action if they gave us one |
1175
|
0
|
0
|
|
|
|
|
if ( $options{'action'} ) { $f->action( $options{'action'} ) } |
|
0
|
|
|
|
|
|
|
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
# Loop through the fields in the form and set them. |
1178
|
0
|
|
|
|
|
|
foreach my $field ( keys %{ $options{'fields_ref'} } ) { |
|
0
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
# If the field "exists" on the form, just fill it in, |
1180
|
|
|
|
|
|
|
# otherwise, add it as a hidden field. |
1181
|
0
|
0
|
|
|
|
|
if ( $f->find_input( $field ) ) { |
1182
|
0
|
0
|
|
|
|
|
if ( $f->find_input( $field )->readonly ) { |
1183
|
0
|
|
|
|
|
|
$f->find_input( $field )->readonly(0) |
1184
|
|
|
|
|
|
|
} |
1185
|
0
|
|
|
|
|
|
$f->param( $field, $options{'fields_ref'}->{ $field } ); |
1186
|
|
|
|
|
|
|
} else { |
1187
|
0
|
|
|
|
|
|
$f = $self->_add_to_form( $f, $field, $options{'fields_ref'}->{ $field } ); |
1188
|
|
|
|
|
|
|
} |
1189
|
|
|
|
|
|
|
} |
1190
|
|
|
|
|
|
|
} |
1191
|
|
|
|
|
|
|
|
1192
|
0
|
0
|
|
|
|
|
if ( $options{'die'} ) { print $f->dump; die } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
# Submit the form. Try up to $attempts times. |
1195
|
0
|
|
|
|
|
|
my $attempts = 5; |
1196
|
0
|
|
|
|
|
|
my $trying_again = 0; |
1197
|
|
|
|
|
|
|
do |
1198
|
|
|
|
|
|
|
{ |
1199
|
|
|
|
|
|
|
# If we're trying again, mention it. |
1200
|
0
|
0
|
|
|
|
|
warn $self->error . "\n" if $trying_again; |
1201
|
|
|
|
|
|
|
|
1202
|
0
|
|
|
|
|
|
eval { |
1203
|
0
|
0
|
|
|
|
|
if ( $options{'button'} ) { |
|
|
0
|
|
|
|
|
|
1204
|
0
|
|
|
|
|
|
$res = $self->mech->request( $f->click( $options{'button'} ) ); |
1205
|
|
|
|
|
|
|
} elsif ( $options{'no_click'} ) { |
1206
|
|
|
|
|
|
|
# We use make_request because some sites like submitting forms |
1207
|
|
|
|
|
|
|
# that have buttons by using Javascript. make_request submits |
1208
|
|
|
|
|
|
|
# the form without clicking anything, whereas "click" clicks |
1209
|
|
|
|
|
|
|
# the first button, which can break things. |
1210
|
0
|
|
|
|
|
|
$res = $self->mech->request( $f->make_request ); |
1211
|
|
|
|
|
|
|
} else { |
1212
|
|
|
|
|
|
|
# Just click the first button |
1213
|
0
|
|
|
|
|
|
$res = $self->mech->request( $f->click ); |
1214
|
|
|
|
|
|
|
} |
1215
|
|
|
|
|
|
|
}; |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
# If it died (it will if there's no button), just return failure. |
1218
|
0
|
0
|
|
|
|
|
if ( $@ ) { |
1219
|
0
|
|
|
|
|
|
$self->error( $@ ); |
1220
|
0
|
|
|
|
|
|
return 0; |
1221
|
|
|
|
|
|
|
} |
1222
|
|
|
|
|
|
|
|
1223
|
0
|
|
|
|
|
|
$attempts--; |
1224
|
0
|
|
|
|
|
|
$trying_again = 1; |
1225
|
|
|
|
|
|
|
|
1226
|
0
|
|
0
|
|
|
|
} until ( ( $self->_page_ok( $res, $options{'re2'} ) ) || ( $attempts <= 0 ) ); |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
# Return the result |
1229
|
0
|
|
|
|
|
|
$self->{current_page} = $res; |
1230
|
0
|
|
|
|
|
|
return ( ! $self->error ); |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
} |
1233
|
|
|
|
|
|
|
|
1234
|
|
|
|
|
|
|
=head2 _add_to_form |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
Internal method to add a hidden field to a form. HTML::Form thinks we |
1237
|
|
|
|
|
|
|
don't want to change hidden fields, and if a hidden field has no value, |
1238
|
|
|
|
|
|
|
it won't even create an input object for it. If that's way over your |
1239
|
|
|
|
|
|
|
head don't worry, it just means we're fixing things with this method, |
1240
|
|
|
|
|
|
|
and submit_form will call this method for you if you pass it a field that |
1241
|
|
|
|
|
|
|
doesn't show up on the form. |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
Returns a form object that is the old form with the new field in it. |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
# Add field $fieldname to form $form (a HTML::Form object) and |
1246
|
|
|
|
|
|
|
# set it's value to $value. |
1247
|
|
|
|
|
|
|
$self->_add_to_form( $form, $fieldname, $value ) |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
=cut |
1250
|
|
|
|
|
|
|
|
1251
|
0
|
|
|
0
|
|
|
sub _add_to_form { |
1252
|
|
|
|
|
|
|
|
1253
|
0
|
|
|
|
|
|
my ( $f, $field, $value ) = @_; |
1254
|
|
|
|
|
|
|
|
1255
|
0
|
|
|
|
|
|
$f->push_input( 'hidden', { name => $field, value => $value } ); |
1256
|
|
|
|
|
|
|
|
1257
|
0
|
|
|
|
|
|
return $f; |
1258
|
|
|
|
|
|
|
} |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
=head2 _go_home |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
Internal method to go to the home page. Checks to see if we're already |
1263
|
|
|
|
|
|
|
there. If not, tries to click the Home button on the page. If there |
1264
|
|
|
|
|
|
|
isn't one, loads the page explicitly. |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
=cut |
1267
|
|
|
|
|
|
|
|
1268
|
0
|
|
|
0
|
|
|
sub _go_home { |
1269
|
|
|
|
|
|
|
|
1270
|
0
|
|
|
|
|
|
my $link_re = $self->site_info->{'home_link_re'}; |
1271
|
0
|
|
|
|
|
|
my $home_uri_re = $self->site_info->{'home_uri_re'}; |
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
# If we're not logged in, go to the site's home page |
1274
|
0
|
0
|
|
|
|
|
unless ( $self->logged_in ) { |
1275
|
0
|
|
|
|
|
|
$self->get_page( $self->site_info->{'home_page'} ); |
1276
|
0
|
|
|
|
|
|
return; |
1277
|
|
|
|
|
|
|
} |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
# Are we there? |
1280
|
0
|
0
|
|
|
|
|
if ( $self->mech->uri =~ /$home_uri_re/i ) { |
1281
|
|
|
|
|
|
|
# warn "I think I'm on the homepage\n"; |
1282
|
|
|
|
|
|
|
# warn $self->mech->uri . "\n"; |
1283
|
0
|
|
|
|
|
|
return; |
1284
|
|
|
|
|
|
|
} |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
# No, try to click home |
1287
|
0
|
|
|
|
|
|
my $home_link = ""; |
1288
|
0
|
0
|
|
|
|
|
if ( $home_link = $self->mech->find_link( url_regex => qr/$link_re/i ) ) { |
1289
|
|
|
|
|
|
|
# warn "_go_home going to " . $home_link->url . "\n"; |
1290
|
0
|
|
|
|
|
|
$self->get_page( $home_link->url ); |
1291
|
0
|
|
|
|
|
|
return; |
1292
|
|
|
|
|
|
|
} |
1293
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
# Still here? Load the page explicitly |
1295
|
0
|
|
|
|
|
|
$self->get_page( $self->site_info->{'home_url'} ); |
1296
|
|
|
|
|
|
|
# warn "I think I loaded $HOME_PAGE\n"; |
1297
|
|
|
|
|
|
|
|
1298
|
0
|
|
|
|
|
|
return; |
1299
|
|
|
|
|
|
|
|
1300
|
|
|
|
|
|
|
} |
1301
|
|
|
|
|
|
|
|
1302
|
|
|
|
|
|
|
=head2 make_cache_dir |
1303
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
Creates the cache directory in cache_dir. Only creates the |
1305
|
|
|
|
|
|
|
top-level directory, croaks if it can't create it. |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
$myspace->cache_dir("/path/to/dir"); |
1308
|
|
|
|
|
|
|
$myspace->make_cache_dir; |
1309
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
This function mainly exists for the internal login method to use, |
1311
|
|
|
|
|
|
|
and for related sub-modules that store their cache files by |
1312
|
|
|
|
|
|
|
default in WWW:Myspace's cache directory. |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
=cut |
1315
|
|
|
|
|
|
|
|
1316
|
0
|
|
|
0
|
1
|
|
sub make_cache_dir { |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
# Make the cache directory if it doesn't exist. |
1319
|
0
|
0
|
|
|
|
|
unless ( -d $self->cache_dir ) { |
1320
|
0
|
0
|
|
|
|
|
mkdir $self->cache_dir or croak "Can't create cache directory ". |
1321
|
|
|
|
|
|
|
$self->cache_dir; |
1322
|
|
|
|
|
|
|
} |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
} |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
=head2 debug( message ); |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
Use this method to turn on/off debugging output. |
1329
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
=cut |
1331
|
|
|
|
|
|
|
|
1332
|
0
|
|
|
0
|
1
|
|
sub debug { |
1333
|
0
|
|
|
|
|
|
my ( $message ) = @_; |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
# warn $message . "\n"; |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
} |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
=head1 AUTHOR |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
Grant Grueninger, C<< >> |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
=head1 BUGS |
1344
|
|
|
|
|
|
|
|
1345
|
|
|
|
|
|
|
Please report any bugs or feature requests to |
1346
|
|
|
|
|
|
|
C, or through the web interface at |
1347
|
|
|
|
|
|
|
L. |
1348
|
|
|
|
|
|
|
I will be notified, and then you'll automatically be notified of progress on |
1349
|
|
|
|
|
|
|
your bug as I make changes. |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
=head1 SUPPORT |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
perldoc WWW::Bebo |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
You can also look for information at: |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=over 4 |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
L |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
=item * CPAN Ratings |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
L |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
L |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
=item * Search CPAN |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
L |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
=back |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
Copyright 2006 Grant Grueninger, all rights reserved. |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
1386
|
|
|
|
|
|
|
under the same terms as Perl itself. |
1387
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
=cut |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
1; # End of WWW::Bebo |