| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package WWW::Crawler::Mojo::UserAgent; |
|
2
|
11
|
|
|
11
|
|
295508
|
use strict; |
|
|
11
|
|
|
|
|
24
|
|
|
|
11
|
|
|
|
|
265
|
|
|
3
|
11
|
|
|
11
|
|
43
|
use warnings; |
|
|
11
|
|
|
|
|
18
|
|
|
|
11
|
|
|
|
|
246
|
|
|
4
|
11
|
|
|
11
|
|
48
|
use Mojo::Base 'Mojo::UserAgent'; |
|
|
11
|
|
|
|
|
17
|
|
|
|
11
|
|
|
|
|
79
|
|
|
5
|
11
|
|
|
11
|
|
1693818
|
use Mojo::URL; |
|
|
11
|
|
|
|
|
21
|
|
|
|
11
|
|
|
|
|
45
|
|
|
6
|
11
|
|
|
11
|
|
420
|
use 5.010; |
|
|
11
|
|
|
|
|
34
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
has active_conn => 0; |
|
9
|
|
|
|
|
|
|
has active_conn_per_host => sub { {} }; |
|
10
|
|
|
|
|
|
|
has '_creds'; |
|
11
|
|
|
|
|
|
|
has keep_credentials => 1; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub new { |
|
14
|
16
|
|
|
16
|
1
|
904
|
my $class = shift; |
|
15
|
16
|
|
|
|
|
81
|
my $self = $class->SUPER::new(@_); |
|
16
|
|
|
|
|
|
|
|
|
17
|
16
|
50
|
|
|
|
112
|
if ($self->keep_credentials) { |
|
18
|
16
|
|
|
|
|
133
|
$self->_creds({}); |
|
19
|
|
|
|
|
|
|
$self->on( |
|
20
|
|
|
|
|
|
|
start => sub { |
|
21
|
22
|
|
|
22
|
|
9100
|
my ($self, $tx) = @_; |
|
22
|
22
|
|
|
|
|
69
|
my $url = $tx->req->url; |
|
23
|
22
|
50
|
|
|
|
134
|
my $host_key = _host_key($url) or return; |
|
24
|
22
|
100
|
|
|
|
74
|
if ($url->userinfo) { |
|
25
|
2
|
|
|
|
|
9
|
$self->{_creds}->{$host_key} = $url->userinfo; |
|
26
|
|
|
|
|
|
|
} |
|
27
|
|
|
|
|
|
|
else { |
|
28
|
20
|
|
|
|
|
138
|
$url->userinfo($self->{_creds}->{$host_key}); |
|
29
|
|
|
|
|
|
|
} |
|
30
|
|
|
|
|
|
|
} |
|
31
|
16
|
|
|
|
|
166
|
); |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
$self->on( |
|
35
|
|
|
|
|
|
|
start => sub { |
|
36
|
22
|
|
|
22
|
|
221
|
my ($self, $tx) = @_; |
|
37
|
22
|
|
|
|
|
54
|
my $url = $tx->req->url; |
|
38
|
22
|
|
|
|
|
135
|
$self->active_host($url, 1); |
|
39
|
22
|
|
|
|
|
120
|
$tx->on(finish => sub { $self->active_host($url, -1) }); |
|
|
22
|
|
|
|
|
250110
|
|
|
40
|
|
|
|
|
|
|
} |
|
41
|
16
|
|
|
|
|
153
|
); |
|
42
|
|
|
|
|
|
|
|
|
43
|
16
|
|
|
|
|
110
|
return $self; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub active_host { |
|
47
|
83
|
|
|
83
|
1
|
2541
|
my ($self, $url, $inc) = @_; |
|
48
|
83
|
|
|
|
|
179
|
my $key = _host_key($url); |
|
49
|
83
|
|
|
|
|
272
|
my $hosts = $self->active_conn_per_host; |
|
50
|
83
|
100
|
|
|
|
348
|
if ($inc) { |
|
51
|
64
|
|
|
|
|
103
|
$self->{active_conn} += $inc; |
|
52
|
64
|
|
|
|
|
116
|
$hosts->{$key} += $inc; |
|
53
|
64
|
100
|
|
|
|
139
|
delete($hosts->{$key}) unless ($hosts->{$key}); |
|
54
|
|
|
|
|
|
|
} |
|
55
|
83
|
|
100
|
|
|
332
|
return $hosts->{$key} || 0; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub credentials { |
|
59
|
2
|
|
|
2
|
1
|
3053
|
my ($self, %credentials) = @_; |
|
60
|
2
|
|
|
|
|
11
|
while (my ($url, $cred) = each(%credentials)) { |
|
61
|
3
|
|
|
|
|
10
|
$self->{_creds}->{_host_key($url)} = $cred; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _host_key { |
|
66
|
118
|
|
|
118
|
|
1080
|
state $well_known_ports = {http => 80, https => 443}; |
|
67
|
118
|
|
|
|
|
161
|
my $url = shift; |
|
68
|
118
|
100
|
|
|
|
314
|
$url = Mojo::URL->new($url) unless ref $url; |
|
69
|
118
|
100
|
100
|
|
|
2025
|
return unless $url->is_abs && (my $wkp = $well_known_ports->{$url->scheme}); |
|
70
|
116
|
|
|
|
|
1420
|
my $key = $url->scheme . '://' . $url->ihost; |
|
71
|
116
|
100
|
|
|
|
1845
|
return $key unless (my $port = $url->port); |
|
72
|
100
|
100
|
|
|
|
647
|
$key .= ':' . $port if $port != $wkp; |
|
73
|
100
|
|
|
|
|
268
|
return $key; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
1; |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 NAME |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
WWW::Crawler::Mojo::UserAgent - Crawler specific featured user agent |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
my $ua = WWW::Crawler::Mojo::UserAgent->new; |
|
85
|
|
|
|
|
|
|
$ua->keep_credentials(1); |
|
86
|
|
|
|
|
|
|
$ua->credentials( |
|
87
|
|
|
|
|
|
|
'http://example.com:8080' => 'jamadam:password1', |
|
88
|
|
|
|
|
|
|
'http://example2.com:8080' => 'jamadam:password2', |
|
89
|
|
|
|
|
|
|
); |
|
90
|
|
|
|
|
|
|
my $tx = $ua->get('http://example.com/'); |
|
91
|
|
|
|
|
|
|
say $tx->req->url # http://jamadam:passowrd@example.com/ |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
if ($ua->active_conn < $max_conn) { |
|
94
|
|
|
|
|
|
|
$ua->get(...); |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
if ($ua->active_host($url) < $max_conn_per_host) { |
|
98
|
|
|
|
|
|
|
$ua->get(...); |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
This class inherits L and adds credential storage and |
|
104
|
|
|
|
|
|
|
active connection counter. |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
WWW::Crawler::Mojo::UserAgent inherits all attributes from Mojo::UserAgent. |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 active_conn |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
A number of current connections. |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
$bot->active_conn($bot->active_conn + 1); |
|
115
|
|
|
|
|
|
|
say $bot->active_conn; |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 active_conn_per_host |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
A number of current connections per host. |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
$bot->active_conn_per_host($bot->active_conn_per_host + 1); |
|
122
|
|
|
|
|
|
|
say $bot->active_conn_per_host; |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 keep_credentials |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Sets true to activate the feature. Defaults to 1. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
$ua->keep_credentials(1); |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 METHODS |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
WWW::Crawler::Mojo::UserAgent inherits all methods from L. |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head2 active_host |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Maintenances the numbers of active connections. |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
$ua->active_host($url, 1); |
|
139
|
|
|
|
|
|
|
$ua->active_host($url, -1); |
|
140
|
|
|
|
|
|
|
my $amount = $ua->active_host($url); |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 credentials |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Stores credentials. |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
$ua->credentials( |
|
147
|
|
|
|
|
|
|
'http://example.com:8080' => 'jamadam:password1', |
|
148
|
|
|
|
|
|
|
'http://example2.com:8080' => 'jamadam:password2', |
|
149
|
|
|
|
|
|
|
); |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 new |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Constructer. |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
$ua = WWW::Crawler::Mojo::UserAgent->new; |
|
156
|
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 AUTHOR |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Keita Sugama, Esugama@jamadam.comE |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Copyright (C) Keita Sugama. |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or |
|
166
|
|
|
|
|
|
|
modify it under the same terms as Perl itself. |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |