line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package WWW::Crawler::Mojo::UserAgent; |
2
|
11
|
|
|
11
|
|
295508
|
use strict; |
|
11
|
|
|
|
|
24
|
|
|
11
|
|
|
|
|
265
|
|
3
|
11
|
|
|
11
|
|
43
|
use warnings; |
|
11
|
|
|
|
|
18
|
|
|
11
|
|
|
|
|
246
|
|
4
|
11
|
|
|
11
|
|
48
|
use Mojo::Base 'Mojo::UserAgent'; |
|
11
|
|
|
|
|
17
|
|
|
11
|
|
|
|
|
79
|
|
5
|
11
|
|
|
11
|
|
1693818
|
use Mojo::URL; |
|
11
|
|
|
|
|
21
|
|
|
11
|
|
|
|
|
45
|
|
6
|
11
|
|
|
11
|
|
420
|
use 5.010; |
|
11
|
|
|
|
|
34
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
has active_conn => 0; |
9
|
|
|
|
|
|
|
has active_conn_per_host => sub { {} }; |
10
|
|
|
|
|
|
|
has '_creds'; |
11
|
|
|
|
|
|
|
has keep_credentials => 1; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
sub new { |
14
|
16
|
|
|
16
|
1
|
904
|
my $class = shift; |
15
|
16
|
|
|
|
|
81
|
my $self = $class->SUPER::new(@_); |
16
|
|
|
|
|
|
|
|
17
|
16
|
50
|
|
|
|
112
|
if ($self->keep_credentials) { |
18
|
16
|
|
|
|
|
133
|
$self->_creds({}); |
19
|
|
|
|
|
|
|
$self->on( |
20
|
|
|
|
|
|
|
start => sub { |
21
|
22
|
|
|
22
|
|
9100
|
my ($self, $tx) = @_; |
22
|
22
|
|
|
|
|
69
|
my $url = $tx->req->url; |
23
|
22
|
50
|
|
|
|
134
|
my $host_key = _host_key($url) or return; |
24
|
22
|
100
|
|
|
|
74
|
if ($url->userinfo) { |
25
|
2
|
|
|
|
|
9
|
$self->{_creds}->{$host_key} = $url->userinfo; |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
else { |
28
|
20
|
|
|
|
|
138
|
$url->userinfo($self->{_creds}->{$host_key}); |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
} |
31
|
16
|
|
|
|
|
166
|
); |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
$self->on( |
35
|
|
|
|
|
|
|
start => sub { |
36
|
22
|
|
|
22
|
|
221
|
my ($self, $tx) = @_; |
37
|
22
|
|
|
|
|
54
|
my $url = $tx->req->url; |
38
|
22
|
|
|
|
|
135
|
$self->active_host($url, 1); |
39
|
22
|
|
|
|
|
120
|
$tx->on(finish => sub { $self->active_host($url, -1) }); |
|
22
|
|
|
|
|
250110
|
|
40
|
|
|
|
|
|
|
} |
41
|
16
|
|
|
|
|
153
|
); |
42
|
|
|
|
|
|
|
|
43
|
16
|
|
|
|
|
110
|
return $self; |
44
|
|
|
|
|
|
|
} |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub active_host { |
47
|
83
|
|
|
83
|
1
|
2541
|
my ($self, $url, $inc) = @_; |
48
|
83
|
|
|
|
|
179
|
my $key = _host_key($url); |
49
|
83
|
|
|
|
|
272
|
my $hosts = $self->active_conn_per_host; |
50
|
83
|
100
|
|
|
|
348
|
if ($inc) { |
51
|
64
|
|
|
|
|
103
|
$self->{active_conn} += $inc; |
52
|
64
|
|
|
|
|
116
|
$hosts->{$key} += $inc; |
53
|
64
|
100
|
|
|
|
139
|
delete($hosts->{$key}) unless ($hosts->{$key}); |
54
|
|
|
|
|
|
|
} |
55
|
83
|
|
100
|
|
|
332
|
return $hosts->{$key} || 0; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub credentials { |
59
|
2
|
|
|
2
|
1
|
3053
|
my ($self, %credentials) = @_; |
60
|
2
|
|
|
|
|
11
|
while (my ($url, $cred) = each(%credentials)) { |
61
|
3
|
|
|
|
|
10
|
$self->{_creds}->{_host_key($url)} = $cred; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _host_key { |
66
|
118
|
|
|
118
|
|
1080
|
state $well_known_ports = {http => 80, https => 443}; |
67
|
118
|
|
|
|
|
161
|
my $url = shift; |
68
|
118
|
100
|
|
|
|
314
|
$url = Mojo::URL->new($url) unless ref $url; |
69
|
118
|
100
|
100
|
|
|
2025
|
return unless $url->is_abs && (my $wkp = $well_known_ports->{$url->scheme}); |
70
|
116
|
|
|
|
|
1420
|
my $key = $url->scheme . '://' . $url->ihost; |
71
|
116
|
100
|
|
|
|
1845
|
return $key unless (my $port = $url->port); |
72
|
100
|
100
|
|
|
|
647
|
$key .= ':' . $port if $port != $wkp; |
73
|
100
|
|
|
|
|
268
|
return $key; |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
1; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 NAME |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
WWW::Crawler::Mojo::UserAgent - Crawler specific featured user agent |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 SYNOPSIS |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
my $ua = WWW::Crawler::Mojo::UserAgent->new; |
85
|
|
|
|
|
|
|
$ua->keep_credentials(1); |
86
|
|
|
|
|
|
|
$ua->credentials( |
87
|
|
|
|
|
|
|
'http://example.com:8080' => 'jamadam:password1', |
88
|
|
|
|
|
|
|
'http://example2.com:8080' => 'jamadam:password2', |
89
|
|
|
|
|
|
|
); |
90
|
|
|
|
|
|
|
my $tx = $ua->get('http://example.com/'); |
91
|
|
|
|
|
|
|
say $tx->req->url # http://jamadam:passowrd@example.com/ |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
if ($ua->active_conn < $max_conn) { |
94
|
|
|
|
|
|
|
$ua->get(...); |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
if ($ua->active_host($url) < $max_conn_per_host) { |
98
|
|
|
|
|
|
|
$ua->get(...); |
99
|
|
|
|
|
|
|
} |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
=head1 DESCRIPTION |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
This class inherits L and adds credential storage and |
104
|
|
|
|
|
|
|
active connection counter. |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
WWW::Crawler::Mojo::UserAgent inherits all attributes from Mojo::UserAgent. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=head2 active_conn |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
A number of current connections. |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
$bot->active_conn($bot->active_conn + 1); |
115
|
|
|
|
|
|
|
say $bot->active_conn; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=head2 active_conn_per_host |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
A number of current connections per host. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
$bot->active_conn_per_host($bot->active_conn_per_host + 1); |
122
|
|
|
|
|
|
|
say $bot->active_conn_per_host; |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 keep_credentials |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Sets true to activate the feature. Defaults to 1. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
$ua->keep_credentials(1); |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 METHODS |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
WWW::Crawler::Mojo::UserAgent inherits all methods from L. |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head2 active_host |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Maintenances the numbers of active connections. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
$ua->active_host($url, 1); |
139
|
|
|
|
|
|
|
$ua->active_host($url, -1); |
140
|
|
|
|
|
|
|
my $amount = $ua->active_host($url); |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
=head2 credentials |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
Stores credentials. |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
$ua->credentials( |
147
|
|
|
|
|
|
|
'http://example.com:8080' => 'jamadam:password1', |
148
|
|
|
|
|
|
|
'http://example2.com:8080' => 'jamadam:password2', |
149
|
|
|
|
|
|
|
); |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head2 new |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
Constructer. |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
$ua = WWW::Crawler::Mojo::UserAgent->new; |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 AUTHOR |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Keita Sugama, Esugama@jamadam.comE |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Copyright (C) Keita Sugama. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or |
166
|
|
|
|
|
|
|
modify it under the same terms as Perl itself. |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=cut |