| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package WWW::Crawl4AI::Strategy; |
|
2
|
|
|
|
|
|
|
# ABSTRACT: role for a single crawl strategy in the WWW::Crawl4AI fallback chain |
|
3
|
3
|
|
|
3
|
|
24287
|
use Moo::Role; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
16
|
|
|
4
|
3
|
|
|
3
|
|
1118
|
use WWW::Crawl4AI::Request (); |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
552
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
our $VERSION = '0.001'; |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
requires 'name'; |
|
10
|
|
|
|
|
|
|
requires 'cost_class'; |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# Whether this strategy belongs in the chain for the given crawler. |
|
13
|
|
|
|
|
|
|
# Plain/Browser/Stealth are always applicable; CloakBrowser/Proxy/Callback |
|
14
|
|
|
|
|
|
|
# override this to gate on configuration. |
|
15
|
20
|
|
|
20
|
1
|
33
|
sub applicable { 1 } |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# Default execution: build a Request and run it through the client, returning a |
|
21
|
|
|
|
|
|
|
# single normalized page. Strategies that don't fetch via Crawl4AI (Callback) |
|
22
|
|
|
|
|
|
|
# override this instead of providing build_request. |
|
23
|
|
|
|
|
|
|
sub crawl { |
|
24
|
36
|
|
|
36
|
1
|
67
|
my ( $self, $crawler, $url, %opts ) = @_; |
|
25
|
36
|
|
|
|
|
83
|
my $req = $self->build_request( $crawler, $url, %opts ); |
|
26
|
36
|
|
|
|
|
434
|
my $pages = $crawler->client->crawl( $req, $self->name ); |
|
27
|
34
|
|
|
|
|
489
|
return $pages->[0]; |
|
28
|
|
|
|
|
|
|
} |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# Helper for build_request implementations. |
|
33
|
|
|
|
|
|
|
sub _request { |
|
34
|
36
|
|
|
36
|
|
355
|
my ( $self, $url, %p ) = @_; |
|
35
|
|
|
|
|
|
|
return WWW::Crawl4AI::Request->new( |
|
36
|
|
|
|
|
|
|
urls => $url, |
|
37
|
|
|
|
|
|
|
browser_params => $p{browser} || {}, |
|
38
|
|
|
|
|
|
|
crawler_params => $p{crawler} || {}, |
|
39
|
36
|
|
50
|
|
|
738
|
); |
|
|
|
|
100
|
|
|
|
|
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
1; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
__END__ |