| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Net::Async::WebSearch::Provider::DuckDuckGo; |
|
2
|
|
|
|
|
|
|
our $VERSION = '0.002'; |
|
3
|
|
|
|
|
|
|
# ABSTRACT: DuckDuckGo HTML endpoint provider (keyless) |
|
4
|
1
|
|
|
1
|
|
934
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
28
|
|
|
5
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
51
|
|
|
6
|
1
|
|
|
1
|
|
4
|
use parent 'Net::Async::WebSearch::Provider'; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
4
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
45
|
use Future; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
24
|
|
|
9
|
1
|
|
|
1
|
|
4
|
use HTTP::Request::Common qw( POST ); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
36
|
|
|
10
|
1
|
|
|
1
|
|
4
|
use URI; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
18
|
|
|
11
|
1
|
|
|
1
|
|
1002
|
use HTML::TreeBuilder; |
|
|
1
|
|
|
|
|
27115
|
|
|
|
1
|
|
|
|
|
9
|
|
|
12
|
1
|
|
|
1
|
|
36
|
use Net::Async::WebSearch::Result; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
480
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
sub _init { |
|
15
|
0
|
|
|
0
|
|
|
my ( $self ) = @_; |
|
16
|
0
|
|
0
|
|
|
|
$self->{endpoint} ||= 'https://html.duckduckgo.com/html/'; |
|
17
|
0
|
|
0
|
|
|
|
$self->{name} ||= 'duckduckgo'; |
|
18
|
|
|
|
|
|
|
} |
|
19
|
|
|
|
|
|
|
|
|
20
|
0
|
|
|
0
|
1
|
|
sub endpoint { $_[0]->{endpoint} } |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub search { |
|
23
|
0
|
|
|
0
|
1
|
|
my ( $self, $http, $query, $opts ) = @_; |
|
24
|
0
|
|
0
|
|
|
|
$opts ||= {}; |
|
25
|
0
|
|
0
|
|
|
|
my $limit = $opts->{limit} || 10; |
|
26
|
|
|
|
|
|
|
|
|
27
|
0
|
|
|
|
|
|
my %form = ( q => $query ); |
|
28
|
0
|
0
|
|
|
|
|
$form{kl} = $opts->{region} if defined $opts->{region}; |
|
29
|
0
|
0
|
|
|
|
|
$form{kp} = $opts->{safesearch} if defined $opts->{safesearch}; |
|
30
|
|
|
|
|
|
|
|
|
31
|
0
|
|
|
|
|
|
my $req = POST( $self->endpoint, [ %form ] ); |
|
32
|
0
|
|
|
|
|
|
$req->header( 'User-Agent' => $self->user_agent_string ); |
|
33
|
0
|
|
|
|
|
|
$req->header( 'Accept' => 'text/html' ); |
|
34
|
0
|
|
0
|
|
|
|
$req->header( 'Accept-Language' => $opts->{language} // 'en-US,en;q=0.5' ); |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
return $http->do_request( request => $req )->then(sub { |
|
37
|
0
|
|
|
0
|
|
|
my ( $resp ) = @_; |
|
38
|
0
|
0
|
|
|
|
|
unless ( $resp->is_success ) { |
|
39
|
0
|
|
|
|
|
|
return Future->fail( |
|
40
|
|
|
|
|
|
|
"duckduckgo: HTTP ".$resp->status_line, 'websearch', $self->name, |
|
41
|
|
|
|
|
|
|
); |
|
42
|
|
|
|
|
|
|
} |
|
43
|
0
|
|
|
|
|
|
my $results = $self->_parse_html( $resp->decoded_content, $limit ); |
|
44
|
0
|
|
|
|
|
|
return Future->done($results); |
|
45
|
0
|
|
|
|
|
|
}); |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
sub _parse_html { |
|
49
|
0
|
|
|
0
|
|
|
my ( $self, $html, $limit ) = @_; |
|
50
|
0
|
|
|
|
|
|
my @out; |
|
51
|
0
|
|
|
|
|
|
my $tree = HTML::TreeBuilder->new_from_content( $html ); |
|
52
|
0
|
|
|
|
|
|
my @blocks = $tree->look_down( _tag => 'div', class => qr/\bresult\b/ ); |
|
53
|
0
|
|
|
|
|
|
my $rank = 0; |
|
54
|
0
|
|
|
|
|
|
for my $b (@blocks) { |
|
55
|
0
|
|
|
|
|
|
my $a = $b->look_down( _tag => 'a', class => qr/\bresult__a\b/ ); |
|
56
|
0
|
0
|
|
|
|
|
next unless $a; |
|
57
|
0
|
0
|
|
|
|
|
my $href = $a->attr('href') or next; |
|
58
|
|
|
|
|
|
|
# DDG wraps real URL in //duckduckgo.com/l/?uddg=... |
|
59
|
0
|
0
|
|
|
|
|
if ( $href =~ m{[?&]uddg=([^&]+)} ) { |
|
60
|
0
|
|
|
|
|
|
require URI::Escape; |
|
61
|
0
|
|
|
|
|
|
$href = URI::Escape::uri_unescape($1); |
|
62
|
|
|
|
|
|
|
} |
|
63
|
0
|
|
|
|
|
|
my $title = $a->as_trimmed_text; |
|
64
|
0
|
|
|
|
|
|
my $sn = $b->look_down( _tag => 'a', class => qr/\bresult__snippet\b/ ); |
|
65
|
0
|
0
|
|
|
|
|
my $snippet = $sn ? $sn->as_trimmed_text : undef; |
|
66
|
0
|
|
|
|
|
|
$rank++; |
|
67
|
0
|
|
|
|
|
|
push @out, Net::Async::WebSearch::Result->new( |
|
68
|
|
|
|
|
|
|
url => $href, |
|
69
|
|
|
|
|
|
|
title => $title, |
|
70
|
|
|
|
|
|
|
snippet => $snippet, |
|
71
|
|
|
|
|
|
|
provider => $self->name, |
|
72
|
|
|
|
|
|
|
rank => $rank, |
|
73
|
|
|
|
|
|
|
); |
|
74
|
0
|
0
|
|
|
|
|
last if $rank >= $limit; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
0
|
|
|
|
|
|
$tree->delete; |
|
77
|
0
|
|
|
|
|
|
return \@out; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
1; |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
__END__ |