| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Net::Async::WebSearch::Provider::Yandex; |
|
2
|
|
|
|
|
|
|
our $VERSION = '0.002'; |
|
3
|
|
|
|
|
|
|
# ABSTRACT: Yandex Search API (XML) provider |
|
4
|
1
|
|
|
1
|
|
930
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
27
|
|
|
5
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
35
|
|
|
6
|
1
|
|
|
1
|
|
4
|
use parent 'Net::Async::WebSearch::Provider'; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
5
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
47
|
use Carp qw( croak ); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
46
|
|
|
9
|
1
|
|
|
1
|
|
5
|
use Future; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
12
|
|
|
10
|
1
|
|
|
1
|
|
3
|
use URI; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
20
|
|
|
11
|
1
|
|
|
1
|
|
3
|
use HTTP::Request::Common qw( GET ); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
33
|
|
|
12
|
1
|
|
|
1
|
|
866
|
use XML::LibXML (); |
|
|
1
|
|
|
|
|
24237
|
|
|
|
1
|
|
|
|
|
28
|
|
|
13
|
1
|
|
|
1
|
|
6
|
use Net::Async::WebSearch::Result; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
639
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
sub _init { |
|
16
|
0
|
|
|
0
|
|
|
my ( $self ) = @_; |
|
17
|
0
|
0
|
|
|
|
|
croak "Yandex provider requires 'api_key'" unless $self->{api_key}; |
|
18
|
|
|
|
|
|
|
croak "Yandex provider requires 'folderid' (Yandex Cloud folder)" |
|
19
|
0
|
0
|
|
|
|
|
unless $self->{folderid}; |
|
20
|
0
|
|
0
|
|
|
|
$self->{endpoint} ||= 'https://yandex.com/search/xml'; |
|
21
|
0
|
|
0
|
|
|
|
$self->{name} ||= 'yandex'; |
|
22
|
0
|
|
0
|
|
|
|
$self->{l10n} ||= 'en'; # en | ru | tr | be | kk | uk |
|
23
|
|
|
|
|
|
|
} |
|
24
|
|
|
|
|
|
|
|
|
25
|
0
|
|
|
0
|
1
|
|
sub endpoint { $_[0]->{endpoint} } |
|
26
|
0
|
|
|
0
|
1
|
|
sub api_key { $_[0]->{api_key} } |
|
27
|
0
|
|
|
0
|
1
|
|
sub folderid { $_[0]->{folderid} } |
|
28
|
0
|
|
|
0
|
1
|
|
sub l10n { $_[0]->{l10n} } |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub search { |
|
31
|
0
|
|
|
0
|
1
|
|
my ( $self, $http, $query, $opts ) = @_; |
|
32
|
0
|
|
0
|
|
|
|
$opts ||= {}; |
|
33
|
0
|
|
0
|
|
|
|
my $limit = $opts->{limit} || 10; |
|
34
|
|
|
|
|
|
|
|
|
35
|
0
|
|
|
|
|
|
my $uri = URI->new( $self->endpoint ); |
|
36
|
|
|
|
|
|
|
my %q = ( |
|
37
|
|
|
|
|
|
|
folderid => $self->folderid, |
|
38
|
|
|
|
|
|
|
apikey => $self->api_key, |
|
39
|
|
|
|
|
|
|
query => $query, |
|
40
|
|
|
|
|
|
|
l10n => $opts->{l10n} // $self->l10n, |
|
41
|
|
|
|
|
|
|
sortby => $opts->{sortby} // 'rlv', |
|
42
|
|
|
|
|
|
|
groupby => $opts->{groupby} |
|
43
|
0
|
|
0
|
|
|
|
// sprintf('attr=d.mode=deep.groups-on-page=%d.docs-in-group=1', $limit), |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
44
|
|
|
|
|
|
|
); |
|
45
|
0
|
0
|
|
|
|
|
$q{filter} = $opts->{safesearch} if defined $opts->{safesearch}; |
|
46
|
0
|
0
|
|
|
|
|
$q{lr} = $opts->{region} if defined $opts->{region}; |
|
47
|
0
|
|
|
|
|
|
$uri->query_form(%q); |
|
48
|
|
|
|
|
|
|
|
|
49
|
0
|
|
|
|
|
|
my $req = GET( $uri->as_string ); |
|
50
|
0
|
|
|
|
|
|
$req->header( 'User-Agent' => $self->user_agent_string ); |
|
51
|
0
|
|
|
|
|
|
$req->header( 'Accept' => 'application/xml' ); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
return $http->do_request( request => $req )->then(sub { |
|
54
|
0
|
|
|
0
|
|
|
my ( $resp ) = @_; |
|
55
|
0
|
0
|
|
|
|
|
unless ( $resp->is_success ) { |
|
56
|
0
|
|
|
|
|
|
return Future->fail( |
|
57
|
|
|
|
|
|
|
$self->name.": HTTP ".$resp->status_line, 'websearch', $self->name, |
|
58
|
|
|
|
|
|
|
); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
0
|
|
|
|
|
|
my $doc = eval { XML::LibXML->new->parse_string( $resp->decoded_content ) }; |
|
|
0
|
|
|
|
|
|
|
|
61
|
0
|
0
|
|
|
|
|
if ( my $e = $@ ) { |
|
62
|
0
|
|
|
|
|
|
return Future->fail( $self->name.": XML parse: $e", 'websearch', $self->name ); |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
# Yandex signals API-level errors inside the XML envelope. |
|
65
|
0
|
0
|
|
|
|
|
if ( my ($err) = $doc->findnodes('/yandexsearch/response/error') ) { |
|
66
|
0
|
|
0
|
|
|
|
my $code = $err->getAttribute('code') // ''; |
|
67
|
0
|
|
|
|
|
|
my $msg = $err->textContent; |
|
68
|
0
|
|
|
|
|
|
return Future->fail( |
|
69
|
|
|
|
|
|
|
$self->name.": API error $code: $msg", 'websearch', $self->name, |
|
70
|
|
|
|
|
|
|
); |
|
71
|
|
|
|
|
|
|
} |
|
72
|
0
|
|
|
|
|
|
my @out; |
|
73
|
0
|
|
|
|
|
|
my $rank = 0; |
|
74
|
0
|
|
|
|
|
|
for my $doc_node ( $doc->findnodes('/yandexsearch/response/results/grouping/group/doc') ) { |
|
75
|
0
|
|
|
|
|
|
my ($url_node) = $doc_node->findnodes('./url'); |
|
76
|
0
|
|
|
|
|
|
my ($title_node) = $doc_node->findnodes('./title'); |
|
77
|
0
|
|
|
|
|
|
my @passages = $doc_node->findnodes('./passages/passage'); |
|
78
|
0
|
|
|
|
|
|
my @headline = $doc_node->findnodes('./headline'); |
|
79
|
0
|
0
|
|
|
|
|
my $url = $url_node ? $url_node->textContent : next; |
|
80
|
0
|
0
|
|
|
|
|
my $title = $title_node ? $title_node->textContent : ''; |
|
81
|
0
|
|
|
|
|
|
my $snippet; |
|
82
|
0
|
0
|
|
|
|
|
if (@passages) { |
|
|
|
0
|
|
|
|
|
|
|
83
|
0
|
|
|
|
|
|
$snippet = join ' … ', map { $_->textContent } @passages; |
|
|
0
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
} elsif (@headline) { |
|
85
|
0
|
|
|
|
|
|
$snippet = $headline[0]->textContent; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
0
|
|
|
|
|
|
$rank++; |
|
88
|
0
|
|
|
|
|
|
push @out, Net::Async::WebSearch::Result->new( |
|
89
|
|
|
|
|
|
|
url => $url, |
|
90
|
|
|
|
|
|
|
title => $title, |
|
91
|
|
|
|
|
|
|
snippet => $snippet, |
|
92
|
|
|
|
|
|
|
provider => $self->name, |
|
93
|
|
|
|
|
|
|
rank => $rank, |
|
94
|
|
|
|
|
|
|
); |
|
95
|
0
|
0
|
|
|
|
|
last if $rank >= $limit; |
|
96
|
|
|
|
|
|
|
} |
|
97
|
0
|
|
|
|
|
|
return Future->done(\@out); |
|
98
|
0
|
|
|
|
|
|
}); |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
1; |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
__END__ |