File Coverage

blib/lib/Net/Async/WebSearch/Provider/Reddit.pm
Criterion Covered Total %
statement 53 75 70.6
branch 10 32 31.2
condition 10 38 26.3
subroutine 15 17 88.2
pod 6 6 100.0
total 94 168 55.9


line stmt bran cond sub pod time code
1             package Net::Async::WebSearch::Provider::Reddit;
2             our $VERSION = '0.002';
3             # ABSTRACT: Reddit search provider (keyless JSON endpoint)
4 2     2   1464 use strict;
  2         3  
  2         55  
5 2     2   7 use warnings;
  2         4  
  2         70  
6 2     2   7 use parent 'Net::Async::WebSearch::Provider';
  2         2  
  2         9  
7              
8 2     2   114 use Future;
  2         4  
  2         45  
9 2     2   6 use JSON::MaybeXS qw( decode_json );
  2         3  
  2         84  
10 2     2   8 use URI;
  2         4  
  2         47  
11 2     2   7 use HTTP::Request::Common qw( GET );
  2         3  
  2         80  
12 2     2   8 use Net::Async::WebSearch::Result;
  2         2  
  2         1562  
13              
14             sub _init {
15 13     13   19 my ( $self ) = @_;
16 13   50     21 $self->{endpoint} ||= 'https://www.reddit.com';
17 13         64 $self->{endpoint} =~ s{/+$}{};
18 13   50     22 $self->{name} ||= 'reddit';
19 13   50     54 $self->{subreddit} //= undef;
20 13   50     41 $self->{sort} ||= 'relevance'; # relevance | hot | top | new | comments
21 13   50     50 $self->{time} ||= 'all'; # hour | day | week | month | year | all
22 13   50     30 $self->{link_base} ||= 'https://www.reddit.com';
23             }
24              
25 9     9 1 40 sub endpoint { $_[0]->{endpoint} }
26 9 50   9 1 22 sub subreddit { @_ > 1 ? ($_[0]->{subreddit} = $_[1]) : $_[0]->{subreddit} }
27 9 50   9 1 56 sub sort { @_ > 1 ? ($_[0]->{sort} = $_[1]) : $_[0]->{sort} }
28 9 50   9 1 44 sub time { @_ > 1 ? ($_[0]->{time} = $_[1]) : $_[0]->{time} }
29 7     7 1 17 sub link_base { $_[0]->{link_base} }
30              
31             sub search {
32 0     0 1 0 my ( $self, $http, $query, $opts ) = @_;
33 0   0     0 $opts ||= {};
34 0   0     0 my $limit = $opts->{limit} || 10;
35              
36 0 0       0 my $sub = defined $opts->{subreddit} ? $opts->{subreddit} : $self->subreddit;
37 0 0 0     0 my $path = defined $sub && length $sub ? "/r/$sub/search.json" : "/search.json";
38              
39 0         0 my $uri = URI->new( $self->endpoint . $path );
40             my %q = (
41             q => $query,
42             limit => $limit,
43             sort => $opts->{sort} // $self->sort,
44 0   0     0 t => $opts->{time} // $self->time,
      0        
45             );
46 0 0 0     0 $q{restrict_sr} = 1 if defined $sub && length $sub;
47 0 0       0 $q{include_over_18} = $opts->{include_nsfw} ? 'on' : 'off';
48 0         0 $uri->query_form(%q);
49              
50 0         0 my $req = GET( $uri->as_string );
51             # Reddit is picky — generic UAs get rate-limited hard.
52 0         0 $req->header( 'User-Agent' => $self->user_agent_string );
53 0         0 $req->header( 'Accept' => 'application/json' );
54              
55             return $http->do_request( request => $req )->then(sub {
56 0     0   0 my ( $resp ) = @_;
57 0 0       0 unless ( $resp->is_success ) {
58 0         0 return Future->fail(
59             $self->name.": HTTP ".$resp->status_line, 'websearch', $self->name,
60             );
61             }
62 0         0 my $data = eval { decode_json( $resp->decoded_content ) };
  0         0  
63 0 0       0 if ( my $e = $@ ) {
64 0         0 return Future->fail( $self->name.": invalid JSON: $e", 'websearch', $self->name );
65             }
66 0         0 return Future->done( $self->_parse_listing($data, $limit) );
67 0         0 });
68             }
69              
70             sub _parse_listing {
71 9     9   16 my ( $self, $data, $limit ) = @_;
72 9         12 my @out;
73 9         10 my $rank = 0;
74 9 50 50     10 for my $child ( @{ ($data->{data} || {})->{children} || [] } ) {
  9         30  
75 7 50       18 my $d = $child->{data} or next;
76 7         9 $rank++;
77 7   50     14 my $permalink = $self->link_base . ( $d->{permalink} // '' );
78 7   33     15 my $target = $d->{url} // $permalink;
79 7         7 my $iso;
80 7 50       12 if ( defined $d->{created_utc} ) {
81 7         24 my @t = gmtime( $d->{created_utc} );
82 7         38 $iso = sprintf '%04d-%02d-%02dT%02d:%02d:%02dZ',
83             $t[5]+1900, $t[4]+1, $t[3], $t[2], $t[1], $t[0];
84             }
85             push @out, Net::Async::WebSearch::Result->new(
86             url => $target,
87             title => $d->{title},
88             snippet => ( $d->{selftext} && length $d->{selftext}
89             ? substr( $d->{selftext}, 0, 400 )
90             : undef ),
91             provider => $self->name,
92             rank => $rank,
93             published_at => $iso,
94             nsfw => ( $d->{over_18} ? 1 : 0 ),
95             domain => $d->{domain},
96             raw => $d,
97             extra => {
98             permalink => $permalink,
99             subreddit => $d->{subreddit},
100             author => $d->{author},
101             reddit_score => $d->{score},
102             num_comments => $d->{num_comments},
103 7 50 33     32 ( defined $d->{created_utc} ? ( created_utc => $d->{created_utc} ) : () ),
    50          
    50          
104             },
105             );
106 7 50       26 last if $rank >= $limit;
107             }
108 9         27 return \@out;
109             }
110              
111             1;
112              
113             __END__