File Coverage

blib/lib/App/DuckPAN/Fathead.pm
Criterion Covered Total %
statement 21 120 17.5
branch 0 26 0.0
condition 0 5 0.0
subroutine 7 18 38.8
pod 0 1 0.0
total 28 170 16.4


line stmt bran cond sub pod time code
1             package App::DuckPAN::Fathead;
2             our $AUTHORITY = 'cpan:DDG';
3             # ABSTRACT: Searches a given output.txt file for a query match
4             $App::DuckPAN::Fathead::VERSION = '1018';
5 1     1   1280 use Moo;
  1         2  
  1         7  
6             with 'App::DuckPAN::HasApp';
7              
8 1     1   1573 use DBI;
  1         13601  
  1         67  
9 1     1   626 use JSON;
  1         3983  
  1         4  
10 1     1   119 use Path::Tiny;
  1         3  
  1         42  
11 1     1   6 use HTML::TreeBuilder;
  1         2  
  1         12  
12 1     1   21 use HTML::Element;
  1         2  
  1         4  
13 1     1   20 use Data::Printer return_value => 'dump';
  1         6  
  1         8  
14              
15             has selected => (
16             is => 'rw',
17             lazy => 1,
18             required => 0,
19             predicate => 1,
20             trigger => 1
21             );
22              
23             sub _trigger_selected {
24 0     0     my ( $self, $id ) = @_;
25 0           my $dir = path("lib/fathead/$id");
26 0 0         unless ($dir->is_dir) {
27 0           my $full_path = $dir->realpath;
28 0           $self->app->emit_and_exit(1, "Directory not found: $full_path") ;
29             }
30 0           my $file = $dir->child("output.txt");
31 0 0         unless ($file->exists){
32 0           my $full_path = $file->realpath;
33 0           $self->app->emit_and_exit(1, "No output.txt was found in $full_path");
34             }
35 0           $self->_set_output_txt($file);
36 0           return $dir;
37             }
38              
39             has _trigger_words => (
40             is => 'ro',
41             builder => 1,
42             lazy => 1,
43             );
44              
45             sub _build__trigger_words {
46 0     0     my ($self) = @_;
47 0           my $tf = 'trigger_words.txt';
48 0 0         return [] unless $self->has_selected;
49 0           my $file = path("lib/fathead/", $self->selected, $tf);
50 0 0         unless ($file->exists){
51 0           my $full_path = $file->realpath;
52 0           $self->app->emit_debug("No $tf was found in $full_path");
53 0           return [];
54             }
55 0           chomp (my @words = $file->lines);
56 0           return \@words;
57             }
58              
59             has _trigger_re => (
60             is => 'ro',
61             lazy => 1,
62             builder => 1,
63             );
64              
65             sub _build__trigger_re {
66 0     0     my ($self) = @_;
67 0           my @words = @{$self->_trigger_words};
  0            
68 0           my $text = join '|', map { quotemeta $_ } @words;
  0            
69 0           return qr/\b(?:$text)\b/i;
70             }
71              
72             has output_txt => (
73             is => 'rwp',
74             lazy => 1,
75             required => 0
76             );
77              
78             has dbh => (
79             is => 'rw',
80             lazy => 1,
81             required => 0,
82             builder => 1
83             );
84              
85             sub _build_dbh {
86 0     0     my ( $self ) = @_;
87              
88             # Open output.txt file for searching
89             # Handles as a CSV with "\t" separator
90             # Provide numbered column names
91 0 0         my $dbh = DBI->connect ("dbi:CSV:", undef, undef, {
92             f_dir => $self->output_txt->parent,
93             f_ext => ".txt/r",
94             csv_sep_char => "\t",
95             csv_quote_char => undef,
96             csv_escape_char => undef,
97             csv_allow_whitespace => 1,
98             csv_allow_quotes => 1,
99             RaiseError => 1,
100             PrintError => 0,
101             csv_tables => {
102             output => {
103             file => 'output.txt',
104             col_names => [
105             "title",
106             "type",
107             "redirect",
108             "col4",
109             "categories",
110             "col6",
111             "related_topics",
112             "col8",
113             "external_links",
114             "disambiguation",
115             "images",
116             "abstract",
117             "abstract_url",
118             ],
119             },
120             },
121             }) or die $DBI::errstr;
122              
123 0           return $dbh;
124             }
125              
126             # Get a Fathead result from the DB
127             # Requery when we get a Redirect
128             sub _search_output {
129              
130 0     0     my ($self, $query) = @_;
131              
132 0           my $trigger_re = $self->_trigger_re;
133 0           $query =~ s/^$trigger_re\s+|\s+$trigger_re$//;
134 0           my $result = $self->_db_lookup($query);
135              
136 0   0       while ($result && $result->{type} eq 'R') {
137 0           my $redirect = $result->{redirect};
138 0           $self->app->emit_notice("Following Redirect: '$result->{title}' -> '$redirect'");
139 0           $result = $self->_db_lookup($redirect);
140             }
141 0           return $result;
142             }
143              
144             # Attempt to get a result from DB (output.txt)
145             # Capture & display any raised errors
146             sub _db_lookup {
147 0     0     my ($self, $query) = @_;
148              
149 0           my $result;
150 0           $@ = '';
151              
152 0           eval {
153             # TODO lowercase all titles first
154 0           my $sth = $self->dbh->prepare("SELECT * FROM output WHERE lower(title) = ?");
155 0           $sth->execute(lc $query);
156 0           while (my $row = $sth->fetchrow_hashref) {
157 0           $result = $row;
158             }
159 0           $sth->finish();
160             };
161 0 0         $self->app->emit_error("SQL database error: $@") if $@;
162 0           return $result;
163             }
164              
165             sub structured_answer_for_query {
166 0     0 0   my ($self, $query) = @_;
167 0 0         my $result = $self->_search_output($query) or return undef;
168 0           return $self->_build_structured_answer($result);
169             }
170              
171             # Build a Structured Answer hash
172             # Properties depend on Fathead result type
173             sub _build_structured_answer {
174 0     0     my ($self, $data) = @_;
175              
176             # Get IA Metadata via ID lookup
177             # Assume selected is an ID
178 0   0       my $metadata = DDG::Meta::Data->get_ia(id => $self->selected) // {};
179 0 0         $self->app->emit_error("No Metadata found for ID: ".$self->selected) unless keys %$metadata;
180              
181             # DBD::Csv ignores col_names letter casing
182             # So, manually map columns to template properties
183             # TODO update info_detail template to use lowercase variable names
184 0           my %extra_data;
185              
186 0           my $out = {
187             id => $self->selected,
188             signal => "high",
189             meta => $metadata,
190             };
191              
192             # Article Result
193 0 0         if ($data->{type} eq 'A') {
194 0           $out->{duckbar_topic} = 'About';
195 0           $out->{model} = 'FatheadArticle';
196 0           $out->{templates} = { detail => 'info_detail' };
197             %extra_data = (
198             Heading => $data->{title},
199             Abstract => $self->_replace_newlines($data->{abstract}),
200             AbstractURL => $data->{abstract_url},
201             FirstURL => $metadata->{src_url},
202 0           Image => $self->_get_image($data->{images}),
203             );
204             }
205              
206             # Disambiguation Result
207 0 0         if ($data->{type} eq 'D') {
208 0           $out->{duckbar_topic} = 'Meanings';
209 0           $out->{model} = 'FatheadListItem';
210 0           $out->{templates} = { item => 'meanings_item' };
211             %extra_data = (
212             Heading => $data->{title}." (".$metadata->{name}.")",
213 0           RelatedTopics => $self->_parse_disambiguations($data->{disambiguation}, $out)
214             );
215             }
216              
217             # Category Pages Result
218 0 0         if ($data->{type} eq 'C') {
219 0           $out->{duckbar_topic} = 'List';
220 0           $out->{model} = 'FatheadListItem';
221 0           $out->{templates} = { item => 'categories_item' };
222             }
223              
224 0           $out->{data} = { %$data, %extra_data };
225 0           return $out;
226             }
227              
228             # Emulate internal processing to build JSON
229             # matching DDG API result format
230             sub _parse_disambiguations {
231 0     0     my ($self, $disambiguations, $out) = @_;
232 0           my @out;
233 0           my @disambiguations = split /\\n/, $disambiguations;
234 0           foreach my $disambiguation (@disambiguations){
235 0           my $result = {};
236 0 0         if ($disambiguation =~ m/^\*\[\[(.+)\]\],(.+)$/) {
237              
238 0           my $title = $1;
239 0           my $html = $2;
240              
241             # Parse HTML into plaintext
242 0           my $root = HTML::TreeBuilder->new_from_content($html);
243 0           $root->elementify();
244 0           my $text = $root->as_trimmed_text;
245              
246             # Build URL Path
247 0           my $href = "/?q=$title&ia=about";
248 0           my $a = HTML::Element->new('a', href => $href);
249 0           $a->push_content($title);
250              
251 0           $result = {
252             Result => $a->as_HTML . "$text", # generates `<a href="$url">$title</a>$text` which gets parsed by template helpers
253             FirstURL => $href,
254             Text => $text
255             };
256             }
257 0           push @out, $result;
258             }
259 0           return \@out;
260             }
261              
262             # Emulate internal processing to build JSON
263             # matching DDG API result format
264             sub _get_image {
265 0     0     my ($self, $image) = @_;
266 0           my $url = "";
267 0 0         if ($image =~ m/^\[\[Image:(.+)\]\]$/) {
268 0           $url = $1;
269             }
270 0           return $url;
271             }
272              
273              
274             sub _replace_newlines {
275 0     0     my ($self, $abstract) = @_;
276 0           $abstract =~ s/\\n/<br>/g;
277 0           return $abstract;
278             }
279              
280              
281             1;
282              
283             __END__
284              
285             =pod
286              
287             =head1 NAME
288              
289             App::DuckPAN::Fathead - Searches a given output.txt file for a query match
290              
291             =head1 VERSION
292              
293             version 1018
294              
295             =head1 AUTHOR
296              
297             DuckDuckGo <open@duckduckgo.com>, Zach Thompson <zach@duckduckgo.com>, Zaahir Moolla <moollaza@duckduckgo.com>, Torsten Raudssus <torsten@raudss.us> L<https://raudss.us/>
298              
299             =head1 COPYRIGHT AND LICENSE
300              
301             This software is Copyright (c) 2013 by DuckDuckGo, Inc. L<https://duckduckgo.com/>.
302              
303             This is free software, licensed under:
304              
305             The Apache License, Version 2.0, January 2004
306              
307             =cut