line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package App::DuckPAN::Fathead; |
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:DDG'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Searches a given output.txt file for a query match |
4
|
|
|
|
|
|
|
$App::DuckPAN::Fathead::VERSION = '1018'; |
5
|
1
|
|
|
1
|
|
1280
|
use Moo; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
6
|
|
|
|
|
|
|
with 'App::DuckPAN::HasApp'; |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
1573
|
use DBI; |
|
1
|
|
|
|
|
13601
|
|
|
1
|
|
|
|
|
67
|
|
9
|
1
|
|
|
1
|
|
626
|
use JSON; |
|
1
|
|
|
|
|
3983
|
|
|
1
|
|
|
|
|
4
|
|
10
|
1
|
|
|
1
|
|
119
|
use Path::Tiny; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
42
|
|
11
|
1
|
|
|
1
|
|
6
|
use HTML::TreeBuilder; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
12
|
|
12
|
1
|
|
|
1
|
|
21
|
use HTML::Element; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
4
|
|
13
|
1
|
|
|
1
|
|
20
|
use Data::Printer return_value => 'dump'; |
|
1
|
|
|
|
|
6
|
|
|
1
|
|
|
|
|
8
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
has selected => ( |
16
|
|
|
|
|
|
|
is => 'rw', |
17
|
|
|
|
|
|
|
lazy => 1, |
18
|
|
|
|
|
|
|
required => 0, |
19
|
|
|
|
|
|
|
predicate => 1, |
20
|
|
|
|
|
|
|
trigger => 1 |
21
|
|
|
|
|
|
|
); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub _trigger_selected { |
24
|
0
|
|
|
0
|
|
|
my ( $self, $id ) = @_; |
25
|
0
|
|
|
|
|
|
my $dir = path("lib/fathead/$id"); |
26
|
0
|
0
|
|
|
|
|
unless ($dir->is_dir) { |
27
|
0
|
|
|
|
|
|
my $full_path = $dir->realpath; |
28
|
0
|
|
|
|
|
|
$self->app->emit_and_exit(1, "Directory not found: $full_path") ; |
29
|
|
|
|
|
|
|
} |
30
|
0
|
|
|
|
|
|
my $file = $dir->child("output.txt"); |
31
|
0
|
0
|
|
|
|
|
unless ($file->exists){ |
32
|
0
|
|
|
|
|
|
my $full_path = $file->realpath; |
33
|
0
|
|
|
|
|
|
$self->app->emit_and_exit(1, "No output.txt was found in $full_path"); |
34
|
|
|
|
|
|
|
} |
35
|
0
|
|
|
|
|
|
$self->_set_output_txt($file); |
36
|
0
|
|
|
|
|
|
return $dir; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
has _trigger_words => ( |
40
|
|
|
|
|
|
|
is => 'ro', |
41
|
|
|
|
|
|
|
builder => 1, |
42
|
|
|
|
|
|
|
lazy => 1, |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub _build__trigger_words { |
46
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
47
|
0
|
|
|
|
|
|
my $tf = 'trigger_words.txt'; |
48
|
0
|
0
|
|
|
|
|
return [] unless $self->has_selected; |
49
|
0
|
|
|
|
|
|
my $file = path("lib/fathead/", $self->selected, $tf); |
50
|
0
|
0
|
|
|
|
|
unless ($file->exists){ |
51
|
0
|
|
|
|
|
|
my $full_path = $file->realpath; |
52
|
0
|
|
|
|
|
|
$self->app->emit_debug("No $tf was found in $full_path"); |
53
|
0
|
|
|
|
|
|
return []; |
54
|
|
|
|
|
|
|
} |
55
|
0
|
|
|
|
|
|
chomp (my @words = $file->lines); |
56
|
0
|
|
|
|
|
|
return \@words; |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has _trigger_re => ( |
60
|
|
|
|
|
|
|
is => 'ro', |
61
|
|
|
|
|
|
|
lazy => 1, |
62
|
|
|
|
|
|
|
builder => 1, |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
sub _build__trigger_re { |
66
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
67
|
0
|
|
|
|
|
|
my @words = @{$self->_trigger_words}; |
|
0
|
|
|
|
|
|
|
68
|
0
|
|
|
|
|
|
my $text = join '|', map { quotemeta $_ } @words; |
|
0
|
|
|
|
|
|
|
69
|
0
|
|
|
|
|
|
return qr/\b(?:$text)\b/i; |
70
|
|
|
|
|
|
|
} |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
has output_txt => ( |
73
|
|
|
|
|
|
|
is => 'rwp', |
74
|
|
|
|
|
|
|
lazy => 1, |
75
|
|
|
|
|
|
|
required => 0 |
76
|
|
|
|
|
|
|
); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
has dbh => ( |
79
|
|
|
|
|
|
|
is => 'rw', |
80
|
|
|
|
|
|
|
lazy => 1, |
81
|
|
|
|
|
|
|
required => 0, |
82
|
|
|
|
|
|
|
builder => 1 |
83
|
|
|
|
|
|
|
); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub _build_dbh { |
86
|
0
|
|
|
0
|
|
|
my ( $self ) = @_; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
# Open output.txt file for searching |
89
|
|
|
|
|
|
|
# Handles as a CSV with "\t" separator |
90
|
|
|
|
|
|
|
# Provide numbered column names |
91
|
0
|
0
|
|
|
|
|
my $dbh = DBI->connect ("dbi:CSV:", undef, undef, { |
92
|
|
|
|
|
|
|
f_dir => $self->output_txt->parent, |
93
|
|
|
|
|
|
|
f_ext => ".txt/r", |
94
|
|
|
|
|
|
|
csv_sep_char => "\t", |
95
|
|
|
|
|
|
|
csv_quote_char => undef, |
96
|
|
|
|
|
|
|
csv_escape_char => undef, |
97
|
|
|
|
|
|
|
csv_allow_whitespace => 1, |
98
|
|
|
|
|
|
|
csv_allow_quotes => 1, |
99
|
|
|
|
|
|
|
RaiseError => 1, |
100
|
|
|
|
|
|
|
PrintError => 0, |
101
|
|
|
|
|
|
|
csv_tables => { |
102
|
|
|
|
|
|
|
output => { |
103
|
|
|
|
|
|
|
file => 'output.txt', |
104
|
|
|
|
|
|
|
col_names => [ |
105
|
|
|
|
|
|
|
"title", |
106
|
|
|
|
|
|
|
"type", |
107
|
|
|
|
|
|
|
"redirect", |
108
|
|
|
|
|
|
|
"col4", |
109
|
|
|
|
|
|
|
"categories", |
110
|
|
|
|
|
|
|
"col6", |
111
|
|
|
|
|
|
|
"related_topics", |
112
|
|
|
|
|
|
|
"col8", |
113
|
|
|
|
|
|
|
"external_links", |
114
|
|
|
|
|
|
|
"disambiguation", |
115
|
|
|
|
|
|
|
"images", |
116
|
|
|
|
|
|
|
"abstract", |
117
|
|
|
|
|
|
|
"abstract_url", |
118
|
|
|
|
|
|
|
], |
119
|
|
|
|
|
|
|
}, |
120
|
|
|
|
|
|
|
}, |
121
|
|
|
|
|
|
|
}) or die $DBI::errstr; |
122
|
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
return $dbh; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
# Get a Fathead result from the DB |
127
|
|
|
|
|
|
|
# Requery when we get a Redirect |
128
|
|
|
|
|
|
|
sub _search_output { |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
0
|
|
|
my ($self, $query) = @_; |
131
|
|
|
|
|
|
|
|
132
|
0
|
|
|
|
|
|
my $trigger_re = $self->_trigger_re; |
133
|
0
|
|
|
|
|
|
$query =~ s/^$trigger_re\s+|\s+$trigger_re$//; |
134
|
0
|
|
|
|
|
|
my $result = $self->_db_lookup($query); |
135
|
|
|
|
|
|
|
|
136
|
0
|
|
0
|
|
|
|
while ($result && $result->{type} eq 'R') { |
137
|
0
|
|
|
|
|
|
my $redirect = $result->{redirect}; |
138
|
0
|
|
|
|
|
|
$self->app->emit_notice("Following Redirect: '$result->{title}' -> '$redirect'"); |
139
|
0
|
|
|
|
|
|
$result = $self->_db_lookup($redirect); |
140
|
|
|
|
|
|
|
} |
141
|
0
|
|
|
|
|
|
return $result; |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
# Attempt to get a result from DB (output.txt) |
145
|
|
|
|
|
|
|
# Capture & display any raised errors |
146
|
|
|
|
|
|
|
sub _db_lookup { |
147
|
0
|
|
|
0
|
|
|
my ($self, $query) = @_; |
148
|
|
|
|
|
|
|
|
149
|
0
|
|
|
|
|
|
my $result; |
150
|
0
|
|
|
|
|
|
$@ = ''; |
151
|
|
|
|
|
|
|
|
152
|
0
|
|
|
|
|
|
eval { |
153
|
|
|
|
|
|
|
# TODO lowercase all titles first |
154
|
0
|
|
|
|
|
|
my $sth = $self->dbh->prepare("SELECT * FROM output WHERE lower(title) = ?"); |
155
|
0
|
|
|
|
|
|
$sth->execute(lc $query); |
156
|
0
|
|
|
|
|
|
while (my $row = $sth->fetchrow_hashref) { |
157
|
0
|
|
|
|
|
|
$result = $row; |
158
|
|
|
|
|
|
|
} |
159
|
0
|
|
|
|
|
|
$sth->finish(); |
160
|
|
|
|
|
|
|
}; |
161
|
0
|
0
|
|
|
|
|
$self->app->emit_error("SQL database error: $@") if $@; |
162
|
0
|
|
|
|
|
|
return $result; |
163
|
|
|
|
|
|
|
} |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub structured_answer_for_query { |
166
|
0
|
|
|
0
|
0
|
|
my ($self, $query) = @_; |
167
|
0
|
0
|
|
|
|
|
my $result = $self->_search_output($query) or return undef; |
168
|
0
|
|
|
|
|
|
return $self->_build_structured_answer($result); |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
# Build a Structured Answer hash |
172
|
|
|
|
|
|
|
# Properties depend on Fathead result type |
173
|
|
|
|
|
|
|
sub _build_structured_answer { |
174
|
0
|
|
|
0
|
|
|
my ($self, $data) = @_; |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# Get IA Metadata via ID lookup |
177
|
|
|
|
|
|
|
# Assume selected is an ID |
178
|
0
|
|
0
|
|
|
|
my $metadata = DDG::Meta::Data->get_ia(id => $self->selected) // {}; |
179
|
0
|
0
|
|
|
|
|
$self->app->emit_error("No Metadata found for ID: ".$self->selected) unless keys %$metadata; |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# DBD::Csv ignores col_names letter casing |
182
|
|
|
|
|
|
|
# So, manually map columns to template properties |
183
|
|
|
|
|
|
|
# TODO update info_detail template to use lowercase variable names |
184
|
0
|
|
|
|
|
|
my %extra_data; |
185
|
|
|
|
|
|
|
|
186
|
0
|
|
|
|
|
|
my $out = { |
187
|
|
|
|
|
|
|
id => $self->selected, |
188
|
|
|
|
|
|
|
signal => "high", |
189
|
|
|
|
|
|
|
meta => $metadata, |
190
|
|
|
|
|
|
|
}; |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# Article Result |
193
|
0
|
0
|
|
|
|
|
if ($data->{type} eq 'A') { |
194
|
0
|
|
|
|
|
|
$out->{duckbar_topic} = 'About'; |
195
|
0
|
|
|
|
|
|
$out->{model} = 'FatheadArticle'; |
196
|
0
|
|
|
|
|
|
$out->{templates} = { detail => 'info_detail' }; |
197
|
|
|
|
|
|
|
%extra_data = ( |
198
|
|
|
|
|
|
|
Heading => $data->{title}, |
199
|
|
|
|
|
|
|
Abstract => $self->_replace_newlines($data->{abstract}), |
200
|
|
|
|
|
|
|
AbstractURL => $data->{abstract_url}, |
201
|
|
|
|
|
|
|
FirstURL => $metadata->{src_url}, |
202
|
0
|
|
|
|
|
|
Image => $self->_get_image($data->{images}), |
203
|
|
|
|
|
|
|
); |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# Disambiguation Result |
207
|
0
|
0
|
|
|
|
|
if ($data->{type} eq 'D') { |
208
|
0
|
|
|
|
|
|
$out->{duckbar_topic} = 'Meanings'; |
209
|
0
|
|
|
|
|
|
$out->{model} = 'FatheadListItem'; |
210
|
0
|
|
|
|
|
|
$out->{templates} = { item => 'meanings_item' }; |
211
|
|
|
|
|
|
|
%extra_data = ( |
212
|
|
|
|
|
|
|
Heading => $data->{title}." (".$metadata->{name}.")", |
213
|
0
|
|
|
|
|
|
RelatedTopics => $self->_parse_disambiguations($data->{disambiguation}, $out) |
214
|
|
|
|
|
|
|
); |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
# Category Pages Result |
218
|
0
|
0
|
|
|
|
|
if ($data->{type} eq 'C') { |
219
|
0
|
|
|
|
|
|
$out->{duckbar_topic} = 'List'; |
220
|
0
|
|
|
|
|
|
$out->{model} = 'FatheadListItem'; |
221
|
0
|
|
|
|
|
|
$out->{templates} = { item => 'categories_item' }; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
0
|
|
|
|
|
|
$out->{data} = { %$data, %extra_data }; |
225
|
0
|
|
|
|
|
|
return $out; |
226
|
|
|
|
|
|
|
} |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# Emulate internal processing to build JSON |
229
|
|
|
|
|
|
|
# matching DDG API result format |
230
|
|
|
|
|
|
|
sub _parse_disambiguations { |
231
|
0
|
|
|
0
|
|
|
my ($self, $disambiguations, $out) = @_; |
232
|
0
|
|
|
|
|
|
my @out; |
233
|
0
|
|
|
|
|
|
my @disambiguations = split /\\n/, $disambiguations; |
234
|
0
|
|
|
|
|
|
foreach my $disambiguation (@disambiguations){ |
235
|
0
|
|
|
|
|
|
my $result = {}; |
236
|
0
|
0
|
|
|
|
|
if ($disambiguation =~ m/^\*\[\[(.+)\]\],(.+)$/) { |
237
|
|
|
|
|
|
|
|
238
|
0
|
|
|
|
|
|
my $title = $1; |
239
|
0
|
|
|
|
|
|
my $html = $2; |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# Parse HTML into plaintext |
242
|
0
|
|
|
|
|
|
my $root = HTML::TreeBuilder->new_from_content($html); |
243
|
0
|
|
|
|
|
|
$root->elementify(); |
244
|
0
|
|
|
|
|
|
my $text = $root->as_trimmed_text; |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Build URL Path |
247
|
0
|
|
|
|
|
|
my $href = "/?q=$title&ia=about"; |
248
|
0
|
|
|
|
|
|
my $a = HTML::Element->new('a', href => $href); |
249
|
0
|
|
|
|
|
|
$a->push_content($title); |
250
|
|
|
|
|
|
|
|
251
|
0
|
|
|
|
|
|
$result = { |
252
|
|
|
|
|
|
|
Result => $a->as_HTML . "$text", # generates `<a href="$url">$title</a>$text` which gets parsed by template helpers |
253
|
|
|
|
|
|
|
FirstURL => $href, |
254
|
|
|
|
|
|
|
Text => $text |
255
|
|
|
|
|
|
|
}; |
256
|
|
|
|
|
|
|
} |
257
|
0
|
|
|
|
|
|
push @out, $result; |
258
|
|
|
|
|
|
|
} |
259
|
0
|
|
|
|
|
|
return \@out; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# Emulate internal processing to build JSON |
263
|
|
|
|
|
|
|
# matching DDG API result format |
264
|
|
|
|
|
|
|
sub _get_image { |
265
|
0
|
|
|
0
|
|
|
my ($self, $image) = @_; |
266
|
0
|
|
|
|
|
|
my $url = ""; |
267
|
0
|
0
|
|
|
|
|
if ($image =~ m/^\[\[Image:(.+)\]\]$/) { |
268
|
0
|
|
|
|
|
|
$url = $1; |
269
|
|
|
|
|
|
|
} |
270
|
0
|
|
|
|
|
|
return $url; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
sub _replace_newlines { |
275
|
0
|
|
|
0
|
|
|
my ($self, $abstract) = @_; |
276
|
0
|
|
|
|
|
|
$abstract =~ s/\\n/<br>/g; |
277
|
0
|
|
|
|
|
|
return $abstract; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
1; |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
__END__ |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=pod |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=head1 NAME |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
App::DuckPAN::Fathead - Searches a given output.txt file for a query match |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head1 VERSION |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
version 1018 |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=head1 AUTHOR |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
DuckDuckGo <open@duckduckgo.com>, Zach Thompson <zach@duckduckgo.com>, Zaahir Moolla <moollaza@duckduckgo.com>, Torsten Raudssus <torsten@raudss.us> L<https://raudss.us/> |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by DuckDuckGo, Inc. L<https://duckduckgo.com/>. |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
This is free software, licensed under: |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
The Apache License, Version 2.0, January 2004 |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=cut |