| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package App::DuckPAN::Cmd::Server; |
|
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:DDG'; |
|
3
|
|
|
|
|
|
|
# ABSTRACT: Starting up the web server to test instant answers |
|
4
|
|
|
|
|
|
|
$App::DuckPAN::Cmd::Server::VERSION = '1017'; |
|
5
|
1
|
|
|
1
|
|
951
|
use Moo; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
5
|
|
|
6
|
|
|
|
|
|
|
with qw( App::DuckPAN::Cmd App::DuckPAN::Restart ); |
|
7
|
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
218
|
use MooX::Options protect_argv => 0; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
6
|
|
|
9
|
1
|
|
|
1
|
|
1421
|
use Plack::Runner; |
|
|
1
|
|
|
|
|
1648
|
|
|
|
1
|
|
|
|
|
23
|
|
|
10
|
1
|
|
|
1
|
|
447
|
use File::ShareDir::ProjectDistDir; |
|
|
1
|
|
|
|
|
3732
|
|
|
|
1
|
|
|
|
|
7
|
|
|
11
|
1
|
|
|
1
|
|
326
|
use File::Copy; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
51
|
|
|
12
|
1
|
|
|
1
|
|
3
|
use Path::Tiny; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
34
|
|
|
13
|
1
|
|
|
1
|
|
3
|
use LWP::Simple; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
12
|
|
|
14
|
1
|
|
|
1
|
|
1130
|
use HTML::TreeBuilder; |
|
|
1
|
|
|
|
|
22707
|
|
|
|
1
|
|
|
|
|
9
|
|
|
15
|
1
|
|
|
1
|
|
36
|
use Config::INI; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
18
|
|
|
16
|
1
|
|
|
1
|
|
647
|
use Term::ProgressBar; |
|
|
1
|
|
|
|
|
48594
|
|
|
|
1
|
|
|
|
|
2051
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
option port => ( |
|
19
|
|
|
|
|
|
|
is => 'ro', |
|
20
|
|
|
|
|
|
|
format => 'i', |
|
21
|
|
|
|
|
|
|
lazy => 1, |
|
22
|
|
|
|
|
|
|
short => 'p', |
|
23
|
|
|
|
|
|
|
default => sub { 5000 }, |
|
24
|
|
|
|
|
|
|
doc => 'set port on which server should listen. defaults to 5000', |
|
25
|
|
|
|
|
|
|
); |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
has page_info => ( |
|
28
|
|
|
|
|
|
|
is => 'ro', |
|
29
|
|
|
|
|
|
|
builder => '_build_page_info', |
|
30
|
|
|
|
|
|
|
lazy=> 1, |
|
31
|
|
|
|
|
|
|
); |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub _build_page_info { |
|
34
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
35
|
0
|
|
|
|
|
|
my $cache_path = $self->asset_cache_path; |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
return +{ |
|
38
|
0
|
|
|
|
|
|
js => [], |
|
39
|
|
|
|
|
|
|
locales => [], |
|
40
|
|
|
|
|
|
|
css => [], |
|
41
|
|
|
|
|
|
|
templates => [{ |
|
42
|
|
|
|
|
|
|
name => 'Template Compiling JS', |
|
43
|
|
|
|
|
|
|
internal => $cache_path->child('template_compiler.js'), |
|
44
|
|
|
|
|
|
|
# stored locally, no need to make web request for this |
|
45
|
|
|
|
|
|
|
external => undef, |
|
46
|
|
|
|
|
|
|
desc => 'Small script DuckPAN runs on SERP load; compiles Spice IA templates', |
|
47
|
|
|
|
|
|
|
}, |
|
48
|
|
|
|
|
|
|
], |
|
49
|
|
|
|
|
|
|
root => [{ |
|
50
|
|
|
|
|
|
|
name => 'DDG Homepage', |
|
51
|
|
|
|
|
|
|
internal => $cache_path->child('page_root.html'), |
|
52
|
|
|
|
|
|
|
external => '/', |
|
53
|
|
|
|
|
|
|
desc => 'used for error page when no instant answers trigger', |
|
54
|
|
|
|
|
|
|
}, |
|
55
|
|
|
|
|
|
|
], |
|
56
|
|
|
|
|
|
|
spice => [{ |
|
57
|
|
|
|
|
|
|
name => 'DDG SERP', |
|
58
|
|
|
|
|
|
|
internal => $cache_path->child('page_spice.html'), |
|
59
|
|
|
|
|
|
|
external => '/?q=duckduckhack-template-for-spice2', |
|
60
|
|
|
|
|
|
|
load_sub_assets => 1, |
|
61
|
|
|
|
|
|
|
desc => 'this is the page we inject Spice and Goodie results into', |
|
62
|
|
|
|
|
|
|
}, |
|
63
|
|
|
|
|
|
|
], |
|
64
|
|
|
|
|
|
|
}; |
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
has hostname => ( |
|
68
|
|
|
|
|
|
|
is => 'ro', |
|
69
|
|
|
|
|
|
|
builder => '_build_hostname', |
|
70
|
|
|
|
|
|
|
lazy => 1, |
|
71
|
|
|
|
|
|
|
); |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub _build_hostname { |
|
74
|
0
|
|
|
0
|
|
|
my ( $self ) = @_; |
|
75
|
0
|
|
|
|
|
|
return $self->app->server_hostname; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
has asset_cache_path => ( |
|
79
|
|
|
|
|
|
|
is => 'ro', |
|
80
|
|
|
|
|
|
|
builder => 1, |
|
81
|
|
|
|
|
|
|
lazy => 1, |
|
82
|
|
|
|
|
|
|
); |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub _build_asset_cache_path { |
|
85
|
0
|
|
|
0
|
|
|
my $self = shift; |
|
86
|
|
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my $asset_path = $self->app->cfg->cache_path->child($self->hostname); |
|
88
|
0
|
0
|
|
|
|
|
$asset_path->mkpath unless $asset_path->exists; |
|
89
|
|
|
|
|
|
|
|
|
90
|
0
|
|
|
|
|
|
return $asset_path; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# Entry point into app |
|
94
|
|
|
|
|
|
|
sub run { |
|
95
|
0
|
|
|
0
|
0
|
|
my ($self, @args) = @_; |
|
96
|
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
|
$self->run_restarter(\@args); |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
# Starts the Plack server on the designated port. Will be launched in a child |
|
101
|
|
|
|
|
|
|
# process since it blocks. Will be killed by user ctrl-c or parent explicitly |
|
102
|
|
|
|
|
|
|
# kill'ing it. |
|
103
|
|
|
|
|
|
|
sub _run_app { |
|
104
|
0
|
|
|
0
|
|
|
my ($self, $args) = @_; |
|
105
|
|
|
|
|
|
|
|
|
106
|
0
|
|
|
|
|
|
my $cache_path = $self->app->cfg->cache_path; |
|
107
|
|
|
|
|
|
|
|
|
108
|
0
|
|
|
|
|
|
$self->app->check_requirements; # Ensure eveything is up do date, or exit. |
|
109
|
|
|
|
|
|
|
|
|
110
|
0
|
|
|
|
|
|
my @blocks = @{$self->app->ddg->get_blocks_from_current_dir(@$args)}; |
|
|
0
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
|
$self->app->emit_debug("Hostname is: https://" . $self->hostname); |
|
113
|
0
|
|
|
|
|
|
$self->app->emit_info("Checking asset cache..."); |
|
114
|
|
|
|
|
|
|
|
|
115
|
0
|
|
|
|
|
|
foreach my $asset (map { @{$self->page_info->{$_}} } (qw(root spice templates))) { |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
116
|
0
|
0
|
|
|
|
|
if (defined $asset->{external}) { |
|
117
|
0
|
|
|
|
|
|
$self->retrieve_and_cache($asset); |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
else { |
|
120
|
|
|
|
|
|
|
# Files without external sources should be copied from the distribution. |
|
121
|
0
|
|
|
|
|
|
my $to_file = $asset->{internal}; |
|
122
|
0
|
|
|
|
|
|
my $from_file = path(dist_dir('App-DuckPAN'), $to_file->basename); |
|
123
|
0
|
0
|
0
|
|
|
|
$from_file->copy($to_file) if ($from_file->exists && !$to_file->exists); |
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
} |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# Pull files out of cache to be served later by DuckPAN server |
|
128
|
0
|
|
|
|
|
|
my %web_args = ( |
|
129
|
|
|
|
|
|
|
blocks => \@blocks, |
|
130
|
|
|
|
|
|
|
server_hostname => $self->hostname |
|
131
|
|
|
|
|
|
|
); |
|
132
|
0
|
|
|
|
|
|
foreach my $page (keys %{$self->page_info}) { |
|
|
0
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
|
$web_args{'page_' . $page} = $self->slurp_or_empty($self->page_info->{$page}); |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
|
$self->app->emit_info("Starting up webserver...", "You can stop the webserver with Ctrl-C"); |
|
137
|
|
|
|
|
|
|
|
|
138
|
0
|
|
|
|
|
|
require App::DuckPAN::Web; |
|
139
|
|
|
|
|
|
|
|
|
140
|
0
|
|
|
|
|
|
my $web = App::DuckPAN::Web->new(%web_args); |
|
141
|
|
|
|
|
|
|
my $runner = Plack::Runner->new( |
|
142
|
|
|
|
|
|
|
#loader => 'Restarter', |
|
143
|
|
|
|
|
|
|
includes => ['lib'], |
|
144
|
0
|
|
|
0
|
|
|
app => sub { $web->run_psgi($self->app, @_) }, |
|
145
|
0
|
|
|
|
|
|
); |
|
146
|
|
|
|
|
|
|
#$runner->loader->watch("./lib"); |
|
147
|
0
|
|
|
|
|
|
$runner->parse_options("--port", $self->port); |
|
148
|
0
|
|
|
|
|
|
exit $runner->run; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub slurp_or_empty { |
|
152
|
0
|
|
|
0
|
0
|
|
my ($self, $which) = @_; |
|
153
|
0
|
|
|
|
|
|
my $cache_path = $self->asset_cache_path; |
|
154
|
0
|
|
|
|
|
|
my $contents = ''; |
|
155
|
0
|
|
|
|
|
|
foreach my $which_file (grep { $_->{internal} } (@$which)) { |
|
|
0
|
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
|
my $where = $which_file->{internal}; |
|
157
|
0
|
0
|
|
|
|
|
my $change_method = ($where =~ m/\.js$/) ? 'change_js' : ($where =~ m/\.css$/) ? 'change_css' : 'change_html'; |
|
|
|
0
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
|
|
159
|
0
|
0
|
|
|
|
|
$contents .= $self->make_source_comment($which_file) . $self->$change_method($where->slurp) if ($where->exists); |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
0
|
|
|
|
|
|
return $contents; |
|
163
|
|
|
|
|
|
|
} |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub make_source_comment { |
|
166
|
0
|
|
|
0
|
0
|
|
my ($self, $file_info) = @_; |
|
167
|
|
|
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
my $comment = ''; |
|
169
|
0
|
|
|
|
|
|
my $internal = $file_info->{internal}; |
|
170
|
0
|
|
0
|
|
|
|
my $title = $file_info->{name} || $internal; |
|
171
|
0
|
0
|
|
|
|
|
if ($internal =~ /js$/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
172
|
0
|
|
|
|
|
|
$comment = '// ' . $title; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
elsif ($internal =~ /css$/) { |
|
175
|
0
|
|
|
|
|
|
$comment = '/* ' . $title . '*/'; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
elsif ($internal =~ /html$/) { |
|
178
|
0
|
|
|
|
|
|
$comment = '<!-- ' . $title . ' -->'; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
0
|
|
|
|
|
|
return "\n$comment\n"; # Just two blank lines if we don't know how to comment for the file type. |
|
182
|
|
|
|
|
|
|
} |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
# Force DuckPAN to ignore requests for certain files |
|
185
|
|
|
|
|
|
|
# that are not needed (ie. d.js, s.js, g.js, post2.html) |
|
186
|
|
|
|
|
|
|
sub change_js { |
|
187
|
0
|
|
|
0
|
0
|
|
my ( $self, $js ) = @_; |
|
188
|
0
|
|
|
|
|
|
$js =~ s!/([dsg]\d+?|duckduck|duckgo_dev)\.js\?!/?duckduckhack_ignore=1&!g; |
|
189
|
0
|
|
|
|
|
|
$js =~ s!/post2\.html!/?duckduckhack_ignore=1&!g; |
|
190
|
0
|
|
|
|
|
|
return $self->change_css($js); |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# Rewrite all relative asset links in CSS |
|
194
|
|
|
|
|
|
|
# E.g url("/assets/background.png") => url("http://duckduckgo.com/assets") |
|
195
|
|
|
|
|
|
|
sub change_css { |
|
196
|
0
|
|
|
0
|
0
|
|
my ( $self, $css ) = @_; |
|
197
|
0
|
|
|
|
|
|
my $hostname = $self->hostname; |
|
198
|
0
|
|
|
|
|
|
$css =~ s!:\s*url\((["'])?/!:url\($1http://$hostname/!g; |
|
199
|
0
|
|
|
|
|
|
return $css; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
sub change_html { |
|
203
|
0
|
|
|
0
|
0
|
|
my ( $self, $html ) = @_; |
|
204
|
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
my $root = HTML::TreeBuilder->new; |
|
206
|
0
|
|
|
|
|
|
$root->parse($html); |
|
207
|
|
|
|
|
|
|
|
|
208
|
0
|
|
|
|
|
|
my @a = $root->look_down( |
|
209
|
|
|
|
|
|
|
"_tag", "a" |
|
210
|
|
|
|
|
|
|
); |
|
211
|
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
|
my @link = $root->look_down( |
|
213
|
|
|
|
|
|
|
"_tag", "link" |
|
214
|
|
|
|
|
|
|
); |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
# Make sure DuckPAN serves DDG CSS (already pulled down at startup) |
|
217
|
|
|
|
|
|
|
# ie <link href="/s123.css"> becomes <link href="/?duckduckhack_css=1"> |
|
218
|
|
|
|
|
|
|
# Also rewrite relative links to hostname |
|
219
|
0
|
|
|
|
|
|
my $has_css = 0; |
|
220
|
0
|
|
|
|
|
|
for (@a, @link) { |
|
221
|
0
|
0
|
0
|
|
|
|
if ($_->attr('type') && $_->attr('type') eq 'text/css') { |
|
|
|
0
|
0
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# We only want to load the CSS file once. |
|
223
|
|
|
|
|
|
|
# We only load it once because /?duckduckhack_css=1 already has all of the CSS |
|
224
|
|
|
|
|
|
|
# in a single page. |
|
225
|
0
|
0
|
|
|
|
|
unless($has_css) { |
|
226
|
0
|
|
|
|
|
|
$_->attr('href','/?duckduckhack_css=1'); |
|
227
|
0
|
|
|
|
|
|
$has_css = 1; |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
else { |
|
230
|
0
|
|
|
|
|
|
$_->attr('href','/?duckduckhack_ignore=1'); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
} |
|
233
|
|
|
|
|
|
|
elsif (defined $_->attr('href') && substr($_->attr('href'),0,1) eq '/') { |
|
234
|
0
|
|
|
|
|
|
$_->attr('href','http://'.$self->hostname.''.$_->attr('href')); |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
|
|
238
|
0
|
|
|
|
|
|
my @script = $root->look_down( |
|
239
|
|
|
|
|
|
|
"_tag", "script" |
|
240
|
|
|
|
|
|
|
); |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
# Make sure DuckPAN serves DDG JS (already pulled down at startup) |
|
243
|
|
|
|
|
|
|
# ie <link href="/d123.js"> becomes <link href="/?duckduckhack_js=1"> |
|
244
|
|
|
|
|
|
|
# Also rewrite relative links to hostname |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
# Temp Fix: Force ignore of d.js & duckduck. |
|
247
|
|
|
|
|
|
|
# This logic needs to be improved! |
|
248
|
|
|
|
|
|
|
|
|
249
|
0
|
|
|
|
|
|
my $has_ddh = 0; |
|
250
|
0
|
|
|
|
|
|
for (@script) { |
|
251
|
0
|
0
|
|
|
|
|
if (my $src = $_->attr('src')) { |
|
252
|
0
|
0
|
|
|
|
|
next if ($src =~ m/^\/\?duckduckhack_/); # Already updated, no need to do again |
|
253
|
0
|
0
|
|
|
|
|
if ($src =~ m/^\/(dpan\d+|duckpan)\.js/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
254
|
0
|
0
|
|
|
|
|
if ($has_ddh){ |
|
255
|
0
|
|
|
|
|
|
$_->attr('src','/?duckduckhack_ignore=1'); |
|
256
|
|
|
|
|
|
|
} |
|
257
|
|
|
|
|
|
|
else { |
|
258
|
0
|
|
|
|
|
|
$_->attr('src','/?duckduckhack_js=1'); |
|
259
|
0
|
|
|
|
|
|
$has_ddh = 1; |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
} |
|
262
|
|
|
|
|
|
|
elsif ($src =~ m/^\/(g\d+|serp)\.js/) { |
|
263
|
0
|
|
|
|
|
|
$_->attr('src','/?duckduckhack_templates=1'); |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
elsif ($src =~ m/^\/(d\d+|base)\.js/) { |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# If dpan.js is not present (ie. homepage) |
|
268
|
|
|
|
|
|
|
# make sure we serve the js rather than blocking |
|
269
|
|
|
|
|
|
|
# the call to d.js |
|
270
|
0
|
0
|
|
|
|
|
if ($has_ddh){ |
|
271
|
0
|
|
|
|
|
|
$_->attr('src','/?duckduckhack_ignore=1'); |
|
272
|
|
|
|
|
|
|
} |
|
273
|
|
|
|
|
|
|
else { |
|
274
|
0
|
|
|
|
|
|
$_->attr('src','/?duckduckhack_js=1'); |
|
275
|
|
|
|
|
|
|
} |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
elsif ($src =~ /locales/) { |
|
278
|
0
|
|
|
|
|
|
$_->attr('src','/?duckduckhack_locales=1'); |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
elsif (substr($src,0,1) eq '/') { |
|
281
|
0
|
|
|
|
|
|
$_->attr('src','http://'.$self->hostname.''.$_->attr('src')); |
|
282
|
|
|
|
|
|
|
} |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
0
|
|
|
|
|
|
my @img = $root->look_down( |
|
287
|
|
|
|
|
|
|
"_tag", "img" |
|
288
|
|
|
|
|
|
|
); |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# Rewrite img links to be requested from hostname |
|
291
|
0
|
|
|
|
|
|
for (@img) { |
|
292
|
0
|
0
|
|
|
|
|
if ($_->attr('src')) { |
|
293
|
0
|
|
|
|
|
|
$_->attr('src','http://'.$self->hostname.''.$_->attr('src')); |
|
294
|
|
|
|
|
|
|
} |
|
295
|
|
|
|
|
|
|
} |
|
296
|
|
|
|
|
|
|
|
|
297
|
0
|
|
|
|
|
|
my $newhtml = $root->as_HTML; |
|
298
|
|
|
|
|
|
|
|
|
299
|
0
|
|
|
|
|
|
return $self->change_js($self->change_css($newhtml)); |
|
300
|
|
|
|
|
|
|
} |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
# This is where we cache and check for newer versions |
|
303
|
|
|
|
|
|
|
# of DDG JS and CSS by parsing the HTML requested from |
|
304
|
|
|
|
|
|
|
# DuckDuckGo. If new files exits, we grab them, rewrite |
|
305
|
|
|
|
|
|
|
# any links and store them in the cache. Otherwise we |
|
306
|
|
|
|
|
|
|
# serve the current versions from the cache. |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub get_sub_assets { |
|
309
|
0
|
|
|
0
|
0
|
|
my ($self, $from) = @_; |
|
310
|
|
|
|
|
|
|
|
|
311
|
0
|
|
|
|
|
|
my $html = $from->{internal}->slurp; |
|
312
|
0
|
|
|
|
|
|
my $root = HTML::TreeBuilder->new; |
|
313
|
0
|
|
|
|
|
|
$root->parse($html); |
|
314
|
|
|
|
|
|
|
|
|
315
|
0
|
|
|
|
|
|
my @script = $root->look_down( |
|
316
|
|
|
|
|
|
|
"_tag", "script" |
|
317
|
|
|
|
|
|
|
); |
|
318
|
|
|
|
|
|
|
|
|
319
|
0
|
|
|
|
|
|
my @link = $root->look_down( |
|
320
|
|
|
|
|
|
|
"_tag", "link" |
|
321
|
|
|
|
|
|
|
); |
|
322
|
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
|
my $cache_path = $self->asset_cache_path; |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
# Find version no. for d.js and g.js |
|
326
|
0
|
|
|
|
|
|
for (@script) { |
|
327
|
0
|
0
|
|
|
|
|
if (my $src = $_->attr('src')) { |
|
328
|
0
|
0
|
|
|
|
|
if ($src =~ m/^\/((?:dpan\d+|duckpan)\.js)/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
329
|
0
|
|
|
|
|
|
unshift @{$self->page_info->{js}}, |
|
|
0
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
{ |
|
331
|
|
|
|
|
|
|
name => 'Main JS', |
|
332
|
|
|
|
|
|
|
internal => $cache_path->child($1), |
|
333
|
|
|
|
|
|
|
external => $1 |
|
334
|
|
|
|
|
|
|
}; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
elsif ($src =~ m/^\/((?:g\d+|serp)\.js)/) { |
|
337
|
0
|
|
|
|
|
|
unshift @{$self->page_info->{templates}}, |
|
|
0
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
{ |
|
339
|
|
|
|
|
|
|
name => 'Templating JS', |
|
340
|
|
|
|
|
|
|
internal => $cache_path->child($1), |
|
341
|
|
|
|
|
|
|
external => $1 |
|
342
|
|
|
|
|
|
|
}; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
elsif ($src =~ m/^\/(locales(?:.*)\.js)/) { |
|
345
|
0
|
|
|
|
|
|
my $long_path = $1; |
|
346
|
0
|
|
|
|
|
|
my $cache_name = $long_path; |
|
347
|
0
|
|
|
|
|
|
$cache_name =~ s#^.+(\.\d+\.\d+\.js)#locales$1#g; # Turn long path into cacheable name |
|
348
|
0
|
|
|
|
|
|
unshift @{$self->page_info->{locales}}, |
|
|
0
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
{ |
|
350
|
|
|
|
|
|
|
name => 'Locales JS', |
|
351
|
|
|
|
|
|
|
internal => $cache_path->child($cache_name), |
|
352
|
|
|
|
|
|
|
external => $long_path |
|
353
|
|
|
|
|
|
|
}; |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
} |
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
|
|
358
|
0
|
|
|
|
|
|
my @cssfile; |
|
359
|
0
|
0
|
|
|
|
|
for (grep { $_->attr('type') && $_->attr('type') eq 'text/css' } @link) { |
|
|
0
|
|
|
|
|
|
|
|
360
|
0
|
0
|
|
|
|
|
if (my $href = $_->attr('href')) { |
|
361
|
|
|
|
|
|
|
# We're looking for txxx.css and sxxx.css. |
|
362
|
|
|
|
|
|
|
# style.css and static.css are for development mode. |
|
363
|
0
|
0
|
|
|
|
|
if ($href =~ m/^\/((?:[str]\d+|style|static|serp)\.css)/) { |
|
364
|
0
|
|
|
|
|
|
my $name = $1; |
|
365
|
0
|
|
|
|
|
|
push @cssfile, $name; |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
} |
|
369
|
0
|
|
|
|
|
|
foreach (sort @cssfile) { |
|
370
|
0
|
|
|
|
|
|
my $name = $_; |
|
371
|
0
|
|
|
|
|
|
unshift @{$self->page_info->{css}}, |
|
|
0
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
{ |
|
373
|
|
|
|
|
|
|
name => $name . ' CSS', |
|
374
|
|
|
|
|
|
|
internal => $cache_path->child($name), |
|
375
|
|
|
|
|
|
|
external => $name |
|
376
|
|
|
|
|
|
|
}; |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
# Check if we need to request any new assets from hostname, otherwise use cached copies |
|
380
|
0
|
0
|
|
|
|
|
foreach my $curr_asset (grep { defined $_ && $_->{internal} } map { @{$self->page_info->{$_}} } (qw(js templates css locales))) { |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
381
|
0
|
|
|
|
|
|
$self->retrieve_and_cache($curr_asset, $from); |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
} |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
sub retrieve_and_cache { |
|
386
|
0
|
|
|
0
|
0
|
|
my ($self, $asset, $sub_of) = @_; |
|
387
|
|
|
|
|
|
|
|
|
388
|
0
|
0
|
0
|
|
|
|
return unless ($asset->{internal} && $asset->{external}); |
|
389
|
|
|
|
|
|
|
|
|
390
|
0
|
|
|
|
|
|
my $to_file = $asset->{internal}; |
|
391
|
0
|
0
|
|
|
|
|
my $path_start = (substr($asset->{external}, 0, 1) eq '/') ? '' : '/'; |
|
392
|
0
|
|
|
|
|
|
my $url = 'https://' . $self->hostname . $path_start . $asset->{external}; |
|
393
|
0
|
0
|
|
|
|
|
my $prefix = ($sub_of) ? '[via ' . $sub_of->{name} . '] ' : ''; |
|
394
|
0
|
|
|
|
|
|
$prefix .= '[' . $asset->{name} . '] '; |
|
395
|
0
|
0
|
0
|
|
|
|
if ($to_file->exists && (time - $to_file->stat->ctime) < $self->app->cachesec) { |
|
396
|
0
|
|
|
|
|
|
$self->app->emit_debug($prefix . $to_file->basename . " recently cached -- no request made."); |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
else { |
|
399
|
0
|
|
|
|
|
|
$self->app->emit_debug($prefix . 'requesting from: ' . $url . '...'); |
|
400
|
0
|
|
|
|
|
|
$to_file->remove; |
|
401
|
0
|
|
|
|
|
|
$to_file->touchpath; |
|
402
|
0
|
|
|
|
|
|
my ($expected_length, $bytes_received, $progress); |
|
403
|
0
|
|
|
|
|
|
my $next_update = 0; |
|
404
|
|
|
|
|
|
|
my $res = $self->app->http->request( |
|
405
|
|
|
|
|
|
|
HTTP::Request->new(GET => $url), |
|
406
|
|
|
|
|
|
|
sub { |
|
407
|
0
|
|
|
0
|
|
|
my ($chunk, $res) = @_; |
|
408
|
0
|
|
|
|
|
|
$bytes_received += length($chunk); |
|
409
|
0
|
|
|
|
|
|
$to_file->append($chunk); |
|
410
|
0
|
|
0
|
|
|
|
$expected_length //= $res->content_length || 0; |
|
|
|
|
0
|
|
|
|
|
|
411
|
0
|
0
|
|
|
|
|
return unless $self->app->verbose; # Progress bar is just for verbose mode; |
|
412
|
0
|
0
|
0
|
|
|
|
if ($expected_length && !defined($progress)) { |
|
|
|
0
|
0
|
|
|
|
|
|
413
|
0
|
|
|
|
|
|
$progress = Term::ProgressBar->new({ |
|
414
|
|
|
|
|
|
|
name => $prefix, |
|
415
|
|
|
|
|
|
|
count => $expected_length, |
|
416
|
|
|
|
|
|
|
remove => 1, |
|
417
|
|
|
|
|
|
|
ETA => 'linear', |
|
418
|
|
|
|
|
|
|
fh => \*STDOUT, |
|
419
|
|
|
|
|
|
|
}); |
|
420
|
0
|
|
|
|
|
|
$progress->minor(0); |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
elsif ($progress && $bytes_received > $next_update) { |
|
423
|
0
|
|
|
|
|
|
$next_update = $progress->update($bytes_received); |
|
424
|
|
|
|
|
|
|
} |
|
425
|
0
|
|
|
|
|
|
}); |
|
426
|
0
|
0
|
0
|
|
|
|
if (!$res->is_success) { |
|
|
|
0
|
|
|
|
|
|
|
427
|
0
|
|
|
|
|
|
$self->app->emit_and_exit(1, qq~$prefix request failed with response: ~ . $res->status_line . "\n"); |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
elsif ($expected_length && $bytes_received < $expected_length) { |
|
430
|
0
|
|
|
|
|
|
$to_file->remove; |
|
431
|
0
|
|
|
|
|
|
$self->app->emit_and_exit(1, qq~$prefix only $bytes_received of $expected_length bytes received~); |
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
else { |
|
434
|
0
|
0
|
0
|
|
|
|
$progress->update($expected_length) if ($progress && $expected_length); |
|
435
|
0
|
|
|
|
|
|
$self->app->emit_debug($prefix . 'written to cache: ' . $to_file); |
|
436
|
|
|
|
|
|
|
} |
|
437
|
|
|
|
|
|
|
} |
|
438
|
|
|
|
|
|
|
# We need to load the assets on the SERPs for reuse. |
|
439
|
0
|
0
|
|
|
|
|
if ($asset->{load_sub_assets}) { |
|
440
|
0
|
|
|
|
|
|
$self->app->emit_debug($prefix . 'parsing for additional assets'); |
|
441
|
0
|
|
|
|
|
|
$self->get_sub_assets($asset); |
|
442
|
0
|
|
|
|
|
|
$self->app->emit_debug($prefix . 'assets loaded'); |
|
443
|
|
|
|
|
|
|
} |
|
444
|
|
|
|
|
|
|
|
|
445
|
0
|
|
|
|
|
|
return; |
|
446
|
|
|
|
|
|
|
} |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
1; |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
__END__ |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
=pod |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head1 NAME |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
App::DuckPAN::Cmd::Server - Starting up the web server to test instant answers |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
=head1 VERSION |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
version 1017 |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=head1 AUTHOR |
|
463
|
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
DuckDuckGo <open@duckduckgo.com>, Zach Thompson <zach@duckduckgo.com>, Zaahir Moolla <moollaza@duckduckgo.com>, Torsten Raudssus <torsten@raudss.us> L<https://raudss.us/> |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by DuckDuckGo, Inc. L<https://duckduckgo.com/>. |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
This is free software, licensed under: |
|
471
|
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
The Apache License, Version 2.0, January 2004 |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
=cut |