File Coverage

blib/lib/App/DuckPAN/Cmd/Server.pm
Criterion Covered Total %
statement 30 198 15.1
branch 0 82 0.0
condition 0 35 0.0
subroutine 10 24 41.6
pod 0 8 0.0
total 40 347 11.5


line stmt bran cond sub pod time code
1             package App::DuckPAN::Cmd::Server;
2             our $AUTHORITY = 'cpan:DDG';
3             # ABSTRACT: Starting up the web server to test instant answers
4             $App::DuckPAN::Cmd::Server::VERSION = '1019';
5 1     1   926 use Moo;
  1         2  
  1         6  
6             with qw( App::DuckPAN::Cmd App::DuckPAN::Restart );
7              
8 1     1   272 use MooX::Options protect_argv => 0;
  1         2  
  1         9  
9 1     1   1850 use Plack::Runner;
  1         1656  
  1         24  
10 1     1   399 use File::ShareDir::ProjectDistDir;
  1         4222  
  1         7  
11 1     1   522 use File::Copy;
  1         3  
  1         53  
12 1     1   6 use Path::Tiny;
  1         1  
  1         36  
13 1     1   5 use LWP::Simple;
  1         2  
  1         13  
14 1     1   923 use HTML::TreeBuilder;
  1         22774  
  1         8  
15 1     1   32 use Config::INI;
  1         3  
  1         19  
16 1     1   464 use Term::ProgressBar;
  1         49450  
  1         1938  
17              
18             option port => (
19             is => 'ro',
20             format => 'i',
21             lazy => 1,
22             short => 'p',
23             default => sub { 5000 },
24             doc => 'set port on which server should listen. defaults to 5000',
25             );
26              
27             has page_info => (
28             is => 'ro',
29             builder => '_build_page_info',
30             lazy=> 1,
31             );
32              
33             sub _build_page_info {
34 0     0     my $self = shift;
35 0           my $cache_path = $self->asset_cache_path;
36              
37             return +{
38 0           js => [],
39             locales => [],
40             css => [],
41             templates => [{
42             name => 'Template Compiling JS',
43             internal => $cache_path->child('template_compiler.js'),
44             # stored locally, no need to make web request for this
45             external => undef,
46             desc => 'Small script DuckPAN runs on SERP load; compiles Spice IA templates',
47             },
48             ],
49             root => [{
50             name => 'DDG Homepage',
51             internal => $cache_path->child('page_root.html'),
52             external => '/',
53             desc => 'used for error page when no instant answers trigger',
54             },
55             ],
56             spice => [{
57             name => 'DDG SERP',
58             internal => $cache_path->child('page_spice.html'),
59             external => '/?q=duckduckhack-template-for-spice2',
60             load_sub_assets => 1,
61             desc => 'this is the page we inject Spice and Goodie results into',
62             },
63             ],
64             };
65             }
66              
67             has hostname => (
68             is => 'ro',
69             builder => '_build_hostname',
70             lazy => 1,
71             );
72              
73             sub _build_hostname {
74 0     0     my ( $self ) = @_;
75 0           return $self->app->server_hostname;
76             }
77              
78             has asset_cache_path => (
79             is => 'ro',
80             builder => 1,
81             lazy => 1,
82             );
83              
84             sub _build_asset_cache_path {
85 0     0     my $self = shift;
86              
87 0           my $asset_path = $self->app->cfg->cache_path->child($self->hostname);
88 0 0         $asset_path->mkpath unless $asset_path->exists;
89              
90 0           return $asset_path;
91             }
92              
93             # Entry point into app
94             sub run {
95 0     0 0   my ($self, @args) = @_;
96              
97 0           $self->run_restarter(\@args);
98             }
99              
100             # Starts the Plack server on the designated port. Will be launched in a child
101             # process since it blocks. Will be killed by user ctrl-c or parent explicitly
102             # kill'ing it.
103             sub _run_app {
104 0     0     my ($self, $args) = @_;
105              
106 0           my $cache_path = $self->app->cfg->cache_path;
107              
108 0           $self->app->check_requirements; # Ensure eveything is up do date, or exit.
109              
110 0           my @blocks = @{$self->app->ddg->get_blocks_from_current_dir(@$args)};
  0            
111              
112 0           $self->app->emit_debug("Hostname is: https://" . $self->hostname);
113 0           $self->app->emit_info("Checking asset cache...");
114              
115 0           foreach my $asset (map { @{$self->page_info->{$_}} } (qw(root spice templates))) {
  0            
  0            
116 0 0         if (defined $asset->{external}) {
117 0           $self->retrieve_and_cache($asset);
118             }
119             else {
120             # Files without external sources should be copied from the distribution.
121 0           my $to_file = $asset->{internal};
122 0           my $from_file = path(dist_dir('App-DuckPAN'), $to_file->basename);
123 0 0 0       $from_file->copy($to_file) if ($from_file->exists && !$to_file->exists);
124             }
125             }
126              
127             # Pull files out of cache to be served later by DuckPAN server
128 0           my %web_args = (
129             blocks => \@blocks,
130             server_hostname => $self->hostname
131             );
132 0           foreach my $page (keys %{$self->page_info}) {
  0            
133 0           $web_args{'page_' . $page} = $self->slurp_or_empty($self->page_info->{$page});
134             }
135              
136 0           $self->app->emit_info("Starting up webserver...", "You can stop the webserver with Ctrl-C");
137              
138 0           require App::DuckPAN::Web;
139              
140 0           my $web = App::DuckPAN::Web->new(%web_args);
141             my $runner = Plack::Runner->new(
142             #loader => 'Restarter',
143             includes => ['lib'],
144 0     0     app => sub { $web->run_psgi($self->app, @_) },
145 0           );
146             #$runner->loader->watch("./lib");
147 0           $runner->parse_options("--port", $self->port);
148 0           exit $runner->run;
149             }
150              
151             sub slurp_or_empty {
152 0     0 0   my ($self, $which) = @_;
153 0           my $cache_path = $self->asset_cache_path;
154 0           my $contents = '';
155 0           foreach my $which_file (grep { $_->{internal} } (@$which)) {
  0            
156 0           my $where = $which_file->{internal};
157 0 0         my $change_method = ($where =~ m/\.js$/) ? 'change_js' : ($where =~ m/\.css$/) ? 'change_css' : 'change_html';
    0          
158              
159 0 0         $contents .= $self->make_source_comment($which_file) . $self->$change_method($where->slurp) if ($where->exists);
160             }
161              
162 0           return $contents;
163             }
164              
165             sub make_source_comment {
166 0     0 0   my ($self, $file_info) = @_;
167              
168 0           my $comment = '';
169 0           my $internal = $file_info->{internal};
170 0   0       my $title = $file_info->{name} || $internal;
171 0 0         if ($internal =~ /js$/) {
    0          
    0          
172 0           $comment = '// ' . $title;
173             }
174             elsif ($internal =~ /css$/) {
175 0           $comment = '/* ' . $title . '*/';
176             }
177             elsif ($internal =~ /html$/) {
178 0           $comment = '<!-- ' . $title . ' -->';
179             }
180              
181 0           return "\n$comment\n"; # Just two blank lines if we don't know how to comment for the file type.
182             }
183              
184             # Force DuckPAN to ignore requests for certain files
185             # that are not needed (ie. d.js, s.js, g.js, post2.html)
186             sub change_js {
187 0     0 0   my ( $self, $js ) = @_;
188 0           $js =~ s!/([dsg]\d+?|duckduck|duckgo_dev)\.js\?!/?duckduckhack_ignore=1&!g;
189 0           $js =~ s!/post2\.html!/?duckduckhack_ignore=1&!g;
190 0           return $self->change_css($js);
191             }
192              
193             # Rewrite all relative asset links in CSS
194             # Capture leading quote, insert $hostname, append filename
195             # E.g url("/assets/background.png") => url("http://duckduckgo.com/assets/background.png")
196             sub change_css {
197 0     0 0   my ( $self, $css ) = @_;
198 0           my $hostname = $self->hostname;
199 0           $css =~ s!:\s*url\((["'])?/!:url\($1http://$hostname/!g;
200 0           return $css;
201             }
202              
203             sub change_html {
204 0     0 0   my ( $self, $html ) = @_;
205              
206 0           my $root = HTML::TreeBuilder->new;
207 0           $root->parse($html);
208              
209 0           my @a = $root->look_down(
210             "_tag", "a"
211             );
212              
213 0           my @link = $root->look_down(
214             "_tag", "link"
215             );
216              
217             # Make sure DuckPAN serves DDG CSS (already pulled down at startup)
218             # ie <link href="/s123.css"> becomes <link href="/?duckduckhack_css=1">
219             # Also rewrite relative links to hostname
220 0           my $has_css = 0;
221 0           for (@a, @link) {
222 0 0 0       if ($_->attr('type') && $_->attr('type') eq 'text/css') {
    0 0        
223             # We only want to load the CSS file once.
224             # We only load it once because /?duckduckhack_css=1 already has all of the CSS
225             # in a single page.
226 0 0         unless($has_css) {
227 0           $_->attr('href','/?duckduckhack_css=1');
228 0           $has_css = 1;
229             }
230             else {
231 0           $_->attr('href','/?duckduckhack_ignore=1');
232             }
233             }
234             elsif (defined $_->attr('href') && substr($_->attr('href'),0,1) eq '/') {
235 0           $_->attr('href','http://'.$self->hostname.''.$_->attr('href'));
236             }
237             }
238              
239 0           my @script = $root->look_down(
240             "_tag", "script"
241             );
242              
243             # Make sure DuckPAN serves DDG JS (already pulled down at startup)
244             # ie <link href="/d123.js"> becomes <link href="/?duckduckhack_js=1">
245             # Also rewrite relative links to hostname
246              
247             # Temp Fix: Force ignore of d.js & duckduck.
248             # This logic needs to be improved!
249              
250 0           my $has_ddh = 0;
251 0           for (@script) {
252 0 0         if (my $src = $_->attr('src')) {
253 0 0         next if ($src =~ m/^\/\?duckduckhack_/); # Already updated, no need to do again
254 0 0         if ($src =~ m/^\/(dpan\d+|duckpan)\.js/) {
    0          
    0          
    0          
    0          
255 0 0         if ($has_ddh){
256 0           $_->attr('src','/?duckduckhack_ignore=1');
257             }
258             else {
259 0           $_->attr('src','/?duckduckhack_js=1');
260 0           $has_ddh = 1;
261             }
262             }
263             elsif ($src =~ m/^\/(g\d+|serp)\.js/) {
264 0           $_->attr('src','/?duckduckhack_templates=1');
265             }
266             elsif ($src =~ m/^\/(d\d+|base)\.js/) {
267              
268             # If dpan.js is not present (ie. homepage)
269             # make sure we serve the js rather than blocking
270             # the call to d.js
271 0 0         if ($has_ddh){
272 0           $_->attr('src','/?duckduckhack_ignore=1');
273             }
274             else {
275 0           $_->attr('src','/?duckduckhack_js=1');
276             }
277             }
278             elsif ($src =~ /locales/) {
279 0           $_->attr('src','/?duckduckhack_locales=1');
280             }
281             elsif (substr($src,0,1) eq '/') {
282 0           $_->attr('src','http://'.$self->hostname.''.$_->attr('src'));
283             }
284             }
285             }
286              
287 0           my @img = $root->look_down(
288             "_tag", "img"
289             );
290              
291             # Rewrite img links to be requested from hostname
292 0           for (@img) {
293 0 0         if ($_->attr('src')) {
294 0           $_->attr('src','http://'.$self->hostname.''.$_->attr('src'));
295             }
296             }
297              
298 0           my $newhtml = $root->as_HTML;
299              
300 0           return $self->change_js($self->change_css($newhtml));
301             }
302              
303             # This is where we cache and check for newer versions
304             # of DDG JS and CSS by parsing the HTML requested from
305             # DuckDuckGo. If new files exits, we grab them, rewrite
306             # any links and store them in the cache. Otherwise we
307             # serve the current versions from the cache.
308              
309             sub get_sub_assets {
310 0     0 0   my ($self, $from) = @_;
311              
312 0           my $html = $from->{internal}->slurp;
313 0           my $root = HTML::TreeBuilder->new;
314 0           $root->parse($html);
315              
316 0           my @script = $root->look_down(
317             "_tag", "script"
318             );
319              
320 0           my @link = $root->look_down(
321             "_tag", "link"
322             );
323              
324 0           my $cache_path = $self->asset_cache_path;
325              
326             # Find version no. for d.js and g.js
327 0           for (@script) {
328 0 0         if (my $src = $_->attr('src')) {
329 0 0         if ($src =~ m/^\/((?:dpan\d+|duckpan)\.js)/) {
    0          
    0          
330 0           unshift @{$self->page_info->{js}},
  0            
331             {
332             name => 'Main JS',
333             internal => $cache_path->child($1),
334             external => $1
335             };
336             }
337             elsif ($src =~ m/^\/((?:g\d+|serp)\.js)/) {
338 0           unshift @{$self->page_info->{templates}},
  0            
339             {
340             name => 'Templating JS',
341             internal => $cache_path->child($1),
342             external => $1
343             };
344             }
345             elsif ($src =~ m/^\/(locales(?:.*)\.js)/) {
346 0           my $long_path = $1;
347 0           my $cache_name = $long_path;
348 0           $cache_name =~ s#^.+(\.\d+\.\d+\.js)#locales$1#g; # Turn long path into cacheable name
349 0           unshift @{$self->page_info->{locales}},
  0            
350             {
351             name => 'Locales JS',
352             internal => $cache_path->child($cache_name),
353             external => $long_path
354             };
355             }
356             }
357             }
358              
359 0           my @cssfile;
360 0 0         for (grep { $_->attr('type') && $_->attr('type') eq 'text/css' } @link) {
  0            
361 0 0         if (my $href = $_->attr('href')) {
362             # We're looking for txxx.css and sxxx.css.
363             # style.css and static.css are for development mode.
364 0 0         if ($href =~ m/^\/((?:[str]\d+|style|static|serp)\.css)/) {
365 0           my $name = $1;
366 0           push @cssfile, $name;
367             }
368             }
369             }
370 0           foreach (sort @cssfile) {
371 0           my $name = $_;
372 0           unshift @{$self->page_info->{css}},
  0            
373             {
374             name => $name . ' CSS',
375             internal => $cache_path->child($name),
376             external => $name
377             };
378             }
379              
380             # Check if we need to request any new assets from hostname, otherwise use cached copies
381 0 0         foreach my $curr_asset (grep { defined $_ && $_->{internal} } map { @{$self->page_info->{$_}} } (qw(js templates css locales))) {
  0            
  0            
  0            
382 0           $self->retrieve_and_cache($curr_asset, $from);
383             }
384             }
385              
386             sub retrieve_and_cache {
387 0     0 0   my ($self, $asset, $sub_of) = @_;
388              
389 0 0 0       return unless ($asset->{internal} && $asset->{external});
390              
391 0           my $to_file = $asset->{internal};
392 0 0         my $path_start = (substr($asset->{external}, 0, 1) eq '/') ? '' : '/';
393 0           my $url = 'https://' . $self->hostname . $path_start . $asset->{external};
394 0 0         my $prefix = ($sub_of) ? '[via ' . $sub_of->{name} . '] ' : '';
395 0           $prefix .= '[' . $asset->{name} . '] ';
396 0 0 0       if ($to_file->exists && (time - $to_file->stat->ctime) < $self->app->cachesec) {
397 0           $self->app->emit_debug($prefix . $to_file->basename . " recently cached -- no request made.");
398             }
399             else {
400 0           $self->app->emit_debug($prefix . 'requesting from: ' . $url . '...');
401 0           $to_file->remove;
402 0           $to_file->touchpath;
403 0           my ($expected_length, $bytes_received, $progress);
404 0           my $next_update = 0;
405             my $res = $self->app->http->request(
406             HTTP::Request->new(GET => $url),
407             sub {
408 0     0     my ($chunk, $res) = @_;
409 0           $bytes_received += length($chunk);
410 0           $to_file->append($chunk);
411 0   0       $expected_length //= $res->content_length || 0;
      0        
412 0 0         return unless $self->app->verbose; # Progress bar is just for verbose mode;
413 0 0 0       if ($expected_length && !defined($progress)) {
    0 0        
414 0           $progress = Term::ProgressBar->new({
415             name => $prefix,
416             count => $expected_length,
417             remove => 1,
418             ETA => 'linear',
419             fh => \*STDOUT,
420             });
421 0           $progress->minor(0);
422             }
423             elsif ($progress && $bytes_received > $next_update) {
424 0           $next_update = $progress->update($bytes_received);
425             }
426 0           });
427 0 0 0       if (!$res->is_success) {
    0          
428 0           $self->app->emit_and_exit(1, qq~$prefix request failed with response: ~ . $res->status_line . "\n");
429             }
430             elsif ($expected_length && $bytes_received < $expected_length) {
431 0           $to_file->remove;
432 0           $self->app->emit_and_exit(1, qq~$prefix only $bytes_received of $expected_length bytes received~);
433             }
434             else {
435 0 0 0       $progress->update($expected_length) if ($progress && $expected_length);
436 0           $self->app->emit_debug($prefix . 'written to cache: ' . $to_file);
437             }
438             }
439             # We need to load the assets on the SERPs for reuse.
440 0 0         if ($asset->{load_sub_assets}) {
441 0           $self->app->emit_debug($prefix . 'parsing for additional assets');
442 0           $self->get_sub_assets($asset);
443 0           $self->app->emit_debug($prefix . 'assets loaded');
444             }
445              
446 0           return;
447             }
448              
449             1;
450              
451             __END__
452              
453             =pod
454              
455             =head1 NAME
456              
457             App::DuckPAN::Cmd::Server - Starting up the web server to test instant answers
458              
459             =head1 VERSION
460              
461             version 1019
462              
463             =head1 AUTHOR
464              
465             DuckDuckGo <open@duckduckgo.com>, Zach Thompson <zach@duckduckgo.com>, Zaahir Moolla <moollaza@duckduckgo.com>, Torsten Raudssus <torsten@raudss.us> L<https://raudss.us/>
466              
467             =head1 COPYRIGHT AND LICENSE
468              
469             This software is Copyright (c) 2013 by DuckDuckGo, Inc. L<https://duckduckgo.com/>.
470              
471             This is free software, licensed under:
472              
473             The Apache License, Version 2.0, January 2004
474              
475             =cut