line |
true |
false |
branch |
108
|
0 |
2 |
unless $Log::Log4perl::Logger::INITIALIZED
|
195
|
2 |
71 |
if ($content // '') eq ''
|
203
|
1 |
70 |
if ($content_len >= 3 and substr($content, 0, 1) eq "\357" and substr($content, 1, 1) eq "\273" and substr($content, 2, 1) eq "\277") { }
|
|
1 |
69 |
elsif ($content_len >= 2 and substr($content, 0, 1) eq "\377" and substr($content, 1, 1) eq "\376") { }
|
|
1 |
136 |
elsif ($content_len >= 2 and substr($content, 0, 1) eq "\376" and substr($content, 1, 1) eq "\377") { }
|
227
|
0 |
71 |
($content_type // '') ne '' && lc($content_type // '') =~ m[^text/html] ? :
|
231
|
3 |
68 |
if ($is_html_type or ($content // '') =~ /$SIMPLE_HTML_PATTERN/)
|
232
|
1 |
2 |
if (not +($content // '') =~ /$USER_AGENT_PATTERN/) { }
|
240
|
0 |
2 |
if ($is_html_type) { }
|
263
|
30 |
1805 |
if $has_html
|
266
|
1799 |
36 |
if (my $hash_idx = index($line, '#'))
|
267
|
13 |
1786 |
if $hash_idx >= 0
|
272
|
274 |
1561 |
if length $line == 0
|
276
|
923 |
638 |
if ($robot_token->directive->is_user_agent)
|
281
|
494 |
144 |
if ($robot_token->directive->is_disallow)
|
286
|
57 |
87 |
if ($robot_token->directive->is_allow)
|
291
|
17 |
70 |
if ($robot_token->directive->is_crawl_delay)
|
296
|
19 |
51 |
if ($robot_token->directive->is_sitemap)
|
301
|
1 |
50 |
if ($robot_token->directive->is_http)
|
306
|
13 |
37 |
if ($robot_token->directive->is_missing)
|
319
|
26 |
11 |
if ($robot_token->directive->is_unknown)
|
335
|
1 |
69 |
if ($robot_rules->crawl_delay > $MAX_CRAWL_DELAY) { }
|
354
|
138 |
413 |
if $state->is_skip_agents
|
358
|
148 |
265 |
unless $state->is_adding_rules
|
364
|
11 |
254 |
if (length $path == 0) { }
|
386
|
5 |
12 |
if $state->is_skip_agents
|
390
|
3 |
9 |
unless $state->is_adding_rules
|
410
|
1 |
0 |
if (index $url_fragment, 'sitemap') { }
|
441
|
20 |
0 |
if $host ne ''
|
451
|
121 |
802 |
if ($state->is_matched_real_name)
|
452
|
41 |
80 |
if $state->is_finished_agent_fields
|
456
|
62 |
740 |
if ($state->is_finished_agent_fields)
|
465
|
39 |
1323 |
if ($agent_name eq '*' and not $state->is_matched_wildcard) { }
|
|
1315 |
8 |
elsif ($agent_name ne '') { }
|
471
|
33 |
1307 |
if (index($target_name_split, $agent_name) == 0)
|
488
|
21 |
18 |
if $warning_count == 1
|
491
|
34 |
5 |
if $warning_count < $MAX_WARNINGS
|
503
|
1522 |
39 |
if ($directive =~ /^acap\-/ or 'CrawlerCommons::RobotDirective'->directive_exists($directive)) { }
|
508
|
10 |
1512 |
unless defined $data
|
522
|
2 |
1520 |
$directive =~ /^acap-/i ? :
|
531
|
26 |
13 |
$lower_line =~ /[ \t]*:[ \t]*(.*)/ ? :
|