line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Archive::Heritrix; |
2
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
24591
|
use 5.008005; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
32
|
|
4
|
1
|
|
|
1
|
|
4
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
29
|
|
5
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
33
|
|
6
|
1
|
|
|
1
|
|
4392
|
use Compress::Zlib; |
|
1
|
|
|
|
|
111531
|
|
|
1
|
|
|
|
|
343
|
|
7
|
1
|
|
|
1
|
|
11
|
use File::Find; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
78
|
|
8
|
1
|
|
|
1
|
|
7708
|
use HTTP::Response; |
|
1
|
|
|
|
|
39198
|
|
|
1
|
|
|
|
|
1856
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
our $VERSION = 0.02; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
sub new { |
13
|
2
|
|
|
2
|
0
|
236
|
my ( $class, %arg ) = @_; |
14
|
2
|
|
|
|
|
8
|
my $self = bless {}, $class; |
15
|
|
|
|
|
|
|
|
16
|
2
|
50
|
66
|
|
|
17
|
if ( $arg{ 'file' } && $arg{ 'directory' } ) { |
17
|
0
|
|
|
|
|
0
|
die "file or directory, not both"; |
18
|
|
|
|
|
|
|
} |
19
|
|
|
|
|
|
|
|
20
|
2
|
|
|
|
|
5
|
my @files = (); |
21
|
|
|
|
|
|
|
|
22
|
2
|
100
|
|
|
|
9
|
if ( $arg{ 'file' } ) { |
23
|
1
|
50
|
|
|
|
29
|
if ( ! -f $arg{ 'file' } ) { |
24
|
0
|
|
|
|
|
0
|
die "no such file"; |
25
|
|
|
|
|
|
|
} |
26
|
1
|
|
|
|
|
5
|
@files = ( $arg{ 'file' } ); |
27
|
|
|
|
|
|
|
} |
28
|
2
|
100
|
|
|
|
14
|
if ( $arg{ 'directory' } ) { |
29
|
1
|
50
|
|
|
|
44
|
if ( ! -d $arg{ 'directory' } ) { |
30
|
0
|
|
|
|
|
0
|
die "no such directory"; |
31
|
|
|
|
|
|
|
} |
32
|
1
|
100
|
|
3
|
|
98
|
find( sub { push @files, $File::Find::name if $File::Find::name =~ /\.arc\.gz$/ }, $arg{ 'directory' } ); |
|
3
|
|
|
|
|
136
|
|
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
2
|
|
|
|
|
15
|
$self->{ 'files' } = \@files; |
36
|
2
|
|
|
|
|
9
|
$self->next_file(); |
37
|
2
|
|
|
|
|
9
|
return $self; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub next_file { |
41
|
5
|
|
|
5
|
0
|
10
|
my $self = shift; |
42
|
5
|
|
|
|
|
9
|
my $f = shift @{ $self->{ 'files' } }; |
|
5
|
|
|
|
|
16
|
|
43
|
5
|
100
|
|
|
|
18
|
if ( ! $f ) { |
44
|
2
|
|
|
|
|
5
|
$self->{ '_fh' } = undef; |
45
|
2
|
|
|
|
|
5
|
return undef; |
46
|
|
|
|
|
|
|
} |
47
|
3
|
|
|
|
|
17
|
my $gz = gzopen( $f, 'rb' ); |
48
|
3
|
50
|
|
|
|
11936
|
return undef unless $gz; |
49
|
3
|
|
|
|
|
14
|
$self->{ '_fh' } = $gz; |
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub next_record { |
53
|
89
|
|
|
89
|
0
|
60741
|
my $self = shift; |
54
|
89
|
|
|
|
|
339
|
my $fh = $self->_fh(); |
55
|
|
|
|
|
|
|
|
56
|
89
|
50
|
|
|
|
272
|
if ( ! $fh ) { |
57
|
0
|
0
|
|
|
|
0
|
if ( $self->next_file() ) { |
58
|
0
|
|
|
|
|
0
|
$fh = $self->_fh(); |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
else { |
61
|
0
|
|
|
|
|
0
|
return undef; |
62
|
|
|
|
|
|
|
} |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
89
|
|
|
|
|
128
|
my $head; |
66
|
89
|
|
|
|
|
321
|
$fh->gzreadline( $head ); |
67
|
89
|
|
|
|
|
10181
|
chomp $head; |
68
|
89
|
|
|
|
|
1068
|
my ($url,$ip,$stamp,$type,$length) = $head =~ m/^(.+?) ([\d\.]+) (\d+) (\S+) (\d+)$/; |
69
|
89
|
100
|
|
|
|
323
|
if ( ! $url ) { |
70
|
3
|
|
|
|
|
14
|
$self->next_file(); |
71
|
3
|
|
|
|
|
10
|
$fh = $self->_fh(); |
72
|
3
|
100
|
|
|
|
46
|
return undef unless $fh; |
73
|
1
|
|
|
|
|
130
|
$fh->gzreadline( $head ); |
74
|
1
|
|
|
|
|
107
|
chomp $head; |
75
|
1
|
|
|
|
|
14
|
($url,$ip,$stamp,$type,$length) = $head =~ m/^(.+?) ([\d\.]+) (\d+) (\S+) (\d+)$/; |
76
|
|
|
|
|
|
|
} |
77
|
87
|
50
|
|
|
|
219
|
return undef unless $url; |
78
|
|
|
|
|
|
|
|
79
|
87
|
|
|
|
|
144
|
my $buf = undef; |
80
|
87
|
|
|
|
|
119
|
my $read = 0; |
81
|
87
|
|
|
|
|
112
|
my $l0 = undef; |
82
|
87
|
|
|
|
|
276
|
while ( $read <= $length ) { |
83
|
64780
|
|
|
|
|
89052
|
my $bbuf; |
84
|
64780
|
|
|
|
|
175793
|
$fh->gzreadline( $bbuf ); |
85
|
64780
|
|
66
|
|
|
6133682
|
$l0 ||= $bbuf; |
86
|
64780
|
|
|
|
|
93202
|
my $got = length( $bbuf ); |
87
|
64780
|
|
|
|
|
89125
|
$buf .= $bbuf; |
88
|
64780
|
|
|
|
|
153900
|
$read += $got; |
89
|
|
|
|
|
|
|
} |
90
|
87
|
|
|
|
|
1169
|
my ( $code, $msg ) = $l0 =~ m#(\d{3})\s+(.+?)[\r\n]*$#; |
91
|
87
|
|
|
|
|
716
|
my $res = HTTP::Response->parse( $buf ); |
92
|
87
|
|
|
|
|
60977
|
$res->{'_headers'}->content_length($length); |
93
|
87
|
|
|
|
|
3168
|
$res->{'_headers'}->referer($url); |
94
|
87
|
|
|
|
|
2891
|
$res->code( $code ); |
95
|
87
|
|
|
|
|
908
|
$res->message( $msg ); |
96
|
|
|
|
|
|
|
|
97
|
87
|
|
|
|
|
1065
|
return $res; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub _fh { |
101
|
92
|
|
|
92
|
|
306
|
return shift->{ '_fh' }; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
1; |
105
|
|
|
|
|
|
|
__END__ |