File Coverage

blib/lib/WARC/Record/Logical/Block.pm
Criterion Covered Total %
statement 165 166 99.4
branch 66 68 97.0
condition 28 29 96.5
subroutine 21 21 100.0
pod n/a
total 280 284 98.5


line stmt bran cond sub pod time code
1             package WARC::Record::Logical::Block; # -*- CPerl -*-
2              
3 2     2   58765 use strict;
  2         10  
  2         50  
4 2     2   8 use warnings;
  2         3  
  2         77  
5              
6             our @ISA = qw();
7              
8 2     2   342 use WARC; *WARC::Record::Logical::Block::VERSION = \$WARC::VERSION;
  2         4  
  2         97  
9              
10 2     2   10 use Carp;
  2         4  
  2         108  
11 2     2   11 use Fcntl qw/SEEK_SET SEEK_CUR SEEK_END/;
  2         3  
  2         94  
12              
13             # This implementation uses an array as the underlying object.
14              
15 2     2   9 use constant { PARENT => 0, SEGMENT => 1, HANDLE => 2 };
  2         4  
  2         121  
16 2     2   10 use constant OBJECT_INIT => undef, 0, undef;
  2         4  
  2         83  
17              
18             # Invariant: HANDLE is always valid if SEGMENT is within range.
19              
20 2     2   426 BEGIN { require WARC::Record::Logical; }
21             BEGIN { $WARC::Record::Logical::Block::{$_} = $WARC::Record::Logical::{$_}
22 2     2   775 for WARC::Record::Logical::SEGMENT_INDEX; }
23              
24             sub _dbg_dump {
25 62     62   3435 my $self = shift;
26              
27 62         94 my $out = 'logical record block';
28             $out .= ' @['.($self->[SEGMENT]).' / '
29 62         126 .($#{$self->[PARENT]{segments}}).']';
  62         189  
30 62 100       90 $out .= ' [EOF]' if $self->[SEGMENT] > $#{$self->[PARENT]{segments}};
  62         165  
31 62         83 $out .= "\n";
32 0         0 $out .= ' '.((tied *{$self->[HANDLE]})->_dbg_dump)
33 62 50 66     129 if $self->[HANDLE] && UNIVERSAL::can(tied *{$self->[HANDLE]}, '_dbg_dump');
  57         224  
34              
35 62         174 return $out;
36             }
37              
38             sub TIEHANDLE {
39 22     22   5556 my $class = shift;
40 22         31 my $parent = shift;
41              
42 22         43 my $ob = [OBJECT_INIT];
43 22         41 $ob->[PARENT] = $parent;
44 22         57 $ob->[HANDLE] = $parent->{segments}[0][SEG_REC]->open_block;
45              
46 22         3757 bless $ob, $class;
47             }
48              
49             # advance to next segment; return false if already at last segment
50             sub _next_segment {
51 79     79   184 my $self = shift;
52              
53 79 100       166 unless ($self->[PARENT]{segments}[1+$self->[SEGMENT]])
54 16         18 { $self->[SEGMENT]++; return 0 }
  16         52  
55              
56 63         124 close $self->[HANDLE];
57             $self->[HANDLE] =
58 63         245 $self->[PARENT]{segments}[++$self->[SEGMENT]][SEG_REC]->open_block;
59 63         8361 return $self->[HANDLE];
60             }
61              
62             sub READLINE {
63 63     63   3878 my $self = shift;
64              
65 63 100       130 if (wantarray) { # data slurp; we might run out of memory...
66 1         2 my @data = ();
67 1         9 while (defined(my $line = $self->READLINE())) { push @data, $line }
  4         9  
68             return @data
69 1         10 }
70              
71 62 100       81 return undef if $self->[SEGMENT] > $#{$self->[PARENT]{segments}};
  62         160  
72              
73 57 100       186 if (not defined $/) { # file slurp; we might run out of memory...
    100          
    100          
74 4         12 my $data = readline $self->[HANDLE];
75 4 100       11 $data = '' unless defined $data;
76 4         9 $data .= readline $self->[HANDLE] while $self->_next_segment;
77 4 100       23 return (length($data) ? $data : undef);
78             } elsif (ref $/) { # record read; fill a block...
79 2     2   15 use bytes;
  2         3  
  2         13  
80 10         13 my $rec_len = 0+${$/};
  10         15  
81 10         24 my $rec = readline $self->[HANDLE];
82 10 100       24 $rec = '' unless defined $rec;
83 10         22 while (length $rec < $rec_len) {
84 9         10 local $/ = \(do {$rec_len - length $rec});
  9         29  
85 9 100       15 last unless $self->_next_segment;
86 8         28 $rec .= readline $self->[HANDLE];
87             }
88 10 100       52 return (length ($rec) ? $rec : undef);
89             } elsif ($/ eq '') { # paragraph read; delimiter is empty line...
90 11         17 my $para = '';
91 11         36 my $input = readline $self->[HANDLE];
92              
93 11   100     1180 while (((defined $input and $para .= $input) or $self->_next_segment)
      100        
94             and ("\n\n" ne substr $para, -2))
95             # segment boundary was in the middle of a line, continuing read is safe
96 13         356 { $input = readline $self->[HANDLE] }
97             # paragraph delimiter may or may not span segments...
98 11 100       429 if (eof $self->[HANDLE]) {
99             # next segment may begin with newlines, if so, they must be read
100 6         104 my $ch; my $end_pos = $self->TELL;
  6         14  
101 6   100     50 $end_pos = $self->TELL, $para .= "\n"
102             while (defined ($ch = $self->GETC) and $ch eq "\n");
103 6         17 $self->SEEK($end_pos, SEEK_SET);
104 6 100       171 $input = defined($ch) ? '' : undef;
105             }
106 11 100       121 return (length ($para) ? $para : undef);
107             } else { # ordinary line read...
108 2     2   429 use bytes; # a line delimiter may be split on any octet boundary
  2         3  
  2         7  
109 32         41 my $line = '';
110 32         92 my $input = readline $self->[HANDLE];
111              
112             # read more data until we have a complete line or reach EOF
113 32   100     300 while (((defined $input and $line .= $input) or $self->_next_segment)
      100        
114             and ($/ ne substr $line, -length $/)) {
115 51 100       94 if (length $/ > 1) {
116             # each number N in this array indicates that a length-N prefix of
117             # $/ currently matches at the end of the line buffer
118             my @prefixes =
119 22         45 grep {substr($/, 0, $_) eq substr($line, -$_)} 1 .. length $/;
  128         221  
120 22         50 while (@prefixes) {
121 39         64 my $count = $self->READ($input, 1);
122 39 100       72 return $line unless $count;
123 38         43 $line .= $input;
124 38 100       89 return $line if $/ eq substr $line, -length $/;
125 34         50 unshift @prefixes, 0; @prefixes = # sieve prefixes
126 34         37 grep {++$_; substr($/, 0, $_) eq substr($line, -$_)} @prefixes;
  92         96  
  92         197  
127             } # loop iterates until no prefix of $/ matches the tail of $line
128             }
129              
130 46         196 $input = readline $self->[HANDLE];
131             }
132 27 100       57 return $input unless length $line;
133 25         112 return $line;
134             }
135             }
136              
137             # This sub must rely on the aliasing effect of @_.
138             sub READ {
139 70     70   321 my $self = shift;
140             # args now: 0: buffer 1: length 2: offset into buffer or undef
141 70         81 my $length = $_[1];
142 70   100     153 my $offset = $_[2] || 0;
143              
144 70 100       85 return 0 if $self->[SEGMENT] > $#{$self->[PARENT]{segments}};
  70         137  
145              
146 68         116 my $buf = ''; my $count = 1; my $bpos = 0;
  68         78  
  68         71  
147 68   100     150 while ($length && ($count || $self->_next_segment)) {
      100        
148 99         182 $count = read $self->[HANDLE], $buf, $length, $bpos;
149 99 50       682 return undef unless defined $count;
150 99         105 $length -= $count; $bpos += $count;
  99         181  
151             }
152              
153 68 100       111 $_[0] = '' unless defined $_[0];
154 68 100       113 $_[0] .= "\0" x ($offset - length($_[0])) if $offset > length $_[0];
155 68         105 substr $_[0], $offset, (length($_[0]) - $offset), $buf;
156 68         127 return $bpos;
157             }
158              
159             sub GETC {
160 22     22   320 my $self = shift;
161              
162 22         21 my $ch;
163 22 100       34 return undef unless $self->READ($ch, 1);
164 18         59 return $ch;
165             }
166              
167             sub EOF {
168 16     16   295 my $self = shift;
169              
170 16 100       25 return 1 if $self->[SEGMENT] > $#{$self->[PARENT]{segments}};
  16         83  
171 2 100       4 return 0 if $self->[SEGMENT] < $#{$self->[PARENT]{segments}};
  2         10  
172 1         4 return eof $self->[HANDLE];
173             }
174              
175             sub SEEK {
176 12     12   20 my $self = shift;
177 12         15 my $offset = shift;
178 12         16 my $whence = shift;
179              
180 12         22 my $segments = $self->[PARENT]{segments};
181              
182 12         13 my $npos;
183              
184 12 100       29 if ($whence == SEEK_SET) { $npos = $offset }
  8 100       10  
    100          
185 1         4 elsif ($whence == SEEK_CUR) { $npos = $self->TELL + $offset }
186             elsif ($whence == SEEK_END)
187 2         6 { $npos = ($segments->[-1][SEG_BASE]
188             + $segments->[-1][SEG_LENGTH] + $offset) }
189 1         220 else { croak "unknown WHENCE $whence in call to seek" }
190              
191             # This function must be able to seek backwards, even if the underlying
192             # handles cannot, to support paragraph reads. Seeking generally requires
193             # finding the new segment, switching to that segment, and seeking
194             # forwards to the desired offset within the new segment.
195              
196             # Special handling for seek to or past end-of-file
197 11 100       35 if ($npos >= ($segments->[-1][SEG_BASE] + $segments->[-1][SEG_LENGTH])) {
198 4         9 $self->[SEGMENT] = @$segments;
199 4         11 close $self->[HANDLE]; $self->[HANDLE] = undef;
  4         46  
200 4         77 return 1;
201             }
202              
203 7         10 my $new_segment_index = @$segments;
204 7         17 for (my $i = 0; $i < $new_segment_index; $i++) {
205 23 100 100     86 $new_segment_index = $i # which also exits this loop
206             if $npos >= $segments->[$i][SEG_BASE]
207             && $npos < ($segments->[$i][SEG_BASE] + $segments->[$i][SEG_LENGTH]);
208             }
209              
210 7         10 my $new_segment = $segments->[$new_segment_index];
211 7 100       19 return 0 unless $new_segment;
212              
213 6         9 $self->[SEGMENT] = $new_segment_index;
214 6         16 close $self->[HANDLE];
215 6         50 $self->[HANDLE] = $new_segment->[SEG_REC]->open_block;
216 6         2568 seek $self->[HANDLE], $npos - $new_segment->[SEG_BASE], SEEK_SET;
217             }
218              
219             sub TELL {
220 15     15   274 my $self = shift;
221              
222 15         20 my $segments = $self->[PARENT]{segments};
223              
224             return ($segments->[$self->[SEGMENT]][SEG_BASE] + (tell $self->[HANDLE]))
225 15 100       21 if $self->[SEGMENT] <= $#{$segments};
  15         67  
226 3         11 return ($segments->[-1][SEG_BASE] + $segments->[-1][SEG_LENGTH]);
227             }
228              
229             sub CLOSE {
230 1     1   3 my $self = shift;
231 1         1 $self->[SEGMENT] = 1+$#{$self->[PARENT]{segments}};
  1         4  
232 1         4 close $self->[HANDLE]; $self->[HANDLE] = undef;
  1         4  
233             }
234              
235             1;
236             __END__