File Coverage

blib/lib/Array/LineReader.pm
Criterion Covered Total %
statement 101 104 97.1
branch 39 50 78.0
condition 13 20 65.0
subroutine 17 17 100.0
pod n/a
total 170 191 89.0


line stmt bran cond sub pod time code
1             package Array::LineReader;
2            
3 4     4   45547 use 5.005;
  4         14  
  4         161  
4 4     4   20 use strict;
  4         8  
  4         141  
5 4     4   22 use warnings;
  4         10  
  4         135  
6 4     4   20 use Carp;
  4         5  
  4         416  
7 4     4   3643 use IO::File;
  4         56198  
  4         798  
8 4     4   4764 use Tie::Array;
  4         6297  
  4         154  
9             require DynaLoader;
10            
11 4     4   28 use vars qw( @ISA $VERSION );
  4         9  
  4         349  
12             @ISA = qw(Tie::Array);
13            
14             =head1 NAME
15            
16             Array::LineReader - Access lines of a file via an array
17            
18             =head1 SYNOPSIS
19            
20             use Array::LineReader;
21             my @lines;
22            
23             # Get the content of every line as an element of @lines:
24             tie @lines, 'Array::LineReader', 'filename';
25             print scalar(@lines); # number of lines in the file
26             print $lines[0]; # content of the first line
27             print $lines[-1]; # content of the last line
28             ...
29            
30             # Get the offset and content of every line as array reference via the elements of @lines:
31             tie @lines, 'Array::LineReader', 'filename', result=>[];
32             print scalar(@lines); # number of lines in the file
33             print $lines[5]->[0],":",$lines[5]->[1]; # offset and content of the 5th line
34             print $lines[-1]->[0],":",$lines[-1]->[1]; # offset and content of the last line
35             ...
36            
37             # Get the offset and content of every line as hash reference via the elements of @lines:
38             tie @lines, 'Array::LineReader', 'filename', result=>{};
39             print scalar(@lines); # number of lines in the file
40             print $lines[4]->{OFFSET},":",$lines[4]->{CONTENT}; # offset and content of the 4th line
41             print $lines[-1]->{OFFSET},":",$lines[-1]->{CONTENT}; # offset and content of the last line
42             ...
43            
44             =cut
45            
46             BEGIN {
47 4     4   12445 $VERSION = sprintf("%d.%02d", q$Revision: 1.1 $ =~ /(\d+)\.(\d+)/);
48             }
49            
50             =head1 VERSION and VOLATILITY
51            
52             $Revision: 1.1 $
53             $Date: 2004/06/10 18:17:23 $
54            
55             =head1 DESCRIPTION
56            
57             Array::LineReader gives you the possibility to access lines of some file by the
58             elements of an array.
59             This modul inherites methods from C (see L).
60             You save a lot of memory, because the file's content is read only on demand, i.e. in
61             the case you access an element of the array. The offset and length of all the lines
62             is hold in memory as long as you tie your array.
63            
64             The underlying file is opened for reading in binary mode.
65             (Yes, there are some OSs, that make a difference in
66             interpreting the C-sequence, i.e. C and the C-character, i.e.
67             C what is the character C<"\x1A">).
68             The bytes read are neigther translated nor suppressed.
69            
70             Lines are build up to and including the C-sequence.
71             The C-sequence is assumed to be C<"\x0D\x0A"> or C<"\x0A\x0D"> or C<"\x0D"> or
72             C<"\x0A">.
73            
74             The file is not closed until you C the array.
75            
76             It's up to you to define the kind of access:
77            
78             =head2 Access content by element
79            
80             tie @lines, 'Array::LineReader', 'filename';
81            
82             You get the content of every line of the file by the elements of the array C<@lines>:
83            
84             print "@lines";
85            
86             =head2 Access offset and content by array references
87            
88             tie @lines, 'Array::LineReader', 'filename', result=>[];
89            
90             You get offset and content of every line of the file via the elements of the array C<@lines>:
91            
92             foreach (@lines){
93             print $_->[0],":"; # offset
94             print $_->[1]; # content
95             }
96            
97             =head2 Access offset and content by hash references
98            
99             tie @lines, 'Array::LineReader', 'filename', result=>{};
100            
101             You get offset and content of every line of the file via the elements of the array C<@lines>:
102            
103             foreach (@lines){
104             print $_->{OFFSET},":"; # offset
105             print $_->{CONTENT}; # content
106             }
107            
108             =head1 METHODS
109            
110             =head2 TIEARRAY
111            
112             =over 4
113            
114             =item TIEARRAY Create this class
115            
116             Overwrites the method C of C (see L). You never should have to call it.
117             It is used to create this class.
118            
119             This method croaks if the C is missing.
120             It croaks too if the additional parameters do not have the form of a hash,
121             i.e. have an odd number.
122            
123             The file is opened in binary mode for reading only. If the file does not exist or can not
124             be opened you will get an emtpy array without a warning.
125            
126             The offset of every line and its length is hold in arrays corresponding to the lines of the file.
127             The content of a given line is read only if you access the corresponding element of the tied array.
128            
129             =back
130            
131             =cut
132            
133             sub TIEARRAY{
134 18     18   2577 my $class = shift;
135 18 100       313 croak "Usage: tie \@lines, 'Array::LineReader', 'filename' [, result=>type_of_result]\n" unless @_;
136 17         33 my $filename = shift;
137 17         30 my $self = {};
138 17         71 $self->{PARMS} = {result=>''};
139 17 100       264 croak "Odd number of parameters to be used in a hash!\n" unless scalar(@_) % 2 == 0;
140 16 100       64 $self->{PARMS} = {@_} if @_; # carps if odd number of parameters
141 16         40 $self->{FH} = undef;
142 16 100       415 if (-f $filename){
143 15         152 $self->{FH} = new IO::File;
144 15 50       667 $self->{FH}->open($filename) or $self->{FH} = undef;
145 15 50       798 binmode($self->{FH}) if $self->{FH};
146             }
147 16         53 $self->{OFFSETS} = [0];
148 16         45 $self->{LENGTHS} = [0];
149 16 100       50 $self->{EOF} = 1 unless $self->{FH};
150 16         102 bless $self, $class;
151             }
152            
153             =head2 FETCHSIZE
154            
155             =over 4
156            
157             =item FETCHSIZE Define the size of the tied array.
158            
159             Overwrites the method C of C (see L). You never should have to call it.
160             This method is called any time the size of the underlying array has to be defined.
161            
162             The size of the tied array is defined to be the number of B.
163            
164             Lets have an example:
165            
166             tie @lines, 'Array::LineReader', 'filename';
167             $line5 = $lines[4]; # access the 5th line.
168             print scalar(@lines); # prints: 5
169             $lastline = $lines[-1]; # access the last line of the file
170             print scalar(@lines); # prints: number of lines in the file
171            
172             =back
173            
174             =cut
175            
176             sub FETCHSIZE{
177 31     31   5242 my $self = shift;
178 31         43 my $index = $#{$self->{OFFSETS}}; # use current number of elements
  31         76  
179 31         104 while (!$self->{EOF}){ $self->_readIt($index++); } # read until EOF
  923         2458  
180 31         224 return $index;
181             }
182            
183             =head2 FETCH
184            
185             =over 4
186            
187             =item FETCH access a specified element of the tied array
188            
189             Overwrites the method C of C (see L). You never should have to call it.
190             This method is called any time you want to access a given element of the tied array.
191            
192             If you access an already known element, the offset of the line to read is sought and the
193             line is read with the already known length.
194             If you access a not yet known element, the file is read up to the corresponding line.
195            
196             Lets have an example:
197            
198             tie @lines, 'Array::LineReader', 'filename';
199             foreach (@lines){
200             print $_; # access one line after the other
201             }
202             ...
203             print $lines[5]; # seeks the offset of the 6th line and reads it
204            
205             You should use the tie command with additional parameter defining the type of the result,
206             if you want to have access not only to the content of a line but also to its offset.
207            
208             tie @lines, 'Array::LinesReader', 'filename', result=>{};
209             print $lines[8]->{OFFSET}; # Offset of the 9th line.
210             print $lines[8]->{CONTENT}; # Content of the 9th line.
211            
212             or to get the offset and content by reference to an array:
213            
214             tie @lines, 'Array::LinesReader', 'filename', result=>[];
215             print $lines[8]->[0]; # Offset of the 9th line.
216             print $lines[8]->[1]; # Content of the 9th line.
217            
218             =back
219            
220             =cut
221            
222             sub FETCH{
223 1240     1240   3495 my ($self,$index) = @_;
224             # $index = scalar(@{$self->{OFFSETS}}) + $index if $index < 0; # correct negative index
225             # croak "Array index out of bounds" if $index < 0;
226 1240 100       1224 if ($index > $#{$self->{OFFSETS}}){
  1240         3076  
227 11         33 $self->_readUpTo($index);
228 11         16 $index = $#{$self->{OFFSETS}};
  11         25  
229             }
230 1240         2321 my $out = $self->_readIt($index);
231 1240         3466 for (ref $self->{PARMS}->{result}){
232 1240 100       5301 /^HASH$/ && return {OFFSET=>$self->{OFFSETS}->[$index],CONTENT=>$out};
233 715 100       3780 /^ARRAY$/ && return [$self->{OFFSETS}->[$index],$out];
234             }
235 190         778 return $out;
236             }
237            
238             =head2 DESTROY
239            
240             =over 4
241            
242             =item DESTROY
243            
244             Overwrites the method C of C (see L).
245             Closes the file to free some memory.
246            
247             =back
248            
249             =cut
250            
251             sub DESTROY{
252 15     15   2919 my $self = shift;
253 15 100       1024 close $self->{FH} if $self->{FH};
254             }
255            
256             =head2 EXISTS
257            
258             =over 4
259            
260             =item EXISTS
261            
262             Overwrites the method C of C (see L).
263             Returns true if the array's element was already read.
264            
265             =back
266            
267             =cut
268            
269             sub EXISTS{
270 2     2   231 my ($self, $index) = @_;
271 2   66     13 return ($index >= 0) && ($index < $self->FETCHSIZE());
272             }
273            
274 4     4   763 sub _readOnly{ croak "The tied array is readonly and can not be modified in any way\n"; }
275             *STORE = \&_readonly;
276             *STORESIZE = \&_readOnly;
277             *CLEAR = \&_readOnly;
278             *POP = \&_readOnly;
279             *PUSH = \&_readOnly;
280             *SHIFT = \&_readOnly;
281             *UNSHIFT = \&_readOnly;
282             *DELETE = \&_readOnly;
283             *EXTEND = \&_readOnly;
284            
285             sub _readIt{
286 2358     2358   2842 my ($self,$index) = @_;
287 2358         2557 my $out = undef;
288 2358 50       2278 die "Invalid call of privat method _readIt" if $index > $#{$self->{OFFSETS}};
  2358         5507  
289 2358 50       5409 return $out unless $self->{FH};
290 2358         29132 seek($self->{FH}, $self->{OFFSETS}->[$index], 0);
291 2358 100       2481 if ($index == $#{$self->{OFFSETS}}){
  2358         13233  
292 1133 100       3471 $self->{EOF} or $out = $self->_readLine();
293 1133 100       2335 if (defined $out){
294 1130         1972 $self->{LENGTHS}->[-1] = length($out);
295 1130         1192 push(@{$self->{OFFSETS}},tell($self->{FH}));
  1130         2737  
296 1130         1586 push(@{$self->{LENGTHS}},0);
  1130         2145  
297             }
298             }else{
299 1225         2689 $out = $self->_readLine($self->{LENGTHS}->[$index]);
300             }
301 2358         18870 return $out;
302             }
303            
304             sub _readUpTo{
305 11     11   22 my ($self,$index) = @_;
306 11 50       36 return unless $self->{FH};
307 11 100       36 return if $self->{EOF};
308 9 50       15 die "Invalid call of privat method _readUpTo" if $index <= $#{$self->{OFFSETS}};
  9         33  
309 9         13 for (my $idx = $#{$self->{OFFSETS}}; $idx < $index; $idx++){
  9         36  
310 195         345 $self->_readIt($idx);
311 195 50       665 last if $self->{EOF};
312             }
313             }
314            
315             sub _readLine{
316 2355     2355   2690 my $self = shift;
317 2355   100     6206 my $length = shift || 0;
318 2355         2911 my $fh = $self->{FH};
319 2355         2773 my $line = "";
320 2355 100 66     6735 $length && $fh->read($line, $length) && return $line; # read $length byte if requested
321            
322 1130         1265 my $c = 0;
323 1130   66     2815 while (defined ($c = $fh->getc) && $c !~ /^[\x0A\x0D]$/){
324 37134         360971 $line .= $c;
325             }
326 1130 50       11702 if (!defined $c){
327 0         0 $self->{EOF} = 1;
328 0 0       0 return (length($line)) ? $line : $c;
329             }
330 1130         1338 $line .= $c;
331 1130         2629 my $nl = $fh->getc;
332 1130 100 66     15537 if (defined $nl && ($nl eq $c || $nl !~ /^[\x0A\x0D]$/)){
    50 66        
      33        
333 1116         3260 $fh->ungetc(ord($nl));
334             }elsif(defined $nl && $nl ne $c){
335 0         0 $line .= $nl;
336             }else{
337 14         794 $self->{EOF} = 1;
338             }
339 1130         3076 return $line;
340             }
341            
342            
343             1;
344             __END__