File Coverage

blib/lib/File/RandomAccess.pm
Criterion Covered Total %
statement 106 224 47.3
branch 49 120 40.8
condition 9 36 25.0
subroutine 11 13 84.6
pod 9 11 81.8
total 184 404 45.5


line stmt bran cond sub pod time code
1             #------------------------------------------------------------------------------
2             # File: RandomAccess.pm
3             #
4             # Description: Buffer to support random access reading of sequential file
5             #
6             # Revisions: 02/11/2004 - P. Harvey Created
7             # 02/20/2004 - P. Harvey Added flag to disable SeekTest in new()
8             # 11/18/2004 - P. Harvey Fixed bug with seek relative to end of file
9             # 01/02/2005 - P. Harvey Added DEBUG code
10             # 01/09/2006 - P. Harvey Fixed bug in ReadLine() when using
11             # multi-character EOL sequences
12             # 02/20/2006 - P. Harvey Fixed bug where seek past end of file could
13             # generate "substr outside string" warning
14             # 06/10/2006 - P. Harvey Decreased $CHUNK_SIZE from 64k to 8k
15             # 11/23/2006 - P. Harvey Limit reads to < 0x80000000 bytes
16             # 11/26/2008 - P. Harvey Fixed bug in ReadLine when reading from a
17             # scalar with a multi-character newline
18             # 01/24/2009 - PH Protect against reading too much at once
19             # 10/04/2018 - PH Added NoBuffer option
20             # 01/20/2024 - PH Set ERROR on file read error
21             #
22             # Notes: Calls the normal file i/o routines unless SeekTest() fails, in
23             # which case the file is buffered in memory to allow random access.
24             # SeekTest() is called automatically when the object is created
25             # unless specified.
26             #
27             # May also be used for string i/o (just pass a scalar reference)
28             #
29             # Sets internal ERROR member from $! if there is an error reading
30             # the file.
31             #
32             # Legal: Copyright (c) 2003-2026, Phil Harvey (philharvey66 at gmail.com)
33             # This library is free software; you can redistribute it and/or
34             # modify it under the same terms as Perl itself.
35             #------------------------------------------------------------------------------
36              
37             package File::RandomAccess;
38              
39 113     113   734 use strict;
  113         319  
  113         7540  
40             require 5.002;
41             require Exporter;
42              
43 113     113   785 use vars qw($VERSION @ISA @EXPORT_OK);
  113         298  
  113         292166  
44             $VERSION = '1.13';
45             @ISA = qw(Exporter);
46              
47             sub Read($$$);
48              
49             # constants
50             my $CHUNK_SIZE = 8192; # size of chunks to read from file (must be power of 2)
51             my $SKIP_SIZE = 65536; # size to skip when fast-forwarding over sequential data
52             my $SLURP_CHUNKS = 16; # read this many chunks at a time when slurping
53              
54             #------------------------------------------------------------------------------
55             # Create new RandomAccess object
56             # Inputs: 0) reference to RandomAccess object or RandomAccess class name
57             # 1) file reference or scalar reference
58             # 2) flag set if file is already random access (disables automatic SeekTest)
59             sub new($$;$)
60             {
61 1751     1751 1 6292 my ($that, $filePt, $isRandom) = @_;
62 1751   33     9898 my $class = ref($that) || $that;
63 1751         3476 my $self;
64              
65 1751 100       5876 if (ref $filePt eq 'SCALAR') {
66             # string i/o
67 1020         6853 $self = {
68             BUFF_PT => $filePt,
69             BASE => 0,
70             POS => 0,
71             LEN => length($$filePt),
72             TESTED => -1,
73             };
74 1020         2794 bless $self, $class;
75             } else {
76             # file i/o
77 731         1904 my $buff = '';
78 731         6951 $self = {
79             FILE_PT => $filePt, # file pointer
80             BUFF_PT => \$buff, # reference to file data
81             BASE => 0, # location of start of buffer in file
82             POS => 0, # current position in buffer
83             LEN => 0, # length of data in buffer
84             TESTED => 0, # 0=untested, 1=passed, -1=failed (requires buffering)
85             };
86 731         2116 bless $self, $class;
87 731 100       3944 $self->SeekTest() unless $isRandom;
88             }
89 1751         6182 return $self;
90             }
91              
92             #------------------------------------------------------------------------------
93             # Enable DEBUG code
94             # Inputs: 0) reference to RandomAccess object
95             sub Debug($)
96             {
97 0     0 0 0 my $self = shift;
98 0         0 $self->{DEBUG} = { };
99             }
100              
101             #------------------------------------------------------------------------------
102             # Perform seek test and turn on buffering if necessary
103             # Inputs: 0) reference to RandomAccess object
104             # Returns: 1 if seek test passed (ie. no buffering required)
105             # Notes: Must be done before any other i/o
106             sub SeekTest($)
107             {
108 758     758 1 1827 my $self = shift;
109 758 100       3835 unless ($self->{TESTED}) {
110 730         1912 my $fp = $self->{FILE_PT};
111 730 50 33     11513 if (seek($fp, 1, 1) and seek($fp, -1, 1)) {
112 730         2885 $self->{TESTED} = 1; # test passed
113             } else {
114 0         0 $self->{TESTED} = -1; # test failed (requires buffering)
115             }
116             }
117 758 100       3090 return $self->{TESTED} == 1 ? 1 : 0;
118             }
119              
120             #------------------------------------------------------------------------------
121             # Get current position in file
122             # Inputs: 0) reference to RandomAccess object
123             # Returns: current position in file
124             sub Tell($)
125             {
126 9378     9378 1 17647 my $self = shift;
127 9378         14162 my $rtnVal;
128 9378 100       23329 if ($self->{TESTED} < 0) {
129 3791         7529 $rtnVal = $self->{POS} + $self->{BASE};
130             } else {
131 5587         14586 $rtnVal = tell($self->{FILE_PT});
132             }
133 9378         23909 return $rtnVal;
134             }
135              
136             #------------------------------------------------------------------------------
137             # Seek to position in file
138             # Inputs: 0) reference to RandomAccess object
139             # 1) position, 2) whence (0 or undef=from start, 1=from cur pos, 2=from end)
140             # Returns: 1 on success
141             # Notes: When buffered, this doesn't quite behave like seek() since it will return
142             # success even if you seek outside the limits of the file. However if you
143             # do this, you will get an error on your next Read().
144             sub Seek($$;$)
145             {
146 8636     8636 1 20447 my ($self, $num, $whence) = @_;
147 8636 100       19797 $whence = 0 unless defined $whence;
148 8636         13197 my $rtnVal;
149 8636 100       20323 if ($self->{TESTED} < 0) {
150 1380         2276 my $newPos;
151 1380 100 33     3884 if ($whence == 0) {
    100          
    50          
152 1052         2033 $newPos = $num - $self->{BASE}; # from start of file
153             } elsif ($whence == 1) {
154 233         573 $newPos = $num + $self->{POS}; # relative to current position
155             } elsif ($self->{NoBuffer} and $self->{FILE_PT}) {
156 0         0 $newPos = -1; # (can't seek relative to end if no buffering)
157             } else {
158 95         465 $self->Slurp(); # read whole file into buffer
159 95         215 $newPos = $num + $self->{LEN}; # relative to end of file
160             }
161 1380 50 33     6185 if ($newPos >= 0 and
      66        
162             # can't go backwards in unbuffered non-seekable file
163             (not $self->{NoBuffer} or $newPos >= $self->{POS}))
164             {
165 1367         2647 $self->{POS} = $newPos;
166 1367         2681 $rtnVal = 1;
167             }
168             } else {
169 7256         73003 $rtnVal = seek($self->{FILE_PT}, $num, $whence);
170             }
171 8636         44774 return $rtnVal;
172             }
173              
174             #------------------------------------------------------------------------------
175             # Read from the file
176             # Inputs: 0) reference to RandomAccess object, 1) buffer, 2) bytes to read
177             # Returns: Number of bytes read
178             sub Read($$$)
179             {
180 28925     28925 1 47128 my $self = shift;
181 28925         43775 my $len = $_[1];
182 28925         40488 my $rtnVal;
183              
184             # protect against reading too much at once
185             # (also from dying with a "Negative length" error)
186 28925 50       63254 if ($len & 0xf8000000) {
187 0 0       0 return 0 if $len < 0;
188             # read in smaller blocks because Windows attempts to pre-allocate
189             # memory for the full size, which can lead to an out-of-memory error
190 0         0 my $maxLen = 0x4000000; # (MUST be less than bitmask in "if" above)
191 0         0 my $num = Read($self, $_[0], $maxLen);
192 0 0       0 return $num if $num < $maxLen;
193 0         0 for (;;) {
194 0         0 $len -= $maxLen;
195 0 0       0 last if $len <= 0;
196 0 0       0 my $l = $len < $maxLen ? $len : $maxLen;
197 0         0 my $buff;
198 0         0 my $n = Read($self, $buff, $l);
199 0 0       0 last unless $n;
200 0         0 $_[0] .= $buff;
201 0         0 $num += $n;
202 0 0       0 last if $n < $l;
203             }
204 0         0 return $num;
205             }
206             # read through our buffer if necessary
207 28925 100       59598 if ($self->{TESTED} < 0) {
208             # purge old data before reading in NoBuffer mode
209 9187 50 0     21778 $self->Purge() or return 0 if $self->{NoBuffer};
210 9187         13895 my $buff;
211 9187         16410 my $newPos = $self->{POS} + $len;
212             # number of bytes to read from file
213 9187         16171 my $num = $newPos - $self->{LEN};
214 9187 50 66     23262 if ($num > 0 and $self->{FILE_PT}) {
215             # read data from file in multiples of $CHUNK_SIZE
216 0         0 $num = (($num - 1) | ($CHUNK_SIZE - 1)) + 1;
217 0         0 $num = read($self->{FILE_PT}, $buff, $num);
218 0 0       0 if ($num) {
    0          
219 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
220 0         0 $self->{LEN} += $num;
221             } elsif (not defined $num) {
222 0         0 $self->{ERROR} = $!;
223             }
224             }
225             # number of bytes left in data buffer
226 9187         15946 $num = $self->{LEN} - $self->{POS};
227 9187 100       18085 if ($len <= $num) {
    100          
228 8584         13256 $rtnVal = $len;
229             } elsif ($num <= 0) {
230 489         1026 $_[0] = '';
231 489         1867 return 0;
232             } else {
233 114         192 $rtnVal = $num;
234             }
235             # return data from our buffer
236 8698         12452 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
  8698         27298  
237 8698         16498 $self->{POS} += $rtnVal;
238             } else {
239             # read directly from file
240 19738 100       42743 $_[0] = '' unless defined $_[0];
241 19738         431672 $rtnVal = read($self->{FILE_PT}, $_[0], $len);
242 19738 50       47973 unless (defined $rtnVal) {
243 0         0 $self->{ERROR} = $!;
244 0         0 $rtnVal = 0;
245             }
246             }
247 28436 50       81293 if ($self->{DEBUG}) {
248 0         0 my $pos = $self->Tell() - $rtnVal;
249 0 0 0     0 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
250 0         0 $self->{DEBUG}->{$pos} = $rtnVal;
251             }
252             }
253 28436         92863 return $rtnVal;
254             }
255              
256             #------------------------------------------------------------------------------
257             # Read a line from file (end of line is $/)
258             # Inputs: 0) reference to RandomAccess object, 1) buffer
259             # Returns: Number of bytes read
260             sub ReadLine($$)
261             {
262 6841     6841 1 12078 my $self = shift;
263 6841         9985 my $rtnVal;
264 6841         13424 my $fp = $self->{FILE_PT};
265              
266 6841 100       16319 if ($self->{TESTED} < 0) {
267 123         251 my ($num, $buff);
268 123 50 0     372 $self->Purge() or return 0 if $self->{NoBuffer};
269 123         235 my $pos = $self->{POS};
270 123 50       303 if ($fp) {
271             # make sure we have some data after the current position
272 0         0 while ($self->{LEN} <= $pos) {
273 0         0 $num = read($fp, $buff, $CHUNK_SIZE);
274 0 0       0 unless ($num) {
275 0 0       0 defined $num or $self->{ERROR} = $!;
276 0         0 return 0;
277             }
278 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
279 0         0 $self->{LEN} += $num;
280             }
281             # scan and read until we find the EOL (or hit EOF)
282 0         0 for (;;) {
283 0         0 $pos = index(${$self->{BUFF_PT}}, $/, $pos);
  0         0  
284 0 0       0 if ($pos >= 0) {
285 0         0 $pos += length($/);
286 0         0 last;
287             }
288 0         0 $pos = $self->{LEN}; # have scanned to end of buffer
289 0         0 $num = read($fp, $buff, $CHUNK_SIZE);
290 0 0       0 unless ($num) {
291 0 0       0 defined $num or $self->{ERROR} = $!;
292 0         0 last;
293             }
294 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
295 0         0 $self->{LEN} += $num;
296             }
297             } else {
298             # string i/o
299 123         199 $pos = index(${$self->{BUFF_PT}}, $/, $pos);
  123         573  
300 123 100       334 if ($pos < 0) {
301 18         34 $pos = $self->{LEN};
302 18 50       70 $self->{POS} = $pos if $self->{POS} > $pos;
303             } else {
304 105         229 $pos += length($/);
305             }
306             }
307             # read the line from our buffer
308 123         242 $rtnVal = $pos - $self->{POS};
309 123         190 $_[0] = substr(${$self->{BUFF_PT}}, $self->{POS}, $rtnVal);
  123         410  
310 123         297 $self->{POS} = $pos;
311             } else {
312 6718         31571 $_[0] = <$fp>;
313 6718 100       14083 if (defined $_[0]) {
314 6696         10857 $rtnVal = length($_[0]);
315             } else {
316 22         57 $rtnVal = 0;
317             }
318             }
319 6841 50       16387 if ($self->{DEBUG}) {
320 0         0 my $pos = $self->Tell() - $rtnVal;
321 0 0 0     0 unless ($self->{DEBUG}->{$pos} and $self->{DEBUG}->{$pos} > $rtnVal) {
322 0         0 $self->{DEBUG}->{$pos} = $rtnVal;
323             }
324             }
325 6841         22577 return $rtnVal;
326             }
327              
328             #------------------------------------------------------------------------------
329             # Read whole file into buffer (without changing read pointer)
330             # Inputs: 0) reference to RandomAccess object
331             sub Slurp($)
332             {
333 95     95 1 224 my $self = shift;
334 95   50     374 my $fp = $self->{FILE_PT} || return;
335             # read whole file into buffer (in large chunks)
336 0         0 my ($buff, $num);
337 0         0 for (;;) {
338 0         0 $num = read($fp, $buff, $CHUNK_SIZE * $SLURP_CHUNKS);
339 0 0       0 unless ($num) {
340 0 0       0 defined $num or $self->{ERROR} = $!;
341 0         0 last;
342             }
343 0         0 ${$self->{BUFF_PT}} .= $buff;
  0         0  
344 0         0 $self->{LEN} += $num;
345             }
346             }
347              
348             #------------------------------------------------------------------------------
349             # Purge internal buffer [internal use only]
350             # Inputs: 0) reference to RandomAccess object
351             # Returns: 1 on success, or 0 if current buffer position is negative
352             # Notes: This is called only in NoBuffer mode
353             sub Purge($)
354             {
355 0     0 0 0 my $self = shift;
356 0 0       0 return 1 unless $self->{FILE_PT};
357 0 0       0 return 0 if $self->{POS} < 0; # error if we can't read from here
358 0 0       0 if ($self->{POS} > $CHUNK_SIZE) {
359 0         0 my $purge = $self->{POS} - ($self->{POS} % $CHUNK_SIZE);
360 0 0       0 if ($purge >= $self->{LEN}) {
    0          
361             # read up to current position in 64k chunks, discarding as we go
362 0         0 while ($self->{POS} > $self->{LEN}) {
363 0         0 $self->{BASE} += $self->{LEN};
364 0         0 $self->{POS} -= $self->{LEN};
365 0         0 ${$self->{BUFF_PT}} = '';
  0         0  
366 0         0 $self->{LEN} = read($self->{FILE_PT}, ${$self->{BUFF_PT}}, $SKIP_SIZE);
  0         0  
367 0 0       0 if (not defined $self->{LEN}) {
368 0         0 $self->{ERROR} = $!;
369 0         0 last;
370             }
371 0 0       0 last if $self->{LEN} < $SKIP_SIZE;
372             }
373             } elsif ($purge > 0) {
374 0         0 ${$self->{BUFF_PT}} = substr ${$self->{BUFF_PT}}, $purge;
  0         0  
  0         0  
375 0         0 $self->{BASE} += $purge;
376 0         0 $self->{POS} -= $purge;
377 0         0 $self->{LEN} -= $purge;
378             }
379             }
380 0         0 return 1;
381             }
382              
383             #------------------------------------------------------------------------------
384             # Set binary mode
385             # Inputs: 0) reference to RandomAccess object
386             sub BinMode($)
387             {
388 796     796 1 1817 my $self = shift;
389 796 100       4976 binmode($self->{FILE_PT}) if $self->{FILE_PT};
390             }
391              
392             #------------------------------------------------------------------------------
393             # Close the file and free the buffer
394             # Inputs: 0) reference to RandomAccess object
395             sub Close($)
396             {
397 516     516 1 1247 my $self = shift;
398              
399 516 50       2611 if ($self->{DEBUG}) {
400 0         0 local $_;
401 0 0       0 if ($self->Seek(0,2)) {
402 0         0 $self->{DEBUG}->{$self->Tell()} = 0; # set EOF marker
403 0         0 my $last;
404 0         0 my $tot = 0;
405 0         0 my $bad = 0;
406 0         0 foreach (sort { $a <=> $b } keys %{$self->{DEBUG}}) {
  0         0  
  0         0  
407 0         0 my $pos = $_;
408 0         0 my $len = $self->{DEBUG}->{$_};
409 0 0 0     0 if (defined $last and $last < $pos) {
410 0         0 my $bytes = $pos - $last;
411 0         0 $tot += $bytes;
412 0         0 $self->Seek($last);
413 0         0 my $buff;
414 0         0 $self->Read($buff, $bytes);
415 0         0 my $warn = '';
416 0 0       0 if ($buff =~ /[^\0]/) {
417 0         0 $bad += ($pos - $last);
418 0         0 $warn = ' - NON-ZERO!';
419             }
420 0         0 printf "0x%.8x - 0x%.8x (%d bytes)$warn\n", $last, $pos, $bytes;
421             }
422 0         0 my $cur = $pos + $len;
423 0 0 0     0 $last = $cur unless defined $last and $last > $cur;
424             }
425 0         0 print "$tot bytes missed";
426 0 0       0 $bad and print ", $bad non-zero!";
427 0         0 print "\n";
428             } else {
429 0         0 warn "File::RandomAccess DEBUG not working (file already closed?)\n";
430             }
431 0         0 delete $self->{DEBUG};
432             }
433             # close the file
434 516 100       2113 if ($self->{FILE_PT}) {
435 514         13839 close($self->{FILE_PT});
436 514         1978 delete $self->{FILE_PT};
437             }
438             # reset the buffer
439 516         1541 my $emptyBuff = '';
440 516         2009 $self->{BUFF_PT} = \$emptyBuff;
441 516         1547 $self->{BASE} = 0;
442 516         1465 $self->{LEN} = 0;
443 516         1881 $self->{POS} = 0;
444             }
445              
446             #------------------------------------------------------------------------------
447             1; # end