File Coverage

blib/lib/PDL/DiskCache.pm
Criterion Covered Total %
statement 69 126 54.7
branch 23 72 31.9
condition 4 9 44.4
subroutine 11 14 78.5
pod 3 4 75.0
total 110 225 48.8


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PDL::DiskCache -- Non-memory-resident array object
4              
5             =head1 SYNOPSIS
6              
7             NON-OO:
8              
9             use PDL::DiskCache;
10             tie @a,'PDL::DiskCache', \@files, \%options;
11             imag $a[3];
12              
13             OO:
14              
15             use PDL::DiskCache;
16             $x = diskcache(\@files,\%options);
17             imag $x->[3];
18              
19             or
20              
21             use PDL::DiskCache;
22             $x = PDL::DiskCache->new(\@files,\%options);
23             imag $x->[4];
24              
25             =over 3
26              
27             =item \@files
28              
29             an array ref containing a list of file names
30              
31             =item \%options
32              
33             a hash ref containing options for the PDL::DiskCache object (see "TIEARRAY"
34             below for details)
35              
36             =back
37              
38             =head1 DESCRIPTION
39              
40             A PDL::DiskCache object is a perl L<"tied array"|perltie> that is useful
41             for operations where you have to look at a large collection of PDLs one
42             or a few at a time (such as tracking features through an image sequence).
43             You can write prototype code that uses a perl list of a few PDLs, then
44             scale up to to millions of PDLs simply by handing the prototype code
45             a DiskCache tied array instead of a native perl array. The individual
46             PDLs are stored on disk and a few of them are swapped into memory on a
47             FIFO basis. You can set whether the data are read-only or writeable.
48              
49             By default, PDL::DiskCache uses FITS files to represent the PDLs, but
50             you can use any sort of file at all -- the read/write routines are the
51             only place where it examines the underlying data, and you can specify
52             the routines to use at construction time (or, of course, subclass
53             PDL::DiskCache).
54              
55             Items are swapped out on a FIFO basis, so if you have 10 slots
56             and an expression with 10 items in it then you're OK (but you probably
57             want more slots than that); but if you use more items in an expression than
58             there are slots, thrashing will occur!
59              
60             The hash ref interface is kept for historical reasons; you can access
61             the sync() and purge() method calls directly from the returned array ref.
62              
63             =head1 Shortcomings & caveats
64              
65             There's no file locking, so you could really hose yourself by having two of
66             these things going at once on the same files.
67              
68             Since this is a tied array, things like Dumper traverse it transparently.
69             That is sort-of good but also sort-of dangerous. You wouldn't want to
70             PDL::Dumper::sdump() a large PDL::DiskCache, for example -- that would defeat
71             the purpose of using a PDL::DiskCache in the first place.
72              
73             =head1 Author, license, no warranty
74              
75             Copyright 2001, Craig DeForest
76              
77             This code may be distributed under the same terms as Perl itself
78             (license available at L). Copying, reverse engineering,
79             distribution, and modification are explicitly allowed so long as this notice
80             is preserved intact and modified versions are clearly marked as such.
81              
82             If you modify the code and it's useful, please send a copy of the modified
83             version to cdeforest@solar.stanford.edu.
84              
85             This package comes with NO WARRANTY.
86              
87             =head1 FUNCTIONS
88              
89             =cut
90              
91             package PDL::DiskCache;
92 1     1   143517 use strict;
  1         3  
  1         68  
93 1     1   7 use warnings;
  1         3  
  1         75  
94 1     1   9 use Carp;
  1         2  
  1         103  
95 1     1   7 use base qw(Exporter);
  1         2  
  1         2546  
96              
97             our $VERSION = 1.1;
98             our @EXPORT = qw(
99             diskcache
100             );
101              
102             =head2 diskcache
103              
104             Object constructor.
105              
106             =for usage
107              
108             $x = diskcache(\@f,\%options);
109              
110             Options
111              
112             =over 3
113              
114             =item
115              
116             See the TIEARRAY options, below.
117              
118             =back
119              
120             =cut
121              
122 2     2 1 264812 sub diskcache { __PACKAGE__->new(@_) }
123              
124             sub new {
125 2     2 0 224 my($class,$f,$opt) = @_;
126 2         11 my($x)=[];
127              
128 2         5 my($y) = tie @{$x},$class,$f,$opt;
  2         14  
129 2 50       9 if($opt->{bless}) {
130 0         0 $x = bless($x,$class);
131             }
132              
133 2 50       8 if(wantarray) {
134 2         14 return ($x,bless($y,$class));
135             } else {
136 0         0 return $x;
137             }
138             }
139              
140             =head2 TIEARRAY
141              
142             =for ref
143              
144             Tied-array constructor; invoked by perl during object construction.
145              
146             =for usage
147              
148             TIEARRAY(class,\@f,\%options)
149              
150             Options
151              
152             =over 3
153              
154             =item ro (default 0)
155              
156             If set, treat the files as read-only (modifications
157             to the tied array will only persist until the changed elements are
158             swapped out)
159              
160             =item rw (default 1)
161              
162             If set, allow reading and writing to the files.
163             Because there's currently no way to determine reliably whether a PDL
164             has been modified, rw files are always written to disk when they're
165             swapped out -- this causes a slight performance hit.
166              
167             =item mem (default 20)
168              
169             Number of files to be cached in memory at once.
170              
171             =item read (default \&rfits)
172              
173             A function ref pointing to code that will read
174             list objects from disk. The function must have the same syntax as
175             rfits: $object = rfits(filename).
176              
177             =item write (default \&wfits)
178              
179             A function ref pointing to code that will
180             write list objects to disk. The function must have the same syntax as
181             wfits: func(object,filename).
182              
183             =item bless (default 0)
184              
185             If set to a nonzero value, then the array ref gets
186             blessed into the DiskCache class for for easier access to the "purge"
187             and "sync" methods. This means that you can say C<< $x->sync >> instead
188             of the more complex C<< (%{tied @$x})->sync >>, but C will return
189             "PDL::DiskCache" instead of "ARRAY", which could break some code.
190              
191             =item verbose (default 0)
192              
193             Get chatty.
194              
195             =back
196              
197             =cut
198              
199             sub TIEARRAY {
200 2     2   6 my($class,$f,$opt) = @_;
201 2 50       12 croak "PDL::DiskCache needs array ref as 2nd arg (did you pass an array instead?)\n"
202             if(ref $f ne 'ARRAY');
203             my($new) = {files => $f # File list
204 2         57 , n => scalar(@{$f}) # no. of el.
205             , write => $opt->{write} || \&main::wfits # Write routine
206             , read => $opt->{read} || \&main::rfits # Read routine
207             , mem => $opt->{mem} || 20 # No. of mem slots
208 2   50     4 , rw => (!($opt->{ro})) # rw or ro
      50        
      50        
209             , fdex => [] # Current file stored in each slot, by slot
210             , slot => [] # Current slot in which each file is stored
211             , cache => [] # Actual cached stuff gets held here
212             , opt => {} # Options stashed here for later reference
213             , cache_next => 0 # Next cache slot to be used
214             };
215 2         10 @{$new->{opt}}{keys %$opt} = values %$opt;
  2         8  
216 2         10 return bless($new,$class);
217             }
218              
219             =head2 purge
220              
221             Remove an item from the oldest slot in the cache, writing to disk as necessary.
222             You also send in how many slots to purge (default 1; sending in -1 purges
223             everything.)
224              
225             For most uses, a nice MODIFIED flag in the data structure could save
226             some hassle here. But PDLs can get modified out from under us
227             with slicing and .= -- so for now we always assume everything is tainted
228             and must be written to disk.
229              
230             =cut
231              
232             sub purge {
233 0     0 1 0 my($me,$n) = @_,1;
234 0 0       0 $me = (tied @{$me}) if("$me" =~ m/^PDL\:\:DiskCache\=ARRAY/);
  0         0  
235              
236 0 0       0 $n = $me->{mem} if($n<0);
237            
238 0 0       0 print "purging $n items..." if($me->{opt}->{verbose});
239              
240 0         0 my($dex) = $me->{cache_next};
241              
242 0         0 local($_);
243 0         0 for(1..$n) {
244 0 0       0 if($me->{rw}) {
245             print "writing $me->{files}->[$me->{fdex}->[$dex]]: "
246 0 0       0 if($me->{opt}->{verbose});
247              
248 0         0 eval {&{$me->{write}}($me->{cache}->[$dex],
  0         0  
249 0         0 $me->{files}->[$me->{fdex}->[$dex]]);
250             };
251 0 0       0 print "WARNING: PDL::DiskCache::purge: problems with write of ".$me->{files}->[$me->{fdex}->[$dex]].", item $me->{fdex}->[$dex] from slot $dex: `$@' (".$me->{opt}->{varname}.") \n" if($@);
252 0         0 $@ = 0;
253              
254 0 0       0 print "ok.\n" if($me->{opt}->{verbose});
255             }
256            
257              
258 0 0       0 print "Purging item $dex (file $me->{fdex}->[$dex])...\n" if($me->{opt}->{verbose});
259 0         0 undef $me->{slot}->[$me->{fdex}->[$dex]]; # Purge from slot location list
260 0         0 undef $me->{fdex}->[$dex]; # Purge from slot fdex list
261 0         0 undef $me->{cache}->[$dex]; # Purge from memory
262              
263 0         0 $dex++;
264 0         0 $dex %= $me->{mem};
265             }
266 0 0       0 print "...done with purge.\n" if($me->{opt}->{verbose});
267             }
268              
269             sub FETCH {
270 2     2   437 my($me,$i) = @_;
271              
272 2 50 33     18 if($i < 0 || $i >= $me->{n}) {
273 0         0 carp("PDL::DiskCache: Element $i is outside range of 0-",$me->{n}-1,"\n");
274 0         0 return undef;
275             }
276              
277 2 50       9 if(defined $me->{slot}->[$i]) {
278 0 0       0 print "Item $i is in the cache...\n" if ($me->{opt}->{verbose});
279 0         0 return ($me->{cache}->[$me->{slot}->[$i]]);
280             }
281            
282             ### Got here -- we have to get the item from disk.
283              
284             print "Item $i ($me->{files}->[$i]) not present. Retrieving..."
285 2 50       8 if($me->{opt}->{verbose});
286            
287 2 50       9 if(defined($me->{fdex}->[$me->{cache_next}])) {
288 0 0       0 print "cache full..." if($me->{opt}->{verbose});
289              
290 0         0 $me->purge(1);
291             }
292            
293 2         5 my($x) = $me->{cache_next};
294 2         7 $me->{cache}->[$x] = eval {
295 2         4 &{$me->{read}}($me->{files}->[$i])
  2         10  
296             } ;
297 2         5 undef $@; # Keep this from hanging anything else.
298             print "result is ",(defined $me->{cache}->[$x] ? "" : "un")."defined.\n"
299 2 0       8 if($me->{opt}->{verbose});
    50          
300              
301 2         6 $me->{slot}->[$i] = $me->{cache_next};
302 2         6 $me->{fdex}->[$me->{cache_next}] = $i;
303 2         5 $me->{cache_next}++;
304 2         5 $me->{cache_next} %= $me->{mem};
305 2         17 $me->{cache}->[$x];
306             }
307              
308             sub STORE {
309 3     3   11 my($me, $i, $val) = @_;
310              
311 3 50       18 if( $me->{slot}->[$i] ) {
312 0 0       0 print "Storing index $i, in cache\n" if($me->{opt}->{verbose});
313 0         0 $me->sync($i);
314 0         0 return $me->{cache}->[$me->{slot}->[$i]] = $val;
315             } else {
316 3 50       119 print "Storing index $i, not in cache\n" if($me->{opt}->{verbose});
317 3 50       17 if(defined ($me->{fdex}->[$me->{cache_next}])) {
318 0 0       0 print "cache full..." if($me->{opt}->{verbose});
319 0         0 $me->purge(1);
320             }
321            
322 3         7 my($x) = $me->{cache_next};
323 3         7 $me->{slot}->[$i] = $x;
324 3         22 $me->{fdex}->[$x] = $i;
325 3         7 $me->{cache_next}++;
326 3         7 $me->{cache_next} %= $me->{mem};
327 3         13 $me->sync($i);
328 3         20 return $me->{cache}->[$x] = $val;
329             }
330              
331 0         0 croak("This never happens");
332              
333             }
334            
335             sub FETCHSIZE {
336 0     0   0 my($me) = shift;
337              
338 0         0 $me->{n};
339             }
340              
341             sub STORESIZE {
342 0     0   0 my($me,$newsize) = @_;
343              
344 0 0       0 if($newsize > $me->{n}) {
345 0         0 croak("PDL::DiskCache: Can't augment array size (yet)!\n");
346             }
347            
348 0         0 for( my($i) = $newsize-1; $i<$me->{n}; $i++ ) {
349 0 0       0 if(defined $me->{slot}->[$i]) {
350 0 0       0 if($me->{rw}) {
351             print "Writing $me->{files}->[$me->{fdex}->[$i]]\n"
352 0 0       0 if($me->{opt}->{verbose});
353 0         0 eval {&{$me->{write}}($me->{cache}->[$me->{slot}->[$i]],
  0         0  
354 0         0 $me->{files}->[$i]);
355             };
356 0         0 $@ = 0; # Keep from hanging anything else
357             }
358 0         0 undef $me->{fdex}->[$me->{slot}->[$i]];
359             }
360             }
361 0         0 $#{$me->{slot}} = $newsize-1;
  0         0  
362 0         0 $#{$me->{files}} = $newsize-1;
  0         0  
363 0         0 $me->{n} = $newsize;
364             }
365              
366             =head2 sync
367              
368             In a rw cache, flush items out to disk but retain them in the cache.
369              
370             Accepts a single scalar argument, which is the index number of a
371             single item that should be written to disk. Passing (-1), or no
372             argument, writes all items to disk, similar to purge(-1).
373              
374             For ro caches, this is a not-too-slow (but safe) no-op.
375              
376             =cut
377              
378             sub sync {
379 5     5 1 12 my($me) = shift;
380 5 50       25 $me = (tied @{$me}) if("$me" =~ m/^PDL\:\:DiskCache\=ARRAY/);
  0         0  
381 5         10 my($syncn) = shift;
382 5 100       16 $syncn = -1 unless defined $syncn;
383 5 100       57 print "PDL::DiskCache::sync\n" if($me->{opt}->{verbose});
384            
385 5 100       27 my @list = $syncn==-1 ? (0..$me->{mem}-1) : ($syncn);
386              
387 5 100       223 if($me->{rw}) {
388 4         12 for(@list) {
389 23 100       79 if(defined $me->{fdex}->[$_]) {
390              
391             print " writing $me->{files}->[$me->{fdex}->[$_]]...\n"
392 6 50       102 if($me->{opt}->{verbose});
393              
394 6         18 eval {&{$me->{write}}($me->{cache}->[$_],
  6         27  
395 6         18 $me->{files}->[$me->{fdex}->[$_]]);
396             };
397 6         2939 $@ = 0; # keep from hanging anything else
398             }
399             }
400             }
401             }
402              
403             =head2 DESTROY
404              
405             This is the perl hook for object destruction. It just makes a call to
406             "sync", to flush the cache out to disk. Destructor calls from perl don't
407             happen at a guaranteed time, so be sure to call "sync" if you need to
408             ensure that the files get flushed out, e.g. to use 'em somewhere else.
409              
410             =cut
411              
412 2     2   335 sub DESTROY { shift->sync }
413              
414             # return true
415             1;