File Coverage

blib/lib/DB/DataStore.pm
Criterion Covered Total %
statement 169 199 84.9
branch 30 50 60.0
condition 6 15 40.0
subroutine 31 39 79.4
pod 7 7 100.0
total 243 310 78.3


line stmt bran cond sub pod time code
1             package DB::DataStore;
2              
3             =head1 NAME
4              
5             DB::DataStore - Simple and fast record based data store
6              
7             =head1 SYNPOSIS
8              
9             use DB::DataStore;
10              
11              
12             my $store = DB::DataStore->open( $directory );
13              
14             my $data = "TEXT DATA OR BYTES";
15             my $id = $store->stow( $data, $optionalID );
16              
17             my $val = $store->fetch( $id );
18              
19             $store->recycle( $id );
20              
21             my $new_id = $store->next_id; # $new_id == $id
22              
23             $store->stow( "MORE DATA", $new_id );
24              
25             =head1 DESCRIPTION
26              
27             A simple and fast way to store arbitrary text or byte data.
28             It is written entirely in perl with no non-core dependencies. It is designed to be
29             both easy to set up and easy to use.
30              
31             =head1 LIMITATIONS
32              
33             DB::DataStore is not meant to store huge amounts of data.
34             It will fail if it tries to create a file size greater than the
35             max allowed by the filesystem. This limitation will be removed in
36             subsequent versions. This limitation is most important when working
37             with sets of data that approach the max file size of the system
38             in question.
39              
40             This is not written with thread safety in mind, so unexpected behavior
41             can occur when multiple DB::DataStore objects open the same directory.
42              
43             =cut
44              
45 1     1   629 use strict;
  1         1  
  1         25  
46 1     1   5 use warnings;
  1         2  
  1         29  
47              
48 1     1   5 use File::Path qw(make_path);
  1         4  
  1         64  
49 1     1   962 use Data::Dumper;
  1         13270  
  1         70  
50              
51 1     1   7 use vars qw($VERSION);
  1         2  
  1         303  
52              
53             $VERSION = '1.03';
54              
55             =head1 METHODS
56              
57             =head2 open( directory )
58              
59             Takes a single argument - a directory, and constructs the data store in it.
60             The directory must be writeable or creatible. If a DataStore already exists
61             there, it opens it, otherwise it creates a new one.
62              
63             =cut
64             sub open {
65 1     1 1 782 my( $pkg, $directory ) = @_;
66              
67 1         165 make_path( "$directory/stores", { error => \my $err } );
68 1 50       5 if( @$err ) {
69 0         0 my( $err ) = values %{ $err->[0] };
  0         0  
70 0         0 die $err;
71             }
72 1         4 my $filename = "$directory/STORE_INDEX";
73              
74 1   33     10 bless {
75             DIRECTORY => $directory,
76             OBJ_INDEX => DB::DataStore::FixedRecycleStore->open( "IL", "$directory/OBJ_INDEX" ),
77             STORE_IDX => DB::DataStore::FixedStore->open( "I", $filename ),
78             STORES => [],
79             }, ref( $pkg ) || $pkg;
80            
81             } #open
82              
83             =head2 entry_count
84              
85             Returns how many entries are in this store. Recycling ids does
86             _not_ decrement this entry_count.
87              
88             =cut
89             sub entry_count {
90 0     0 1 0 shift->{OBJ_INDEX}->entry_count;
91             }
92              
93             =head2 ensure_entry_count( min_count )
94              
95             This makes sure there there are at least min_count
96             entries in this datastore. This creates empty
97             records if needed.
98              
99             =cut
100             sub ensure_entry_count {
101 0     0 1 0 shift->{OBJ_INDEX}->ensure_entry_count( shift );
102             }
103              
104             =head2 next_id
105              
106             This sets up a new empty record and returns the
107             id for it.
108              
109             =cut
110             sub next_id {
111 0     0 1 0 my $self = shift;
112 0         0 $self->{OBJ_INDEX}->next_id;
113             }
114              
115             =head2 stow( data, optionalID )
116              
117             This saves the text or byte data to the datastore.
118             If an id is passed in, this saves the data to the record
119             for that id, overwriting what was there.
120             If an id is not passed in, it creates a new datastore.
121              
122             Returns the id of the record written to.
123              
124             =cut
125             sub stow {
126 4     4 1 398 my( $self, $data, $id ) = @_;
127 4   33     23 $id //= $self->{OBJ_INDEX}->next_id;
128              
129 1     1   5 my $save_size = do { use bytes; length( $data ); };
  1         2  
  1         7  
  4         5  
  4         7  
130              
131 4         5 my( $current_store_id, $current_idx_in_store ) = @{ $self->{OBJ_INDEX}->get_record( $id ) };
  4         12  
132              
133             #
134             # Check if this record had been saved before, and that the
135             # store is was in has a large enough record size.
136             #
137 4 100       21 if( $current_store_id ) {
138 1         3 my $old_store = $self->_get_store( $current_store_id );
139              
140 1 50       5 warn "object '$id' references store '$current_store_id' which does not exist" unless $old_store;
141              
142 1 50       5 if( $old_store->{RECORD_SIZE} >= $save_size ) {
143 1         3 $old_store->put_record( $current_idx_in_store, [$data] );
144 1         3 return $id;
145             }
146            
147             # the old store was not big enough (or missing), so remove its record from
148             # there.
149 0 0       0 $old_store->recycle( $current_idx_in_store, 1 ) if $old_store;
150             }
151              
152 3         15 my( $store_id, $store ) = $self->_best_store_for_size( $save_size );
153 3         7 my $index_in_store = $store->next_id;
154              
155 3         11 $self->{OBJ_INDEX}->put_record( $id, [ $store_id, $index_in_store ] );
156 3         12 $store->put_record( $index_in_store, [ $data ] );
157              
158 3         9 $id;
159             } #stow
160              
161             =head2 fetch( id )
162              
163             Returns the record associated with the ID. If the ID has no
164             record associated with it, undef is returned.
165              
166             =cut
167             sub fetch {
168 2     2 1 490 my( $self, $id ) = @_;
169 2         3 my( $store_id, $id_in_store ) = @{ $self->{OBJ_INDEX}->get_record( $id ) };
  2         8  
170 2 50       7 return undef unless $store_id;
171              
172 2         5 my $store = $self->_get_store( $store_id );
173 2         3 my( $data ) = @{ $store->get_record( $id_in_store ) };
  2         4  
174 2         10 $data;
175             } #fetch
176              
177             =head2 recycle( $id )
178              
179             This marks that the record associated with the id may be reused.
180             Calling this does not decrement the number of entries reported
181             by the datastore.
182              
183             =cut
184             sub recycle {
185 1     1 1 3 my( $self, $id ) = @_;
186 1         2 my( $store_id, $id_in_store ) = @{ $self->{OBJ_INDEX}->get_record( $id ) };
  1         4  
187 1 50       7 return undef unless defined $store_id;
188            
189 1         3 my $store = $self->_get_store( $store_id );
190 1         4 $store->recycle( $id_in_store );
191 1         4 $self->{OBJ_INDEX}->recycle( $id_in_store );
192              
193             } #recycle
194              
195             sub _best_store_for_size {
196 3     3   7 my( $self, $record_size ) = @_;
197 3         4 my( $best_idx, $best_size, $best_store ); #without going over.
198              
199             # using the written record rather than the array of stores to
200             # determine how many there are.
201 3         7 for my $idx ( 1 .. $self->{STORE_IDX}->entry_count ) {
202 2         6 my $store = $self->_get_store( $idx );
203 2         4 my $store_size = $store->{RECORD_SIZE};
204 2 50       6 if( $store_size >= $record_size ) {
205 2 50 33     7 if( ! defined( $best_size ) || $store_size < $best_size ) {
206 2         4 $best_idx = $idx;
207 2         2 $best_size = $store_size;
208 2         5 $best_store = $store;
209             }
210             }
211             } #each store
212            
213 3 100       9 if( $best_store ) {
214 2         10 return $best_idx, $best_store;
215             }
216              
217             # Have to create a new store.
218             # Make one that is thrice the size of the record
219 1         2 my $store_size = 3 * $record_size;
220 1         3 my $store_id = $self->{STORE_IDX}->next_id;
221              
222             # first, make an entry in the store index, giving it that size, then
223             # fetch it?
224 1         5 $self->{STORE_IDX}->put_record( $store_id, [$store_size] );
225              
226 1         4 my $store = $self->_get_store( $store_id );
227              
228 1         3 $store_id, $store;
229              
230             } #_best_store_for_size
231              
232             sub _get_recycled_ids {
233 0     0   0 shift->{OBJ_INDEX}->get_recycled_ids;
234             }
235              
236             sub _get_store {
237 7     7   12 my( $self, $store_index ) = @_;
238              
239 7 100       19 if( $self->{STORES}[ $store_index ] ) {
240 6         14 return $self->{STORES}[ $store_index ];
241             }
242              
243 1         2 my( $store_size ) = @{ $self->{ STORE_IDX }->get_record( $store_index ) };
  1         3  
244              
245             # since we are not using a pack template with a definite size, the size comes from the record
246              
247 1         10 my $store = DB::DataStore::FixedRecycleStore->open( "A*", "$self->{DIRECTORY}/${store_index}_OBJSTORE", $store_size );
248 1         3 $self->{STORES}[ $store_index ] = $store;
249 1         2 $store;
250             } #_get_store
251              
252             # ----------- end DB::DataStore
253             =head1 HELPER PACKAGES
254              
255             DB::DataStore relies on two helper packages that are useful in
256             their own right and are documented here.
257              
258             =head1 HELPER PACKAGE
259              
260             DB::DataStore::FixedStore
261              
262             =head1 DESCRIPTION
263              
264             A fixed record store that uses perl pack and unpack templates to store
265             identically sized sets of data and uses a single file to do so.
266              
267             =head1 SYNOPSIS
268              
269             my $template = "LII"; # perl pack template. See perl pack/unpack.
270              
271             my $size; #required if the template does not have a definite size, like A*
272              
273             my $store = DB::DataStore::FixedStore->open( $template, $filename, $size );
274              
275             my $new_id = $store->next_id;
276              
277             $store->put_record( $id, [ 321421424243, 12, 345 ] );
278              
279             my $more_data = $store->get_record( $other_id );
280              
281             my $removed_last = $store->pop;
282              
283             my $last_id = $store->push( $data_at_the_end );
284              
285             my $entries = $store->entry_count;
286              
287             if( $entries < $min ) {
288              
289             $store->ensure_entry_count( $min );
290              
291             }
292              
293             $store->emtpy;
294              
295             $store->unlink_store;
296              
297             =head1 METHODS
298              
299             =cut
300             package DB::DataStore::FixedStore;
301              
302 1     1   705 use strict;
  1         2  
  1         22  
303 1     1   10 use warnings;
  1         1  
  1         33  
304              
305 1     1   5 use Fcntl qw( SEEK_SET LOCK_EX LOCK_UN );
  1         2  
  1         120  
306              
307             =head2 open( template, filename, size )
308              
309             Opens or creates the file given as a fixed record
310             length data store. If a size is not given,
311             it calculates the size from the template, if it can.
312             This will die if a zero byte record size is determined.
313              
314             =cut
315             sub open {
316 5     5   9 my( $pkg, $template, $filename, $size ) = @_;
317 5   33     23 my $class = ref( $pkg ) || $pkg;
318 5         7 my $FH;
319 1   66 1   5 my $useSize = $size || do { use bytes; length( pack( $template ) ) };
  1         2  
  1         5  
  5         26  
320 5 50       12 die "Cannot open a zero record sized fixed store" unless $useSize;
321 5 50       92 unless( -e $filename ) {
322 5         261 open $FH, ">$filename";
323 5         11 print $FH "";
324 5         33 close $FH;
325             }
326 5 50       133 open $FH, "+<$filename" or die "$@ $!";
327 5         51 bless { TMPL => $template,
328             RECORD_SIZE => $useSize,
329             FILENAME => $filename,
330             FILEHANDLE => $FH,
331             }, $class;
332             } #open
333              
334             =head2 empty
335              
336             This empties out the database, setting it to zero records.
337              
338             =cut
339             sub empty {
340 0     0   0 my $self = shift;
341 0         0 my $fh = $self->_filehandle;
342 0         0 truncate $self->{FILENAME}, 0;
343 0         0 undef;
344             } #empty
345              
346             =head2 ensure_entry_count( count )
347              
348             Makes sure the data store has at least as many entries
349             as the count given. This creates empty records if needed
350             to rearch the target record count.
351              
352             =cut
353             sub ensure_entry_count {
354 0     0   0 my( $self, $count ) = @_;
355 0         0 my $fh = $self->_filehandle;
356              
357 0         0 my $entries = $self->entry_count;
358 0 0       0 if( $count > $entries ) {
359 0         0 for( (1+$entries)..$count ) {
360 0         0 $self->put_record( $_, [] );
361             }
362             }
363             } #ensure_entry_count
364              
365             =head2
366              
367             Returns the number of entries in this store.
368             This is the same as the size of the file divided
369             by the record size.
370              
371             =cut
372             sub entry_count {
373             # return how many entries this index has
374 18     18   22 my $self = shift;
375 18         33 my $fh = $self->_filehandle;
376 18         284 my $filesize = -s $self->{FILENAME};
377 18         51 int( $filesize / $self->{RECORD_SIZE} );
378             }
379              
380             =head2 get_record( idx )
381              
382             Returns an arrayref representing the record with the given id.
383             The array in question is the unpacked template.
384              
385             =cut
386             sub get_record {
387 12     12   19 my( $self, $idx ) = @_;
388              
389 12         22 my $fh = $self->_filehandle;
390 12 50       64 sysseek $fh, $self->{RECORD_SIZE} * ($idx-1), SEEK_SET or die "Could not seek ($self->{RECORD_SIZE} * ($idx-1)) : $@ $!";
391 12         56 my $srv = sysread $fh, my $data, $self->{RECORD_SIZE};
392 12 50       25 defined( $srv ) or die "Could not read : $@ $!";
393 12         48 [unpack( $self->{TMPL}, $data )];
394             } #get_record
395              
396             =head2 next_id
397              
398             adds an empty record and returns its id, starting with 1
399              
400             =cut
401             sub next_id {
402 6     6   11 my( $self ) = @_;
403 6         12 my $fh = $self->_filehandle;
404 6         18 my $next_id = 1 + $self->entry_count;
405 6         20 $self->put_record( $next_id, [] );
406 6         16 $next_id;
407             } #next_id
408              
409              
410             =head2 pop
411              
412             Remove the last record and return it.
413              
414             =cut
415             sub pop {
416 7     7   9 my( $self ) = @_;
417              
418 7         16 my $entries = $self->entry_count;
419 7 100       44 return undef unless $entries;
420 2         5 my $ret = $self->get_record( $entries );
421 2         6 truncate $self->_filehandle, ($entries-1) * $self->{RECORD_SIZE};
422 2         9 $ret;
423             } #pop
424              
425             =head2 push( data )
426              
427             Add a record to the end of this store. Returns the id assigned
428             to that record. The data must be a scalar or list reference.
429             If a list reference, it should conform to the pack template
430             assigned to this store.
431              
432             =cut
433             sub push {
434 2     2   3 my( $self, $data ) = @_;
435 2         8 my $fh = $self->_filehandle;
436 2         6 my $next_id = 1 + $self->entry_count;
437 2         5 $self->put_record( $next_id, $data );
438 2         5 $next_id;
439             } #push
440              
441             =head2 push( idx, data )
442              
443             Saves the data to the record and the record to the filesystem.
444             The data must be a scalar or list reference.
445             If a list reference, it should conform to the pack template
446             assigned to this store.
447              
448             =cut
449             sub put_record {
450 16     16   28 my( $self, $idx, $data ) = @_;
451 16         30 my $fh = $self->_filehandle;
452 16 50       68 my $to_write = pack ( $self->{TMPL}, ref $data ? @$data : $data );
453              
454 1     1   716 my $to_write_length = do { use bytes; length( $to_write ); };
  1         2  
  1         3  
  16         17  
  16         23  
455 16 100       65 if( $to_write_length < $self->{RECORD_SIZE} ) {
456 6         8 my $del = $self->{RECORD_SIZE} - $to_write_length;
457 6         13 $to_write .= "\0" x $del;
458 1     1   72 my $to_write_length = do { use bytes; length( $to_write ); };
  1         2  
  1         4  
  6         6  
  6         7  
459 6 50       27 die "$to_write_length vs $self->{RECORD_SIZE}" unless $to_write_length == $self->{RECORD_SIZE};
460             }
461 16 50       264 sysseek( $fh, $self->{RECORD_SIZE} * ($idx-1), SEEK_SET ) && ( my $swv = syswrite( $fh, $to_write ) );
462 16 50       37 defined( $swv ) or die "Could not write : $@ $!";
463 16         28 1;
464             } #put_record
465              
466             =head2 unlink_store
467              
468             Removes the file for this record store entirely from the file system.
469              
470             =cut
471             sub unlink_store {
472             # TODO : more checks
473 0     0   0 my $self = shift;
474 0         0 close $self->_filehandle;
475 0         0 unlink $self->{FILENAME};
476             }
477              
478             sub _filehandle {
479 56     56   88 my $self = shift;
480 56         350 close $self->{FILEHANDLE};
481 56         1474 CORE::open( $self->{FILEHANDLE}, "+<$self->{FILENAME}" );
482 56         245 $self->{FILEHANDLE};
483             }
484              
485              
486             # ----------- end DB::DataStore::FixedStore
487              
488              
489              
490             =head1 HELPER PACKAGE
491              
492             DB::DataStore::FixedRecycleStore
493              
494             =head1 SYNOPSIS
495              
496             A subclass DB::DataStore::FixedRecycleStore. This allows
497             indexes to be recycled and their record space reclaimed.
498              
499             my $store = DB::DataStore::FixedRecycleStore->open( $template, $filename, $size );
500              
501             my $id = $store->next_id;
502              
503             $store->put_record( $id, ["SOMEDATA","FOR","PACK" ] );
504              
505             my $id2 = $store->next_id; # == 2
506              
507             $store->recycle( $id );
508              
509             my $avail_ids = $store->get_recycled_ids; # [ 1 ]
510              
511             my $id3 = $store->next_id;
512              
513             $id3 == $id;
514              
515             =cut
516             package DB::DataStore::FixedRecycleStore;
517              
518 1     1   244 use strict;
  1         2  
  1         17  
519 1     1   22 use warnings;
  1         1  
  1         305  
520              
521             our @ISA='DB::DataStore::FixedStore';
522              
523             sub open {
524 2     2   3 my( $pkg, $template, $filename, $size ) = @_;
525 2         10 my $self = DB::DataStore::FixedStore->open( $template, $filename, $size );
526 2         7 $self->{RECYCLER} = DB::DataStore::FixedStore->open( "L", "${filename}.recycle" );
527 2         10 bless $self, $pkg;
528             } #open
529              
530             =head1 METHODS
531              
532             =head2 recycle( $idx )
533              
534             Recycles the given id and reclaims its space.
535              
536             =cut
537             sub recycle {
538 2     2   5 my( $self, $idx ) = @_;
539 2         6 $self->{RECYCLER}->push( [$idx] );
540             } #recycle
541              
542             =head2 get_recycled_ids
543              
544             Returns a list reference of ids that are available
545             to be reused.
546              
547             =cut
548             sub get_recycled_ids {
549 0     0   0 my $self = shift;
550 0         0 my $R = $self->{RECYCLER};
551 0         0 my $max = $R->entry_count;
552 0         0 my @ids;
553 0         0 for( 1 .. $max ) {
554 0         0 push @ids, @{ $R->get_record( $_ ) };
  0         0  
555             }
556 0         0 \@ids;
557             } #get_recycled_ids
558              
559             sub next_id {
560 7     7   11 my $self = shift;
561              
562 7 100       10 my( $recycled_id ) = @{ $self->{RECYCLER}->pop || []};
  7         39  
563 7 100       30 $recycled_id = $recycled_id ? $recycled_id : $self->SUPER::next_id;
564             } #next_id
565              
566             # ----------- end package DB::DataStore::FixedRecycleStore;
567              
568             1;
569              
570             __END__