File Coverage

blib/lib/FlatFile.pm
Criterion Covered Total %
statement 237 247 95.9
branch 45 64 70.3
condition 12 18 66.6
subroutine 48 49 97.9
pod 9 14 64.2
total 351 392 89.5


line stmt bran cond sub pod time code
1 17     17   1000678 use strict; use warnings;
  17     17   167  
  17         475  
  17         75  
  17         27  
  17         579  
2              
3             package FlatFile;
4 17     17   10810 use Tie::File;
  17         273500  
  17         725  
5             our $VERSION = '0.12';
6 17     17   144 use Carp 'croak';
  17         26  
  17         2008  
7              
8             =head1 NAME
9              
10             FlatFile - Manipulate flat-file databases
11              
12             =head1 SYNOPSIS
13              
14             # Usage pattern A: direct use
15             use FlatFile;
16              
17             my $password = FlatFile->new(FILE => $filename,
18             FIELDS => [qw(username password uid gid gecos home shell)],
19             MODE => "+<", # "<" for read-only access
20             RECSEP => "\n", FIELDSEP => ":");
21              
22             my ($mjd) = $file->lookup(username => "mjd");
23             print "mjd: ", $mjd->uid, "\n";
24              
25             # Look up all records for which function returns true
26             sub is_chen { $_{gecos} =~ /\bChen$/ }
27             my @chens = $file->c_lookup(\&is_chen);
28             for (@chens) { $_->set_shell("/bin/false") }
29              
30             $mjd->delete; # delete MJD from file
31              
32             $password->flush; # write out changes to file
33              
34             # Usage pattern B: subclass
35             # PasswordFile.pm:
36             package PasswordFile;
37             use FlatFile;
38             our @ISA = 'FlatFile';
39             our @FIELDS = qw(username password uid gid gecos home shell);
40             our $RECSEP = "\n";
41             our $FIELDSEP = ":";
42             our $MODE = "<";
43             our $FILE = "/etc/passwd";
44              
45             # Main program uses subclass:
46             package main;
47             use PasswordFile;
48             my $password = PasswordFile->new;
49              
50             ... the rest as above ...
51              
52             =head1 DESCRIPTION
53              
54             C is a class for manipulating flat-file (plain text)
55             databases. One first opens the database, obtaining a database object.
56             Queries may be perfomed on the database object, yielding record
57             objects, which can be queried to retrieve information from the
58             database. If the database is writable, the objects can be updated,
59             and the updates written back to the file.
60              
61             Subclasses of this module can be created to represent specific files,
62             such as the Unix password file or the Account Management C file.
63              
64             =cut
65              
66             my %default_default =
67             (FILE => undef,
68             TMPFILE => "", # overwritten later
69             MODE => "<",
70             FIELDS => undef,
71             RECSEP => "\n",
72             FIELDSEP => qr/\s+/,
73             FIELDSEPSTR => " ",
74             RECBASECLASS => "FlatFile::Rec",
75             RECCLASS => "", # Will be overwritten in ->new method
76             DEFAULTS => {},
77             );
78              
79             sub _classvars {
80 44     44   80 my $class = shift;
81 44 100       177 return {} if $class eq __PACKAGE__;
82 20         27 my %cv;
83 20         75 for my $k (keys %default_default) {
84 17     17   111 my $val = do { no strict 'refs'; $ {"$class\::$k"} };
  17         28  
  17         6812  
  200         213  
  200         198  
  200         354  
85 200 100       326 $cv{$k} = $val if defined $val;
86             }
87 20         49 \%cv;
88             }
89              
90             =head1 Methods
91              
92             =head2 C<< $db = FlatFile->new(FILE => $filename, FIELDS => [...], ...); >>
93              
94             The C method opens the database. At least two arguments are
95             required: the C argument that gives the path at which the data
96             can be accessed, and the C argument that names the fields, in
97             order.
98              
99             By default, the file will be opened for reading only. To override
100             this, supply a C argument whose value is a mode string like the
101             one given as the second argument to the Perl built-in C
102             function. For read-write access, you should probably use C<< MODE => "+<" >>. As of version 0.10, only C<< < >>, C<< +< >>, and C<< +> >> are supported.
103              
104             The file will be assumed to contain "records" that are divided into
105             "fields". By default, records are assumed to be terminated with a
106             newline character; to override this, use C<< RECSEP => $separator >>.
107             Fields are assumed to be separated by whitespace; to override, use
108             C<< FIELDSEP => $pattern >>. C<$pattern> may be a compiled regex
109             object or a literal string. If it is a pattern, you must also supply
110             an example string with C<> that will be used when writing
111             out records. For example, for the Unix password file, whose fields
112             are separated by colons, use:
113              
114             FIELDSEP => ":"
115              
116             but for a file whose fields are separated by one or more space
117             characters, use:
118              
119             FIELDSEP => qr/ +/, FIELDSEPSTR => " "
120              
121             The C argument tells the module to use two spaces between
122             fields when writing out new records.
123              
124             You may supply a
125              
126             DEFAULTS => { field => default_value, ... }
127              
128             argument that specifies default values for some or all of the fields. Fields for which no default value
129              
130             When changes are written to the disk, the module first copies the
131             modified data to a temporary file, then atomically replaces the old
132             file with the temporary file. To specify a temporary filename, use
133             C<< TMPFILE => $filename >>. Otherwise, it will default to the name
134             of the main file with C<".tmp"> appended.
135              
136             Record objects will be allocated in dynamically generated classes
137             named C,
138             C, and so on, which inherit from common
139             base class C. To override this choice of
140             class, supply a class name with C<< RECCLASS => $classname >>. You
141             may want your custom class to inherit from
142             C.
143              
144             =cut
145              
146             my $classid = "A";
147             sub new {
148 44     44 1 56063 my ($class, %opts) = @_;
149 44         125 my $self = {recno => 0};
150              
151 44         88 bless $self => $class;
152              
153             # acquire object properties from argument list (%opts)
154             # or from class defaults or default defaults, as appropriate.
155             # _default will detect missing required values
156             # and unknown key names
157 44         163 for my $source (\%opts, $class->_classvars) {
158 88         206 $self->_acquire_settings($source, check_keys => 1);
159             }
160              
161             # TODO: TESTS for this logic
162 44 100       132 if (exists $self->{FIELDSEP}) {
163 16 50       49 if (ref $self->{FIELDSEP}) {
164             defined($self->{FIELDSEPSTR})
165 0 0       0 or croak "FIELDSEPSTR required in conjunction with FIELDSEP";
166             } else {
167             # literal string; compile it to a pattern
168 16         30 my $str = $self->{FIELDSEP};
169 16         29 $self->{FIELDSEPSTR} = $str;
170 16         47 $self->{FIELDSEP} = "\Q$str";
171             }
172             }
173              
174 44         116 $self->_acquire_settings(\%default_default, mandatory => 1);
175              
176             $self->{RECCLASS} = join "::", $self->{RECBASECLASS}, $classid++
177 44 100       227 unless $self->{RECCLASS};
178              
179             $self->{TMPFILE} = $self->{FILE} . ".tmp"
180 44 50       145 unless exists $opts{TMPFILE};
181              
182 44         155 $self->_calculate_field_offsets;
183              
184 44         133 $self->_generate_record_class;
185              
186              
187 44 50       133 return $self->_open_file ? $self : ();
188             }
189              
190             sub _acquire_settings {
191 132     132   256 my ($self, $settings, %opt) = @_;
192 132         340 for my $k (keys %$settings) {
193 593 50 66     1079 if ($opt{check_keys} && not exists $default_default{$k}) {
194 0         0 croak "unknown key '$k'";
195             }
196 593 50 66     1272 if (! exists $self->{$k} && exists $settings->{$k}) {
197 424 50 66     892 if ($opt{mandatory} && not defined $settings->{$k}) {
198 0         0 croak "Required key '$k' unspecified";
199             }
200 424         712 $self->{$k} = $settings->{$k};
201             }
202             }
203             }
204              
205 17     17   116 use Fcntl qw(O_RDONLY O_RDWR O_TRUNC);
  17         26  
  17         6370  
206             my %MODE_OK = ('<', O_RDONLY, '+<', O_RDWR,
207             '+>', O_RDWR|O_TRUNC);
208             sub _mode_flags {
209 44     44   67 my $self = shift;
210 44         100 $MODE_OK{$self->{MODE}};
211             }
212              
213             sub _writable {
214 98     98   133 my $self = shift;
215 98         385 $self->{MODE} ne "<"; # "<" is the only read-only mode
216             }
217              
218             sub _open_file {
219 44     44   67 my $self = shift;
220 44         66 my $file = $self->{FILE};
221 44         67 my $mode = $self->{MODE};
222 44         103 my $flags = $self->_mode_flags();
223 44 50       105 defined $flags or croak "Invalid mode '$mode'";
224              
225             tie my(@file), "Tie::File", $file, mode => $flags,
226 44 50       328 recsep => $self->{RECSEP}, autochomp => 1,
227             or return;
228 44         6868 $self->{file} = \@file;
229 44         202 return 1;
230             }
231              
232              
233             sub _calculate_field_offsets {
234 44     44   66 my $self = shift;
235 44         64 my @f = @{$self->{FIELDS}};
  44         112  
236 44         63 my %off;
237 44         133 for my $i (0 .. $#f) {
238 101 50       187 if (exists $off{$f[$i]}) {
239 0         0 croak "duplicate field name '$f[$i]'";
240             } else {
241 101         201 $off{$f[$i]} = $i;
242             }
243             }
244 44         83 $self->{OFF} = \%off;
245 44         79 return 1;
246             }
247              
248             sub _generate_record_class {
249 44     44   65 my ($self) = shift;
250 44         77 my $classname = $self->{RECCLASS};
251              
252             # create 'get' methods
253 44         63 for my $field (@{$self->{FIELDS}}) {
  44         101  
254 101         150 my $ff = $field;
255             my $code = sub {
256 306     306   15167 return $_[0]{data}{$ff};
257 101         309 };
258 17     17   135 no strict 'refs';
  17         33  
  17         2004  
259 101         134 *{"$classname\::$field"} = $code;
  101         500  
260 101         128 *{"$classname\::get_$field"} = $code;
  101         341  
261             }
262              
263             # create 'set' methods
264 44 100       128 if ($self->_writable) {
265 28         41 for my $field (@{$self->{FIELDS}}) {
  28         56  
266 56         75 my $ff = $field;
267             my $code = sub {
268 6     6   21 my ($rec, $val) = @_;
269 6         15 $rec->{data}{$ff} = $val;
270 6         34 $rec->db->_update($rec);
271 56         164 };
272 17     17   101 no strict 'refs';
  17         31  
  17         1076  
273 56         74 *{"$classname\::set_$field"} = $code;
  56         207  
274             }
275             }
276              
277 17     17   101 no strict 'refs';
  17         29  
  17         15895  
278 44         119 @{"$classname\::ISA"} = ($self->{RECBASECLASS});
  44         845  
279 44         138 *{"$classname\::FIELD"} = $self->{OFF}; # create %FIELD hash
  44         276  
280 44         78 *{"$classname\::FIELD"} = $self->{FIELDS}; # create @FIELD hash
  44         117  
281 44         68 *{"$classname\::DEFAULT"} = $self->{DEFAULTS}; # create %DEFAULT hash
  44         153  
282 44         82 return 1;
283             }
284              
285             =head2 C<< $db->lookup($field, $value) >>
286              
287             Returns an array of all records in the database for which the field
288             C<$field> contains the value C<$value>. For information about record
289             objects, see L<"Record objects"> below.
290              
291             Field contents are always compared stringwise. For numeric or other
292             comparisons, use C instead.
293              
294             The behavior in scalar context is undefined.
295              
296             =cut
297              
298             # Locate records for which field $f contains value $v
299             # return all such
300             # TODO: iterator interface?
301             sub lookup {
302 46     46 1 11051 my ($self, $f, $v) = @_;
303              
304             # If called as a class method, try to instantiate the database
305             # for the duration of a single query
306             # Note that since we don't give the new call the required FILE and FIELD
307             # arguments, this will only work if $self is actually the name of a subclass
308             # in which those things are predefined
309 46 100       134 $self = $self->new if not ref $self;
310              
311 46         70 my @result;
312 46 50       126 $self->rewind or croak "Couldn't rewind handle";
313 46         158 while (my $rec = $self->nextrec) {
314 249 100       544 if ($rec->$f eq $v) {
315 57 100       137 return $rec unless wantarray();
316 55         149 push @result, $rec;
317             }
318             }
319 44         170 return @result;
320             }
321              
322             =head2 C<< $db->c_lookup($predicate) >>
323              
324             Returns an array of all records in the database for which the
325             predicate function C<$predicate> returns true. For information about
326             record objects, see L<"Record objects"> below.
327              
328             The predicate function will be called repeatedly, once for each record
329             in the database.
330              
331             Each record will be passed to the predicate function as a hash, with
332             field names as the hash keys and record data as the hash values. The
333             global variable C<%_> will also be initialized to contain the current
334             record hash. For example, if C<$db> is the Unix password file, then
335             we can search for people named "Chen" like this:
336              
337             sub is_chen {
338             my %data = @_;
339             $data{gecos} =~ /\bChen$/;
340             }
341              
342             @chens = $db->c_lookup(\&is_chen);
343              
344             Or, using the C<%_> variable, like this:
345              
346             sub is_chen { $_{gecos} =~ /\bChen$/ }
347              
348             @chens = $db->c_lookup(\&is_chen);
349              
350             The behavior in scalar context is undefined.
351              
352             =cut
353              
354             # return all records for which some callback yields true
355             sub c_lookup {
356 13     13 1 1849 my ($self, $cb) = @_;
357 13         18 my @result;
358              
359             # If called as a class method, try to instantiate the database
360             # for the duration of a single query
361             # Note that since we don't give the new call the required FILE and FIELD
362             # arguments, this will only work if $self is actually the name of a subclass
363             # in which those things are predefined
364 13 100       51 $self = $self->new if not ref $self;
365              
366 13 50       32 $self->rewind or croak "Couldn't rewind handle";
367 13         28 while (my $rec = $self->nextrec) {
368 64         135 local %_ = $rec->as_hash;
369 64 100       181 push @result, $rec if $cb->(%_);
370             }
371 13         80 return @result;
372             }
373              
374             sub rewind {
375 59     59 0 102 my $self = shift;
376 59         85 $self->{recno} = 0;
377 59         124 return 1;
378             }
379              
380             =head2 C<< $db->rec_count >>
381              
382             Return a count of the number of records in the database.
383              
384             =cut
385              
386             sub rec_count {
387 2     2 1 374 my $self = shift;
388              
389 2         4 return scalar(@{$self->{file}});
  2         9  
390             }
391              
392             sub save_position {
393 6     6 0 8 my $self = shift;
394 6         35 FlatFile::Position->new(\($self->{recno}));
395             }
396              
397             =head2 C<< my $record = $db->nextrec >>
398              
399             Get the next record from the database and return a record object
400             representing it. Each call to C returns a different record.
401             Returns an undefined value when there are no more records left.
402              
403             For information about record objects, see L<"Record objects"> below.
404              
405             To rewind the database so that C will start at the beginning,
406             use the C method.
407              
408             The following code will scan all the records in the database:
409              
410             $db->rewind;
411             while (my $rec = $db->nextrec) {
412             ... do something with $rec...
413             }
414              
415             =cut
416              
417              
418              
419             sub nextrec {
420 373     373 1 1918 my $self = shift;
421 373         502 my $recno = $self->{recno};
422              
423 373         720 $recno++ while $self->{DELETE}{$recno};
424              
425             # Someone may have done an in-memory update of the record
426             # we just read. If so, discard the disk data and
427             # proceed with the in-memory version of the record instead.
428             # if it wasn't updated, the continue processing
429             # with the disk data
430             my $line = exists $self->{UPDATE}{$recno}
431             ? $self->{UPDATE}{$recno}
432 373 100       1410 : $self->{file}[$recno];
433 373 100       41837 return unless defined $line;
434 316         2576 my @data = split $self->{FIELDSEP}, $line, -1;
435 316         561 $self->{recno} = $recno+1;
436 316         638 return $self->make_rec($recno, @data);
437             }
438              
439             sub make_rec {
440 322     322 0 657 my ($self, $recno, @data) = @_;
441 322         887 return $self->{RECCLASS}->new($self, $recno, @data);
442             }
443              
444             =head2 C<< $db->append(@data) >>
445              
446             Create a new record and add it to the database. New records may not be
447             written out until the C<< ->flush >> method is called. The new
448             records will be added at the end of the file.
449              
450             C<@data> is a complete set of data values for the new record, in the
451             appropriate order. It is a fatal error to pass too many or too few
452             values.
453              
454             =cut
455              
456              
457             # TODO: fail unless ->_writable
458             sub append {
459 6     6 1 3657 my ($self, @data) = @_;
460 6         19 my $pos = $self->save_position;
461 6 50       24 push @{$self->{file}}, $self->make_rec(0, @data)->as_string or return;
  6         21  
462 6         2278 return 1;
463             }
464              
465             sub _update {
466 6     6   34 my ($self, $new_rec) = @_;
467 6         36 my $id = $new_rec->id;
468 6 50       33 return if $self->{DELETE}{$id};
469 6         56 $self->{UPDATE}{$id} = $new_rec->as_string;
470             }
471              
472             =head2 C<< $db->delete_rec($record) >>
473              
474             Delete a record from the database. C<$record> should be a record
475             object, returned from a previous call to C, C, or
476             some similar function. The record will be removed from the disk file
477             when the C method is called.
478              
479             Returns true on success, false on failure.
480              
481             =cut
482              
483             sub delete_rec {
484 6     6 1 30 my ($self, $rec) = @_;
485 6         27 my $id = $rec->id;
486 6         12 delete $self->{UPDATE}{$id};
487 6         19 $self->{DELETE}{$id} = 1;
488             }
489              
490             =head2 C<< $db->flush >>
491              
492             Adding new records, deleting and modifying old records is performed
493             in-memory only until C is called. At this point, the program
494             will copy the original data file, making all requested modifications,
495             and then atomically replace the original file with the new copy.
496              
497             Returns true on success, false if the update was not performed.
498              
499             C is also called automatically when the program exits.
500              
501             =cut
502              
503             # Old behavior was lost:
504             ### copy input file, writing out updated records
505             ### then atomically replace input file with updated copy
506             # Fix this XXX
507             sub flush {
508 54     54 1 2406 my $self = shift;
509              
510             # Quick return if there's nothing to do
511 54 100       140 return unless $self->_writable;
512 38         149 return if keys %{$self->{UPDATE}} == 0
513 38 100 100     54 && keys %{$self->{DELETE}} == 0;
  32         195  
514              
515 12         19 my $f = tied(@{$self->{file}});
  12         32  
516 12         51 $f->defer;
517              
518 12         154 for my $k (keys %{$self->{UPDATE}}) {
  12         52  
519 6         37 $self->{file}[$k] = $self->{UPDATE}{$k};
520             }
521 12         744 for my $k (sort {$b <=> $a} keys %{$self->{DELETE}}) {
  0         0  
  12         51  
522 6         9 splice @{$self->{file}}, $k, 1;
  6         32  
523             }
524              
525 12         3229 %{$self->{UPDATE}} = %{$self->{DELETE}} = ();
  12         23  
  12         30  
526 12         50 $f->flush;
527 12         1795 return 1;
528             }
529              
530             sub DESTROY {
531 44     44   10842 my $self = shift;
532 44         140 $self->flush('DESTROY');
533             }
534              
535 15     15 0 827 sub field_separator_string { $_[0]->{FIELDSEPSTR} }
536 13     13 0 35 sub record_separator { $_[0]{RECSEP} }
537              
538             =head2 C<< $db->has_field($fieldname) >>
539              
540             Returns true if the database contains a field with the specified name.
541              
542             =cut
543              
544             sub has_field {
545 0     0 1 0 my ($self, $field) = @_;
546 0         0 exists $self->{OFF}{$field};
547             }
548              
549             =head1 Record objects
550              
551             Certain methods return "record objects", each of which represents a
552             single record. The data can be accessed and the database can be
553             modified by operating on these record objects.
554              
555             Each object supports a series of accessor methods that are named after
556             the fields in the database. If the database contains a field "color",
557             for example, record objects resulting from queries on that database
558             will support a C method to retrieve the color value from a
559             record, and a synonymous method that does the exact same
560             thing. If the database was opened for writing, the record objects will
561             also support a C method to modify the color in a record.
562             The effects of the C methods will be propagated to the file
563             when the database is flushed.
564              
565             Other methods follow.
566              
567             =cut
568              
569             package FlatFile::Rec;
570 17     17   135 use Carp 'croak';
  17         42  
  17         1252  
571              
572             sub default {
573 10     10   31 my $self = shift;
574 10   33     27 my $class = ref($self) || $self;
575 10         12 my $field = shift;
576 17     17   102 no strict 'refs';
  17         25  
  17         1615  
577 10         13 my $d = \%{"$class\::DEFAULT"};
  10         25  
578 10 50       51 return wantarray ? (exists $d->{$field}, $d->{$field}) : $d->{$field};
579             }
580              
581             =head2 C<< $record->fields >>
582              
583             Returns a list of the fields in the object, in order.
584              
585             =cut
586              
587             sub fields {
588 334     334   390 my $self = shift;
589 334   66     924 my $class = ref($self) || $self;
590 17     17   103 no strict 'refs';
  17         27  
  17         8147  
591 334         386 return @{"$class\::FIELD"};
  334         1190  
592             }
593              
594             sub new {
595 322     322   685 my ($class, $db, $id, @data) = @_;
596 322         465 my $self = {};
597 322         371 my %data;
598              
599 322         625 my @f = $class->fields;
600 322         972 @data{@f} = @data;
601              
602             # set default values in data hash
603 322         506 for my $f (@f) {
604 989 100       1541 if (not defined $data{$f}) {
605 10         24 my ($has_default, $default_value) = $class->default($f);
606 10 50       23 if ($has_default) {
607 10         21 $data{$f} = $default_value;
608             } else {
609 0         0 my $msg = "required field '$f' missing from record";
610 0 0       0 $msg .= " $id" if $id;
611 0         0 croak $msg;
612             }
613             }
614             }
615              
616 322         539 $self->{data} = \%data;
617 322         467 $self->{db} = $db;
618 322         503 $self->{id} = $id;
619 322         1269 bless $self => $class;
620             }
621              
622             =head2 C<< $record->db >>
623              
624             Returns the database object from which the record was originally selected.
625             This example shows how one might modify a record and then write the
626             change to disk, even if the original database object was unavailable:
627              
628             $employee->set_salary(1.06 * $employee->salary);
629             $employee->db->flush;
630              
631             =cut
632              
633             sub db {
634 33     33   111 $_[0]{db};
635             }
636              
637             sub id {
638 12     12   31 $_[0]{id};
639             }
640              
641             =head2 C<< %hash = $record->as_hash >>
642              
643             Returns a hash containing all the data in the record. The keys in the
644             hash are the field names, and the corresponding values are the record
645             data.
646              
647             =cut
648              
649             sub as_hash {
650 64     64   73 my $self = shift;
651 64         59 return %{$self->{data}};
  64         271  
652             }
653              
654             =head2 C<< @data = $record->as_array >>
655              
656             Return the record data values only.
657              
658             =cut
659              
660             sub as_array {
661 12     12   53 my $self = shift;
662 12         49 my @f = $self->fields;
663 12         76 return @{$self->{data}}{@f};
  12         397  
664             }
665              
666             =head2 C<< $line = $record->as_string >>
667              
668             Return the record data in the same form that it appeared in the
669             original file. For example, if the record were selected from the Unix
670             password file, this might return the string
671             C<"root:x:0:0:Porpoise Super-User:/:/sbin/sh">.
672              
673             =cut
674              
675             sub as_string {
676 12     12   22 my $self = shift;
677 12         36 my $fsep = $self->db->field_separator_string;
678 12         48 my $rsep = $self->db->record_separator;
679 12         66 my @data = $self->as_array;
680 12         83 return join($fsep, @data) . $rsep;
681             }
682              
683             =head2 C<< $line = $record->delete >>
684              
685             Delete this record from its associated database. It will be removed
686             from the disk file the next time the database object is flushed.
687              
688             =cut
689              
690             # delete this record from its database
691             sub delete {
692 3     3   548 my $self = shift;
693 3         37 $self->db->delete_rec($self);
694             }
695              
696             package FlatFile::Position;
697              
698             sub new {
699 6     6   14 my ($class, $record_number_ref) = @_;
700 6         9 my $recno = $$record_number_ref;
701             my $self = sub {
702 6     6   27 $$record_number_ref = $recno;
703 6         22 };
704 6         24 bless $self => $class;
705             }
706              
707             sub DESTROY {
708 6     6   10 my $self = shift;
709 6         22 $self->();
710             }
711              
712             1;
713              
714             __END__