File Coverage

blib/lib/FileHash/FormatString.pm

Criterion	Covered	Total	%
statement	15	133	11.2
branch	0	56	0.0
condition	0	18	0.0
subroutine	5	30	16.6
pod	4	4	100.0
total	24	241	9.9

line	stmt	bran	cond	sub	pod	time	code
1							#============================= FormatString.pm ===============================
2							# Filename: FormatString.pm
3							# Description: Format lines to describe directory text lines.
4							# Original Author: Dale M. Amon
5							# Revised by: $Author: amon $
6							# Date: $Date: 2008-08-28 23:35:28 $
7							# Version: $Revision: 1.7 $
8							# License: LGPL 2.1, Perl Artistic or BSD
9							#
10							#=============================================================================
11	1			1		5	use strict;
	1					3
	1					34
12	1			1		4	use File::Spec;
	1					1
	1					16
13	1			1		1966	use HTTP::Date;
	1					6598
	1					79
14	1			1		11	use Fault::Notepad;
	1					14
	1					42
15
16							package FileHash::FormatString;
17	1			1		6	use vars qw{@ISA};
	1					2
	1					3050
18							@ISA = qw( UNIVERSAL );
19
20							#=============================================================================
21							# INTERNAL OPS
22							#=============================================================================
23
24							my %FieldNames =
25							('pathQuoted' => 1, 'path' => 1,
26							'deviceQuoted' => 1, 'device' => 1,
27							'directoryQuoted' => 1, 'directory' => 1,
28							'fileQuoted' => 1, 'file' => 1,
29							'mode' => 1, 'modeOctal' => 1, 'modeChars' => 1,
30							'atime' => 1, 'atimeQuoted' => 1,
31							'atimeDate' => 1, 'atimeTime' => 1,
32							'ctime' => 1, 'ctimeQuoted' => 1,
33							'ctimeDate' => 1, 'ctimeTime' => 1,
34							'mtime' => 1, 'mtimeQuoted' => 1,
35							'mtimeDate' => 1, 'mtimeTime' => 1,
36							'uidName' => 1, 'uid' => 1,
37							'gidName' => 1, 'gid' => 1,
38							'hardlinks' => 1,
39							'sizeBytes' => 1,
40							'inode' => 1,
41							'blocksAllocated' => 1,
42							'blocksizePreference' => 1,
43							'deviceSpecialId' => 1,
44							'deviceNumber' => 1,
45							'md5sum' => 1,
46							'SKIP' => 1
47							);
48
49							#=============================================================================
50							# Path ops
51							#=============================================================================
52
53							sub _selectBestString ($$$$$) {
54	0			0			my ($s,$a,$b,$argnames,$out) = @_;
55
56	0	0	0				$s->{'notepad'}->add
			0
57							("$argnames both present and are different: $a ne $b\n")
58							if ((defined $a and defined $b and ($a ne $b)));
59
60	0	0					return (defined $a) ? $a : ((defined $b) ? $b : undef);
		0
61							}
62
63							#-----------------------------------------------------------------------------
64
65							sub _bestPath ($$$) {
66	0			0			my ($self,$in,$out) = @_;
67
68	0						my $dev = $self->_selectBestString
69							(@$in{'deviceQuoted', 'device'}, "deviceQuoted and Device", $out);
70
71	0						my $dir = $self->_selectBestString
72							(@$in{'directoryQuoted','directory'}, "directoryQuoted and Directory",
73							$out);
74
75	0						my $file = $self->_selectBestString
76							(@$in{'fileQuoted', 'file'}, "fileQuoted and File", $out);
77
78	0						my $fullpath = $self->_selectBestString
79							(@$in{'pathQuoted', 'path'}, "pathQuoted and Path", $out);
80
81	0						my $catpath = undef;
82	0	0	0				if (defined $dir or defined $file) {
83	0	0					defined $dir or ($dir = "");
84	0	0					defined $file or ($file = "");
85	0						$catpath = File::Spec->catpath($dev,$dir,$file);
86							}
87
88	0						my $val = $self->_selectBestString
89							($fullpath,$catpath, "Path or pathQuoted and constructed path", $out);
90
91							# splitpath will now force device to "" instead of undef as well.
92							#
93	0						return (File::Spec->splitpath($val));
94							}
95
96							#=============================================================================
97							# File Mode ops
98							#=============================================================================
99
100							my %TypeNames =
101							(
102							'c' => 0020000,
103							'd' => 0040000,
104							'b' => 0060000,
105							'-' => 0100000,
106							'l' => 0120000,
107							's' => 0140000,
108							'p' => 0160000
109							);
110
111							sub _string2mode ($$$) {
112	0			0			my ($s,$modestr,$out) = @_;
113
114							# Coding trick to get rid of leading and trailing whitespace.
115	0						$_ = $modestr; ($modestr) = split;
	0
116
117	0	0					if (length $modestr != 10) {
118	0						$s->{'notepad'}->add
119							("Modestring ignored. It must be 10 characters long: \'$modestr\'");
120	0						return undef;
121							}
122
123	0						my ($type,$sticky,$sgid,$suid) =
124							($modestr =~
125							/([cdblsp-])[r-][w-]([xtT-])[r-][w-]([xsS-])[r-][w-]([xsS-])/);
126
127	0	0					if (!defined $type) {
128	0						$s->{'notepad'}->add ("Invalid mode string: \'$modestr\'");
129	0						return undef;
130							}
131
132	0						my $typeval = $TypeNames{$type};
133	0						my $mode = lc (substr $modestr, 1);
134	0						$mode =~ tr/\-rwxSsTt/01/;
135
136							return (
137	0	0					$typeval \|
		0
		0
138							(((lc $suid) eq "s") ? 01000 : 0) \|
139							(((lc $sgid) eq "s") ? 02000 : 0) \|
140							(((lc $sticky) eq "t") ? 04000 : 0) \|
141							oct "0b${mode}");
142							}
143
144							#-----------------------------------------------------------------------------
145
146							sub _bestMode ($$$) {
147	0			0			my ($s,$in,$out) = @_;
148	0						my ($mode1,$mode2,$mode3) = @$in{'mode','modeOctal','modeChars'};
149	0	0					($mode2 = oct $mode2) if (defined $mode2);
150	0	0					($mode3 = $s->_string2mode ($mode3,$out)) if (defined $mode3);
151
152	0						my $mode = $s->_selectBestNumber
153							($mode2,$mode3, "modeOctal and modeChars", $out);
154
155	0						my $best = $s->_selectBestNumber
156							($mode1, $mode, "mode and modeOctal or modeChars", $out);
157
158	0						return $best;
159							}
160
161							#=============================================================================
162							# File Time ops
163							#=============================================================================
164
165							sub _convertQuoted ($$$$) {
166	0			0			my ($s,$qtime,$str,$out) = @_;
167	0						my $t = undef;
168
169	0	0					if (defined $qtime) {
170	0						$t = HTTP::Date::str2time($qtime);
171	0	0					$s->{'notepad'}->add ("Could not parse ${str}Quoted: \'$qtime\'")
172							if (!defined $t);
173							}
174	0						return $t;
175							}
176
177							#-----------------------------------------------------------------------------
178
179							sub _convertQuotedParts ($$$$) {
180	0			0			my ($s,$qdate,$qtime,$str,$out) = @_;
181	0						my $t = undef;
182
183	0	0	0				if (defined $qdate or defined $qtime) {
184	0	0					$t = HTTP::Date::str2time (((defined $qdate) ? "$qdate " : "") .
		0
185							((defined $qtime) ? $qtime : ""));
186
187	0	0					$s->{'notepad'}->add ("Could not parse ${str}Date + ${str}Time: " .
188							"\'$qdate\' \'$qtime\'")
189							if (!defined $t);
190							}
191	0						return $t;
192							}
193
194							#-----------------------------------------------------------------------------
195
196							sub _selectBestNumber ($$$$$) {
197	0			0			my ($s,$a,$b,$argnames,$out) = @_;
198
199	0	0	0				$s->{'notepad'}->add ("$argnames both present and are different: $a != $b\n")
			0
200							if ((defined $a and defined $b and ($a ne $b)));
201
202	0	0					return (defined $a) ? $a : ((defined $b) ? $b : undef);
		0
203							}
204
205							#-----------------------------------------------------------------------------
206
207							sub _getTime ($$$$$$) {
208	0			0			my ($s,$t1,$qtime,$time3a,$time3b,$str,$out) = @_;
209	0						my ($best,$t2,$t3);
210
211	0						$t2 = $s->_convertQuoted ($qtime, $str,$out);
212	0						$t3 = $s->_convertQuotedParts ($time3a,$time3b,$str,$out);
213
214	0						my $time = $s->_selectBestNumber
215							($t2,$t3, "${str}TimeQuoted and ${str}Date + ${str}Time", $out);
216
217	0						return ($s->_selectBestNumber
218							($time, $t1, "${str}Time and a ${str}Quoted form", $out));
219							}
220
221							#-----------------------------------------------------------------------------
222
223							sub _bestAtime ($$$) {
224	0			0			my ($self,$in,$out) = @_;
225	0						$self->_getTime(@$in{'atimeQuoted','atime','atimeDate','atimeTime'},
226							"atime",$out);
227							}
228
229							sub _bestCtime ($$$) {
230	0			0			my ($self,$in,$out) = @_;
231	0						$self->_getTime(@$in{'ctimeQuoted','ctime','ctimeDate','ctimeTime'},
232							"ctime",$out);
233							}
234
235							sub _bestMtime ($$$) {
236	0			0			my ($self,$in,$out) = @_;
237	0						$self->_getTime(@$in{'mtimeQuoted','mtime','mtimeDate','mtimeTime'},
238							"mtime",$out);
239							}
240
241							#=============================================================================
242							# Uninterpreted fields. Some day I might add validation checking here.
243							#=============================================================================
244
245	0			0			sub _validateUID ($$) {my ($s,$p) = @_; (@$p{'uid','uidName'}); }
	0
246	0			0			sub _validateGID ($$) {my ($s,$p) = @_; (@$p{'gid','gidName'}); }
	0
247
248	0			0			sub _validateSize ($$) {my ($s,$p) = @_; $p->{'sizeBytes'}; }
	0
249	0			0			sub _validateInode ($$) {my ($s,$p) = @_; $p->{'inode'}; }
	0
250	0			0			sub _validateBlocksAllocated ($$) {my ($s,$p) = @_; $p->{'blocksAllocated'};}
	0
251	0			0			sub _validateDeviceSpecialId ($$) {my ($s,$p) = @_; $p->{'deviceSpecialId'};}
	0
252	0			0			sub _validateDeviceNumber ($$) {my ($s,$p) = @_; $p->{'deviceNumber'}; }
	0
253	0			0			sub _validateMD5SUM ($$) {my ($s,$p) = @_; $p->{'md5sum'}; }
	0
254	0			0			sub _validateHardLinks ($$) {my ($s,$p) = @_; $p->{'hardlinks'}; }
	0
255
256	0			0			sub _validateBlocksizePreference ($$) {my ($s,$p) = @_;
257	0						$p->{'blocksizePreference'};}
258							#=============================================================================
259							# CLASS METHODS
260							#=============================================================================
261
262							sub alloc ($$) {
263	0			0	1		my ($class,$line) = @_;
264	0						my $self = bless {}, $class;
265	0						@$self{'fields','format','notepad'} = undef;
266	0						return $self;
267							}
268
269							#=============================================================================
270							# INSTANCE METHODS
271							#=============================================================================
272
273							sub init ($$) {
274	0			0	1		my ($self,$line) = @_;
275
276	0	0					if ($::DEBUG) {
277	0	0					Fault::Logger->arg_check_noref ($line,"formatline") or return undef;
278							}
279
280	0						my @format = split ' ',$line;
281	0						my $fields = $#format+1;
282
283							# If the format line is empty, add a skip that will eat entire the line.
284	0	0					if ($fields == 0) {@format = ("SKIP"); $fields++;}
	0
	0
285
286	0						@$self{'fields','reqd','format','notepad'} = ($fields, $fields, [],
287							Fault::Notepad->new);
288
289	0						foreach (@format) {
290	0	0					if (defined $FieldNames{$_}) {
291	0						push @{$self->{'format'}}, $_;
	0
292							}
293							else {
294	0						$self->{'notepad'}->add ("Invalid Fieldname in Format: $_\n");
295	0						push @{$self->{'format'}}, 'SKIP';
	0
296							}
297							}
298
299							# If the last field is a SKIP, data is not required to be present at
300							# that location.
301							#
302	0	0					if (${$self->{'format'}}[$#format] eq 'SKIP') {$self->{'reqd'}--;}
	0
	0
303	0						return $self;
304							}
305
306							#-----------------------------------------------------------------------------
307							# The lexemes list arg will get checked in the various routines.
308							# NOTE: It will be up to the validate routines to detect if it contains
309							# refs or other non text?
310
311							sub parse ($\@) {
312	0			0	1		my ($self,@lexemes) = @_;
313	0						my $actual_words = $#lexemes+1;
314	0						my ($in,$out);
315
316	0						@$in{keys %FieldNames} = undef;
317	0						$out->{'notepad'} = Fault::Notepad->new;
318
319							# Check for less than reqd fields as it is okay to be missing
320							# a trailing SKIP field.
321							#
322	0	0					Fault::Logger->assertion_check
323							($actual_words < $self->{'reqd'},
324							undef,"Not enough items in line to satisfy format: $_")
325							or return undef;
326
327							# Assign lexemes to their matching field name in the input hash
328	0						@$in{@{$self->{'format'}}} = (@lexemes);
	0
329
330	0						@$out{'device','directory','file',
331							'mode','atime','ctime','mtime',
332							'uid','uidName','gid','gidName',
333							'sizeBytes','inode','hardlinks',
334							'blocksAllocated','blocksizePreference',
335							'deviceSpecialId','deviceNumber','md5sum'} =
336							($self->_bestPath ($in),
337							$self->_bestMode ($in),
338							$self->_bestAtime ($in),
339							$self->_bestCtime ($in),
340							$self->_bestMtime ($in),
341							$self->_validateUID ($in),
342							$self->_validateGID ($in),
343							$self->_validateSize ($in),
344							$self->_validateInode ($in),
345							$self->_validateHardLinks ($in),
346							$self->_validateBlocksAllocated ($in),
347							$self->_validateBlocksizePreference ($in),
348							$self->_validateDeviceSpecialId ($in),
349							$self->_validateDeviceNumber ($in),
350							$self->_validateMD5SUM ($in)
351							);
352
353	0						return $out;
354							}
355
356							#-----------------------------------------------------------------------------
357
358	0			0	1		sub fields ($) {shift->{'fields'};}
359
360							#=============================================================================
361							# POD DOCUMENTATION
362							#=============================================================================
363							# You may extract and format the documention section with the 'perldoc' cmd.
364
365							=head1 NAME
366
367							FileHash::FormatString - Supports parsing of formatted lines of file data.
368
369							=head1 SYNOPSIS
370
371							use FileHash::Formatstring;
372							$obj = FileHash::FormatString->alloc;
373
374							$obj = $obj->init ($formatline);
375							$hash = $obj->parse (@lexemes);
376							$cnt = $obj->fields;
377
378							=head1 Inheritance
379
380							UNIVERSAL
381
382							=head1 Description
383
384							This is an internal class used by FileHashes.
385
386							Format strings are used to map a positionally significant list of
387							lexemes to a set of field names.
388
389							If the format line is empty, the format will default to a single
390							SKIP field which will absorb an entire line of input during parse.
391
392							It was created primarily to make it easy to read assorted
393							dumps of metadata about files that might be hanging around in one's
394							system and which might help to define what files used to be in that
395							directory you just deleted...
396
397							=head1 Field Names
398
399							The following are the field names which may appear in a format string.
400
401							pathQuoted "C:/home/amon/Photo for Dale 00000.jpg"
402							path C:/home/amon/Photo_for_Dale_00000.jpg
403							deviceQuoted "C:"
404							device C:
405							directoryQuoted "/home/amon"
406							directory /home/amon
407							fileQuoted "Photo for Dale 00000.jpg"
408							file Photo_for_Dale_00000.jpg
409							mode 33152
410							modeChars -rw-------
411							modeOctal 0600
412							atime 1214479354
413							atimeQuoted "2008-06-26 12:22"
414							atimeDate 2008-06-26
415							atimeTime 12:22
416							ctime 1203083422
417							ctimeQuoted "2008-02-15 13:50"
418							ctimeDate 2008-02-15
419							ctimeTime 13:50
420							mtime 1124835415
421							mtimeQuoted "2005-08-23 23:16"
422							mtimeDate 2005-08-23
423							mtimeTime 23:16
424							uidName amon
425							uid 1000
426							gidName amon
427							gid 1000
428							hardlinks 1
429							sizeBytes 661340
430							inode 2163352
431							blocksAllocated 1304
432							blocksizePreference 4096
433							deviceSpecialId 0
434							deviceNumber 771
435							md5sum 2d6431f79028879f7aa2976e8222e76e
436							SKIP arbitraryword
437
438							Any space delimited item which does not match one of these items
439							exactly, down to the capitalization, is replaced with the no op
440							field name 'SKIP'. Later, during parsing, this will cause the
441							corresponding item in a list of lexemes to be ignored, ie dumped
442							into the 'SKIP' bucket.
443
444							If field names are repeated in a field string, only the last instance
445							will be meaningful. Parsed values for the earlier tokens are
446							overwritten by later ones. This is also true of 'SKIP' tokens, including
447							ones that are added as replacements for unknown field names.
448
449							If there is likely to be junk at the end of the line, a single SKIP at
450							the end will absorb all of the remaing text to the end of the line.
451
452							If more than one possibility is available for a given bit of
453							information about a file, all should have the same value, but only
454							the 'best' will be selected. The prioritization is done thusly:
455
456							For the path name of the file
457
458							1 pathQuoted
459							2 Path
460							3 1 deviceQuoted 1 directoryQuoted 1 fileQuoted
461							2 device 2 directory 2 file
462
463							The end result will be strings for device,directory and file, and the
464							null string for any that are missing.
465
466							For atime, ctime and mtime:
467
468							1 *time
469							2 *timeQuoted
470							3 1 timeDate 1 timeTime
471
472							For the mode value:
473
474							1 mode
475							2 modeOctal
476							3 modeChars
477
478							If the original line contains incomplete path data, it may
479							be supplied by the calling object pre-pending a pathQuoted or
480							directoryQuoted. If deviceQuoted is not null on the file system
481							and is missing, it should be included.
482
483							=head1 Examples
484
485							use FileHash::FormatString;
486							my $fmt = "modeChars hardlinks uidName gidName sizeBytes mtimeDate mtimeTime file";
487							my $line = "-rwxr-xr-x 1 root root 262 2003-08-23 15:58 20030823-ipsec1";
488							my $a = FileHash::FormatString->alloc;
489
490							$a->init ($fmt);
491							my @lexemes = split $line,$a->fields;
492							$hash = $a->parse (@lexemes);
493
494							=head1 Class Variables
495
496							None.
497
498							=head1 Instance Variables
499
500							fields Number of lexemes required for this line format.
501							format List of field names to match sequentially to lexemes.
502							notepad Notepad object used to record the unexpected.
503
504							=head1 Class Methods
505
506							=over 4
507
508							=item B<$obj = FileHash::FormatString-Ealloc>
509
510							Allocate an empty FormatString object.
511
512							=head1 Instance Methods
513
514							=over 4
515
516							=item B<$cnt = $obj-Efields>
517
518							Returns the number of format fields, including SKIP tokens, expected by
519							this object.
520
521							=item B<$obj = $obj-Einit ($formatline)>
522
523							Initialize a FormatString object. It has one required argument, a format
524							line which contains field names from the list given earlier.
525
526							For example, a format line useable with a current Linux 'ls -l' output
527							line is:
528
529							"modeChars hardlinks uidName gidName sizeBytes mtimeDate mtimeTime file"
530
531							=item B<$hash = $obj-Eparse (@lexemes)>
532
533							Match the format field names one to one with the list of lexemes and
534							then return a hash with the 'best data' from those fields in cases where
535							different fields should contain the same information in different forms.
536
537							The returned hash uses field names suitable for direct insertion in a
538							FileHash::Entry object.
539
540							=back 4
541
542							=head1 Private Class Method
543
544							None.
545
546							=head1 Private Instance Methods
547
548							None.
549
550							=head1 Errors and Warnings
551
552							Lots.
553
554							=head1 KNOWN BUGS
555
556							See TODO.
557
558							=head1 SEE ALSO
559
560							File::Spec, HTTP::Date, Fault::Notepad, Fault::Logger
561
562							=head1 AUTHOR
563
564							Dale Amon
565
566							=cut
567
568							#=============================================================================
569							# CVS HISTORY
570							#=============================================================================
571							# $Log: FormatString.pm,v $
572							# Revision 1.7 2008-08-28 23:35:28 amon
573							# perldoc section regularization.
574							#
575							# Revision 1.6 2008-07-27 15:16:17 amon
576							# Wrote lexical parse for Entry; error checking on eval and other minor issues.
577							#
578							# Revision 1.5 2008-07-25 14:30:42 amon
579							# Documentation improvements and corrections.
580							#
581							# Revision 1.4 2008-07-24 20:19:43 amon
582							# Just in case I missed anything.
583							#
584							# Revision 1.3 2008-07-24 13:35:26 amon
585							# switch to NeXT style alloc/init format for FileHash and Entry classes.
586							#
587							# Revision 1.2 2008-07-23 21:12:24 amon
588							# Moved notes out of file headers; a few doc updates; added assertion checks;
589							# minor bug fixes.
590							#
591							# 20080706 Dale Amon
592							# Created. Used some code from Directory::Entry class
593							1;