line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Parse::File::Metadata; |
2
|
3
|
|
|
3
|
|
3937
|
use strict; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
156
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.07'; |
4
|
3
|
|
|
3
|
|
16
|
use Carp; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
299
|
|
5
|
3
|
|
|
3
|
|
17
|
use Scalar::Util qw( reftype ); |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
6130
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Parse::File::Metadata - For plain-text files that contain both metadata and data records, parse metadata first |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 SYNOPSIS |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use Parse::File::Metadata; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
$metaref = {}; |
16
|
|
|
|
|
|
|
@rules = ( |
17
|
|
|
|
|
|
|
{ |
18
|
|
|
|
|
|
|
rule => sub { exists $metaref->{d}; }, |
19
|
|
|
|
|
|
|
label => q{'d' key must exist}, |
20
|
|
|
|
|
|
|
}, |
21
|
|
|
|
|
|
|
{ |
22
|
|
|
|
|
|
|
rule => sub { $metaref->{d} =~ /^\d+$/; }, |
23
|
|
|
|
|
|
|
label => q{'d' key must be non-negative integer}, |
24
|
|
|
|
|
|
|
}, |
25
|
|
|
|
|
|
|
{ |
26
|
|
|
|
|
|
|
rule => sub { exists $metaref->{f}; }, |
27
|
|
|
|
|
|
|
label => q{'f' key must exist}, |
28
|
|
|
|
|
|
|
}, |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
$self = Parse::File::Metadata->new( { |
32
|
|
|
|
|
|
|
file => 'path/to/myfile', |
33
|
|
|
|
|
|
|
header_split => '\s*=\s*', |
34
|
|
|
|
|
|
|
metaref => $metaref, |
35
|
|
|
|
|
|
|
rules => \@rules, |
36
|
|
|
|
|
|
|
} ); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
$dataprocess = sub { my @fields = split /,/, $_[0], -1; print "@fields\n"; }; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$self->process_metadata_and_proceed( $dataprocess ); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
$self->process_metadata_only(); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
$metadata_out = $self->get_metadata(); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
$exception = $self->get_exception(); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=head1 DESCRIPTION |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
This module is useful when you have to parse a plain-text file that meets the |
51
|
|
|
|
|
|
|
following conditions: |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=over 4 |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=item * |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
The file consists of two types of records: |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=over 4 |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item * |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
A I section consisting of key-value pairs which constitute, in some |
64
|
|
|
|
|
|
|
sense, I. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=item * |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
A I section consisting mainly or entirely of I records, which may be either delimited or fixed-width. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=item * |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
The header and the body are separated by one or more empty records. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=back |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=item * |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Your program must parse the metadata first, then make a decision on the basis |
79
|
|
|
|
|
|
|
of the metadata whether to proceed with parsing of the data. The metadata may |
80
|
|
|
|
|
|
|
or may not be used in the parsing of the data. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=back |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head2 Example |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Below is a plain-text file in which the header consists of key-value pairs |
87
|
|
|
|
|
|
|
delimited by C<=> signs. The key is the to the left of the first delimiter. |
88
|
|
|
|
|
|
|
Everything to the right is part of the value (including any additional |
89
|
|
|
|
|
|
|
delimiter characters). |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
The body consists of comma-delimited strings. Whether in the body or the |
92
|
|
|
|
|
|
|
header, comments begin with a C<#> sign and are ignored. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
# comment |
95
|
|
|
|
|
|
|
a=alpha |
96
|
|
|
|
|
|
|
b=beta,charlie,delta |
97
|
|
|
|
|
|
|
c=epsilon zeta eta |
98
|
|
|
|
|
|
|
d=1234567890 |
99
|
|
|
|
|
|
|
e=This is a string |
100
|
|
|
|
|
|
|
f=, |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
some,body,loves,me |
103
|
|
|
|
|
|
|
I,wonder,wonder,who |
104
|
|
|
|
|
|
|
could,it,be,you |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
Suppose you are told that you should proceed to parse the body if and only if |
107
|
|
|
|
|
|
|
the following conditions are met in the header: |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=over 4 |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item * There must be a metadata element keyed on C. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item * The value of metadata element C must be a non-negative integer. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item * There must be a metadata element keyed on C. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=back |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
This file would meet all three criteria and the program would proceed to parse |
120
|
|
|
|
|
|
|
the three data records. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
If, however, metadata element C |
123
|
|
|
|
|
|
|
were commented out: |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
#f=, |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
the file would no longer meet the criteria and the program would cease before |
128
|
|
|
|
|
|
|
parsing the data records. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head1 METHODS |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=head2 C |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=over 4 |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=item * Purpose |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Parse::File::Metadata constructor. Validates input. |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=item * Arguments |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
$self = Parse::File::Metadata->new( { |
143
|
|
|
|
|
|
|
file => 'path/to/myfile', |
144
|
|
|
|
|
|
|
header_split => '\s*=\s*', |
145
|
|
|
|
|
|
|
metaref => $metaref, |
146
|
|
|
|
|
|
|
rules => \@rules, |
147
|
|
|
|
|
|
|
} ); |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Single hash reference. Hash has the following elements: |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=over 4 |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=item * C |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Path, relative or absolute, to the file needing parsing. |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=item * C |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Hard-quoted string holding a Perl 5 regex to be used for parsing metadata |
160
|
|
|
|
|
|
|
records. |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
=item * C |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Empty hash-reference. |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
=item * C |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
Reference to an array of hashrefs. Each such hashref has two elements: |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=over 4 |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
=item * C |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Reference to a subroutine describing a criterion which the header must pass |
175
|
|
|
|
|
|
|
before parsing of the body begins. The subroutine returns a true value when |
176
|
|
|
|
|
|
|
the criterion is met and an undefined value when the criterion is not met. |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=item * C |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
A human-friendly string which will be used to populate exceptions if the |
181
|
|
|
|
|
|
|
criteria are not met. |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=back |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
The rules are applied in the order specified in the array. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=back |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=item * Return Value |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
Parse::File::Metadata object. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=back |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=cut |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub new { |
198
|
14
|
|
|
14
|
1
|
12690
|
my ($class, $args) = @_; |
199
|
13
|
|
|
|
|
310
|
croak "Metadata hash must start out empty: $!" |
200
|
|
|
|
|
|
|
unless ( reftype($args->{metaref}) eq 'HASH' and |
201
|
14
|
100
|
100
|
|
|
211
|
! keys %{ $args->{metaref} } ); |
202
|
12
|
100
|
|
|
|
198
|
croak "Rules must be in array ref: $!" |
203
|
|
|
|
|
|
|
unless ( reftype($args->{rules}) eq 'ARRAY' ); |
204
|
|
|
|
|
|
|
|
205
|
11
|
|
|
|
|
30
|
my $self = bless $args, $class; |
206
|
|
|
|
|
|
|
|
207
|
11
|
|
|
|
|
34
|
return $self; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=head2 C |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=over 4 |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=item * Purpose |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
Process metadata rows found in file header and test the resulting hash against |
217
|
|
|
|
|
|
|
the criteria specified in the rules. If all criteria are met, proceed to |
218
|
|
|
|
|
|
|
parse the data rows with the subroutine specified as argument to this method. |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=item * Arguments |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
$dataprocess = sub { my @fields = split /,/, $_[0], -1; print "@fields\n"; }; |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
$self->process_metadata_and_proceed( $dataprocess ); |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item * Return Values |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
None. Use C and C methods to obtain that |
229
|
|
|
|
|
|
|
data. |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=back |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=cut |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub process_metadata_and_proceed { |
236
|
8
|
|
|
8
|
1
|
3829
|
my ($self, $dataprocess) = @_; |
237
|
8
|
100
|
100
|
|
|
331
|
croak "Must define subroutine for processing data rows: $!" |
238
|
|
|
|
|
|
|
unless ( defined($dataprocess) and reftype($dataprocess) eq 'CODE' ); |
239
|
|
|
|
|
|
|
|
240
|
6
|
|
|
|
|
17
|
$self->_process_metadata_engine($dataprocess); |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub _process_metadata_engine { |
244
|
9
|
|
|
9
|
|
14
|
my $self = shift; |
245
|
9
|
|
100
|
|
|
32
|
my $dataprocess = shift || undef; |
246
|
9
|
|
|
|
|
11
|
my $header_seen; |
247
|
9
|
|
|
|
|
16
|
my $exception = []; |
248
|
9
|
50
|
|
|
|
453
|
open my $FILE, '<', $self->{file} |
249
|
|
|
|
|
|
|
or croak "Unable to open file for reading"; |
250
|
9
|
|
|
|
|
220
|
THISFILE: while (my $l = <$FILE>) { |
251
|
96
|
100
|
|
|
|
318
|
next if $l =~ /^#/; |
252
|
84
|
|
|
|
|
301
|
$l =~ s/[\r\n]+$//g; |
253
|
84
|
100
|
|
|
|
175
|
if (! $header_seen) { |
254
|
62
|
100
|
|
|
|
116
|
if ($l eq '') { |
255
|
9
|
|
|
|
|
32
|
$header_seen++; |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
else { |
258
|
53
|
100
|
|
|
|
505
|
next unless $l =~ /^(.+?)$self->{header_split}(.*)$/; |
259
|
51
|
|
|
|
|
212
|
my ($k, $v) = ($1, $2); |
260
|
51
|
|
|
|
|
252
|
$self->{metaref}->{$k} = $v; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
} |
263
|
|
|
|
|
|
|
else { |
264
|
22
|
|
|
|
|
26
|
foreach my $r ( @{ $self->{rules} } ) { |
|
22
|
|
|
|
|
52
|
|
265
|
66
|
100
|
|
|
|
256
|
unless ( &{ $r->{rule} } ) { |
|
66
|
|
|
|
|
174
|
|
266
|
4
|
|
|
|
|
22
|
push @{$exception}, $r->{label}; |
|
4
|
|
|
|
|
14
|
|
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
} |
269
|
22
|
100
|
|
|
|
102
|
last THISFILE if scalar @{$exception}; |
|
22
|
|
|
|
|
56
|
|
270
|
19
|
100
|
|
|
|
63
|
&{ $dataprocess }($l) |
|
13
|
|
|
|
|
29
|
|
271
|
|
|
|
|
|
|
if defined $dataprocess; |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
} |
274
|
9
|
50
|
|
|
|
180
|
close $FILE or croak "Unable to close"; |
275
|
9
|
|
|
|
|
138
|
$self->{exception} = $exception; |
276
|
|
|
|
|
|
|
}; |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=head2 C |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
=over 4 |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
=item * Purpose |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
Same as L, except that it returns before |
285
|
|
|
|
|
|
|
beginning any processing of the data records. |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
=item * Arguments |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
$self->process_metadata_only(); |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=item * Return Values |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
None. |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=back |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
=cut |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub process_metadata_only { |
300
|
3
|
|
|
3
|
1
|
2198
|
my $self = shift; |
301
|
3
|
|
|
|
|
10
|
$self->_process_metadata_engine(); |
302
|
|
|
|
|
|
|
} |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head2 C |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=over 4 |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=item * Purpose |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Access metadata in file's header section. |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
=item * Arguments |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
$metadata_out = $self->get_metadata() |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
None. |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=item * Return Values |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
Hash of metadata found in file's header. |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=back |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
=cut |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
sub get_metadata { |
327
|
9
|
|
|
9
|
1
|
41
|
my $self = shift; |
328
|
9
|
|
|
|
|
29
|
return $self->{metaref}; |
329
|
|
|
|
|
|
|
} |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head2 C |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=over 4 |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=item * Purpose |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
Access reasons, if any, why file failed to meet specified criteria. |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=item * Arguments |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
$exception = $self->get_exception() |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
None. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item * Return Values |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Reference to an array holding lists of C |
348
|
|
|
|
|
|
|
metadata fails. |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=back |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=cut |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
sub get_exception { |
355
|
9
|
|
|
9
|
1
|
49
|
my $self = shift; |
356
|
9
|
|
|
|
|
28
|
return $self->{exception}; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=head1 SUPPORT |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
L |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
=head1 AUTHOR |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
James E Keenan |
366
|
|
|
|
|
|
|
CPAN ID: jkeenan |
367
|
|
|
|
|
|
|
Perl Seminar NY |
368
|
|
|
|
|
|
|
jkeenan@cpan.org |
369
|
|
|
|
|
|
|
http://thenceforward.net/perl/modules/Parse-File-Metadata |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=head1 COPYRIGHT |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
Copyright 2010 James E Keenan |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
This program is free software; you can redistribute |
376
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
The full text of the license can be found in the |
379
|
|
|
|
|
|
|
LICENSE file included with this module. |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head1 SEE ALSO |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
perl(1). |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
=cut |