line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Spreadsheet::Read::Ingester ; |
2
|
|
|
|
|
|
|
$Spreadsheet::Read::Ingester::VERSION = '0.010'; |
3
|
2
|
|
|
2
|
|
679168
|
use strict; |
|
2
|
|
|
|
|
10
|
|
|
2
|
|
|
|
|
49
|
|
4
|
2
|
|
|
2
|
|
9
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
43
|
|
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
538
|
use Storable; |
|
2
|
|
|
|
|
2950
|
|
|
2
|
|
|
|
|
90
|
|
7
|
2
|
|
|
2
|
|
12
|
use File::Spec; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
26
|
|
8
|
2
|
|
|
2
|
|
440
|
use File::Signature; |
|
2
|
|
|
|
|
3235
|
|
|
2
|
|
|
|
|
32
|
|
9
|
2
|
|
|
2
|
|
441
|
use File::UserConfig; |
|
2
|
|
|
|
|
32539
|
|
|
2
|
|
|
|
|
49
|
|
10
|
2
|
|
|
2
|
|
37
|
use base qw (Spreadsheet::Read); |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
717
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
### Public methods ### |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Override constructor |
15
|
|
|
|
|
|
|
sub new { |
16
|
3
|
|
|
3
|
1
|
4766
|
my $s = shift; |
17
|
3
|
|
|
|
|
8
|
my $data = $s->_fetch_data(@_); |
18
|
3
|
|
|
|
|
17
|
return $data; |
19
|
|
|
|
|
|
|
} |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# wrap functions that require 'Spreadsheet::Read' objects |
22
|
|
|
|
|
|
|
my @funcs = qw (parses rows row col2label cr2cell cell2cr cellrow); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
foreach my $func (@funcs) { |
25
|
2
|
|
|
2
|
|
404509
|
{ no strict; |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
1529
|
|
26
|
|
|
|
|
|
|
*$func = sub { |
27
|
2
|
|
|
2
|
|
328
|
my $s = shift; |
28
|
2
|
|
|
|
|
5
|
bless $s, 'Spreadsheet::Read'; |
29
|
2
|
|
|
|
|
6
|
my $super_func = "SUPER::$func"; |
30
|
2
|
100
|
|
|
|
6
|
if (wantarray) { |
31
|
1
|
|
|
|
|
6
|
my @result = $s->$super_func(shift); |
32
|
1
|
|
|
|
|
31
|
bless $s, 'Spreadsheet::Read::Ingester'; |
33
|
1
|
|
|
|
|
3
|
return @result; |
34
|
|
|
|
|
|
|
} else { |
35
|
1
|
|
|
|
|
7
|
my $result = $s->$super_func(shift); |
36
|
1
|
|
|
|
|
28
|
bless $s, 'Spreadsheet::Read::Ingester'; |
37
|
1
|
|
|
|
|
8
|
return $result; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
# Override add function |
44
|
|
|
|
|
|
|
sub add { |
45
|
1
|
|
|
1
|
1
|
38
|
my $book = shift; |
46
|
1
|
|
|
|
|
3
|
my $data = $book->_fetch_data(@_); |
47
|
|
|
|
|
|
|
$book && (ref $book eq "ARRAY" || |
48
|
1
|
50
|
33
|
|
|
15
|
ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $data; |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
49
|
|
|
|
|
|
|
|
50
|
1
|
|
|
|
|
3
|
my $c1 = $book->[0]; |
51
|
1
|
|
|
|
|
2
|
my $c2 = $data->[0]; |
52
|
|
|
|
|
|
|
|
53
|
1
|
50
|
|
|
|
3
|
unless ($c1->{parsers}) { |
54
|
0
|
|
|
|
|
0
|
$c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version ); |
55
|
0
|
|
|
|
|
0
|
$book->[$_]{parser} = 0 for 1 .. $c1->{sheets}; |
56
|
|
|
|
|
|
|
} |
57
|
1
|
|
|
|
|
2
|
my ($pidx) = (grep { my $p = $c1->{parsers}[$_]; |
58
|
|
|
|
|
|
|
$p->{type} eq $c2->{type} && |
59
|
|
|
|
|
|
|
$p->{parser} eq $c2->{parser} && |
60
|
1
|
50
|
33
|
|
|
2
|
$p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}}); |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
4
|
|
61
|
1
|
50
|
|
|
|
4
|
unless (defined $pidx) { |
62
|
0
|
|
|
|
|
0
|
$pidx = scalar @{$c1->{parsers}}; |
|
0
|
|
|
|
|
0
|
|
63
|
0
|
|
|
|
|
0
|
$c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version ); |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
1
|
|
|
|
|
2
|
foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) { |
|
0
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
4
|
|
67
|
1
|
|
|
|
|
2
|
my $s = $sn; |
68
|
1
|
|
|
|
|
3
|
my $v = 2; |
69
|
1
|
|
|
|
|
3
|
while (exists $c1->{sheet}{$s}) { |
70
|
0
|
|
|
|
|
0
|
$s = $sn."[".$v++."]"; |
71
|
|
|
|
|
|
|
} |
72
|
1
|
|
|
|
|
3
|
$c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn}; |
73
|
1
|
|
|
|
|
2
|
$data->[$c2->{sheet}{$sn}]{parser} = $pidx; |
74
|
1
|
|
|
|
|
3
|
push @$book, $data->[$c2->{sheet}{$sn}]; |
75
|
|
|
|
|
|
|
} |
76
|
1
|
|
|
|
|
3
|
$c1->{sheets} += $c2->{sheets}; |
77
|
|
|
|
|
|
|
|
78
|
1
|
|
|
|
|
13
|
return $book; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
# Fetch data from stored variable, if available |
82
|
|
|
|
|
|
|
sub _fetch_data { |
83
|
4
|
|
|
4
|
|
8
|
my $s = shift; |
84
|
4
|
|
|
|
|
6
|
my $file = shift; |
85
|
4
|
|
|
|
|
8
|
my @args = @_; |
86
|
|
|
|
|
|
|
|
87
|
4
|
|
|
|
|
7
|
my $sig = ''; |
88
|
4
|
|
|
|
|
7
|
eval { $sig = File::Signature->new($file)->{digest} }; |
|
4
|
|
|
|
|
27
|
|
89
|
|
|
|
|
|
|
|
90
|
4
|
|
|
|
|
492
|
my %args = @args; |
91
|
4
|
|
|
|
|
7
|
my $suffix; |
92
|
4
|
|
|
|
|
17
|
foreach my $key (sort keys %args) { |
93
|
4
|
|
|
|
|
6
|
$suffix .= $key; |
94
|
4
|
|
|
|
|
9
|
$suffix .= $args{$key}; |
95
|
|
|
|
|
|
|
} |
96
|
4
|
100
|
|
|
|
11
|
if ($suffix) { |
97
|
2
|
|
|
|
|
4
|
$sig .= "-$suffix"; |
98
|
|
|
|
|
|
|
} |
99
|
4
|
|
|
|
|
21
|
my $configdir = File::UserConfig->new(dist => 'Spreadsheet-Read-Ingester')->configdir; |
100
|
4
|
|
|
|
|
1670
|
my $parsed_file = File::Spec->catfile($configdir, $sig); |
101
|
|
|
|
|
|
|
|
102
|
4
|
|
|
|
|
9
|
my $data; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# try to retrieve parsed data |
105
|
4
|
|
|
|
|
7
|
eval { $data = retrieve $parsed_file }; |
|
4
|
|
|
|
|
13
|
|
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# otherwise reingest from raw file |
108
|
4
|
100
|
|
|
|
1072
|
if (!$data) { |
109
|
2
|
|
|
|
|
15
|
$data = $s->SUPER::new($file, @_); |
110
|
2
|
|
|
|
|
1978
|
my $error = $data->[0]{error}; |
111
|
2
|
50
|
|
|
|
7
|
die "Unable to read data from file: $file. Error: $error" if $data->[0]{error}; |
112
|
2
|
|
|
|
|
7
|
store $data, $parsed_file; |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
4
|
|
|
|
|
357
|
return $data; |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
sub cleanup { |
119
|
3
|
|
|
3
|
1
|
2002668
|
my $s = shift; |
120
|
3
|
|
|
|
|
7
|
my $age = shift; |
121
|
|
|
|
|
|
|
|
122
|
3
|
100
|
|
|
|
21
|
if (!defined $age) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
123
|
1
|
|
|
|
|
4
|
$age = 30; |
124
|
|
|
|
|
|
|
} elsif ($age eq '0') { |
125
|
1
|
|
|
|
|
4
|
$age = -1 |
126
|
|
|
|
|
|
|
} elsif ($age !~ /^\d+$/) { |
127
|
1
|
|
|
|
|
19
|
warn 'cleanup method accepts only positive integer values or 0'; |
128
|
1
|
|
|
|
|
68
|
return; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
2
|
|
|
|
|
19
|
my $configdir = File::UserConfig->new(dist => 'Spreadsheet-Read-Ingester')->configdir; |
132
|
|
|
|
|
|
|
|
133
|
2
|
50
|
|
|
|
1326
|
opendir (DIR, $configdir) or die 'Could not open directory.'; |
134
|
2
|
|
|
|
|
60
|
my @files = readdir (DIR); |
135
|
2
|
|
|
|
|
28
|
closedir (DIR); |
136
|
2
|
|
|
|
|
8
|
foreach my $file (@files) { |
137
|
12
|
|
|
|
|
116
|
$file = File::Spec->catfile($configdir, $file); |
138
|
12
|
100
|
|
|
|
161
|
next if (-d $file); |
139
|
6
|
100
|
|
|
|
86
|
if (-M $file >= $age) { |
140
|
3
|
50
|
|
|
|
186
|
unlink $file or die 'Cannot remove file: $file'; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
1; # Magic true value |
146
|
|
|
|
|
|
|
# ABSTRACT: ingest and save csv and spreadsheet data to a perl data structure to avoid reparsing |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
__END__ |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=pod |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 NAME |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
Spreadsheet::Read::Ingester - ingest and save csv and spreadsheet data to a perl data structure to avoid reparsing |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head1 SYNOPSIS |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
use Spreadsheet::Read::Ingester; |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# ingest raw file, store parsed data file, and return data object |
161
|
|
|
|
|
|
|
my $data = Spreadsheet::Read::Ingester->new('/path/to/file'); |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# the returned data object has all the methods of a L<Spreadsheet::Read> object |
164
|
|
|
|
|
|
|
my $num_cols = $data->sheet(1)->maxcol; |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# delete old data files older than 30 days to save disk space |
167
|
|
|
|
|
|
|
Spreadsheet::Read::Ingester->cleanup; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head1 DESCRIPTION |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
This module is intended to be a drop-in replacement for L<Spreadsheet::Read> and |
172
|
|
|
|
|
|
|
is a simple, unobtrusive wrapper for it. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
Parsing spreadsheet and csv data files is time consuming, especially with large |
175
|
|
|
|
|
|
|
data sets. If a data file is ingested more than once, much time and processing |
176
|
|
|
|
|
|
|
power is wasted reparsing the same data. To avoid reparsing, this module uses |
177
|
|
|
|
|
|
|
L<Storable> to save a parsed version of the data to disk when a new file is |
178
|
|
|
|
|
|
|
ingested. All subsequent ingestions are retrieved from the stored Perl data |
179
|
|
|
|
|
|
|
structure. Files are saved in the directory determined by L<File::UserConfig> |
180
|
|
|
|
|
|
|
and is a function of the user's OS. |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
The stored data file names are the unique file signatures for the raw data file. |
183
|
|
|
|
|
|
|
The signature is used to detect if the original file changed, in which case the |
184
|
|
|
|
|
|
|
data is reingested from the raw file and a new parsed file is saved using an |
185
|
|
|
|
|
|
|
updated file signature. Arguments passed to the constructor are appended to the |
186
|
|
|
|
|
|
|
name of the file to ensure different parse options are accounted for. Parsed |
187
|
|
|
|
|
|
|
data files are kept indefinitely but can be deleted with the C<cleanup()> |
188
|
|
|
|
|
|
|
method. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
Consult the L<Spreadsheet::Read> documentation for accessing the data object |
191
|
|
|
|
|
|
|
returned by this module. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head1 METHODS |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head2 new( $path_to_file ) |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
my $data = Spreadsheet::Read::Ingester->new('/path/to/file'); |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Takes same arguments as the new constructor in L<Spreadsheet::Read> module. |
200
|
|
|
|
|
|
|
Returns an object identical to the object returned by the L<Spreadsheet::Read> |
201
|
|
|
|
|
|
|
module along with its corresponding methods. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head2 cleanup( $file_age_in_days ) |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=head2 cleanup() |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
Spreadsheet::Read::Ingester->cleanup(0); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
Deletes all stored files from the user's application data directory. Takes an |
210
|
|
|
|
|
|
|
optional argument indicating the minimum number of days old the file must be |
211
|
|
|
|
|
|
|
before it is deleted. Defaults to 30 days. Passing a value of 0 deletes all |
212
|
|
|
|
|
|
|
files. |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=head1 REQUIRES |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=over 4 |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=item * L<File::Signature|File::Signature> |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=item * L<File::Spec|File::Spec> |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=item * L<File::UserConfig|File::UserConfig> |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item * L<Storable|Storable> |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=item * L<strict|strict> |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=item * L<warnings|warnings> |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=back |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=for :stopwords cpan testmatrix url annocpan anno bugtracker rt cpants kwalitee diff irc mailto metadata placeholders metacpan |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 SUPPORT |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=head2 Perldoc |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
perldoc Spreadsheet::Read::Ingester |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head2 Websites |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
The following websites have more information about this module, and may be of help to you. As always, |
245
|
|
|
|
|
|
|
in addition to those websites please use your favorite search engine to discover more resources. |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=over 4 |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item * |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
MetaCPAN |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
A modern, open-source CPAN search engine, useful to view POD in HTML format. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
L<https://metacpan.org/release/Spreadsheet-Read-Ingester> |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=back |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=head2 Source Code |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
The code is open to the world, and available for you to hack on. Please feel free to browse it and play |
262
|
|
|
|
|
|
|
with it, or whatever. If you want to contribute patches, please send me a diff or prod me to pull |
263
|
|
|
|
|
|
|
from your repository :) |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
L<https://github.com/sdondley/Spreadsheet-Read-Ingester> |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
git clone git://github.com/sdondley/Spreadsheet-Read-Ingester.git |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=head1 BUGS |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Please report any bugs or feature requests on the bugtracker website |
272
|
|
|
|
|
|
|
L<https://github.com/sdondley/Spreadsheet-Read-Ingester/issues> |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
When submitting a bug or request, please include a test-file or a |
275
|
|
|
|
|
|
|
patch to an existing test-file that illustrates the bug or desired |
276
|
|
|
|
|
|
|
feature. |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=head1 LIMITATIONS |
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
If a new parser is installed (e.g. L<Text::CSV_XS>) and a previous ingestion |
281
|
|
|
|
|
|
|
used a different parser (e.g. L<Text::CSV_PP>), results from the previous parser |
282
|
|
|
|
|
|
|
will be returned. Most likely, this will have no practical consequence. But if |
283
|
|
|
|
|
|
|
you are concerned, you can avoid the problem by specifying the same parser using |
284
|
|
|
|
|
|
|
an environment variable per the L<Spreadsheet::Read> documentation: |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
env SPREADSHEET_READ_CSV=Text::CSV_PP ... |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
Similarly, upgrading to a newer version of a parser can cause the same problem. |
289
|
|
|
|
|
|
|
Currently, the only workaround is to delete the stored data files parsed with |
290
|
|
|
|
|
|
|
the old older parser version. |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head1 INSTALLATION |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
See perlmodinstall for information and options on installing Perl modules. |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
=head1 SEE ALSO |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
L<Spreadsheet::Read> |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
=head1 AUTHOR |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
Steve Dondley <s@dondley.com> |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
This software is copyright (c) 2019 by Steve Dondley. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
309
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=cut |