line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Spreadsheet::ExcelTableReader; |
2
|
4
|
|
|
4
|
|
6390
|
use Moo 2; |
|
4
|
|
|
|
|
57850
|
|
|
4
|
|
|
|
|
26
|
|
3
|
4
|
|
|
4
|
|
11490
|
use Spreadsheet::ParseExcel; |
|
4
|
|
|
|
|
288958
|
|
|
4
|
|
|
|
|
150
|
|
4
|
4
|
|
|
4
|
|
57
|
use Spreadsheet::ParseExcel::Utility 'int2col'; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
216
|
|
5
|
4
|
|
|
4
|
|
3290
|
use Spreadsheet::XLSX; |
|
4
|
|
|
|
|
346864
|
|
|
4
|
|
|
|
|
164
|
|
6
|
4
|
|
|
4
|
|
877
|
use Log::Any '$log'; |
|
4
|
|
|
|
|
5514
|
|
|
4
|
|
|
|
|
35
|
|
7
|
4
|
|
|
4
|
|
9894
|
use Spreadsheet::ExcelTableReader::Field; |
|
4
|
|
|
|
|
15
|
|
|
4
|
|
|
|
|
141
|
|
8
|
4
|
|
|
4
|
|
30
|
use Carp 'croak'; |
|
4
|
|
|
|
|
8
|
|
|
4
|
|
|
|
|
199
|
|
9
|
4
|
|
|
4
|
|
21
|
use IO::Handle; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
13658
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $VERSION= '0.000001_002'; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# ABSTRACT: Module to extract a table from somewhere within an Excel spreadsheet |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
has file => ( is => 'ro' ); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has sheet => ( is => 'ro' ); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# Arrayref of all sheets we can search |
22
|
|
|
|
|
|
|
has _sheets => ( is => 'lazy' ); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has fields => ( is => 'ro', required => 1, coerce => \&_coerce_field_list ); |
26
|
17
|
|
|
17
|
1
|
582
|
sub field_list { @{ shift->fields } } |
|
17
|
|
|
|
|
71
|
|
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
has find_table_args => ( is => 'rw' ); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
has _table_location => ( is => 'rw', lazy_build => 1 ); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
sub _build__sheets { |
34
|
21
|
|
|
21
|
|
1853
|
my $self= shift; |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# If we have ->sheet and it is a worksheet object, then no need to do anything else |
37
|
21
|
50
|
66
|
|
|
204
|
if ($self->sheet && ref($self->sheet) && ref($self->sheet)->can('get_cell')) { |
|
|
|
66
|
|
|
|
|
38
|
9
|
|
|
|
|
195
|
return [ $self->sheet ]; |
39
|
|
|
|
|
|
|
} |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# Else we need to scan sheets from the excel file. Make sure we have the file |
42
|
12
|
|
|
|
|
41
|
my $wbook= $self->_open_workbook($self->file); |
43
|
12
|
|
|
|
|
72
|
my @sheets= $wbook->worksheets; |
44
|
12
|
50
|
|
|
|
98
|
@sheets or croak "No worksheets in file?"; |
45
|
12
|
50
|
|
|
|
57
|
if (defined $self->sheet) { |
46
|
0
|
0
|
|
|
|
0
|
if (ref($self->sheet) eq 'Regexp') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
47
|
0
|
|
|
|
|
0
|
@sheets= grep { $_->get_name =~ $self->sheet } @sheets; |
|
0
|
|
|
|
|
0
|
|
48
|
|
|
|
|
|
|
} elsif (ref($self->sheet) eq 'CODE') { |
49
|
0
|
|
|
|
|
0
|
@sheets= grep { $self->sheet->($_) } @sheets; |
|
0
|
|
|
|
|
0
|
|
50
|
|
|
|
|
|
|
} elsif (!ref $self->sheet) { |
51
|
0
|
|
|
|
|
0
|
@sheets= grep { $_->get_name eq $self->sheet } @sheets; |
|
0
|
|
|
|
|
0
|
|
52
|
|
|
|
|
|
|
} else { |
53
|
0
|
|
|
|
|
0
|
croak "Unknown type of sheet specification: ".$self->sheet; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
} |
56
|
12
|
50
|
|
|
|
28
|
@sheets or croak "No worksheets match the specification"; |
57
|
|
|
|
|
|
|
|
58
|
12
|
|
|
|
|
398
|
return \@sheets; |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub _open_workbook { |
62
|
12
|
|
|
12
|
|
54
|
my ($self, $f)= @_; |
63
|
|
|
|
|
|
|
|
64
|
12
|
50
|
|
|
|
33
|
defined $f or croak "workbook file is undefined"; |
65
|
|
|
|
|
|
|
|
66
|
12
|
|
|
|
|
16
|
my $wbook; |
67
|
12
|
50
|
66
|
|
|
92
|
if (ref($f) && ref($f)->can('worksheets')) { |
68
|
0
|
|
|
|
|
0
|
$wbook= $f; |
69
|
|
|
|
|
|
|
} else { |
70
|
12
|
|
|
|
|
15
|
my $type= "xlsx"; |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
# Probe the file to determine type |
73
|
12
|
100
|
100
|
|
|
352
|
if (ref($f) eq 'GLOB' or ref($f) && ref($f)->can('read')) { |
|
|
50
|
66
|
|
|
|
|
74
|
4
|
|
|
|
|
27
|
my $fpos= $f->tell; |
75
|
4
|
50
|
|
|
|
37
|
$fpos >= 0 or croak "File handle must be seekable"; |
76
|
4
|
50
|
|
|
|
23
|
$f->read(my $buf, 4) == 4 or croak "read($f,4): $!"; |
77
|
4
|
50
|
|
|
|
130
|
$f->seek($fpos, 0) or croak "failed to seek back to start of file"; |
78
|
4
|
100
|
|
|
|
45
|
$type= 'xls' if $buf eq "\xD0\xCF\x11\xE0"; |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
elsif (-e $f) { |
81
|
8
|
|
|
|
|
92
|
$f= "$f"; # force stringification |
82
|
8
|
50
|
|
|
|
377
|
open my $fh, '<', $f or croak "open($f): $!"; |
83
|
8
|
50
|
|
|
|
163
|
read($fh, my $buf, 4) == 4 or croak "read($f,4): $!"; |
84
|
8
|
100
|
|
|
|
282
|
$type= 'xls' if $buf eq "\xD0\xCF\x11\xE0"; |
85
|
|
|
|
|
|
|
} |
86
|
|
|
|
|
|
|
else { |
87
|
0
|
0
|
|
|
|
0
|
$log->notice("Can't determine parser for '$f', guessing '$type'") if $log->is_notice; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
12
|
100
|
|
|
|
32
|
if ($type eq 'xlsx') { |
91
|
|
|
|
|
|
|
# Spreadsheet::XLSX uses Archive::Zip which can *only* work on IO::Handle |
92
|
|
|
|
|
|
|
# instances, not plain globrefs. (seems like a bug, hm) |
93
|
6
|
100
|
|
|
|
24
|
if (ref($f) eq 'GLOB') { |
94
|
1
|
|
|
|
|
8
|
require IO::File; |
95
|
1
|
|
|
|
|
9
|
my $f_obj= IO::File->new; |
96
|
1
|
50
|
|
|
|
49
|
$f_obj->fdopen($f, 'r') or croak "Can't convert GLOBref to IO::File"; |
97
|
1
|
|
|
|
|
78
|
$f= $f_obj; |
98
|
|
|
|
|
|
|
} |
99
|
6
|
|
|
|
|
51
|
$wbook= Spreadsheet::XLSX->new($f); |
100
|
|
|
|
|
|
|
} else { |
101
|
6
|
|
|
|
|
50
|
$wbook= Spreadsheet::ParseExcel->new->parse($f); |
102
|
|
|
|
|
|
|
} |
103
|
12
|
50
|
|
|
|
317956
|
defined $wbook or croak "Can't parse file '".$self->file."'"; |
104
|
|
|
|
|
|
|
} |
105
|
12
|
|
|
|
|
39
|
return $wbook; |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub _build__table_location { |
109
|
0
|
|
|
0
|
|
0
|
my $self= shift; |
110
|
0
|
|
|
|
|
0
|
my $args= $self->find_table_args; |
111
|
0
|
0
|
|
|
|
0
|
$self->find_table( !$args? () : (ref($args) eq 'ARRAY')? @$args : %$args ) |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
112
|
|
|
|
|
|
|
or croak "No match for table header in excel file"; |
113
|
0
|
|
|
|
|
0
|
$self->{_table_location}; # find_table sets the value already, in a slight violation of this builder method. |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub _coerce_field_list { |
117
|
21
|
|
|
21
|
|
44840
|
my ($list)= @_; |
118
|
21
|
50
|
33
|
|
|
160
|
defined $list and ref $list eq 'ARRAY' or croak "'fields' must be a non-empty arrayref"; |
119
|
21
|
|
|
|
|
59
|
my @list= @$list; # clone it, to make sure we don't unexpectedly alter the caller's data |
120
|
21
|
|
|
|
|
45
|
for (@list) { |
121
|
81
|
100
|
|
|
|
602
|
if (!ref $_) { |
|
|
50
|
|
|
|
|
|
122
|
61
|
|
|
|
|
2277
|
$_= Spreadsheet::ExcelTableReader::Field->new( |
123
|
|
|
|
|
|
|
name => $_, |
124
|
|
|
|
|
|
|
header => qr/^\s*\Q$_\E\s*$/i, |
125
|
|
|
|
|
|
|
); |
126
|
|
|
|
|
|
|
} elsif (ref $_ eq 'HASH') { |
127
|
20
|
|
|
|
|
69
|
my %args= %$_; |
128
|
|
|
|
|
|
|
# "isa" alias for the 'type' attribute |
129
|
20
|
50
|
33
|
|
|
64
|
$args{type}= delete $args{isa} if defined $args{isa} && !defined $args{type}; |
130
|
|
|
|
|
|
|
# default header to field name with optional whitespace |
131
|
20
|
100
|
|
|
|
203
|
$args{header}= qr/^\s*\Q$args{name}\E\s*$/i unless defined $args{header}; |
132
|
20
|
|
|
|
|
451
|
$_= Spreadsheet::ExcelTableReader::Field->new( %args ) |
133
|
|
|
|
|
|
|
} else { |
134
|
0
|
|
|
|
|
0
|
croak "Can't coerce '$_' to a Field object" |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
21
|
|
|
|
|
532
|
return \@list; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
sub BUILD { |
142
|
21
|
|
|
21
|
0
|
376
|
my $self= shift; |
143
|
|
|
|
|
|
|
# Any errors getting the list of searchable worksheets should happen now, during construction time |
144
|
21
|
|
|
|
|
359
|
$self->_sheets; |
145
|
|
|
|
|
|
|
} |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub _cell_name { |
149
|
243
|
|
|
243
|
|
348
|
my ($row, $col)= @_; |
150
|
243
|
|
|
|
|
513
|
return int2col($col).($row+1); |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub find_table { |
154
|
12
|
|
|
12
|
1
|
127
|
my $self= shift; |
155
|
|
|
|
|
|
|
|
156
|
12
|
|
|
|
|
16
|
my $location; |
157
|
12
|
|
|
|
|
17
|
my @sheets= @{$self->_sheets}; |
|
12
|
|
|
|
|
233
|
|
158
|
12
|
|
|
|
|
108
|
my @fields= $self->field_list; |
159
|
12
|
|
|
|
|
24
|
my $num_required_fields= grep { $_->required } @fields; |
|
47
|
|
|
|
|
100
|
|
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
# Algorithm is O(N^4) in worst case, but the regex should make it more like O(N^2) in most |
162
|
|
|
|
|
|
|
# real world cases. The worst case would be if every row of every sheet of the workbook almost |
163
|
|
|
|
|
|
|
# matched the header row (which could happen with extremely lax field header patterns) |
164
|
12
|
|
|
|
|
16
|
my $header_regex= qr/(?:@{[ join('|', map { $_->header_regex } @fields) ]})/ms; |
|
12
|
|
|
|
|
20
|
|
|
47
|
|
|
|
|
869
|
|
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# Scan top-down across all sheets at once, since headers are probably at the top of the document. |
167
|
12
|
|
|
|
|
33
|
my $row= 0; |
168
|
12
|
|
|
|
|
20
|
my $in_range= 1; # flag turns false if we pass the bottom of all sheets |
169
|
12
|
|
|
|
|
29
|
row_loop: while ($in_range) { |
170
|
14
|
|
|
|
|
18
|
$in_range= 0; |
171
|
14
|
|
|
|
|
28
|
for my $sheet (@sheets) { |
172
|
14
|
50
|
|
|
|
63
|
$log->trace("row $row sheet $sheet") if $log->is_trace; |
173
|
14
|
|
|
|
|
136
|
my %field_found; |
174
|
14
|
|
|
|
|
45
|
my ($rmin, $rmax)= $sheet->row_range(); |
175
|
14
|
|
|
|
|
104
|
my ($cmin, $cmax)= $sheet->col_range(); |
176
|
14
|
100
|
66
|
|
|
136
|
next unless $row >= $rmin && $row <= $rmax; |
177
|
12
|
|
|
|
|
14
|
$in_range++; |
178
|
12
|
50
|
|
|
|
29
|
my @row_vals= map { my $c= $sheet->get_cell($row, $_); $c? $c->value : '' } 0..$cmax; |
|
53
|
|
|
|
|
265
|
|
|
53
|
|
|
|
|
514
|
|
179
|
12
|
|
|
|
|
74
|
my $match_count= grep { $_ =~ $header_regex } @row_vals; |
|
53
|
|
|
|
|
235
|
|
180
|
12
|
|
|
|
|
74
|
$log->trace("str=@row_vals, regex=$header_regex, match_count=$match_count"); |
181
|
12
|
50
|
|
|
|
262
|
if ($match_count >= $num_required_fields) { |
182
|
12
|
|
|
|
|
38
|
my $field_col= $self->_resolve_field_columns($sheet, $row, \@row_vals); |
183
|
12
|
100
|
|
|
|
40
|
if ($field_col) { |
184
|
10
|
|
|
|
|
40
|
$location= { |
185
|
|
|
|
|
|
|
sheet => $sheet, |
186
|
|
|
|
|
|
|
header_row => $row, |
187
|
|
|
|
|
|
|
min_row => $row+1, |
188
|
|
|
|
|
|
|
field_col => $field_col, |
189
|
|
|
|
|
|
|
}; |
190
|
10
|
|
|
|
|
39
|
last row_loop; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
} |
194
|
4
|
|
|
|
|
9
|
++$row; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
12
|
100
|
|
|
|
41
|
return '' unless defined $location; |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
# Calculate a few more fields for location |
200
|
10
|
|
|
|
|
14
|
my @cols_used= sort { $a <=> $b } values %{ $location->{field_col} }; |
|
53
|
|
|
|
|
102
|
|
|
10
|
|
|
|
|
55
|
|
201
|
10
|
|
|
|
|
23
|
$location->{min_col}= $cols_used[0]; |
202
|
10
|
|
|
|
|
19
|
$location->{max_col}= $cols_used[-1]; |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# Maybe should look for the last row containing data for our columns, but that seems expensive... |
205
|
10
|
|
|
|
|
38
|
$location->{max_row}= ($location->{sheet}->row_range())[1]; |
206
|
|
|
|
|
|
|
|
207
|
10
|
|
|
|
|
70
|
$location->{start_cell}= _cell_name($location->{min_row}, $location->{min_col}); |
208
|
10
|
|
|
|
|
122
|
$location->{end_cell}= _cell_name($location->{min_col}, $location->{max_col}); |
209
|
10
|
|
|
|
|
122
|
$self->_table_location($location); |
210
|
|
|
|
|
|
|
|
211
|
10
|
|
|
|
|
45
|
return 1; |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub _resolve_field_columns { |
215
|
12
|
|
|
12
|
|
20
|
my ($self, $sheet, $row, $row_vals)= @_; |
216
|
12
|
|
|
|
|
15
|
my %col_map; |
217
|
|
|
|
|
|
|
my %field_found; |
218
|
12
|
|
|
|
|
33
|
my $fields= $self->fields; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# Try each cell to see if it matches each field's header |
221
|
12
|
|
|
|
|
33
|
for my $col (0..$#$row_vals) { |
222
|
53
|
|
|
|
|
344
|
my $v= $row_vals->[$col]; |
223
|
53
|
50
|
33
|
|
|
228
|
next unless defined $v and length $v; |
224
|
53
|
|
|
|
|
86
|
for my $field (@$fields) { |
225
|
224
|
100
|
|
|
|
5103
|
push @{ $field_found{$field->name} }, $col |
|
68
|
|
|
|
|
792
|
|
226
|
|
|
|
|
|
|
if $v =~ $field->header_regex; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# Is there one and only one mapping of fields to columns? |
231
|
12
|
|
|
|
|
44
|
my $ambiguous= 0; |
232
|
12
|
|
|
|
|
35
|
my @todo= @$fields; |
233
|
12
|
|
|
|
|
31
|
while (@todo) { |
234
|
61
|
|
|
|
|
90
|
my $field= shift @todo; |
235
|
61
|
100
|
|
|
|
182
|
next unless defined $field_found{$field->name}; |
236
|
60
|
|
|
|
|
110
|
my $possible= $field_found{$field->name}; |
237
|
60
|
|
|
|
|
97
|
my @available= grep { !defined $col_map{$_} } @$possible; |
|
125
|
|
|
|
|
329
|
|
238
|
125
|
|
|
|
|
711
|
$log->trace("ambiguous=$ambiguous : field ".$field->name." could be ".join(',', map { _cell_name($row,$_) } @$possible) |
239
|
60
|
|
|
|
|
191
|
." and ".join(',', map { _cell_name($row,$_) } @available)." are available"); |
|
98
|
|
|
|
|
983
|
|
240
|
60
|
50
|
|
|
|
1420
|
if (!@available) { |
|
|
100
|
|
|
|
|
|
241
|
|
|
|
|
|
|
# It is possible that two fields claim the same columns and one is required |
242
|
0
|
0
|
|
|
|
0
|
if ($field->required) { |
243
|
0
|
|
|
|
|
0
|
my $col= $possible->[0]; |
244
|
0
|
0
|
|
|
|
0
|
$log->debug("Field ".$field->name." and ".$col_map{$col}." would both claim "._cell_name($row, $col)) |
245
|
|
|
|
|
|
|
if $log->is_debug; |
246
|
0
|
|
|
|
|
0
|
return; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
} |
249
|
|
|
|
|
|
|
elsif (@available > 1) { |
250
|
|
|
|
|
|
|
# It is possible for a field to match more than one column. |
251
|
|
|
|
|
|
|
# If so, we send it to the back of the list in case another more specific |
252
|
|
|
|
|
|
|
# column claims one of the options. |
253
|
16
|
100
|
|
|
|
38
|
if (++$ambiguous > @todo) { |
254
|
2
|
50
|
|
|
|
7
|
$log->debug("Can't decide between ".join(', ', map { _cell_name($row,$_) } @available)." for field ".$field->name) |
|
0
|
|
|
|
|
0
|
|
255
|
|
|
|
|
|
|
if $log->is_debug; |
256
|
2
|
|
|
|
|
22
|
return; |
257
|
|
|
|
|
|
|
} |
258
|
14
|
|
|
|
|
47
|
push @todo, $field; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
else { |
261
|
44
|
|
|
|
|
120
|
$col_map{$available[0]}= $field->name; |
262
|
44
|
|
|
|
|
139
|
$ambiguous= 0; # made progress, start counting over again |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
# Success! convert the col map to an array of col-index-per-field |
266
|
10
|
50
|
|
|
|
31
|
$log->debug("Found headers at ".join(' ', map { _cell_name($row,$_) } sort keys %col_map)) |
|
0
|
|
|
|
|
0
|
|
267
|
|
|
|
|
|
|
if $log->is_debug; |
268
|
10
|
|
|
|
|
136
|
return { reverse %col_map }; |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
sub table_location { |
273
|
6
|
|
|
6
|
1
|
31
|
my ($self)= @_; |
274
|
6
|
50
|
|
|
|
15
|
return undef unless defined $self->{_table_location}; |
275
|
|
|
|
|
|
|
# Deep-clone the location |
276
|
6
|
|
|
|
|
8
|
my %loc= %{ $self->_table_location }; |
|
6
|
|
|
|
|
56
|
|
277
|
6
|
|
|
|
|
11
|
$loc{field_col}= { %{ $loc{field_col} } }; |
|
6
|
|
|
|
|
25
|
|
278
|
6
|
|
|
|
|
28
|
return \%loc; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub record_count { |
283
|
0
|
|
|
0
|
1
|
0
|
my $self= shift; |
284
|
0
|
0
|
|
|
|
0
|
return 0 unless defined $self->_table_location; |
285
|
0
|
|
|
|
|
0
|
return $self->_table_location->{max_row} - $self->_table_location->{min_row} + 1; |
286
|
|
|
|
|
|
|
} |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
sub records { |
290
|
4
|
|
|
4
|
1
|
13
|
my ($self, %opts)= @_; |
291
|
4
|
|
|
|
|
17
|
my $i= $self->iterator(%opts); |
292
|
4
|
|
|
|
|
8
|
my @records; |
293
|
4
|
|
|
|
|
9
|
while (my $r= $i->()) { push @records, $r; } |
|
12
|
|
|
|
|
32
|
|
294
|
4
|
|
|
|
|
18
|
return \@records; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
|
298
|
4
|
|
|
4
|
1
|
24
|
sub record_arrays { shift->records(as => 'array', @_) } |
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
our %_Iterators; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub iterator { |
304
|
4
|
|
|
4
|
1
|
9
|
my ($self, %opts)= @_; |
305
|
4
|
|
|
|
|
15
|
my ($as, $blank_row, $on_error)= delete @opts{'as','blank_row','on_error'}; |
306
|
4
|
50
|
|
|
|
14
|
croak "Unknown option(s) to iterator: ".join(', ', keys %opts) |
307
|
|
|
|
|
|
|
if keys %opts; |
308
|
|
|
|
|
|
|
|
309
|
4
|
50
|
|
|
|
11
|
$as= 'hash' unless defined $as; |
310
|
4
|
|
|
|
|
5
|
my $hash= ($as eq 'hash'); |
311
|
|
|
|
|
|
|
|
312
|
4
|
50
|
|
|
|
10
|
$blank_row= 'end' unless defined $blank_row; |
313
|
4
|
|
|
|
|
7
|
my $skip_blank_row= ($blank_row eq 'skip'); |
314
|
4
|
|
|
|
|
8
|
my $end_blank_row= ($blank_row eq 'end'); |
315
|
|
|
|
|
|
|
|
316
|
4
|
|
|
|
|
9
|
my $sheet= $self->_table_location->{sheet}; |
317
|
4
|
|
|
|
|
10
|
my $min_row= $self->_table_location->{min_row}; |
318
|
4
|
|
|
|
|
5
|
my $row= $min_row - 1; |
319
|
4
|
|
|
|
|
4
|
my $col; |
320
|
4
|
|
|
|
|
10
|
my $min_col= $self->_table_location->{min_col}; |
321
|
4
|
|
|
|
|
10
|
my $remaining= $self->_table_location->{max_row} - $self->_table_location->{min_row} + 1; |
322
|
4
|
|
|
|
|
4
|
my $is_blank_row; |
323
|
4
|
|
|
|
|
6
|
my %field_col= %{ $self->_table_location->{field_col} }; |
|
4
|
|
|
|
|
19
|
|
324
|
4
|
|
|
|
|
8
|
my (@result_keys, @field_extractors, @validations); |
325
|
4
|
|
|
|
|
10
|
for my $field ($self->field_list) { |
326
|
16
|
|
|
|
|
27
|
my $blank= $field->blank; |
327
|
16
|
|
|
|
|
34
|
my $src_col= $field_col{$field->name}; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# Don't need an extractor for fields not found in the table if result type is hash, |
330
|
|
|
|
|
|
|
# but if result type is array we need to pad with a null value |
331
|
16
|
50
|
|
|
|
33
|
if (!defined $src_col) { |
332
|
0
|
0
|
|
0
|
|
0
|
$hash or push @field_extractors, sub { undef; }; |
|
0
|
|
|
|
|
0
|
|
333
|
0
|
|
|
|
|
0
|
next; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
16
|
50
|
|
|
|
31
|
push @result_keys, $field->name if $hash; |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
# If trimming, use a different implementation than if not, for a little efficiency |
339
|
|
|
|
|
|
|
push @field_extractors, $field->trim? |
340
|
|
|
|
|
|
|
sub { |
341
|
24
|
|
|
24
|
|
63
|
my $v= $sheet->get_cell($row, $src_col); |
342
|
24
|
50
|
|
|
|
233
|
return $blank unless defined $v; |
343
|
24
|
|
|
|
|
52
|
$v= $v->value; |
344
|
24
|
|
|
|
|
156
|
$v =~ s/^\s*(.*?)\s*$/$1/; |
345
|
24
|
50
|
|
|
|
55
|
return $blank unless length $v; |
346
|
24
|
|
|
|
|
26
|
$is_blank_row= 0; |
347
|
24
|
|
|
|
|
53
|
$v; |
348
|
|
|
|
|
|
|
} |
349
|
|
|
|
|
|
|
: |
350
|
|
|
|
|
|
|
sub { |
351
|
24
|
|
|
24
|
|
61
|
my $v= $sheet->get_cell($row, $src_col); |
352
|
24
|
50
|
33
|
|
|
270
|
defined $v && length($v= $v->value) |
353
|
|
|
|
|
|
|
or return $blank; |
354
|
24
|
|
|
|
|
148
|
$is_blank_row= 0; |
355
|
24
|
|
|
|
|
56
|
$v; |
356
|
16
|
100
|
|
|
|
87
|
}; |
357
|
|
|
|
|
|
|
|
358
|
16
|
50
|
|
|
|
59
|
if (defined (my $type= $field->type)) { |
359
|
|
|
|
|
|
|
# This sub will access the values array at the same position as the current field_extractor |
360
|
0
|
|
|
|
|
0
|
my $idx= $#field_extractors; |
361
|
|
|
|
|
|
|
push @validations, sub { |
362
|
0
|
0
|
|
0
|
|
0
|
return if $type->check($_[0][$idx]); |
363
|
0
|
|
|
|
|
0
|
$col= $src_col; # so the iterator->col reports the column of the error |
364
|
0
|
|
|
|
|
0
|
croak "Not a ".$type->name." at cell "._cell_name($row, $col); |
365
|
0
|
|
|
|
|
0
|
}; |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# Closure over everything, for very fast access |
370
|
|
|
|
|
|
|
my $sub= sub { |
371
|
16
|
|
|
16
|
|
102
|
$log->trace("iterator: remaining=$remaining row=$row sheet=$sheet"); |
372
|
16
|
100
|
|
|
|
252
|
again: |
373
|
|
|
|
|
|
|
return unless $remaining > 0; |
374
|
12
|
|
|
|
|
13
|
++$row; |
375
|
12
|
|
|
|
|
17
|
$col= $min_col; |
376
|
12
|
|
|
|
|
13
|
--$remaining; |
377
|
12
|
|
|
|
|
14
|
$is_blank_row= 1; # This var is closured, and gets set to 0 by the next line |
378
|
12
|
|
|
|
|
20
|
my @values= map { $_->() } @field_extractors; |
|
48
|
|
|
|
|
80
|
|
379
|
12
|
50
|
33
|
|
|
34
|
goto again if $skip_blank_row && $is_blank_row; |
380
|
12
|
50
|
33
|
|
|
54
|
if ($end_blank_row && $is_blank_row) { |
381
|
0
|
|
|
|
|
0
|
$remaining= 0; |
382
|
0
|
|
|
|
|
0
|
return; |
383
|
|
|
|
|
|
|
} |
384
|
12
|
|
|
|
|
23
|
$_->(\@values) for @validations; # This can die. It can also be an empty list. |
385
|
12
|
50
|
|
|
|
43
|
return $hash? do { my %r; @r{@result_keys}= @values; \%r } : \@values; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
386
|
4
|
|
|
|
|
19
|
}; |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
# Blessed coderef, so we can call methods on it |
389
|
4
|
|
|
|
|
17
|
bless $sub, 'Spreadsheet::ExcelTableReader::Iterator'; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# Store references to all the closered variables so the methods can access them |
392
|
|
|
|
|
|
|
$_Iterators{$sub}= { |
393
|
|
|
|
|
|
|
r_sheet => \$sheet, |
394
|
|
|
|
|
|
|
r_row => \$row, |
395
|
|
|
|
|
|
|
r_col => \$col, |
396
|
|
|
|
|
|
|
r_remaining => \$remaining, |
397
|
|
|
|
|
|
|
min_row => $self->_table_location->{min_row}, |
398
|
|
|
|
|
|
|
max_row => $self->_table_location->{max_row}, |
399
|
4
|
|
|
|
|
38
|
}; |
400
|
|
|
|
|
|
|
|
401
|
4
|
|
|
|
|
17
|
return $sub; |
402
|
|
|
|
|
|
|
} |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
package Spreadsheet::ExcelTableReader::Iterator; |
405
|
|
|
|
|
|
|
|
406
|
4
|
|
|
4
|
|
78
|
sub DESTROY { delete $_Iterators{$_[0]}; } |
407
|
0
|
|
|
0
|
|
|
sub sheet { ${ $_Iterators{$_[0]}{r_sheet} } } |
|
0
|
|
|
|
|
|
|
408
|
0
|
|
|
0
|
|
|
sub col { ${ $_Iterators{$_[0]}{r_col} } } |
|
0
|
|
|
|
|
|
|
409
|
0
|
|
|
0
|
|
|
sub row { ${ $_Iterators{$_[0]}{r_row} } } |
|
0
|
|
|
|
|
|
|
410
|
0
|
|
|
0
|
|
|
sub remaining { ${ $_Iterators{$_[0]}{r_remaining} } } |
|
0
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
sub rewind { |
413
|
0
|
|
|
0
|
|
|
my $self= $_Iterators{$_[0]}; |
414
|
0
|
|
|
|
|
|
${$self->{r_row}}= $self->{min_row} - 1; |
|
0
|
|
|
|
|
|
|
415
|
0
|
|
|
|
|
|
${$self->{r_remaining}}= $self->{max_row} - $self->{min_row} + 1; |
|
0
|
|
|
|
|
|
|
416
|
0
|
|
|
|
|
|
return 1; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
1; |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
__END__ |