line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
#!/pro/bin/perl |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Spreadsheet::Read; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Spreadsheet::Read - Read the data from a spreadsheet |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Spreadsheet::Read; |
12
|
|
|
|
|
|
|
my $book = ReadData ("test.csv", sep => ";"); |
13
|
|
|
|
|
|
|
my $book = ReadData ("test.sxc"); |
14
|
|
|
|
|
|
|
my $book = ReadData ("test.ods"); |
15
|
|
|
|
|
|
|
my $book = ReadData ("test.xls"); |
16
|
|
|
|
|
|
|
my $book = ReadData ("test.xlsx"); |
17
|
|
|
|
|
|
|
my $book = ReadData ("test.xlsm"); |
18
|
|
|
|
|
|
|
my $book = ReadData ($fh, parser => "xls"); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
Spreadsheet::Read::add ($book, "sheet.csv"); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $sheet = $book->[1]; # first datasheet |
23
|
|
|
|
|
|
|
my $cell = $book->[1]{A3}; # content of field A3 of sheet 1 |
24
|
|
|
|
|
|
|
my $cell = $book->[1]{cell}[1][3]; # same, unformatted |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# OO API |
27
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new ("file.csv"); |
28
|
|
|
|
|
|
|
my $sheet = $book->sheet (1); |
29
|
|
|
|
|
|
|
my $cell = $sheet->cell ("A3"); |
30
|
|
|
|
|
|
|
my $cell = $sheet->cell (1, 3); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$book->add ("test.xls"); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=cut |
35
|
|
|
|
|
|
|
|
36
|
58
|
|
|
58
|
|
5939642
|
use 5.8.1; |
|
58
|
|
|
|
|
792
|
|
37
|
58
|
|
|
58
|
|
386
|
use strict; |
|
58
|
|
|
|
|
131
|
|
|
58
|
|
|
|
|
1419
|
|
38
|
58
|
|
|
58
|
|
320
|
use warnings; |
|
58
|
|
|
|
|
121
|
|
|
58
|
|
|
|
|
3663
|
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
our $VERSION = "0.82"; |
41
|
1
|
|
|
1
|
1
|
137
|
sub Version { $VERSION } |
42
|
|
|
|
|
|
|
|
43
|
58
|
|
|
58
|
|
394
|
use Carp; |
|
58
|
|
|
|
|
133
|
|
|
58
|
|
|
|
|
3835
|
|
44
|
58
|
|
|
58
|
|
397
|
use Exporter; |
|
58
|
|
|
|
|
124
|
|
|
58
|
|
|
|
|
5881
|
|
45
|
|
|
|
|
|
|
our @ISA = qw( Exporter ); |
46
|
|
|
|
|
|
|
our @EXPORT = qw( ReadData cell2cr cr2cell ); |
47
|
|
|
|
|
|
|
our @EXPORT_OK = qw( parses rows cellrow row add ); |
48
|
|
|
|
|
|
|
|
49
|
58
|
|
|
58
|
|
32974
|
use Encode qw( decode ); |
|
58
|
|
|
|
|
903436
|
|
|
58
|
|
|
|
|
4416
|
|
50
|
58
|
|
|
58
|
|
52982
|
use File::Temp qw( ); |
|
58
|
|
|
|
|
1251489
|
|
|
58
|
|
|
|
|
1782
|
|
51
|
58
|
|
|
58
|
|
35635
|
use Data::Dumper; |
|
58
|
|
|
|
|
379932
|
|
|
58
|
|
|
|
|
70239
|
|
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my @parsers = ( |
54
|
|
|
|
|
|
|
[ csv => "Text::CSV_XS", "0.71" ], |
55
|
|
|
|
|
|
|
[ csv => "Text::CSV_PP", "1.17" ], |
56
|
|
|
|
|
|
|
[ csv => "Text::CSV", "1.17" ], |
57
|
|
|
|
|
|
|
[ ods => "Spreadsheet::ParseODS", "0.25" ], |
58
|
|
|
|
|
|
|
[ ods => "Spreadsheet::ReadSXC", "0.20" ], |
59
|
|
|
|
|
|
|
[ sxc => "Spreadsheet::ParseODS", "0.25" ], |
60
|
|
|
|
|
|
|
[ sxc => "Spreadsheet::ReadSXC", "0.20" ], |
61
|
|
|
|
|
|
|
[ xls => "Spreadsheet::ParseExcel", "0.34" ], |
62
|
|
|
|
|
|
|
[ xlsx => "Spreadsheet::ParseXLSX", "0.24" ], |
63
|
|
|
|
|
|
|
[ xlsx => "Spreadsheet::XLSX", "0.13" ], |
64
|
|
|
|
|
|
|
[ prl => "Spreadsheet::Perl", "" ], |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# Helper modules |
67
|
|
|
|
|
|
|
[ ios => "IO::Scalar", "" ], |
68
|
|
|
|
|
|
|
[ dmp => "Data::Peek", "" ], |
69
|
|
|
|
|
|
|
); |
70
|
|
|
|
|
|
|
my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers }); |
71
|
|
|
|
|
|
|
foreach my $p (@parsers) { |
72
|
|
|
|
|
|
|
my $format = $p->[0]; |
73
|
|
|
|
|
|
|
$can{$format} and next; |
74
|
|
|
|
|
|
|
$can{$format} = ""; |
75
|
|
|
|
|
|
|
my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next; |
76
|
|
|
|
|
|
|
my $min_version = $can{supports}{$preset}; |
77
|
|
|
|
|
|
|
unless ($min_version) { |
78
|
|
|
|
|
|
|
# Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle" |
79
|
|
|
|
|
|
|
$can{$format} = "!$preset is not supported for the $format format"; |
80
|
|
|
|
|
|
|
next; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
if (eval "local \$_; require $preset" and not $@) { |
83
|
|
|
|
|
|
|
# forcing a parser should still check the version |
84
|
|
|
|
|
|
|
my $ok; |
85
|
|
|
|
|
|
|
my $has = $preset->VERSION; |
86
|
|
|
|
|
|
|
$has =~ s/_[0-9]+$//; # Remove beta-part |
87
|
|
|
|
|
|
|
if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions |
88
|
|
|
|
|
|
|
my @min = split m/\./ => $1; |
89
|
|
|
|
|
|
|
$has =~ s/^v//; |
90
|
|
|
|
|
|
|
my @has = split m/\./ => $has; |
91
|
|
|
|
|
|
|
$ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >= |
92
|
|
|
|
|
|
|
(($min[0] * 1000 + $min[1]) * 1000 + $min[2]); |
93
|
|
|
|
|
|
|
} |
94
|
|
|
|
|
|
|
else { # normal versions |
95
|
|
|
|
|
|
|
$ok = $has >= $min_version; |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
$ok or $preset = "!$preset"; |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
else { |
100
|
|
|
|
|
|
|
$preset = "!$preset"; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
$can{$format} = $preset; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
delete $can{supports}; |
105
|
|
|
|
|
|
|
for (@parsers) { |
106
|
|
|
|
|
|
|
my ($flag, $mod, $vsn) = @$_; |
107
|
|
|
|
|
|
|
$can{$flag} and next; |
108
|
|
|
|
|
|
|
eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" or |
109
|
|
|
|
|
|
|
$_->[0] = "! Cannot use $mod version $vsn: $@"; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
$can{sc} = __PACKAGE__; # SquirelCalc is built-in |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
defined $Spreadsheet::ParseExcel::VERSION && $Spreadsheet::ParseExcel::VERSION < 0.61 and |
114
|
|
|
|
|
|
|
*Spreadsheet::ParseExcel::Workbook::get_active_sheet = sub { undef; }; |
115
|
|
|
|
|
|
|
defined $Spreadsheet::ParseODS::VERSION && $Spreadsheet::ParseODS::VERSION < 0.25 and |
116
|
|
|
|
|
|
|
*Spreadsheet::ParseODS::Workbook::get_active_sheet = sub { undef; }; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
my $debug = 0; |
119
|
|
|
|
|
|
|
my %def_opts = ( |
120
|
|
|
|
|
|
|
rc => 1, |
121
|
|
|
|
|
|
|
cells => 1, |
122
|
|
|
|
|
|
|
attr => 0, |
123
|
|
|
|
|
|
|
clip => undef, # $opt{cells}; |
124
|
|
|
|
|
|
|
strip => 0, |
125
|
|
|
|
|
|
|
pivot => 0, |
126
|
|
|
|
|
|
|
dtfmt => "yyyy-mm-dd", # Format 14 |
127
|
|
|
|
|
|
|
debug => 0, |
128
|
|
|
|
|
|
|
passwd => undef, |
129
|
|
|
|
|
|
|
parser => undef, |
130
|
|
|
|
|
|
|
sep => undef, |
131
|
|
|
|
|
|
|
quote => undef, |
132
|
|
|
|
|
|
|
label => undef, |
133
|
|
|
|
|
|
|
merge => 0, |
134
|
|
|
|
|
|
|
); |
135
|
|
|
|
|
|
|
my @def_attr = ( |
136
|
|
|
|
|
|
|
type => "text", |
137
|
|
|
|
|
|
|
fgcolor => undef, |
138
|
|
|
|
|
|
|
bgcolor => undef, |
139
|
|
|
|
|
|
|
font => undef, |
140
|
|
|
|
|
|
|
size => undef, |
141
|
|
|
|
|
|
|
format => undef, |
142
|
|
|
|
|
|
|
halign => "left", |
143
|
|
|
|
|
|
|
valign => "top", |
144
|
|
|
|
|
|
|
bold => 0, |
145
|
|
|
|
|
|
|
italic => 0, |
146
|
|
|
|
|
|
|
uline => 0, |
147
|
|
|
|
|
|
|
wrap => 0, |
148
|
|
|
|
|
|
|
merged => 0, |
149
|
|
|
|
|
|
|
hidden => 0, |
150
|
|
|
|
|
|
|
locked => 0, |
151
|
|
|
|
|
|
|
enc => "utf-8", # $ENV{LC_ALL} // $ENV{LANG} // ... |
152
|
|
|
|
|
|
|
formula => undef, |
153
|
|
|
|
|
|
|
); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# Helper functions |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
sub _dump { |
158
|
7
|
|
|
7
|
|
21
|
my ($label, $ref) = @_; |
159
|
7
|
50
|
|
|
|
26
|
if ($can{dmp}) { |
160
|
7
|
|
|
|
|
46
|
print STDERR Data::Peek::DDumper ({ $label => $ref }); |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
else { |
163
|
0
|
|
|
|
|
0
|
print STDERR Data::Dumper->Dump ([$ref], [$label]); |
164
|
|
|
|
|
|
|
} |
165
|
7
|
|
|
|
|
1881
|
my @c = caller (1); |
166
|
7
|
|
|
|
|
51
|
print STDERR "<<- $c[1]:$c[2]|$c[3]\n"; |
167
|
|
|
|
|
|
|
} # _dump |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub _parser { |
170
|
420
|
100
|
|
420
|
|
1720
|
my $type = shift or return ""; |
171
|
177
|
|
|
|
|
532
|
$type = lc $type; |
172
|
177
|
100
|
|
|
|
702
|
my $ods = $can{ods} ? "ods" : "sxc"; |
173
|
|
|
|
|
|
|
# Aliases and fullnames |
174
|
177
|
100
|
|
|
|
560
|
$type eq "excel" and return "xls"; |
175
|
176
|
50
|
|
|
|
527
|
$type eq "excel2007" and return "xlsx"; |
176
|
176
|
50
|
|
|
|
483
|
$type eq "xlsm" and return "xlsx"; |
177
|
176
|
100
|
|
|
|
466
|
$type eq "oo" and return $ods; |
178
|
|
|
|
|
|
|
# $type eq "sxc" and return $ods; |
179
|
175
|
100
|
|
|
|
440
|
$type eq "openoffice" and return $ods; |
180
|
174
|
50
|
|
|
|
413
|
$type eq "libreoffice" and return $ods; |
181
|
174
|
100
|
|
|
|
524
|
$type eq "perl" and return "prl"; |
182
|
173
|
50
|
|
|
|
450
|
$type eq "squirelcalc" and return "sc"; |
183
|
173
|
100
|
|
|
|
1243
|
return exists $can{$type} ? $type : ""; |
184
|
|
|
|
|
|
|
} # _parser |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub new { |
187
|
13
|
|
|
13
|
1
|
7331
|
my $class = shift; |
188
|
13
|
|
|
|
|
69
|
my $r = ReadData (@_); |
189
|
13
|
100
|
|
|
|
51
|
unless ($r) { |
190
|
2
|
50
|
|
|
|
14
|
@_ and return; # new with arguments failed to open resource |
191
|
0
|
|
|
|
|
0
|
$r = [{ |
192
|
|
|
|
|
|
|
parsers => [], |
193
|
|
|
|
|
|
|
error => undef, |
194
|
|
|
|
|
|
|
sheets => 0, |
195
|
|
|
|
|
|
|
sheet => { }, |
196
|
|
|
|
|
|
|
}]; |
197
|
|
|
|
|
|
|
} |
198
|
11
|
|
|
|
|
225
|
bless $r => $class; |
199
|
|
|
|
|
|
|
} # new |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
# Spreadsheet::Read::parses ("csv") or die "Cannot parse CSV" |
202
|
|
|
|
|
|
|
sub parses { |
203
|
68
|
100
|
|
68
|
1
|
161862
|
ref $_[0] eq __PACKAGE__ and shift; |
204
|
68
|
100
|
|
|
|
307
|
my $type = _parser (shift) or return 0; |
205
|
66
|
100
|
|
|
|
625
|
if ($can{$type} =~ m/^!\s*(.*)/) { |
206
|
17
|
|
|
|
|
73
|
$@ = $1; |
207
|
17
|
|
|
|
|
61
|
return 0; |
208
|
|
|
|
|
|
|
} |
209
|
49
|
|
|
|
|
162
|
return $can{$type}; |
210
|
|
|
|
|
|
|
} # parses |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
sub sheets { |
213
|
4
|
|
|
4
|
1
|
14
|
my $ctrl = shift->[0]; |
214
|
4
|
|
|
|
|
6
|
my %s = %{$ctrl->{sheet}}; |
|
4
|
|
|
|
|
19
|
|
215
|
4
|
100
|
|
|
|
48
|
wantarray ? sort { $s{$a} <=> $s{$b} } keys %s : $ctrl->{sheets}; |
|
3
|
|
|
|
|
27
|
|
216
|
|
|
|
|
|
|
} # sheets |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# col2label (4) => "D" |
219
|
|
|
|
|
|
|
sub col2label { |
220
|
15739
|
100
|
|
15739
|
1
|
33822
|
ref $_[0] eq __PACKAGE__ and shift; |
221
|
15739
|
|
|
|
|
20470
|
my $c = shift; |
222
|
15739
|
100
|
100
|
|
|
39898
|
defined $c && $c > 0 or return ""; |
223
|
15735
|
|
|
|
|
21038
|
my $cell = ""; |
224
|
15735
|
|
|
|
|
25414
|
while ($c) { |
225
|
58
|
|
|
58
|
|
31758
|
use integer; |
|
58
|
|
|
|
|
922
|
|
|
58
|
|
|
|
|
380
|
|
226
|
|
|
|
|
|
|
|
227
|
15752
|
|
|
|
|
29069
|
substr $cell, 0, 0, chr (--$c % 26 + ord "A"); |
228
|
15752
|
|
|
|
|
27170
|
$c /= 26; |
229
|
|
|
|
|
|
|
} |
230
|
15735
|
|
|
|
|
41770
|
$cell; |
231
|
|
|
|
|
|
|
} # col2label |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
# cr2cell (4, 18) => "D18" |
234
|
|
|
|
|
|
|
# No prototype to allow 'cr2cell (@rowcol)' |
235
|
|
|
|
|
|
|
sub cr2cell { |
236
|
15537
|
100
|
|
15537
|
1
|
225094
|
ref $_[0] eq __PACKAGE__ and shift; |
237
|
15537
|
|
|
|
|
24494
|
my ($c, $r) = @_; |
238
|
15537
|
100
|
100
|
|
|
71221
|
defined $c && defined $r && $c > 0 && $r > 0 or return ""; |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
239
|
15531
|
|
|
|
|
23927
|
col2label ($c) . $r; |
240
|
|
|
|
|
|
|
} # cr2cell |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
# cell2cr ("D18") => (4, 18) |
243
|
|
|
|
|
|
|
sub cell2cr { |
244
|
620
|
100
|
|
620
|
1
|
690191
|
ref $_[0] eq __PACKAGE__ and shift; |
245
|
620
|
100
|
100
|
|
|
6048
|
my ($cc, $r) = (uc ($_[0]||"") =~ m/^([A-Z]+)([0-9]+)$/) or return (0, 0); |
246
|
616
|
|
|
|
|
1298
|
my $c = 0; |
247
|
616
|
|
|
|
|
2537
|
while ($cc =~ s/^([A-Z])//) { |
248
|
624
|
|
|
|
|
2497
|
$c = 26 * $c + 1 + ord ($1) - ord ("A"); |
249
|
|
|
|
|
|
|
} |
250
|
616
|
|
|
|
|
2221
|
($c, $r); |
251
|
|
|
|
|
|
|
} # cell2cr |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
# my @row = cellrow ($book->[1], 1); |
254
|
|
|
|
|
|
|
# my @row = $book->cellrow (1, 1); |
255
|
|
|
|
|
|
|
sub cellrow { |
256
|
10
|
100
|
|
10
|
1
|
70
|
my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return; |
|
|
100
|
|
|
|
|
|
257
|
9
|
100
|
66
|
|
|
95
|
ref $sheet eq "HASH" && exists $sheet->{cell} or return; |
258
|
7
|
50
|
33
|
|
|
45
|
exists $sheet->{maxcol} && exists $sheet->{maxrow} or return; |
259
|
7
|
100
|
|
|
|
35
|
my $row = shift or return; |
260
|
6
|
100
|
100
|
|
|
51
|
$row > 0 && $row <= $sheet->{maxrow} or return; |
261
|
4
|
|
|
|
|
11
|
my $s = $sheet->{cell}; |
262
|
4
|
|
|
|
|
16
|
map { $s->[$_][$row] } 1..$sheet->{maxcol}; |
|
76
|
|
|
|
|
211
|
|
263
|
|
|
|
|
|
|
} # cellrow |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
# my @row = row ($book->[1], 1); |
266
|
|
|
|
|
|
|
# my @row = $book->row (1, 1); |
267
|
|
|
|
|
|
|
sub row { |
268
|
10
|
100
|
|
10
|
1
|
3820
|
my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return; |
|
|
100
|
|
|
|
|
|
269
|
9
|
100
|
66
|
|
|
74
|
ref $sheet eq "HASH" && exists $sheet->{cell} or return; |
270
|
7
|
50
|
33
|
|
|
37
|
exists $sheet->{maxcol} && exists $sheet->{maxrow} or return; |
271
|
7
|
100
|
|
|
|
32
|
my $row = shift or return; |
272
|
6
|
100
|
100
|
|
|
41
|
$row > 0 && $row <= $sheet->{maxrow} or return; |
273
|
4
|
|
|
|
|
18
|
map { $sheet->{cr2cell ($_, $row)} } 1..$sheet->{maxcol}; |
|
76
|
|
|
|
|
134
|
|
274
|
|
|
|
|
|
|
} # row |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# Convert {cell}'s [column][row] to a [row][column] list |
277
|
|
|
|
|
|
|
# my @rows = rows ($book->[1]); |
278
|
|
|
|
|
|
|
sub rows { |
279
|
33
|
50
|
|
33
|
1
|
22403
|
my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return; |
|
|
100
|
|
|
|
|
|
280
|
30
|
100
|
100
|
|
|
169
|
ref $sheet eq "HASH" && exists $sheet->{cell} or return; |
281
|
26
|
100
|
100
|
|
|
149
|
exists $sheet->{maxcol} && exists $sheet->{maxrow} or return; |
282
|
5
|
|
|
|
|
16
|
my $s = $sheet->{cell}; |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
map { |
285
|
20
|
|
|
|
|
32
|
my $r = $_; |
286
|
20
|
|
|
|
|
34
|
[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ]; |
|
80
|
|
|
|
|
181
|
|
287
|
5
|
|
|
|
|
25
|
} 1..$sheet->{maxrow}; |
288
|
|
|
|
|
|
|
} # rows |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub sheet { |
291
|
24
|
|
|
24
|
1
|
612
|
my ($book, $sheet) = @_; |
292
|
24
|
100
|
100
|
|
|
124
|
$book && $sheet or return; |
293
|
22
|
|
|
|
|
39
|
my $class = "Spreadsheet::Read::Sheet"; |
294
|
|
|
|
|
|
|
$sheet =~ m/^[0-9]+$/ && $sheet >= 1 && $sheet <= $book->[0]{sheets} and |
295
|
22
|
100
|
66
|
|
|
332
|
return bless $book->[$sheet] => $class; |
|
|
|
100
|
|
|
|
|
296
|
|
|
|
|
|
|
exists $book->[0]{sheet}{$sheet} and |
297
|
6
|
100
|
|
|
|
24
|
return bless $book->[$book->[0]{sheet}{$sheet}] => $class; |
298
|
5
|
|
|
|
|
16
|
foreach my $idx (1 .. $book->[0]{sheets}) { |
299
|
6
|
100
|
|
|
|
25
|
$book->[$idx]{label} eq $sheet and |
300
|
|
|
|
|
|
|
return bless $book->[$idx] => $class; |
301
|
|
|
|
|
|
|
} |
302
|
4
|
|
|
|
|
21
|
return; |
303
|
|
|
|
|
|
|
} # sheet |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# If option "clip" is set, remove the trailing rows and |
306
|
|
|
|
|
|
|
# columns in each sheet that contain no visible data |
307
|
|
|
|
|
|
|
sub _clipsheets { |
308
|
228
|
|
|
228
|
|
631
|
my ($opt, $ref) = @_; |
309
|
|
|
|
|
|
|
|
310
|
228
|
50
|
|
|
|
736
|
$ref->[0]{sheets} or return $ref; |
311
|
|
|
|
|
|
|
|
312
|
228
|
|
|
|
|
644
|
my ($rc, $cl) = ($opt->{rc}, $opt->{cells}); |
313
|
228
|
|
|
|
|
685
|
my ($oc, $os, $oa) = ($opt->{clip}, $opt->{strip}, $opt->{attr}); |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Strip leading/trailing spaces |
316
|
228
|
100
|
100
|
|
|
937
|
if ($os || $oc) { |
317
|
213
|
|
|
|
|
653
|
foreach my $sheet (1 .. $ref->[0]{sheets}) { |
318
|
255
|
|
|
|
|
573
|
$ref->[$sheet]{indx} = $sheet; |
319
|
255
|
|
|
|
|
460
|
my $ss = $ref->[$sheet]; |
320
|
255
|
100
|
66
|
|
|
1109
|
$ss->{maxrow} && $ss->{maxcol} or next; |
321
|
248
|
|
|
|
|
563
|
my ($mc, $mr) = (0, 0); |
322
|
248
|
|
|
|
|
593
|
foreach my $row (1 .. $ss->{maxrow}) { |
323
|
1958
|
|
|
|
|
3308
|
foreach my $col (1 .. $ss->{maxcol}) { |
324
|
16652
|
100
|
|
|
|
25905
|
if ($rc) { |
325
|
14754
|
100
|
|
|
|
27221
|
defined $ss->{cell}[$col][$row] or next; |
326
|
5897
|
100
|
|
|
|
10078
|
$os & 2 and $ss->{cell}[$col][$row] =~ s/\s+$//; |
327
|
5897
|
100
|
|
|
|
9547
|
$os & 1 and $ss->{cell}[$col][$row] =~ s/^\s+//; |
328
|
5897
|
100
|
|
|
|
10590
|
if (length $ss->{cell}[$col][$row]) { |
329
|
4432
|
100
|
|
|
|
7185
|
$col > $mc and $mc = $col; |
330
|
4432
|
100
|
|
|
|
7530
|
$row > $mr and $mr = $row; |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
} |
333
|
7795
|
100
|
|
|
|
12164
|
if ($cl) { |
334
|
7576
|
|
|
|
|
11599
|
my $cell = cr2cell ($col, $row); |
335
|
7576
|
100
|
|
|
|
15528
|
defined $ss->{$cell} or next; |
336
|
6249
|
100
|
|
|
|
11234
|
$os & 2 and $ss->{$cell} =~ s/\s+$//; |
337
|
6249
|
100
|
|
|
|
10114
|
$os & 1 and $ss->{$cell} =~ s/^\s+//; |
338
|
6249
|
100
|
|
|
|
11479
|
if (length $ss->{$cell}) { |
339
|
4772
|
100
|
|
|
|
7695
|
$col > $mc and $mc = $col; |
340
|
4772
|
100
|
|
|
|
8905
|
$row > $mr and $mr = $row; |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
} |
345
|
|
|
|
|
|
|
|
346
|
248
|
100
|
66
|
|
|
1540
|
$oc && ($mc < $ss->{maxcol} || $mr < $ss->{maxrow}) or next; |
|
|
|
66
|
|
|
|
|
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# Remove trailing empty columns |
349
|
106
|
|
|
|
|
254
|
foreach my $col (($mc + 1) .. $ss->{maxcol}) { |
350
|
243
|
100
|
|
|
|
679
|
$rc and undef $ss->{cell}[$col]; |
351
|
243
|
100
|
|
|
|
568
|
$oa and undef $ss->{attr}[$col]; |
352
|
243
|
100
|
|
|
|
445
|
$cl or next; |
353
|
193
|
|
|
|
|
337
|
my $c = col2label ($col); |
354
|
193
|
|
|
|
|
1438
|
delete $ss->{"$c$_"} for 1 .. $ss->{maxrow}; |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# Remove trailing empty rows |
358
|
106
|
|
|
|
|
269
|
foreach my $row (($mr + 1) .. $ss->{maxrow}) { |
359
|
139
|
|
|
|
|
265
|
foreach my $col (1 .. $mc) { |
360
|
64
|
50
|
|
|
|
159
|
$cl and delete $ss->{cr2cell ($col, $row)}; |
361
|
64
|
50
|
|
|
|
213
|
$rc and undef $ss->{cell} [$col][$row]; |
362
|
64
|
50
|
|
|
|
1024
|
$oa and undef $ss->{attr} [$col][$row]; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
106
|
|
|
|
|
312
|
($ss->{maxrow}, $ss->{maxcol}) = ($mr, $mc); |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
} |
369
|
|
|
|
|
|
|
|
370
|
228
|
100
|
|
|
|
708
|
if ($opt->{pivot}) { |
371
|
1
|
|
|
|
|
4
|
foreach my $sheet (1 .. $ref->[0]{sheets}) { |
372
|
1
|
|
|
|
|
4
|
my $ss = $ref->[$sheet]; |
373
|
1
|
0
|
33
|
|
|
4
|
$ss->{maxrow} || $ss->{maxcol} or next; |
374
|
1
|
50
|
|
|
|
5
|
my $mx = $ss->{maxrow} > $ss->{maxcol} ? $ss->{maxrow} : $ss->{maxcol}; |
375
|
1
|
|
|
|
|
3
|
foreach my $row (2 .. $mx) { |
376
|
18
|
|
|
|
|
38
|
foreach my $col (1 .. ($row - 1)) { |
377
|
|
|
|
|
|
|
$opt->{rc} and |
378
|
|
|
|
|
|
|
($ss->{cell}[$col][$row], $ss->{cell}[$row][$col]) = |
379
|
171
|
50
|
|
|
|
520
|
($ss->{cell}[$row][$col], $ss->{cell}[$col][$row]); |
380
|
|
|
|
|
|
|
$opt->{cells} and |
381
|
|
|
|
|
|
|
($ss->{cr2cell ($col, $row)}, $ss->{cr2cell ($row, $col)}) = |
382
|
171
|
50
|
|
|
|
349
|
($ss->{cr2cell ($row, $col)}, $ss->{cr2cell ($col, $row)}); |
383
|
|
|
|
|
|
|
} |
384
|
|
|
|
|
|
|
} |
385
|
1
|
|
|
|
|
4
|
($ss->{maxcol}, $ss->{maxrow}) = ($ss->{maxrow}, $ss->{maxcol}); |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
228
|
|
|
|
|
5183
|
$ref; |
390
|
|
|
|
|
|
|
} # _clipsheets |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
# Convert a single color (index) to a color |
393
|
|
|
|
|
|
|
sub _xls_color { |
394
|
1758
|
|
|
1758
|
|
3249
|
my $clr = shift; |
395
|
1758
|
100
|
|
|
|
3493
|
defined $clr or return undef; |
396
|
866
|
50
|
|
|
|
1459
|
$clr eq "#000000" and return undef; |
397
|
866
|
50
|
|
|
|
1719
|
$clr =~ m/^#[0-9a-fA-F]+$/ and return lc $clr; |
398
|
866
|
100
|
66
|
|
|
2434
|
$clr == 0 || $clr == 32767 and return undef; # Default fg color |
399
|
484
|
|
|
|
|
1093
|
return "#" . lc Spreadsheet::ParseExcel->ColorIdxToRGB ($clr); |
400
|
|
|
|
|
|
|
} # _xls_color |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# Convert a fill [ $pattern, $front_color, $back_color ] to a single background |
403
|
|
|
|
|
|
|
sub _xls_fill { |
404
|
1542
|
|
|
1542
|
|
3011
|
my ($p, $fg, $bg) = @_; |
405
|
1542
|
50
|
|
|
|
2656
|
defined $p or return undef; |
406
|
1542
|
50
|
|
|
|
2580
|
$p == 32767 and return undef; # Default fg color |
407
|
1542
|
100
|
100
|
|
|
14827
|
$p == 0 && !defined $bg and return undef; # No fill bg color |
408
|
650
|
100
|
|
|
|
1104
|
$p == 1 and return _xls_color ($fg); |
409
|
434
|
50
|
33
|
|
|
6146
|
$bg < 8 || $bg > 63 and return undef; # see Workbook.pm#106 |
410
|
0
|
|
|
|
|
0
|
return _xls_color ($bg); |
411
|
|
|
|
|
|
|
} # _xls_fill |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
sub ReadData { |
414
|
280
|
100
|
|
280
|
1
|
342784
|
my $txt = shift or return; |
415
|
|
|
|
|
|
|
|
416
|
277
|
|
|
|
|
573
|
my %opt; |
417
|
277
|
100
|
|
|
|
774
|
if (@_) { |
418
|
222
|
100
|
|
|
|
1117
|
if (ref $_[0] eq "HASH") { %opt = %{shift @_} } |
|
20
|
50
|
|
|
|
31
|
|
|
20
|
|
|
|
|
79
|
|
419
|
202
|
|
|
|
|
776
|
elsif (@_ % 2 == 0) { %opt = @_ } |
420
|
|
|
|
|
|
|
} |
421
|
|
|
|
|
|
|
|
422
|
277
|
100
|
|
|
|
1075
|
exists $opt{rc} or $opt{rc} = $def_opts{rc}; |
423
|
277
|
100
|
|
|
|
1367
|
exists $opt{cells} or $opt{cells} = $def_opts{cells}; |
424
|
277
|
100
|
|
|
|
849
|
exists $opt{attr} or $opt{attr} = $def_opts{attr}; |
425
|
277
|
100
|
|
|
|
951
|
exists $opt{clip} or $opt{clip} = $opt{cells}; |
426
|
277
|
100
|
|
|
|
748
|
exists $opt{strip} or $opt{strip} = $def_opts{strip}; |
427
|
277
|
100
|
|
|
|
888
|
exists $opt{dtfmt} or $opt{dtfmt} = $def_opts{dtfmt}; |
428
|
277
|
100
|
|
|
|
771
|
exists $opt{merge} or $opt{merge} = $def_opts{merge}; |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
# $debug = $opt{debug} || 0; |
431
|
277
|
100
|
|
|
|
783
|
$debug = defined $opt{debug} ? $opt{debug} : $def_opts{debug}; |
432
|
277
|
100
|
|
|
|
702
|
$debug > 4 and _dump (Options => \%opt); |
433
|
|
|
|
|
|
|
|
434
|
31
|
|
|
|
|
97
|
my %parser_opts = map { $_ => $opt{$_} } |
435
|
277
|
|
|
|
|
1036
|
grep { !exists $def_opts{$_} } |
|
2047
|
|
|
|
|
3971
|
|
436
|
|
|
|
|
|
|
keys %opt; |
437
|
|
|
|
|
|
|
|
438
|
277
|
|
|
|
|
1216
|
my $_parser = _parser ($opt{parser}); |
439
|
|
|
|
|
|
|
|
440
|
277
|
100
|
|
|
|
1172
|
my $io_ref = ref ($txt) =~ m/GLOB|IO/ ? $txt : undef; |
441
|
|
|
|
|
|
|
my $io_fil = $io_ref ? 0 : $txt =~ m/\0/ ? 0 |
442
|
58
|
100
|
|
58
|
|
129661
|
: do { no warnings "newline"; -f $txt }; |
|
58
|
100
|
|
|
|
168
|
|
|
58
|
|
|
|
|
458040
|
|
|
277
|
|
|
|
|
1053
|
|
|
256
|
|
|
|
|
5473
|
|
443
|
277
|
100
|
100
|
|
|
1792
|
my $io_txt = $io_ref || $io_fil ? 0 : 1; |
444
|
|
|
|
|
|
|
|
445
|
277
|
100
|
100
|
|
|
4128
|
$io_fil && ! -s $txt and do { $@ = "$txt is empty"; return }; |
|
11
|
|
|
|
|
63
|
|
|
11
|
|
|
|
|
68
|
|
446
|
266
|
100
|
100
|
|
|
1445
|
$io_ref && eof $txt and do { $@ = "Empty stream"; return }; |
|
5
|
|
|
|
|
22
|
|
|
5
|
|
|
|
|
88
|
|
447
|
|
|
|
|
|
|
|
448
|
261
|
100
|
100
|
|
|
2023
|
if ($opt{parser} ? $_parser eq "csv" : ($io_fil && $txt =~ m/\.(csv)$/i)) { |
|
|
100
|
|
|
|
|
|
449
|
106
|
50
|
|
|
|
358
|
$can{csv} or croak "CSV parser not installed"; |
450
|
|
|
|
|
|
|
|
451
|
106
|
100
|
|
|
|
357
|
my $label = defined $opt{label} ? $opt{label} : $io_fil ? $txt : "IO"; |
|
|
100
|
|
|
|
|
|
452
|
|
|
|
|
|
|
|
453
|
106
|
50
|
|
|
|
286
|
$debug and print STDERR "Opening CSV $label using $can{csv}-", $can{csv}->VERSION, "\n"; |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
my @data = ( |
456
|
|
|
|
|
|
|
{ type => "csv", |
457
|
|
|
|
|
|
|
parser => $can{csv}, |
458
|
|
|
|
|
|
|
version => $can{csv}->VERSION, |
459
|
|
|
|
|
|
|
parsers => [ { |
460
|
|
|
|
|
|
|
type => "csv", |
461
|
|
|
|
|
|
|
parser => $can{csv}, |
462
|
|
|
|
|
|
|
version => $can{csv}->VERSION, |
463
|
106
|
|
|
|
|
3215
|
}], |
464
|
|
|
|
|
|
|
error => undef, |
465
|
|
|
|
|
|
|
quote => '"', |
466
|
|
|
|
|
|
|
sepchar => ',', |
467
|
|
|
|
|
|
|
sheets => 1, |
468
|
|
|
|
|
|
|
sheet => { $label => 1 }, |
469
|
|
|
|
|
|
|
}, |
470
|
|
|
|
|
|
|
{ parser => 0, |
471
|
|
|
|
|
|
|
label => $label, |
472
|
|
|
|
|
|
|
maxrow => 0, |
473
|
|
|
|
|
|
|
maxcol => 0, |
474
|
|
|
|
|
|
|
cell => [], |
475
|
|
|
|
|
|
|
attr => [], |
476
|
|
|
|
|
|
|
merged => [], |
477
|
|
|
|
|
|
|
active => 1, |
478
|
|
|
|
|
|
|
}, |
479
|
|
|
|
|
|
|
); |
480
|
|
|
|
|
|
|
|
481
|
106
|
|
|
|
|
558
|
my ($sep, $quo, $in) = (",", '"'); |
482
|
106
|
100
|
|
|
|
323
|
defined $opt{sep} and $sep = $opt{sep}; |
483
|
106
|
100
|
|
|
|
248
|
defined $opt{quote} and $quo = $opt{quote}; |
484
|
106
|
50
|
|
|
|
282
|
$debug > 8 and _dump (debug => { |
485
|
|
|
|
|
|
|
data => \@data, txt => $txt, io_ref => $io_ref, io_fil => $io_fil }); |
486
|
106
|
100
|
|
|
|
275
|
if ($io_fil) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
487
|
94
|
100
|
100
|
|
|
331
|
unless (defined $opt{quote} && defined $opt{sep}) { |
488
|
87
|
50
|
|
|
|
3475
|
open $in, "<", $txt or return; |
489
|
87
|
|
|
|
|
2121
|
my $l1 = <$in>; |
490
|
|
|
|
|
|
|
|
491
|
87
|
100
|
|
|
|
430
|
$quo = defined $opt{quote} ? $opt{quote} : '"'; |
492
|
|
|
|
|
|
|
$sep = # If explicitly set, use it |
493
|
|
|
|
|
|
|
defined $opt{sep} ? $opt{sep} : |
494
|
|
|
|
|
|
|
# otherwise start auto-detect with quoted strings |
495
|
87
|
50
|
|
|
|
964
|
$l1 =~ m/["0-9];["0-9;]/ ? ";" : |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
496
|
|
|
|
|
|
|
$l1 =~ m/["0-9],["0-9,]/ ? "," : |
497
|
|
|
|
|
|
|
$l1 =~ m/["0-9]\t["0-9,]/ ? "\t" : |
498
|
|
|
|
|
|
|
$l1 =~ m/["0-9]\|["0-9,]/ ? "|" : |
499
|
|
|
|
|
|
|
# If neither, then for unquoted strings |
500
|
|
|
|
|
|
|
$l1 =~ m/\w;[\w;]/ ? ";" : |
501
|
|
|
|
|
|
|
$l1 =~ m/\w,[\w,]/ ? "," : |
502
|
|
|
|
|
|
|
$l1 =~ m/\w\t[\w,]/ ? "\t" : |
503
|
|
|
|
|
|
|
$l1 =~ m/\w\|[\w,]/ ? "|" : |
504
|
|
|
|
|
|
|
"," ; |
505
|
87
|
|
|
|
|
1308
|
close $in; |
506
|
|
|
|
|
|
|
} |
507
|
94
|
50
|
|
|
|
2858
|
open $in, "<", $txt or return; |
508
|
|
|
|
|
|
|
} |
509
|
|
|
|
|
|
|
elsif ($io_ref) { |
510
|
10
|
|
|
|
|
23
|
$in = $txt; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
elsif (ref $txt eq "SCALAR") { |
513
|
1
|
50
|
|
|
|
14
|
open $in, "<", $txt or croak "Cannot open input: $!"; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
elsif ($txt =~ m/[\r\n,;]/) { |
516
|
1
|
50
|
|
2
|
|
94
|
open $in, "<", \$txt or croak "Cannot open input: $!"; |
|
2
|
|
|
|
|
26
|
|
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
21
|
|
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
else { |
519
|
0
|
|
|
|
|
0
|
warn "Input type ", ref $txt, |
520
|
|
|
|
|
|
|
" might not be supported. Please file a ticket\n"; |
521
|
0
|
|
|
|
|
0
|
$in = $txt; # Now pray ... |
522
|
|
|
|
|
|
|
} |
523
|
106
|
50
|
|
|
|
1398
|
$debug > 1 and print STDERR "CSV sep_char '$sep', quote_char '$quo'\n"; |
524
|
|
|
|
|
|
|
my $csv = $can{csv}->new ({ |
525
|
|
|
|
|
|
|
%parser_opts, |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
sep_char => ($data[0]{sepchar} = $sep), |
528
|
106
|
50
|
|
|
|
1403
|
quote_char => ($data[0]{quote} = $quo), |
529
|
|
|
|
|
|
|
keep_meta_info => 1, |
530
|
|
|
|
|
|
|
binary => 1, |
531
|
|
|
|
|
|
|
auto_diag => 1, |
532
|
|
|
|
|
|
|
}) or croak "Cannot create a csv ('$sep', '$quo') parser!"; |
533
|
|
|
|
|
|
|
|
534
|
106
|
|
|
|
|
23294
|
while (my $row = $csv->getline ($in)) { |
535
|
413
|
50
|
|
|
|
72606
|
my @row = @$row or last; |
536
|
|
|
|
|
|
|
|
537
|
413
|
|
|
|
|
920
|
my $r = ++$data[1]{maxrow}; |
538
|
413
|
100
|
|
|
|
1042
|
@row > $data[1]{maxcol} and $data[1]{maxcol} = @row; |
539
|
413
|
|
|
|
|
1153
|
foreach my $c (0 .. $#row) { |
540
|
2427
|
|
|
|
|
3555
|
my $val = $row[$c]; |
541
|
2427
|
|
|
|
|
4306
|
my $cell = cr2cell ($c + 1, $r); |
542
|
2427
|
100
|
|
|
|
7000
|
$opt{rc} and $data[1]{cell}[$c + 1][$r] = $val; |
543
|
2427
|
100
|
|
|
|
6482
|
$opt{cells} and $data[1]{$cell} = $val; |
544
|
2427
|
50
|
|
|
|
12226
|
$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr }; |
545
|
|
|
|
|
|
|
} |
546
|
|
|
|
|
|
|
} |
547
|
106
|
50
|
|
|
|
15169
|
$csv->eof () or $data[0]{error} = [ $csv->error_diag ]; |
548
|
106
|
|
|
|
|
1601
|
close $in; |
549
|
|
|
|
|
|
|
|
550
|
106
|
|
|
|
|
313
|
for (@{$data[1]{cell}}) { |
|
106
|
|
|
|
|
536
|
|
551
|
734
|
100
|
|
|
|
1624
|
defined or $_ = []; |
552
|
|
|
|
|
|
|
} |
553
|
106
|
|
|
|
|
437
|
return _clipsheets \%opt, [ @data ]; |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
155
|
100
|
|
|
|
399
|
if ($io_txt) { # && $_parser !~ m/^xlsx?$/) { |
557
|
29
|
100
|
66
|
|
|
475
|
if ( # /etc/magic: Microsoft Office Document |
|
|
50
|
66
|
|
|
|
|
|
|
50
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
558
|
|
|
|
|
|
|
$txt =~ m{\A(\376\067\0\043 |
559
|
|
|
|
|
|
|
|\320\317\021\340\241\261\032\341 |
560
|
|
|
|
|
|
|
|\333\245-\0\0\0)}x |
561
|
|
|
|
|
|
|
# /usr/share/misc/magic |
562
|
|
|
|
|
|
|
|| $txt =~ m{\A.{2080}Microsoft Excel 5.0 Worksheet} |
563
|
|
|
|
|
|
|
|| $txt =~ m{\A\x09\x04\x06\x00\x00\x00\x10\x00} |
564
|
|
|
|
|
|
|
) { |
565
|
1
|
50
|
|
|
|
6
|
$can{xls} or croak "Spreadsheet::ParseExcel not installed"; |
566
|
1
|
|
|
|
|
4
|
my $tmpfile; |
567
|
1
|
50
|
|
|
|
5
|
if ($can{ios}) { # Do not use a temp file if IO::Scalar is available |
568
|
1
|
|
|
|
|
3
|
$tmpfile = \$txt; |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
else { |
571
|
0
|
|
|
|
|
0
|
$tmpfile = File::Temp->new (SUFFIX => ".xls", UNLINK => 1); |
572
|
0
|
|
|
|
|
0
|
binmode $tmpfile; |
573
|
0
|
|
|
|
|
0
|
print $tmpfile $txt; |
574
|
0
|
|
|
|
|
0
|
close $tmpfile; |
575
|
|
|
|
|
|
|
} |
576
|
1
|
50
|
|
|
|
53
|
open $io_ref, "<", $tmpfile or do { $@ = $!; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
577
|
1
|
|
|
|
|
1007
|
$io_txt = 0; |
578
|
1
|
|
|
|
|
9
|
$_parser = _parser ($opt{parser} = "xls"); |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
elsif ( # /usr/share/misc/magic |
581
|
|
|
|
|
|
|
$txt =~ m{\APK\003\004.{4,30}(?:\[Content_Types\]\.xml|_rels/\.rels)} |
582
|
|
|
|
|
|
|
) { |
583
|
0
|
0
|
|
|
|
0
|
$can{xlsx} or croak "XLSX parser not installed"; |
584
|
0
|
|
|
|
|
0
|
my $tmpfile; |
585
|
0
|
0
|
|
|
|
0
|
if ($can{ios}) { # Do not use a temp file if IO::Scalar is available |
586
|
0
|
|
|
|
|
0
|
$tmpfile = \$txt; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
else { |
589
|
0
|
|
|
|
|
0
|
$tmpfile = File::Temp->new (SUFFIX => ".xlsx", UNLINK => 1); |
590
|
0
|
|
|
|
|
0
|
binmode $tmpfile; |
591
|
0
|
|
|
|
|
0
|
print $tmpfile $txt; |
592
|
0
|
|
|
|
|
0
|
close $tmpfile; |
593
|
|
|
|
|
|
|
} |
594
|
0
|
0
|
|
|
|
0
|
open $io_ref, "<", $tmpfile or do { $@ = $!; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
595
|
0
|
|
|
|
|
0
|
$io_txt = 0; |
596
|
0
|
|
|
|
|
0
|
$_parser = _parser ($opt{parser} = "xlsx"); |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
elsif ( # /usr/share/misc/magic |
599
|
|
|
|
|
|
|
$txt =~ m{\APK\003\004.{9,30}\Qmimetypeapplication/vnd.oasis.opendocument.spreadsheet} |
600
|
|
|
|
|
|
|
) { |
601
|
0
|
0
|
|
|
|
0
|
$can{ods} or croak "ODS parser not installed"; |
602
|
0
|
|
|
|
|
0
|
my $tmpfile; |
603
|
0
|
0
|
|
|
|
0
|
if ($can{ios}) { # Do not use a temp file if IO::Scalar is available |
604
|
0
|
|
|
|
|
0
|
$tmpfile = \$txt; |
605
|
|
|
|
|
|
|
} |
606
|
|
|
|
|
|
|
else { |
607
|
0
|
|
|
|
|
0
|
$tmpfile = File::Temp->new (SUFFIX => ".ods", UNLINK => 1); |
608
|
0
|
|
|
|
|
0
|
binmode $tmpfile; |
609
|
0
|
|
|
|
|
0
|
print $tmpfile $txt; |
610
|
0
|
|
|
|
|
0
|
close $tmpfile; |
611
|
|
|
|
|
|
|
} |
612
|
0
|
0
|
|
|
|
0
|
open $io_ref, "<", $tmpfile or do { $@ = $!; return }; |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
613
|
0
|
|
|
|
|
0
|
$io_txt = 0; |
614
|
0
|
|
|
|
|
0
|
$_parser = _parser ($opt{parser} = "ods"); |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
elsif (!$io_ref && $txt =~ m/\.xls[xm]?$/i) { |
617
|
3
|
|
|
|
|
19
|
$@ = "Cannot open $txt as file"; |
618
|
3
|
|
|
|
|
20
|
return; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
} |
621
|
152
|
100
|
66
|
|
|
1178
|
if ($opt{parser} ? $_parser =~ m/^(?:xlsx?)$/ |
|
|
100
|
|
|
|
|
|
622
|
|
|
|
|
|
|
: ($io_fil && $txt =~ m/\.(xls[xm]?)$/i && |
623
|
|
|
|
|
|
|
($_parser = _parser ($1)))) { |
624
|
76
|
100
|
|
|
|
380
|
my $parse_type = $_parser =~ m/x$/i ? "XLSX" : "XLS"; |
625
|
76
|
50
|
|
|
|
326
|
my $parser = $can{lc $parse_type} or |
626
|
|
|
|
|
|
|
croak "Parser for $parse_type is not installed"; |
627
|
|
|
|
|
|
|
#$debug and print STDERR __FILE__, "#", __LINE__, " | $_parser | $parser | $parse_type\n"; |
628
|
|
|
|
|
|
|
$debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt, |
629
|
76
|
50
|
|
|
|
360
|
" using $parser-", $can{lc $parse_type}->VERSION, "\n"; |
|
|
100
|
|
|
|
|
|
630
|
76
|
50
|
|
|
|
267
|
$opt{passwd} and $parser_opts{Password} = $opt{passwd}; |
631
|
76
|
|
|
|
|
152
|
my $oBook = eval { |
632
|
|
|
|
|
|
|
$io_ref |
633
|
|
|
|
|
|
|
? $parse_type eq "XLSX" |
634
|
|
|
|
|
|
|
? $can{xlsx} =~ m/::XLSX$/ |
635
|
|
|
|
|
|
|
? $parser->new ($io_ref) |
636
|
|
|
|
|
|
|
: $parser->new (%parser_opts)->parse ($io_ref) |
637
|
|
|
|
|
|
|
: $parser->new (%parser_opts)->Parse ($io_ref) |
638
|
|
|
|
|
|
|
: $parse_type eq "XLSX" |
639
|
76
|
0
|
|
|
|
866
|
? $can{xlsx} =~ m/::XLSX$/ |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
640
|
|
|
|
|
|
|
? $parser->new ($txt) |
641
|
|
|
|
|
|
|
: $parser->new (%parser_opts)->parse ($txt) |
642
|
|
|
|
|
|
|
: $parser->new (%parser_opts)->Parse ($txt); |
643
|
|
|
|
|
|
|
}; |
644
|
76
|
50
|
|
|
|
2249905
|
unless ($oBook) { |
645
|
|
|
|
|
|
|
# cleanup will fail on folders with spaces. |
646
|
0
|
|
|
|
|
0
|
(my $msg = $@) =~ s/ at \S+ line \d+.*//s; |
647
|
0
|
|
|
|
|
0
|
croak "$parse_type parser cannot parse data: $msg"; |
648
|
|
|
|
|
|
|
} |
649
|
76
|
50
|
|
|
|
284
|
$debug > 8 and _dump (oBook => $oBook); |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# WorkBook keys: |
652
|
|
|
|
|
|
|
# aColor _CurSheet Format SheetCount |
653
|
|
|
|
|
|
|
# ActiveSheet _CurSheet_ FormatStr _skip_chart |
654
|
|
|
|
|
|
|
# Author File NotSetCell _string_contin |
655
|
|
|
|
|
|
|
# BIFFVersion Flg1904 Object Version |
656
|
|
|
|
|
|
|
# _buffer FmtClass PkgStr Worksheet |
657
|
|
|
|
|
|
|
# CellHandler Font _previous_info |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
my @data = ( { |
660
|
|
|
|
|
|
|
type => lc $parse_type, |
661
|
|
|
|
|
|
|
parser => $can{lc $parse_type}, |
662
|
|
|
|
|
|
|
version => $can{lc $parse_type}->VERSION, |
663
|
|
|
|
|
|
|
parsers => [{ |
664
|
|
|
|
|
|
|
type => lc $parse_type, |
665
|
|
|
|
|
|
|
parser => $can{lc $parse_type}, |
666
|
|
|
|
|
|
|
version => $can{lc $parse_type}->VERSION, |
667
|
|
|
|
|
|
|
}], |
668
|
|
|
|
|
|
|
error => undef, |
669
|
76
|
|
50
|
|
|
2469
|
sheets => $oBook->{SheetCount} || 0, |
670
|
|
|
|
|
|
|
sheet => {}, |
671
|
|
|
|
|
|
|
} ); |
672
|
|
|
|
|
|
|
# Overrule the default date format strings |
673
|
|
|
|
|
|
|
my %def_fmt = ( |
674
|
|
|
|
|
|
|
0x0E => lc $opt{dtfmt}, # m-d-yy |
675
|
76
|
|
|
|
|
652
|
0x0F => "d-mmm-yyyy", # d-mmm-yy |
676
|
|
|
|
|
|
|
0x11 => "mmm-yyyy", # mmm-yy |
677
|
|
|
|
|
|
|
0x16 => "yyyy-mm-dd hh:mm", # m-d-yy h:mm |
678
|
|
|
|
|
|
|
); |
679
|
76
|
|
|
|
|
586
|
$oBook->{FormatStr}{$_} = $def_fmt{$_} for keys %def_fmt; |
680
|
|
|
|
|
|
|
my $oFmt = $parse_type eq "XLSX" |
681
|
76
|
50
|
|
|
|
742
|
? $can{xlsx} =~ m/::XLSX$/ |
|
|
100
|
|
|
|
|
|
682
|
|
|
|
|
|
|
? Spreadsheet::XLSX::Fmt2007->new |
683
|
|
|
|
|
|
|
: Spreadsheet::ParseExcel::FmtDefault->new |
684
|
|
|
|
|
|
|
: Spreadsheet::ParseExcel::FmtDefault->new; |
685
|
|
|
|
|
|
|
|
686
|
76
|
50
|
|
|
|
916
|
$debug > 20 and _dump ("oBook before conversion", $oBook); |
687
|
76
|
50
|
33
|
|
|
306
|
if ($parse_type eq "ODS" and !exists $oBook->{SheetCount}) { |
688
|
0
|
|
|
|
|
0
|
my $styles = delete $oBook->{_styles}; |
689
|
0
|
|
|
|
|
0
|
my $sheets = delete $oBook->{_sheets}; |
690
|
0
|
0
|
0
|
|
|
0
|
if ($sheets && ref $sheets eq "ARRAY") { |
691
|
0
|
|
0
|
|
|
0
|
$styles = ($styles || {})->{styles} || {}; |
692
|
0
|
|
|
|
|
0
|
$data[0]{sheets} = $oBook->{SheetCount} = scalar @{$sheets}; |
|
0
|
|
|
|
|
0
|
|
693
|
0
|
|
|
|
|
0
|
$oBook->{Worksheet} = []; |
694
|
0
|
|
|
0
|
|
0
|
*S::R::Sheet::get_merged_areas = sub { [] }; |
|
0
|
|
|
|
|
0
|
|
695
|
0
|
|
|
|
|
0
|
my $x = 0; |
696
|
0
|
|
|
|
|
0
|
foreach my $sh (@{$sheets}) { |
|
0
|
|
|
|
|
0
|
|
697
|
0
|
|
|
|
|
0
|
push @{$oBook->{Worksheet}} => bless { |
698
|
|
|
|
|
|
|
Name => $sh->{label}, |
699
|
|
|
|
|
|
|
Cells => [], |
700
|
|
|
|
|
|
|
MinRow => $sh->{col_min}, |
701
|
|
|
|
|
|
|
MaxRow => $sh->{row_max}, |
702
|
|
|
|
|
|
|
MinCol => $sh->{col_min}, |
703
|
|
|
|
|
|
|
MaxCol => $sh->{row_max}, |
704
|
|
|
|
|
|
|
RowHidden => $sh->{hidden_rows}, |
705
|
|
|
|
|
|
|
ColHidden => $sh->{hidden_cols}, |
706
|
0
|
|
|
|
|
0
|
_SheetNo => $x++, |
707
|
|
|
|
|
|
|
} => "S::R::Sheet"; |
708
|
|
|
|
|
|
|
# header_cols |
709
|
|
|
|
|
|
|
# header_rows |
710
|
|
|
|
|
|
|
# print_areas |
711
|
|
|
|
|
|
|
# sheet_hidden |
712
|
|
|
|
|
|
|
# tab_color |
713
|
0
|
|
|
0
|
|
0
|
*S::R::Cell::Value = sub { $_[0]{Raw} }; |
|
0
|
|
|
|
|
0
|
|
714
|
0
|
|
|
0
|
|
0
|
*S::R::Cell::is_merged = sub { 0 }; |
|
0
|
|
|
|
|
0
|
|
715
|
0
|
|
|
|
|
0
|
my $r = 0; |
716
|
0
|
|
|
|
|
0
|
foreach my $row (@{$sh->{data}}) { |
|
0
|
|
|
|
|
0
|
|
717
|
|
|
|
|
|
|
$#$row > $oBook->{Worksheet}[-1]{MaxCol} and |
718
|
0
|
0
|
|
|
|
0
|
$oBook->{Worksheet}[-1]{MaxCol} = $#$row; |
719
|
|
|
|
|
|
|
$oBook->{Worksheet}[-1]{Cells}[$r++] = [ map { bless { |
720
|
|
|
|
|
|
|
Code => undef, |
721
|
|
|
|
|
|
|
Format => $_->{format}, |
722
|
|
|
|
|
|
|
Formula => $_->{formula}, |
723
|
|
|
|
|
|
|
Hidden => undef, |
724
|
|
|
|
|
|
|
Merged => undef, |
725
|
|
|
|
|
|
|
# use || instead of // for now |
726
|
|
|
|
|
|
|
# even though it is undesirable |
727
|
|
|
|
|
|
|
Type => $_->{type} || "", |
728
|
|
|
|
|
|
|
Val => $_->{value} || $_->{unformatted}, |
729
|
|
|
|
|
|
|
Raw => $_->{unformatted} || $_->{value}, |
730
|
|
|
|
|
|
|
_Style => $styles->{$_->{style} || ""} |
731
|
|
|
|
|
|
|
|| $_->{style}, |
732
|
|
|
|
|
|
|
# hyperlink |
733
|
0
|
|
0
|
|
|
0
|
} => "S::R::Cell" } @{$row} ]; |
|
0
|
|
0
|
|
|
0
|
|
|
0
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
|
|
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
--$r > $oBook->{Worksheet}[-1]{MaxRow} and |
736
|
0
|
0
|
|
|
|
0
|
$oBook->{Worksheet}[-1]{MaxRow} = $r; |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
} |
739
|
|
|
|
|
|
|
} |
740
|
|
|
|
|
|
|
|
741
|
76
|
100
|
|
|
|
347
|
$debug and print STDERR "\t$data[0]{sheets} sheets\n"; |
742
|
|
|
|
|
|
|
my $active_sheet = $oBook->get_active_sheet |
743
|
|
|
|
|
|
|
|| $oBook->{ActiveSheet} |
744
|
76
|
|
33
|
|
|
413
|
|| $oBook->{SelectedSheet}; |
745
|
76
|
|
|
|
|
939
|
my $current_sheet = 0; |
746
|
76
|
|
|
|
|
156
|
foreach my $oWkS (@{$oBook->{Worksheet}}) { |
|
76
|
|
|
|
|
270
|
|
747
|
122
|
50
|
|
|
|
354
|
$debug > 8 and _dump ("oWkS", $oWkS); |
748
|
122
|
|
|
|
|
198
|
$current_sheet++; |
749
|
122
|
100
|
100
|
|
|
548
|
$opt{clip} and !defined $oWkS->{Cells} and next; # Skip empty sheets |
750
|
|
|
|
|
|
|
my %sheet = ( |
751
|
|
|
|
|
|
|
parser => 0, |
752
|
|
|
|
|
|
|
label => $oWkS->{Name}, |
753
|
116
|
|
|
|
|
808
|
maxrow => 0, |
754
|
|
|
|
|
|
|
maxcol => 0, |
755
|
|
|
|
|
|
|
cell => [], |
756
|
|
|
|
|
|
|
attr => [], |
757
|
|
|
|
|
|
|
merged => [], |
758
|
|
|
|
|
|
|
active => 0, |
759
|
|
|
|
|
|
|
); |
760
|
|
|
|
|
|
|
# $debug and $sheet{_parser} = $oWkS; |
761
|
116
|
50
|
|
|
|
377
|
defined $sheet{label} or $sheet{label} = "-- unlabeled --"; |
762
|
116
|
100
|
|
|
|
415
|
exists $oWkS->{MinRow} and $sheet{minrow} = $oWkS->{MinRow} + 1; |
763
|
116
|
100
|
|
|
|
365
|
exists $oWkS->{MaxRow} and $sheet{maxrow} = $oWkS->{MaxRow} + 1; |
764
|
116
|
100
|
|
|
|
362
|
exists $oWkS->{MinCol} and $sheet{mincol} = $oWkS->{MinCol} + 1; |
765
|
116
|
100
|
|
|
|
295
|
exists $oWkS->{MaxCol} and $sheet{maxcol} = $oWkS->{MaxCol} + 1; |
766
|
|
|
|
|
|
|
$sheet{merged} = [ |
767
|
14
|
|
|
|
|
37
|
map { $_->[0] } |
768
|
5
|
|
|
|
|
30
|
sort { $a->[1] cmp $b->[1] } |
769
|
14
|
|
|
|
|
125
|
map {[ $_, pack "NNNN", @$_ ]} |
770
|
14
|
|
|
|
|
100
|
map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]} |
|
56
|
|
|
|
|
111
|
|
|
14
|
|
|
|
|
48
|
|
771
|
116
|
100
|
|
|
|
231
|
@{$oWkS->get_merged_areas || []}]; |
|
116
|
|
|
|
|
523
|
|
772
|
116
|
|
|
|
|
1112
|
my $sheet_idx = 1 + @data; |
773
|
116
|
100
|
|
|
|
390
|
$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n"; |
774
|
116
|
100
|
|
|
|
289
|
if (defined $active_sheet) { |
775
|
|
|
|
|
|
|
# _SheetNo is 0-based |
776
|
7
|
50
|
|
|
|
21
|
my $sheet_no = defined $oWkS->{_SheetNo} ? $oWkS->{_SheetNo} : $current_sheet - 1; |
777
|
7
|
100
|
|
|
|
23
|
$sheet_no eq $active_sheet and $sheet{active} = 1; |
778
|
|
|
|
|
|
|
} |
779
|
|
|
|
|
|
|
# Sheet keys: |
780
|
|
|
|
|
|
|
# _Book FooterMargin MinCol RightMargin |
781
|
|
|
|
|
|
|
# BottomMargin FooterMergin MinRow RightMergin |
782
|
|
|
|
|
|
|
# BottomMergin HCenter Name RowHeight |
783
|
|
|
|
|
|
|
# Cells Header NoColor RowHidden |
784
|
|
|
|
|
|
|
# ColFmtNo HeaderMargin NoOrient Scale |
785
|
|
|
|
|
|
|
# ColHidden HeaderMergin NoPls SheetHidden |
786
|
|
|
|
|
|
|
# ColWidth Kind Notes _SheetNo |
787
|
|
|
|
|
|
|
# Copis Landscape PageFit SheetType |
788
|
|
|
|
|
|
|
# DefColWidth LeftMargin PageStart SheetVersion |
789
|
|
|
|
|
|
|
# DefRowHeight LeftMergin PaperSize TopMargin |
790
|
|
|
|
|
|
|
# Draft LeftToRight _Pos TopMergin |
791
|
|
|
|
|
|
|
# FitHeight MaxCol PrintGrid UsePage |
792
|
|
|
|
|
|
|
# FitWidth MaxRow PrintHeaders VCenter |
793
|
|
|
|
|
|
|
# Footer MergedArea Res VRes |
794
|
116
|
100
|
|
|
|
309
|
if (exists $oWkS->{MinRow}) { |
795
|
111
|
|
50
|
|
|
550
|
my $hiddenRows = $oWkS->{RowHidden} || []; |
796
|
111
|
|
50
|
|
|
467
|
my $hiddenCols = $oWkS->{ColHidden} || []; |
797
|
111
|
100
|
|
|
|
346
|
if ($opt{clip}) { |
798
|
92
|
|
|
|
|
250
|
my ($mr, $mc) = (-1, -1); |
799
|
92
|
|
|
|
|
304
|
foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) { |
800
|
522
|
|
|
|
|
902
|
foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) { |
801
|
2938
|
100
|
|
|
|
5485
|
my $oWkC = $oWkS->{Cells}[$r][$c] or next; |
802
|
1717
|
50
|
|
|
|
3279
|
defined (my $val = $oWkC->{Val}) or next; |
803
|
1717
|
100
|
|
|
|
3171
|
$val eq "" and next; |
804
|
1690
|
100
|
|
|
|
2831
|
$r > $mr and $mr = $r; |
805
|
1690
|
100
|
|
|
|
2952
|
$c > $mc and $mc = $c; |
806
|
|
|
|
|
|
|
} |
807
|
|
|
|
|
|
|
} |
808
|
92
|
|
|
|
|
317
|
($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1); |
809
|
|
|
|
|
|
|
} |
810
|
111
|
|
|
|
|
294
|
foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) { |
811
|
602
|
|
|
|
|
1154
|
foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) { |
812
|
3157
|
100
|
|
|
|
6913
|
my $oWkC = $oWkS->{Cells}[$r][$c] or next; |
813
|
|
|
|
|
|
|
#defined (my $val = $oWkC->{Val}) or next; |
814
|
1818
|
|
|
|
|
2808
|
my $val = $oWkC->{Val}; |
815
|
1818
|
50
|
33
|
|
|
5564
|
if (defined $val and my $enc = $oWkC->{Code}) { |
816
|
0
|
0
|
|
|
|
0
|
$enc eq "ucs2" and $val = decode ("utf-16be", $val); |
817
|
|
|
|
|
|
|
} |
818
|
1818
|
|
|
|
|
3883
|
my $cell = cr2cell ($c + 1, $r + 1); |
819
|
1818
|
100
|
|
|
|
5312
|
$opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original |
820
|
|
|
|
|
|
|
|
821
|
1818
|
|
|
|
|
2461
|
my $fmt; |
822
|
1818
|
|
|
|
|
2669
|
my $FmT = $oWkC->{Format}; |
823
|
1818
|
100
|
|
|
|
3087
|
if ($FmT) { |
824
|
968
|
100
|
|
|
|
1888
|
unless (ref $FmT) { |
825
|
130
|
|
|
|
|
186
|
$fmt = $FmT; |
826
|
130
|
|
|
|
|
202
|
$FmT = {}; |
827
|
|
|
|
|
|
|
} |
828
|
|
|
|
|
|
|
} |
829
|
|
|
|
|
|
|
else { |
830
|
850
|
|
|
|
|
1343
|
$FmT = {}; |
831
|
|
|
|
|
|
|
} |
832
|
1818
|
|
|
|
|
3034
|
foreach my $attr (qw( AlignH AlignV FmtIdx Hidden Lock |
833
|
|
|
|
|
|
|
Wrap )) { |
834
|
10908
|
100
|
|
|
|
20618
|
exists $FmT->{$attr} or $FmT->{$attr} = 0; |
835
|
|
|
|
|
|
|
} |
836
|
1818
|
100
|
|
|
|
3764
|
exists $FmT->{Fill} or $FmT->{Fill} = [ 0 ]; |
837
|
1818
|
100
|
|
|
|
3943
|
exists $FmT->{Font} or $FmT->{Font} = undef; |
838
|
|
|
|
|
|
|
|
839
|
1818
|
100
|
|
|
|
3252
|
unless (defined $fmt) { |
840
|
|
|
|
|
|
|
$fmt = $FmT->{FmtIdx} |
841
|
|
|
|
|
|
|
? $oBook->{FormatStr}{$FmT->{FmtIdx}} |
842
|
1688
|
100
|
|
|
|
3178
|
: undef; |
843
|
|
|
|
|
|
|
} |
844
|
|
|
|
|
|
|
lc $oWkC->{Type} eq "float" and |
845
|
1818
|
50
|
|
|
|
3745
|
$oWkC->{Type} = "Numeric"; |
846
|
1818
|
100
|
|
|
|
3223
|
if ($oWkC->{Type} eq "Numeric") { |
847
|
|
|
|
|
|
|
# Fixed in 0.33 and up |
848
|
|
|
|
|
|
|
# see Spreadsheet/ParseExcel/FmtDefault.pm |
849
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x0e || |
850
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x0f || |
851
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x10 || |
852
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x11 || |
853
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x16 || |
854
|
|
|
|
|
|
|
(defined $fmt && $fmt =~ m{^[dmy][-\\/dmy]*$}) and |
855
|
370
|
100
|
33
|
|
|
3321
|
$oWkC->{Type} = "Date"; |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
856
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x09 || |
857
|
|
|
|
|
|
|
$FmT->{FmtIdx} == 0x0a || |
858
|
|
|
|
|
|
|
(defined $fmt && $fmt =~ m{^0+\.0+%$}) and |
859
|
370
|
100
|
100
|
|
|
2217
|
$oWkC->{Type} = "Percentage"; |
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
860
|
|
|
|
|
|
|
} |
861
|
1818
|
100
|
|
|
|
3332
|
defined $fmt and $fmt =~ s/\\//g; |
862
|
|
|
|
|
|
|
$opt{cells} and # Formatted value |
863
|
|
|
|
|
|
|
$sheet{$cell} = defined $val ? $FmT && exists $def_fmt{$FmT->{FmtIdx}} |
864
|
1818
|
100
|
66
|
|
|
9515
|
? $oFmt->ValFmt ($oWkC, $oBook) |
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
865
|
|
|
|
|
|
|
: $oWkC->Value : undef; |
866
|
1818
|
100
|
|
|
|
15533
|
if ($opt{attr}) { |
867
|
1542
|
|
|
|
|
2221
|
my $FnT = $FmT->{Font}; |
868
|
|
|
|
|
|
|
my $fmi = $FmT->{FmtIdx} |
869
|
|
|
|
|
|
|
? $oBook->{FormatStr}{$FmT->{FmtIdx}} |
870
|
1542
|
100
|
|
|
|
2671
|
: undef; |
871
|
1542
|
100
|
|
|
|
2605
|
$fmi and $fmi =~ s/\\//g; |
872
|
1542
|
|
100
|
|
|
3663
|
my $merged = (defined $oWkC->{Merged} ? $oWkC->{Merged} : $oWkC->is_merged) || 0; |
873
|
|
|
|
|
|
|
$sheet{attr}[$c + 1][$r + 1] = { |
874
|
|
|
|
|
|
|
@def_attr, |
875
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
type => lc $oWkC->{Type}, |
877
|
|
|
|
|
|
|
enc => $oWkC->{Code}, |
878
|
|
|
|
|
|
|
merged => $merged, |
879
|
|
|
|
|
|
|
hidden => ($hiddenRows->[$r] || $hiddenCols->[$c] ? 1 : |
880
|
|
|
|
|
|
|
defined $oWkC->{Hidden} ? $oWkC->{Hidden} : $FmT->{Hidden}) || 0, |
881
|
|
|
|
|
|
|
locked => $FmT->{Lock} || 0, |
882
|
|
|
|
|
|
|
format => $fmi, |
883
|
|
|
|
|
|
|
halign => [ undef, qw( left center right |
884
|
|
|
|
|
|
|
fill justify ), undef, |
885
|
|
|
|
|
|
|
"equal_space" ]->[$FmT->{AlignH}], |
886
|
|
|
|
|
|
|
valign => [ qw( top center bottom justify |
887
|
|
|
|
|
|
|
equal_space )]->[$FmT->{AlignV}], |
888
|
|
|
|
|
|
|
wrap => $FmT->{Wrap}, |
889
|
|
|
|
|
|
|
font => $FnT->{Name}, |
890
|
|
|
|
|
|
|
size => $FnT->{Height}, |
891
|
|
|
|
|
|
|
bold => $FnT->{Bold}, |
892
|
|
|
|
|
|
|
italic => $FnT->{Italic}, |
893
|
|
|
|
|
|
|
uline => $FnT->{Underline}, |
894
|
|
|
|
|
|
|
fgcolor => _xls_color ($FnT->{Color}), |
895
|
1542
|
|
|
|
|
6211
|
bgcolor => _xls_fill (@{$FmT->{Fill}}), |
896
|
|
|
|
|
|
|
formula => $oWkC->{Formula}, |
897
|
1542
|
|
100
|
|
|
21283
|
}; |
|
|
|
100
|
|
|
|
|
898
|
|
|
|
|
|
|
#_dump "cell", $sheet{attr}[$c + 1][$r + 1]; |
899
|
1542
|
100
|
100
|
|
|
11792
|
if ($opt{merge} && $merged and |
|
|
|
66
|
|
|
|
|
900
|
|
|
|
|
|
|
my $p_cell = Spreadsheet::Read::Sheet::merged_from (\%sheet, $c + 1, $r + 1)) { |
901
|
6
|
|
|
|
|
64
|
warn $p_cell; |
902
|
6
|
|
|
|
|
2928
|
$sheet{attr}[$c + 1][$r + 1]{merged} = $p_cell; |
903
|
6
|
100
|
|
|
|
19
|
if ($cell ne $p_cell) { |
904
|
4
|
|
|
|
|
10
|
my ($C, $R) = cell2cr ($p_cell); |
905
|
|
|
|
|
|
|
$sheet{cell}[$c + 1][$r + 1] = |
906
|
4
|
|
|
|
|
14
|
$sheet{cell}[$C][$R]; |
907
|
4
|
|
|
|
|
10
|
$sheet{$cell} = $sheet{$p_cell}; |
908
|
|
|
|
|
|
|
} |
909
|
|
|
|
|
|
|
} |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
} |
914
|
116
|
|
|
|
|
221
|
for (@{$sheet{cell}}) { |
|
116
|
|
|
|
|
374
|
|
915
|
414
|
100
|
|
|
|
1023
|
defined or $_ = []; |
916
|
|
|
|
|
|
|
} |
917
|
116
|
|
|
|
|
1962
|
push @data, { %sheet }; |
918
|
|
|
|
|
|
|
# $data[0]{sheets}++; |
919
|
116
|
50
|
|
|
|
507
|
if ($sheet{label} eq "-- unlabeled --") { |
920
|
0
|
|
|
|
|
0
|
$sheet{label} = ""; |
921
|
|
|
|
|
|
|
} |
922
|
|
|
|
|
|
|
else { |
923
|
116
|
|
|
|
|
738
|
$data[0]{sheet}{$sheet{label}} = $#data; |
924
|
|
|
|
|
|
|
} |
925
|
|
|
|
|
|
|
} |
926
|
76
|
|
|
|
|
407
|
return _clipsheets \%opt, [ @data ]; |
927
|
|
|
|
|
|
|
} |
928
|
76
|
100
|
33
|
|
|
487
|
if ($opt{parser} ? $_parser =~ m/^(ods)$/ |
|
|
50
|
0
|
|
|
|
|
|
|
|
33
|
|
|
|
|
929
|
|
|
|
|
|
|
: ($io_fil && $txt =~ m/(ods)$/i && |
930
|
|
|
|
|
|
|
($_parser = _parser ($1))) |
931
|
|
|
|
|
|
|
and ($can{$_parser} || "") !~ m/sxc/i) { |
932
|
0
|
|
|
|
|
0
|
my $parse_type = "ODS"; |
933
|
0
|
0
|
|
|
|
0
|
my $parser = $can{lc $parse_type} or |
934
|
|
|
|
|
|
|
croak "Parser for $parse_type is not installed"; |
935
|
|
|
|
|
|
|
#$debug and print STDERR __FILE__, "#", __LINE__, " | $_parser | $parser | $parse_type\n"; |
936
|
|
|
|
|
|
|
$debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt, |
937
|
0
|
0
|
|
|
|
0
|
" using $parser-", $can{lc $parse_type}->VERSION, "\n"; |
|
|
0
|
|
|
|
|
|
938
|
0
|
0
|
|
|
|
0
|
$opt{passwd} and $parser_opts{Password} = $opt{passwd}; |
939
|
0
|
|
|
|
|
0
|
my $oBook = eval { |
940
|
0
|
0
|
|
|
|
0
|
$io_ref |
941
|
|
|
|
|
|
|
? $parser->new (readonly => 1, %parser_opts)->parse ($io_ref) |
942
|
|
|
|
|
|
|
: $parser->new (readonly => 1, %parser_opts)->parse ($txt) |
943
|
|
|
|
|
|
|
}; |
944
|
0
|
0
|
|
|
|
0
|
unless ($oBook) { |
945
|
|
|
|
|
|
|
# cleanup will fail on folders with spaces. |
946
|
0
|
|
|
|
|
0
|
(my $msg = $@) =~ s/ at \S+ line \d+.*//s; |
947
|
0
|
|
|
|
|
0
|
croak "$parse_type parser cannot parse data: $msg"; |
948
|
|
|
|
|
|
|
} |
949
|
0
|
0
|
|
|
|
0
|
$debug > 8 and _dump (oBook => $oBook); |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
my @data = ( { |
952
|
|
|
|
|
|
|
type => lc $parse_type, |
953
|
|
|
|
|
|
|
parser => $can{lc $parse_type}, |
954
|
|
|
|
|
|
|
version => $can{lc $parse_type}->VERSION, |
955
|
|
|
|
|
|
|
parsers => [{ |
956
|
|
|
|
|
|
|
type => lc $parse_type, |
957
|
|
|
|
|
|
|
parser => $can{lc $parse_type}, |
958
|
0
|
|
|
|
|
0
|
version => $can{lc $parse_type}->VERSION, |
959
|
|
|
|
|
|
|
}], |
960
|
|
|
|
|
|
|
error => undef, |
961
|
|
|
|
|
|
|
sheets => scalar $oBook->worksheets, |
962
|
|
|
|
|
|
|
sheet => {}, |
963
|
|
|
|
|
|
|
} ); |
964
|
|
|
|
|
|
|
# $debug and $data[0]{_parser} = $oBook; |
965
|
|
|
|
|
|
|
|
966
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "\t$data[0]{sheets} sheets\n"; |
967
|
0
|
|
|
|
|
0
|
my $active_sheet = $oBook->get_active_sheet; |
968
|
0
|
|
|
|
|
0
|
my $current_sheet = 0; |
969
|
0
|
|
|
|
|
0
|
foreach my $oWkS ($oBook->worksheets) { |
970
|
0
|
|
|
|
|
0
|
$current_sheet++; |
971
|
0
|
0
|
0
|
|
|
0
|
$opt{clip} && $oWkS->row_max < $oWkS->row_min |
|
|
|
0
|
|
|
|
|
972
|
|
|
|
|
|
|
&& $oWkS->col_max < $oWkS->col_min and next; # Skip empty sheets |
973
|
0
|
|
|
|
|
0
|
my %sheet = ( |
974
|
|
|
|
|
|
|
parser => 0, |
975
|
|
|
|
|
|
|
label => $oWkS->label, |
976
|
|
|
|
|
|
|
maxrow => $oWkS->row_max+1, |
977
|
|
|
|
|
|
|
maxcol => $oWkS->col_max+1, |
978
|
|
|
|
|
|
|
cell => [], |
979
|
|
|
|
|
|
|
attr => [], |
980
|
|
|
|
|
|
|
merged => [], |
981
|
|
|
|
|
|
|
active => 0, |
982
|
|
|
|
|
|
|
); |
983
|
|
|
|
|
|
|
# $debug and $sheet{_parser} = $oWkS; |
984
|
0
|
0
|
|
|
|
0
|
defined $sheet{label} or $sheet{label} = "-- unlabeled --"; |
985
|
|
|
|
|
|
|
$sheet{merged} = [ |
986
|
0
|
|
|
|
|
0
|
map { $_->[0] } |
987
|
0
|
|
|
|
|
0
|
sort { $a->[1] cmp $b->[1] } |
988
|
0
|
|
|
|
|
0
|
map {[ $_, pack "NNNN", @$_ ]} |
989
|
0
|
|
|
|
|
0
|
map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]} |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
990
|
0
|
0
|
|
|
|
0
|
@{$oWkS->get_merged_areas || []}]; |
|
0
|
|
|
|
|
0
|
|
991
|
0
|
|
|
|
|
0
|
my $sheet_idx = 1 + @data; |
992
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n"; |
993
|
0
|
0
|
|
|
|
0
|
if (defined $active_sheet) { |
994
|
0
|
|
|
|
|
0
|
my $sheet_no = $current_sheet - 1; |
995
|
0
|
0
|
|
|
|
0
|
$sheet_no eq $active_sheet and $sheet{active} = 1; |
996
|
|
|
|
|
|
|
} |
997
|
0
|
|
0
|
|
|
0
|
my $hiddenRows = $oWkS->hidden_rows || []; |
998
|
0
|
|
0
|
|
|
0
|
my $hiddenCols = $oWkS->hidden_cols || []; |
999
|
0
|
0
|
|
|
|
0
|
if ($opt{clip}) { |
1000
|
0
|
|
|
|
|
0
|
my ($mr, $mc) = (-1, -1); |
1001
|
0
|
|
|
|
|
0
|
foreach my $r ($oWkS->row_min .. $sheet{maxrow}-1) { |
1002
|
0
|
|
|
|
|
0
|
foreach my $c ($oWkS->col_min .. $sheet{maxcol}-1) { |
1003
|
0
|
0
|
|
|
|
0
|
my $oWkC = $oWkS->get_cell($r, $c) or next; |
1004
|
0
|
0
|
|
|
|
0
|
defined (my $val = $oWkC->value) or next; |
1005
|
0
|
0
|
|
|
|
0
|
$val eq "" and next; |
1006
|
0
|
0
|
|
|
|
0
|
$r > $mr and $mr = $r; |
1007
|
0
|
0
|
|
|
|
0
|
$c > $mc and $mc = $c; |
1008
|
|
|
|
|
|
|
} |
1009
|
|
|
|
|
|
|
} |
1010
|
0
|
|
|
|
|
0
|
($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1); |
1011
|
|
|
|
|
|
|
} |
1012
|
0
|
|
|
|
|
0
|
foreach my $r ($oWkS->row_min .. $sheet{maxrow}) { |
1013
|
0
|
|
|
|
|
0
|
foreach my $c ($oWkS->col_min .. $sheet{maxcol}) { |
1014
|
0
|
0
|
|
|
|
0
|
my $oWkC = $oWkS->get_cell($r, $c) or next; |
1015
|
0
|
|
|
|
|
0
|
my $val = $oWkC->unformatted; |
1016
|
|
|
|
|
|
|
#if (defined $val and my $enc = $oWkC->{Code}) { |
1017
|
|
|
|
|
|
|
# $enc eq "ucs2" and $val = decode ("utf-16be", $val); |
1018
|
|
|
|
|
|
|
# } |
1019
|
0
|
|
|
|
|
0
|
my $cell = cr2cell ($c + 1, $r + 1); |
1020
|
0
|
0
|
|
|
|
0
|
$opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original |
1021
|
|
|
|
|
|
|
|
1022
|
0
|
|
|
|
|
0
|
my $fmt; |
1023
|
0
|
|
|
|
|
0
|
my $styleName = $oWkC->style; |
1024
|
0
|
|
|
|
|
0
|
my $FmT; |
1025
|
0
|
0
|
0
|
|
|
0
|
if ($styleName && defined (my $s = $oBook->_styles->{$styleName})) { |
1026
|
0
|
|
|
|
|
0
|
$fmt = $s; |
1027
|
|
|
|
|
|
|
} |
1028
|
|
|
|
|
|
|
|
1029
|
0
|
0
|
|
|
|
0
|
defined $fmt and $fmt =~ s/\\//g; |
1030
|
|
|
|
|
|
|
$opt{cells} and # Formatted value |
1031
|
0
|
0
|
|
|
|
0
|
$sheet{$cell} = defined $val ? $oWkC->value : undef; |
|
|
0
|
|
|
|
|
|
1032
|
0
|
0
|
|
|
|
0
|
if ($opt{attr}) { |
1033
|
0
|
0
|
|
|
|
0
|
my $FnT = $FmT ? $FmT->{font_face} : undef; |
1034
|
0
|
|
|
|
|
0
|
my $fmi; |
1035
|
|
|
|
|
|
|
#my $fmi = $FmT ? $FmT->{FmtIdx} |
1036
|
|
|
|
|
|
|
# ? $oBook->{FormatStr}{$FmT->{FmtIdx}} |
1037
|
|
|
|
|
|
|
# : undef; |
1038
|
|
|
|
|
|
|
#$fmi and $fmi =~ s/\\//g; |
1039
|
0
|
|
0
|
|
|
0
|
my $type = $oWkC->type || ''; |
1040
|
0
|
0
|
|
|
|
0
|
$type eq "float" and $type = "numeric"; |
1041
|
|
|
|
|
|
|
|
1042
|
0
|
|
0
|
|
|
0
|
my $merged = $oWkC->is_merged || 0; |
1043
|
0
|
|
0
|
|
|
0
|
$sheet{attr}[$c + 1][$r + 1] = { |
1044
|
|
|
|
|
|
|
@def_attr, |
1045
|
|
|
|
|
|
|
|
1046
|
|
|
|
|
|
|
type => $type, |
1047
|
|
|
|
|
|
|
# enc => $oWkC->{Code}, |
1048
|
|
|
|
|
|
|
merged => $merged, |
1049
|
|
|
|
|
|
|
hidden => ($hiddenRows->[$r] || $hiddenCols->[$c] ? 1 : |
1050
|
|
|
|
|
|
|
$oWkC->is_hidden ? $oWkC->is_hidden : undef) || 0, |
1051
|
|
|
|
|
|
|
# locked => $FmT->{Lock} || 0, |
1052
|
|
|
|
|
|
|
format => $fmi, |
1053
|
|
|
|
|
|
|
# halign => [ undef, qw( left center right |
1054
|
|
|
|
|
|
|
# fill justify ), undef, |
1055
|
|
|
|
|
|
|
# "equal_space" ]->[$FmT->{AlignH}], |
1056
|
|
|
|
|
|
|
# valign => [ qw( top center bottom justify |
1057
|
|
|
|
|
|
|
# equal_space )]->[$FmT->{AlignV}], |
1058
|
|
|
|
|
|
|
# wrap => $FmT->{Wrap}, |
1059
|
|
|
|
|
|
|
# font => $FnT->{Name}, |
1060
|
|
|
|
|
|
|
# size => $FnT->{Height}, |
1061
|
|
|
|
|
|
|
# bold => $FnT->{Bold}, |
1062
|
|
|
|
|
|
|
# italic => $FnT->{Italic}, |
1063
|
|
|
|
|
|
|
# uline => $FnT->{Underline}, |
1064
|
|
|
|
|
|
|
# fgcolor => _xls_color ($FnT->{Color}), |
1065
|
|
|
|
|
|
|
# bgcolor => _xls_fill (@{$FmT->{Fill}}), |
1066
|
|
|
|
|
|
|
formula => $oWkC->formula, |
1067
|
|
|
|
|
|
|
}; |
1068
|
|
|
|
|
|
|
#_dump "cell", $sheet{attr}[$c + 1][$r + 1]; |
1069
|
0
|
0
|
0
|
|
|
0
|
if ($opt{merge} && $merged and |
|
|
|
0
|
|
|
|
|
1070
|
|
|
|
|
|
|
my $p_cell = Spreadsheet::Read::Sheet::merged_from(\%sheet, $c + 1, $r + 1)) { |
1071
|
0
|
|
|
|
|
0
|
$sheet{attr}[$c + 1][$r + 1]{merged} = $p_cell; |
1072
|
0
|
0
|
|
|
|
0
|
if ($cell ne $p_cell) { |
1073
|
0
|
|
|
|
|
0
|
my ($C, $R) = cell2cr ($p_cell); |
1074
|
|
|
|
|
|
|
$sheet{cell}[$c + 1][$r + 1] = |
1075
|
0
|
|
|
|
|
0
|
$sheet{cell}[$C][$R]; |
1076
|
0
|
|
|
|
|
0
|
$sheet{$cell} = $sheet{$p_cell}; |
1077
|
|
|
|
|
|
|
} |
1078
|
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
|
} |
1080
|
|
|
|
|
|
|
} |
1081
|
|
|
|
|
|
|
} |
1082
|
0
|
|
|
|
|
0
|
for (@{$sheet{cell}}) { |
|
0
|
|
|
|
|
0
|
|
1083
|
0
|
0
|
|
|
|
0
|
defined or $_ = []; |
1084
|
|
|
|
|
|
|
} |
1085
|
0
|
|
|
|
|
0
|
push @data, { %sheet }; |
1086
|
0
|
0
|
|
|
|
0
|
if ($sheet{label} eq "-- unlabeled --") { |
1087
|
0
|
|
|
|
|
0
|
$sheet{label} = ""; |
1088
|
|
|
|
|
|
|
} |
1089
|
|
|
|
|
|
|
else { |
1090
|
0
|
|
|
|
|
0
|
$data[0]{sheet}{$sheet{label}} = $#data; |
1091
|
|
|
|
|
|
|
} |
1092
|
|
|
|
|
|
|
} |
1093
|
0
|
|
|
|
|
0
|
return _clipsheets \%opt, [ @data ]; |
1094
|
|
|
|
|
|
|
} |
1095
|
|
|
|
|
|
|
|
1096
|
76
|
100
|
|
|
|
418
|
if ($opt{parser} ? _parser ($opt{parser}) eq "sc" |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
1097
|
|
|
|
|
|
|
: $io_fil |
1098
|
|
|
|
|
|
|
? $txt =~ m/\.sc$/ |
1099
|
|
|
|
|
|
|
: $txt =~ m/^# .*SquirrelCalc/) { |
1100
|
46
|
100
|
|
|
|
111
|
if ($io_ref) { |
|
|
100
|
|
|
|
|
|
1101
|
2
|
|
|
|
|
8
|
local $/; |
1102
|
2
|
|
|
|
|
35
|
my $x = <$txt>; |
1103
|
2
|
|
|
|
|
10
|
$txt = $x; |
1104
|
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
elsif ($io_fil) { |
1106
|
42
|
|
|
|
|
197
|
local $/; |
1107
|
42
|
50
|
|
|
|
1546
|
open my $sc, "<", $txt or return; |
1108
|
42
|
|
|
|
|
1839
|
$txt = <$sc>; |
1109
|
42
|
|
|
|
|
750
|
close $sc; |
1110
|
|
|
|
|
|
|
} |
1111
|
46
|
50
|
|
|
|
297
|
$txt =~ m/\S/ or return; |
1112
|
46
|
50
|
|
|
|
158
|
my $label = defined $opt{label} ? $opt{label} : "sheet"; |
1113
|
46
|
|
|
|
|
593
|
my @data = ( |
1114
|
|
|
|
|
|
|
{ type => "sc", |
1115
|
|
|
|
|
|
|
parser => "Spreadsheet::Read", |
1116
|
|
|
|
|
|
|
version => $VERSION, |
1117
|
|
|
|
|
|
|
parsers => [{ |
1118
|
|
|
|
|
|
|
type => "sc", |
1119
|
|
|
|
|
|
|
parser => "Spreadsheet::Read", |
1120
|
|
|
|
|
|
|
version => $VERSION, |
1121
|
|
|
|
|
|
|
}], |
1122
|
|
|
|
|
|
|
error => undef, |
1123
|
|
|
|
|
|
|
sheets => 1, |
1124
|
|
|
|
|
|
|
sheet => { $label => 1 }, |
1125
|
|
|
|
|
|
|
}, |
1126
|
|
|
|
|
|
|
{ parser => 0, |
1127
|
|
|
|
|
|
|
label => $label, |
1128
|
|
|
|
|
|
|
maxrow => 0, |
1129
|
|
|
|
|
|
|
maxcol => 0, |
1130
|
|
|
|
|
|
|
cell => [], |
1131
|
|
|
|
|
|
|
attr => [], |
1132
|
|
|
|
|
|
|
merged => [], |
1133
|
|
|
|
|
|
|
active => 1, |
1134
|
|
|
|
|
|
|
}, |
1135
|
|
|
|
|
|
|
); |
1136
|
|
|
|
|
|
|
|
1137
|
46
|
|
|
|
|
9671
|
for (split m/\s*[\r\n]\s*/, $txt) { |
1138
|
13478
|
100
|
|
|
|
21913
|
if (m/^dimension.*of ([0-9]+) rows.*of ([0-9]+) columns/i) { |
1139
|
46
|
|
|
|
|
77
|
@{$data[1]}{qw(maxrow maxcol)} = ($1, $2); |
|
46
|
|
|
|
|
232
|
|
1140
|
46
|
|
|
|
|
112
|
next; |
1141
|
|
|
|
|
|
|
} |
1142
|
13432
|
100
|
|
|
|
32829
|
s/^r([0-9]+)c([0-9]+)\s*=\s*// or next; |
1143
|
3542
|
|
|
|
|
6394
|
my ($c, $r) = map { $_ + 1 } $2, $1; |
|
7084
|
|
|
|
|
15274
|
|
1144
|
3542
|
100
|
66
|
|
|
15102
|
if (m/.* \{(.*)}$/ or m/"(.*)"/) { |
1145
|
2714
|
|
|
|
|
5077
|
my $cell = cr2cell ($c, $r); |
1146
|
2714
|
100
|
|
|
|
8367
|
$opt{rc} and $data[1]{cell}[$c][$r] = $1; |
1147
|
2714
|
50
|
|
|
|
7592
|
$opt{cells} and $data[1]{$cell} = $1; |
1148
|
2714
|
100
|
|
|
|
6552
|
$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr }; |
1149
|
2714
|
|
|
|
|
4821
|
next; |
1150
|
|
|
|
|
|
|
} |
1151
|
|
|
|
|
|
|
# Now only formula's remain. Ignore for now |
1152
|
|
|
|
|
|
|
# r67c7 = [P2L] 2*(1000*r67c5-60) |
1153
|
|
|
|
|
|
|
} |
1154
|
46
|
|
|
|
|
770
|
for (@{$data[1]{cell}}) { |
|
46
|
|
|
|
|
171
|
|
1155
|
360
|
100
|
|
|
|
713
|
defined or $_ = []; |
1156
|
|
|
|
|
|
|
} |
1157
|
46
|
|
|
|
|
155
|
return _clipsheets \%opt, [ @data ]; |
1158
|
|
|
|
|
|
|
} |
1159
|
|
|
|
|
|
|
|
1160
|
30
|
50
|
66
|
|
|
408
|
if ($opt{parser} ? _parser ($opt{parser}) eq "sxc" |
|
|
100
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
: ($txt =~ m/^<\?xml/ or -f $txt)) { |
1162
|
7
|
50
|
|
|
|
997
|
$can{sxc} or croak "Spreadsheet::ReadSXC not installed"; |
1163
|
|
|
|
|
|
|
|
1164
|
0
|
0
|
0
|
|
|
0
|
ref $txt && $can{sxc}->VERSION <= 0.23 and |
1165
|
|
|
|
|
|
|
croak ("Sorry, references as input are not supported by Spreadsheet::ReadSXC before 0.23"); |
1166
|
|
|
|
|
|
|
|
1167
|
0
|
|
|
|
|
0
|
my $using = "using $can{sxc}-" . $can{sxc}->VERSION; |
1168
|
0
|
|
|
|
|
0
|
my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up |
1169
|
0
|
|
|
|
|
0
|
my $sxc; |
1170
|
0
|
0
|
0
|
|
|
0
|
if ($txt =~ m/\.(sxc|ods)$/i) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1171
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "Opening \U$1\E $txt $using\n"; |
1172
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR __FILE__, "#", __LINE__, "\n"; |
1173
|
0
|
0
|
|
|
|
0
|
$sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return; |
1174
|
|
|
|
|
|
|
} |
1175
|
|
|
|
|
|
|
# treat all refs as a filehandle |
1176
|
|
|
|
|
|
|
elsif (ref $txt) { |
1177
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "Opening SXC filehandle\n"; |
1178
|
0
|
0
|
|
|
|
0
|
$sxc = Spreadsheet::ReadSXC::read_sxc_fh ($txt, $sxc_options) or return; |
1179
|
|
|
|
|
|
|
} |
1180
|
|
|
|
|
|
|
elsif ($txt =~ m/\.xml$/i) { |
1181
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "Opening XML $txt $using\n"; |
1182
|
0
|
0
|
|
|
|
0
|
$sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return; |
1183
|
|
|
|
|
|
|
} |
1184
|
|
|
|
|
|
|
# need to test on pattern to prevent stat warning |
1185
|
|
|
|
|
|
|
# on filename with newline |
1186
|
|
|
|
|
|
|
elsif ($txt !~ m/^<\?xml/i and -f $txt) { |
1187
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "Opening XML $txt $using\n"; |
1188
|
0
|
0
|
|
|
|
0
|
open my $f, "<", $txt or return; |
1189
|
0
|
|
|
|
|
0
|
local $/; |
1190
|
0
|
|
|
|
|
0
|
$txt = <$f>; |
1191
|
0
|
|
|
|
|
0
|
close $f; |
1192
|
|
|
|
|
|
|
} |
1193
|
0
|
0
|
0
|
|
|
0
|
!$sxc && $txt =~ m/^<\?xml/i and |
1194
|
|
|
|
|
|
|
$sxc = Spreadsheet::ReadSXC::read_xml_string ($txt, $sxc_options); |
1195
|
0
|
0
|
|
|
|
0
|
$debug > 8 and _dump (sxc => $sxc); |
1196
|
0
|
0
|
|
|
|
0
|
if ($sxc) { |
1197
|
0
|
|
|
|
|
0
|
my @data = ( { |
1198
|
|
|
|
|
|
|
type => "sxc", |
1199
|
|
|
|
|
|
|
parser => "Spreadsheet::ReadSXC", |
1200
|
|
|
|
|
|
|
version => $Spreadsheet::ReadSXC::VERSION, |
1201
|
|
|
|
|
|
|
parsers => [{ |
1202
|
|
|
|
|
|
|
type => "sxc", |
1203
|
|
|
|
|
|
|
parser => "Spreadsheet::ReadSXC", |
1204
|
|
|
|
|
|
|
version => $Spreadsheet::ReadSXC::VERSION, |
1205
|
|
|
|
|
|
|
}], |
1206
|
|
|
|
|
|
|
error => undef, |
1207
|
|
|
|
|
|
|
sheets => 0, |
1208
|
|
|
|
|
|
|
sheet => {}, |
1209
|
|
|
|
|
|
|
} ); |
1210
|
|
|
|
|
|
|
my @sheets = ref $sxc eq "HASH" # < 0.20 |
1211
|
|
|
|
|
|
|
? map { |
1212
|
|
|
|
|
|
|
{ label => $_, |
1213
|
0
|
|
|
|
|
0
|
data => $sxc->{$_}, |
1214
|
|
|
|
|
|
|
} |
1215
|
|
|
|
|
|
|
} keys %$sxc |
1216
|
0
|
0
|
|
|
|
0
|
: @{$sxc}; |
|
0
|
|
|
|
|
0
|
|
1217
|
0
|
|
|
|
|
0
|
foreach my $sheet (@sheets) { |
1218
|
0
|
0
|
|
|
|
0
|
my @sheet = @{$sheet->{data} || []}; |
|
0
|
|
|
|
|
0
|
|
1219
|
|
|
|
|
|
|
my %sheet = ( |
1220
|
|
|
|
|
|
|
parser => 0, |
1221
|
|
|
|
|
|
|
label => $sheet->{label}, |
1222
|
0
|
|
|
|
|
0
|
maxrow => scalar @sheet, |
1223
|
|
|
|
|
|
|
maxcol => 0, |
1224
|
|
|
|
|
|
|
cell => [], |
1225
|
|
|
|
|
|
|
attr => [], |
1226
|
|
|
|
|
|
|
merged => [], |
1227
|
|
|
|
|
|
|
active => 0, |
1228
|
|
|
|
|
|
|
); |
1229
|
0
|
|
|
|
|
0
|
my $sheet_idx = 1 + @data; |
1230
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} rows\n"; |
1231
|
0
|
|
|
|
|
0
|
foreach my $r (0 .. $#sheet) { |
1232
|
0
|
0
|
|
|
|
0
|
my @row = @{$sheet[$r]} or next; |
|
0
|
|
|
|
|
0
|
|
1233
|
0
|
|
|
|
|
0
|
foreach my $c (0 .. $#row) { |
1234
|
0
|
0
|
|
|
|
0
|
defined (my $val = $row[$c]) or next; |
1235
|
0
|
|
|
|
|
0
|
my $C = $c + 1; |
1236
|
0
|
0
|
|
|
|
0
|
$C > $sheet{maxcol} and $sheet{maxcol} = $C; |
1237
|
0
|
|
|
|
|
0
|
my $cell = cr2cell ($C, $r + 1); |
1238
|
0
|
0
|
|
|
|
0
|
$opt{rc} and $sheet{cell}[$C][$r + 1] = $val; |
1239
|
0
|
0
|
|
|
|
0
|
$opt{cells} and $sheet{$cell} = $val; |
1240
|
0
|
0
|
|
|
|
0
|
$opt{attr} and $sheet{attr}[$C][$r + 1] = { @def_attr }; |
1241
|
|
|
|
|
|
|
} |
1242
|
|
|
|
|
|
|
} |
1243
|
0
|
|
|
|
|
0
|
for (@{$sheet{cell}}) { |
|
0
|
|
|
|
|
0
|
|
1244
|
0
|
0
|
|
|
|
0
|
defined or $_ = []; |
1245
|
|
|
|
|
|
|
} |
1246
|
0
|
0
|
|
|
|
0
|
$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n"; |
1247
|
0
|
|
|
|
|
0
|
push @data, { %sheet }; |
1248
|
0
|
|
|
|
|
0
|
$data[0]{sheets}++; |
1249
|
0
|
|
|
|
|
0
|
$data[0]{sheet}{$sheet->{label}} = $#data; |
1250
|
|
|
|
|
|
|
} |
1251
|
0
|
|
|
|
|
0
|
return _clipsheets \%opt, [ @data ]; |
1252
|
|
|
|
|
|
|
} |
1253
|
|
|
|
|
|
|
} |
1254
|
|
|
|
|
|
|
|
1255
|
23
|
100
|
100
|
|
|
144
|
if (!ref $txt and $txt =~ m/\.\w+$/) { |
1256
|
|
|
|
|
|
|
# Return (localized) system message |
1257
|
5
|
50
|
|
|
|
130
|
open my $fh, "<", $txt and |
1258
|
|
|
|
|
|
|
croak "I can open file $txt, but I do not know how to parse it\n"; |
1259
|
|
|
|
|
|
|
|
1260
|
5
|
|
|
|
|
93
|
$@ = $!; |
1261
|
|
|
|
|
|
|
} |
1262
|
|
|
|
|
|
|
|
1263
|
23
|
|
|
|
|
128
|
return; |
1264
|
|
|
|
|
|
|
} # ReadData |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
sub add { |
1267
|
2
|
|
|
2
|
1
|
7
|
my $book = shift; |
1268
|
2
|
50
|
|
|
|
10
|
my $r = ReadData (@_) or return; |
1269
|
|
|
|
|
|
|
$book && (ref $book eq "ARRAY" || |
1270
|
2
|
50
|
33
|
|
|
28
|
ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $r; |
|
|
|
33
|
|
|
|
|
|
|
|
33
|
|
|
|
|
1271
|
|
|
|
|
|
|
|
1272
|
2
|
|
|
|
|
5
|
my $c1 = $book->[0]; |
1273
|
2
|
|
|
|
|
6
|
my $c2 = $r->[0]; |
1274
|
|
|
|
|
|
|
|
1275
|
2
|
50
|
|
|
|
7
|
unless ($c1->{parsers}) { |
1276
|
0
|
|
|
|
|
0
|
$c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version ); |
1277
|
0
|
|
|
|
|
0
|
$book->[$_]{parser} = 0 for 1 .. $c1->{sheets}; |
1278
|
|
|
|
|
|
|
} |
1279
|
2
|
|
|
|
|
5
|
my ($pidx) = (grep { my $p = $c1->{parsers}[$_]; |
1280
|
|
|
|
|
|
|
$p->{type} eq $c2->{type} && |
1281
|
|
|
|
|
|
|
$p->{parser} eq $c2->{parser} && |
1282
|
2
|
50
|
33
|
|
|
6
|
$p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}}); |
|
2
|
|
|
|
|
26
|
|
|
2
|
|
|
|
|
7
|
|
1283
|
2
|
50
|
|
|
|
8
|
unless (defined $pidx) { |
1284
|
0
|
|
|
|
|
0
|
$pidx = scalar @{$c1->{parsers}}; |
|
0
|
|
|
|
|
0
|
|
1285
|
0
|
|
|
|
|
0
|
$c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version ); |
1286
|
|
|
|
|
|
|
} |
1287
|
|
|
|
|
|
|
|
1288
|
2
|
|
|
|
|
6
|
foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) { |
|
0
|
|
|
|
|
0
|
|
|
2
|
|
|
|
|
12
|
|
1289
|
2
|
|
|
|
|
5
|
my $s = $sn; |
1290
|
2
|
|
|
|
|
4
|
my $v = 2; |
1291
|
2
|
|
|
|
|
9
|
while (exists $c1->{sheet}{$s}) { |
1292
|
1
|
|
|
|
|
6
|
$s = $sn."[".$v++."]"; |
1293
|
|
|
|
|
|
|
} |
1294
|
2
|
|
|
|
|
7
|
$c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn}; |
1295
|
2
|
|
|
|
|
6
|
$r->[$c2->{sheet}{$sn}]{parser} = $pidx; |
1296
|
2
|
|
|
|
|
9
|
push @$book, $r->[$c2->{sheet}{$sn}]; |
1297
|
|
|
|
|
|
|
} |
1298
|
2
|
|
|
|
|
5
|
$c1->{sheets} += $c2->{sheets}; |
1299
|
|
|
|
|
|
|
|
1300
|
2
|
|
|
|
|
18
|
return $book; |
1301
|
|
|
|
|
|
|
} # add |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
package Spreadsheet::Read::Attribute; |
1304
|
|
|
|
|
|
|
|
1305
|
58
|
|
|
58
|
|
660
|
use Carp; |
|
58
|
|
|
|
|
153
|
|
|
58
|
|
|
|
|
4960
|
|
1306
|
58
|
|
|
58
|
|
492
|
use vars qw( $AUTOLOAD ); |
|
58
|
|
|
|
|
185
|
|
|
58
|
|
|
|
|
85151
|
|
1307
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
sub AUTOLOAD { |
1309
|
33
|
|
|
33
|
|
1778
|
my $self = shift; |
1310
|
33
|
|
|
|
|
132
|
(my $attr = $AUTOLOAD) =~ s/.*:://; |
1311
|
33
|
|
|
|
|
463
|
$self->{$attr}; |
1312
|
|
|
|
|
|
|
} # AUTOLOAD |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
package Spreadsheet::Read::Sheet; |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
sub cell { |
1317
|
55
|
|
|
55
|
|
19960
|
my ($sheet, @id) = @_; |
1318
|
|
|
|
|
|
|
@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and |
1319
|
55
|
50
|
66
|
|
|
508
|
return $sheet->{cell}[$id[0]][$id[1]]; |
|
|
|
66
|
|
|
|
|
1320
|
|
|
|
|
|
|
@id && $id[0] && exists $sheet->{$id[0]} and |
1321
|
27
|
50
|
33
|
|
|
269
|
return $sheet->{$id[0]}; |
|
|
|
33
|
|
|
|
|
1322
|
|
|
|
|
|
|
} # cell |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
sub attr { |
1325
|
26
|
|
|
26
|
|
63
|
my ($sheet, @id) = @_; |
1326
|
26
|
|
|
|
|
36
|
my $class = "Spreadsheet::Read::Attribute"; |
1327
|
|
|
|
|
|
|
@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and |
1328
|
26
|
50
|
66
|
|
|
239
|
return bless $sheet->{attr}[$id[0]][$id[1]] => $class; |
|
|
|
66
|
|
|
|
|
1329
|
14
|
50
|
33
|
|
|
82
|
if (@id && $id[0] && exists $sheet->{$id[0]}) { |
|
|
|
33
|
|
|
|
|
1330
|
14
|
|
|
|
|
38
|
my ($c, $r) = $sheet->cell2cr ($id[0]); |
1331
|
14
|
|
|
|
|
124
|
return bless $sheet->{attr}[$c][$r] => $class; |
1332
|
|
|
|
|
|
|
} |
1333
|
0
|
|
|
|
|
0
|
undef; |
1334
|
|
|
|
|
|
|
} # attr |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
sub maxrow { |
1337
|
2
|
|
|
2
|
|
4
|
my $sheet = shift; |
1338
|
2
|
|
|
|
|
13
|
return $sheet->{maxrow}; |
1339
|
|
|
|
|
|
|
} # maxrow |
1340
|
|
|
|
|
|
|
|
1341
|
|
|
|
|
|
|
sub maxcol { |
1342
|
2
|
|
|
2
|
|
5
|
my $sheet = shift; |
1343
|
2
|
|
|
|
|
11
|
return $sheet->{maxcol}; |
1344
|
|
|
|
|
|
|
} # maxrow |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
sub col2label { |
1347
|
1
|
50
|
|
1
|
|
7
|
$_[0] =~ m/::/ and shift; # class unused |
1348
|
1
|
|
|
|
|
6
|
return Spreadsheet::Read::col2label (@_); |
1349
|
|
|
|
|
|
|
} # col2label |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
sub cr2cell { |
1352
|
34
|
100
|
|
34
|
|
88
|
$_[0] =~ m/::/ and shift; # class unused |
1353
|
34
|
|
|
|
|
82
|
return Spreadsheet::Read::cr2cell (@_); |
1354
|
|
|
|
|
|
|
} # cr2cell |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
sub cell2cr { |
1357
|
33
|
100
|
|
33
|
|
11908
|
$_[0] =~ m/::/ and shift; # class unused |
1358
|
33
|
|
|
|
|
93
|
return Spreadsheet::Read::cell2cr (@_); |
1359
|
|
|
|
|
|
|
} # cell2cr |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
sub label { |
1362
|
2
|
|
|
2
|
|
7
|
my ($sheet, $label) = @_; |
1363
|
2
|
100
|
|
|
|
9
|
defined $label and $sheet->{label} = $label; |
1364
|
2
|
|
|
|
|
10
|
return $sheet->{label}; |
1365
|
|
|
|
|
|
|
} # label |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
sub active { |
1368
|
1
|
|
|
1
|
|
4
|
my $sheet = shift; |
1369
|
1
|
|
|
|
|
5
|
return $sheet->{active}; |
1370
|
|
|
|
|
|
|
} # label |
1371
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
# my @row = $sheet->cellrow (1); |
1373
|
|
|
|
|
|
|
sub cellrow { |
1374
|
4
|
|
|
4
|
|
10
|
my ($sheet, $row) = @_; |
1375
|
4
|
100
|
66
|
|
|
35
|
defined $row && $row > 0 && $row <= $sheet->{maxrow} or return; |
|
|
|
100
|
|
|
|
|
1376
|
1
|
|
|
|
|
4
|
my $s = $sheet->{cell}; |
1377
|
1
|
|
|
|
|
4
|
map { $s->[$_][$row] } 1..$sheet->{maxcol}; |
|
19
|
|
|
|
|
39
|
|
1378
|
|
|
|
|
|
|
} # cellrow |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
# my @row = $sheet->row (1); |
1381
|
|
|
|
|
|
|
sub row { |
1382
|
4
|
|
|
4
|
|
9
|
my ($sheet, $row) = @_; |
1383
|
4
|
100
|
66
|
|
|
38
|
defined $row && $row > 0 && $row <= $sheet->{maxrow} or return; |
|
|
|
100
|
|
|
|
|
1384
|
1
|
|
|
|
|
8
|
map { $sheet->{$sheet->cr2cell ($_, $row)} } 1..$sheet->{maxcol}; |
|
19
|
|
|
|
|
34
|
|
1385
|
|
|
|
|
|
|
} # row |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
# my @col = $sheet->cellcolumn (1); |
1388
|
|
|
|
|
|
|
sub cellcolumn { |
1389
|
4
|
|
|
4
|
|
13
|
my ($sheet, $col) = @_; |
1390
|
4
|
100
|
66
|
|
|
38
|
defined $col && $col > 0 && $col <= $sheet->{maxcol} or return; |
|
|
|
100
|
|
|
|
|
1391
|
1
|
|
|
|
|
3
|
my $s = $sheet->{cell}; |
1392
|
1
|
|
|
|
|
4
|
map { $s->[$col][$_] } 1..$sheet->{maxrow}; |
|
5
|
|
|
|
|
31
|
|
1393
|
|
|
|
|
|
|
} # cellcolumn |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
# my @col = $sheet->column (1); |
1396
|
|
|
|
|
|
|
sub column { |
1397
|
4
|
|
|
4
|
|
14
|
my ($sheet, $col) = @_; |
1398
|
4
|
100
|
66
|
|
|
41
|
defined $col && $col > 0 && $col <= $sheet->{maxcol} or return; |
|
|
|
100
|
|
|
|
|
1399
|
1
|
|
|
|
|
6
|
map { $sheet->{$sheet->cr2cell ($col, $_)} } 1..$sheet->{maxrow}; |
|
5
|
|
|
|
|
11
|
|
1400
|
|
|
|
|
|
|
} # column |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
# Convert {cell}'s [column][row] to a [row][column] list |
1403
|
|
|
|
|
|
|
# my @rows = $sheet->rows (); |
1404
|
|
|
|
|
|
|
sub rows { |
1405
|
1
|
|
|
1
|
|
3
|
my $sheet = shift; |
1406
|
1
|
|
|
|
|
3
|
my $s = $sheet->{cell}; |
1407
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
map { |
1409
|
5
|
|
|
|
|
10
|
my $r = $_; |
1410
|
5
|
|
|
|
|
10
|
[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ]; |
|
95
|
|
|
|
|
154
|
|
1411
|
1
|
|
|
|
|
8
|
} 1..$sheet->{maxrow}; |
1412
|
|
|
|
|
|
|
} # rows |
1413
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
sub merged_from { |
1415
|
17
|
|
|
17
|
|
46
|
my ($sheet, @id, $col, $row) = @_; |
1416
|
17
|
50
|
|
|
|
39
|
my $ma = $sheet->{merged} or return; |
1417
|
17
|
100
|
66
|
|
|
148
|
if (@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/) { |
|
|
100
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
66
|
|
|
|
|
1418
|
10
|
|
|
|
|
23
|
($col, $row) = @id; |
1419
|
|
|
|
|
|
|
} |
1420
|
|
|
|
|
|
|
elsif (@id && $id[0] && exists $sheet->{$id[0]}) { |
1421
|
3
|
|
|
|
|
10
|
($col, $row) = cell2cr ($id[0]); |
1422
|
|
|
|
|
|
|
} |
1423
|
17
|
100
|
66
|
|
|
87
|
defined $row && $row > 0 && $row <= $sheet->{maxrow} or return; |
|
|
|
100
|
|
|
|
|
1424
|
12
|
50
|
33
|
|
|
48
|
defined $col && $col > 0 && $col <= $sheet->{maxcol} or return; |
|
|
|
33
|
|
|
|
|
1425
|
12
|
|
|
|
|
18
|
foreach my $range (@{$ma}) { |
|
12
|
|
|
|
|
21
|
|
1426
|
22
|
|
|
|
|
26
|
my ($ctl, $rtl, $cbr, $rbr) = @{$range}; |
|
22
|
|
|
|
|
32
|
|
1427
|
22
|
100
|
100
|
|
|
66
|
$col >= $ctl && $col <= $cbr or next; |
1428
|
12
|
100
|
66
|
|
|
30
|
$row >= $rtl && $row <= $rbr or next; |
1429
|
10
|
|
|
|
|
25
|
return cr2cell ($ctl, $rtl); |
1430
|
|
|
|
|
|
|
} |
1431
|
|
|
|
|
|
|
} # cell |
1432
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
1; |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
__END__ |
1436
|
|
|
|
|
|
|
=head1 DESCRIPTION |
1437
|
|
|
|
|
|
|
|
1438
|
|
|
|
|
|
|
Spreadsheet::Read tries to transparently read *any* spreadsheet and |
1439
|
|
|
|
|
|
|
return its content in a universal manner independent of the parsing |
1440
|
|
|
|
|
|
|
module that does the actual spreadsheet scanning. |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
For OpenOffice and/or LibreOffice this module uses |
1443
|
|
|
|
|
|
|
L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
For Microsoft Excel this module uses |
1446
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, |
1447
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX>, or |
1448
|
|
|
|
|
|
|
L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX> (stronly |
1449
|
|
|
|
|
|
|
discouraged). |
1450
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
For CSV this module uses L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> |
1452
|
|
|
|
|
|
|
or L<Text::CSV_PP|https://metacpan.org/release/Text-CSV>. |
1453
|
|
|
|
|
|
|
|
1454
|
|
|
|
|
|
|
For SquirrelCalc there is a very simplistic built-in parser |
1455
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
=head2 Data structure |
1457
|
|
|
|
|
|
|
|
1458
|
|
|
|
|
|
|
The data is returned as an array reference: |
1459
|
|
|
|
|
|
|
|
1460
|
|
|
|
|
|
|
$book = [ |
1461
|
|
|
|
|
|
|
# Entry 0 is the overall control hash |
1462
|
|
|
|
|
|
|
{ sheets => 2, |
1463
|
|
|
|
|
|
|
sheet => { |
1464
|
|
|
|
|
|
|
"Sheet 1" => 1, |
1465
|
|
|
|
|
|
|
"Sheet 2" => 2, |
1466
|
|
|
|
|
|
|
}, |
1467
|
|
|
|
|
|
|
parsers => [ { |
1468
|
|
|
|
|
|
|
type => "xls", |
1469
|
|
|
|
|
|
|
parser => "Spreadsheet::ParseExcel", |
1470
|
|
|
|
|
|
|
version => 0.59, |
1471
|
|
|
|
|
|
|
}], |
1472
|
|
|
|
|
|
|
error => undef, |
1473
|
|
|
|
|
|
|
}, |
1474
|
|
|
|
|
|
|
# Entry 1 is the first sheet |
1475
|
|
|
|
|
|
|
{ parser => 0, |
1476
|
|
|
|
|
|
|
label => "Sheet 1", |
1477
|
|
|
|
|
|
|
maxrow => 2, |
1478
|
|
|
|
|
|
|
maxcol => 4, |
1479
|
|
|
|
|
|
|
cell => [ undef, |
1480
|
|
|
|
|
|
|
[ undef, 1 ], |
1481
|
|
|
|
|
|
|
[ undef, undef, undef, undef, undef, "Nugget" ], |
1482
|
|
|
|
|
|
|
], |
1483
|
|
|
|
|
|
|
attr => [], |
1484
|
|
|
|
|
|
|
merged => [], |
1485
|
|
|
|
|
|
|
active => 1, |
1486
|
|
|
|
|
|
|
A1 => 1, |
1487
|
|
|
|
|
|
|
B5 => "Nugget", |
1488
|
|
|
|
|
|
|
}, |
1489
|
|
|
|
|
|
|
# Entry 2 is the second sheet |
1490
|
|
|
|
|
|
|
{ parser => 0, |
1491
|
|
|
|
|
|
|
label => "Sheet 2", |
1492
|
|
|
|
|
|
|
: |
1493
|
|
|
|
|
|
|
: |
1494
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
To keep as close contact to spreadsheet users, row and column 1 have |
1496
|
|
|
|
|
|
|
index 1 too in the C<cell> element of the sheet hash, so cell "A1" is |
1497
|
|
|
|
|
|
|
the same as C<cell> [1, 1] (column first). To switch between the two, |
1498
|
|
|
|
|
|
|
there are helper functions available: C<cell2cr ()>, C<cr2cell ()>, |
1499
|
|
|
|
|
|
|
and C<col2label ()>. |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
The C<cell> hash entry contains unformatted data, while the hash entries |
1502
|
|
|
|
|
|
|
with the traditional labels contain the formatted values (if applicable). |
1503
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
The control hash (the first entry in the returned array ref), contains |
1505
|
|
|
|
|
|
|
some spreadsheet meta-data. The entry C<sheet> is there to be able to find |
1506
|
|
|
|
|
|
|
the sheets when accessing them by name: |
1507
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
my %sheet2 = %{$book->[$book->[0]{sheet}{"Sheet 2"}]}; |
1509
|
|
|
|
|
|
|
|
1510
|
|
|
|
|
|
|
=head2 Formatted vs Unformatted |
1511
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
The difference between formatted and unformatted cells is that the (optional) |
1513
|
|
|
|
|
|
|
format is applied to the cell or not. This part is B<completely> implemented |
1514
|
|
|
|
|
|
|
on the parser side. Spreadsheet::Read just makes both available if these are |
1515
|
|
|
|
|
|
|
supported. Options provide means to disable either. If the parser does not |
1516
|
|
|
|
|
|
|
provide formatted cells - like CSV - both values are equal. |
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
To show what this implies: |
1519
|
|
|
|
|
|
|
|
1520
|
|
|
|
|
|
|
use Spreadsheet::Read; |
1521
|
|
|
|
|
|
|
|
1522
|
|
|
|
|
|
|
my $file = "files/example.xlsx"; |
1523
|
|
|
|
|
|
|
my $workbook = Spreadsheet::Read->new ($file); |
1524
|
|
|
|
|
|
|
|
1525
|
|
|
|
|
|
|
my $info = $workbook->[0]; |
1526
|
|
|
|
|
|
|
say "Parsed $file with $info->{parser}-$info->{version}"; |
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
my $sheet = $workbook->sheet (1); |
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
say join "\t" => "Formatted:", $sheet->row (1); |
1531
|
|
|
|
|
|
|
say join "\t" => "Unformatted:", $sheet->cellrow (1); |
1532
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
Might return very different results depending one the underlying parser (and |
1534
|
|
|
|
|
|
|
its version): |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
Parsed files/example.xlsx with Spreadsheet::ParseXLSX-0.27 |
1537
|
|
|
|
|
|
|
Formatted: 8-Aug Foo & Barr < Quux |
1538
|
|
|
|
|
|
|
Unformatted: 39668 Foo & Barr < Quux |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
Parsed files/example.xlsx with Spreadsheet::XLSX-0.15 |
1541
|
|
|
|
|
|
|
Formatted: 39668 Foo & Barr < Quux |
1542
|
|
|
|
|
|
|
Unformatted: 39668 Foo & Barr < Quux |
1543
|
|
|
|
|
|
|
|
1544
|
|
|
|
|
|
|
=head2 Functions and methods |
1545
|
|
|
|
|
|
|
|
1546
|
|
|
|
|
|
|
=head3 new |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new (...) or die $@; |
1549
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
All options accepted by ReadData are accepted by new. |
1551
|
|
|
|
|
|
|
|
1552
|
|
|
|
|
|
|
With no arguments at all, $book will be an object where sheets can be added |
1553
|
|
|
|
|
|
|
using C<add> |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new (); |
1556
|
|
|
|
|
|
|
$book->add ("file.csv"); |
1557
|
|
|
|
|
|
|
$book->add ("file.cslx"); |
1558
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
=head3 ReadData |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
my $book = ReadData ($source [, option => value [, ... ]]); |
1562
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
my $book = ReadData ("file.csv", sep => ',', quote => '"'); |
1564
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
my $book = ReadData ("file.xls", dtfmt => "yyyy-mm-dd"); |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
my $book = ReadData ("file.ods"); |
1568
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
my $book = ReadData ("file.sxc"); |
1570
|
|
|
|
|
|
|
|
1571
|
|
|
|
|
|
|
my $book = ReadData ("content.xml"); |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
my $book = ReadData ($content); |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
my $book = ReadData ($content, parser => "xlsx"); |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
my $book = ReadData ($fh, parser => "xlsx"); |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
my $book = ReadData (\$content, parser => "xlsx"); |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
Tries to convert the given file, string, or stream to the data structure |
1582
|
|
|
|
|
|
|
described above. |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
Processing Excel data from a stream or content is supported through a |
1585
|
|
|
|
|
|
|
L<File::Temp|https://metacpan.org/release/File-Temp> temporary file or |
1586
|
|
|
|
|
|
|
L<IO::Scalar|https://metacpan.org/release/IO-stringy> when available. |
1587
|
|
|
|
|
|
|
|
1588
|
|
|
|
|
|
|
L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> |
1589
|
|
|
|
|
|
|
does preserve sheet order as of version 0.20. |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
Choosing between C<$content> and C<\\$content> (with or without passing |
1592
|
|
|
|
|
|
|
the desired C<parser> option) may be depending on trial and terror. |
1593
|
|
|
|
|
|
|
C<ReadData> does try to determine parser type on content if needed, but |
1594
|
|
|
|
|
|
|
not all combinations are checked, and not all signatures are builtin. |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
Currently supported options are: |
1597
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
=over 2 |
1599
|
|
|
|
|
|
|
|
1600
|
|
|
|
|
|
|
=item parser |
1601
|
|
|
|
|
|
|
X<parser> |
1602
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
Force the data to be parsed by a specific format. Possible values are |
1604
|
|
|
|
|
|
|
C<csv>, C<prl> (or C<perl>), C<sc> (or C<squirelcalc>), C<sxc> (or C<oo>, |
1605
|
|
|
|
|
|
|
C<ods>, C<openoffice>, C<libreoffice>) C<xls> (or C<excel>), and C<xlsx> |
1606
|
|
|
|
|
|
|
(or C<excel2007>). |
1607
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
When parsing streams, instead of files, it is highly recommended to pass |
1609
|
|
|
|
|
|
|
this option. |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
Spreadsheet::Read supports several underlying parsers per spreadsheet |
1612
|
|
|
|
|
|
|
type. It will try those from most favored to least favored. When you |
1613
|
|
|
|
|
|
|
have a good reason to prefer a different parser, you can set that in |
1614
|
|
|
|
|
|
|
environment variables. The other options then will not be tested for: |
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
env SPREADSHEET_READ_CSV=Text::CSV_PP ... |
1617
|
|
|
|
|
|
|
|
1618
|
|
|
|
|
|
|
=item cells |
1619
|
|
|
|
|
|
|
X<cells> |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
Control the generation of named cells ("C<A1>" etc). Default is true. |
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
=item rc |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
Control the generation of the {cell}[c][r] entries. Default is true. |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
=item attr |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
Control the generation of the {attr}[c][r] entries. Default is false. |
1630
|
|
|
|
|
|
|
See L</Cell Attributes> below. |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
=item clip |
1633
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
If set, L<C<ReadData>|/ReadData> will remove all trailing rows and columns |
1635
|
|
|
|
|
|
|
per sheet that have no data, where no data means only undefined or empty |
1636
|
|
|
|
|
|
|
cells (after optional stripping). If a sheet has no data at all, the sheet |
1637
|
|
|
|
|
|
|
will be skipped entirely when this attribute is true. |
1638
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
=item strip |
1640
|
|
|
|
|
|
|
|
1641
|
|
|
|
|
|
|
If set, L<C<ReadData>|/ReadData> will remove trailing- and/or |
1642
|
|
|
|
|
|
|
leading-whitespace from every field. |
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
strip leading strailing |
1645
|
|
|
|
|
|
|
----- ------- --------- |
1646
|
|
|
|
|
|
|
0 n/a n/a |
1647
|
|
|
|
|
|
|
1 strip n/a |
1648
|
|
|
|
|
|
|
2 n/a strip |
1649
|
|
|
|
|
|
|
3 strip strip |
1650
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
=item pivot |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
Swap all rows and columns. |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
When a sheet contains data like |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
A1 B1 C1 E1 |
1658
|
|
|
|
|
|
|
A2 C2 D2 |
1659
|
|
|
|
|
|
|
A3 B3 C3 D3 E3 |
1660
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
using C<pivot> will return the sheet data as |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
A1 A2 A3 |
1664
|
|
|
|
|
|
|
B1 B3 |
1665
|
|
|
|
|
|
|
C1 C2 C3 |
1666
|
|
|
|
|
|
|
D2 D3 |
1667
|
|
|
|
|
|
|
E1 E3 |
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
=item sep |
1670
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
Set separator for CSV. Default is comma C<,>. |
1672
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
=item quote |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
Set quote character for CSV. Default is C<">. |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
=item dtfmt |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
Set the format for MS-Excel date fields that are set to use the default |
1680
|
|
|
|
|
|
|
date format. The default format in Excel is "C<m-d-yy>", which is both |
1681
|
|
|
|
|
|
|
not year 2000 safe, nor very useful. The default is now "C<yyyy-mm-dd>", |
1682
|
|
|
|
|
|
|
which is more ISO-like. |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
Note that date formatting in MS-Excel is not reliable at all, as it will |
1685
|
|
|
|
|
|
|
store/replace/change the date field separator in already stored formats |
1686
|
|
|
|
|
|
|
if you change your locale settings. So the above mentioned default can |
1687
|
|
|
|
|
|
|
be either "C<m-d-yy>" OR "C<m/d/yy>" depending on what that specific |
1688
|
|
|
|
|
|
|
character happened to be at the time the user saved the file. |
1689
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
=item merge |
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
Copy content to all cells in merged areas. |
1693
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
If supported, this will copy formatted and unformatted values from the |
1695
|
|
|
|
|
|
|
top-left cell of a merged area to all other cells in the area. |
1696
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
=item debug |
1698
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
Enable some diagnostic messages to STDERR. |
1700
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
The value determines how much diagnostics are dumped (using |
1702
|
|
|
|
|
|
|
L<Data::Peek|https://metacpan.org/release/Data-Peek>). A value of C<9> |
1703
|
|
|
|
|
|
|
and higher will dump the entire structure from the back-end parser. |
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
=item passwd |
1706
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
Use this password to decrypt password protected spreadsheet. |
1708
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
Currently only supports Excel. |
1710
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
=back |
1712
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
All other attributes/options will be passed to the underlying parser if |
1714
|
|
|
|
|
|
|
that parser supports attributes. |
1715
|
|
|
|
|
|
|
|
1716
|
|
|
|
|
|
|
=head3 col2label |
1717
|
|
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
my $col_id = col2label (col); |
1719
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
my $col_id = $book->col2label (col); # OO |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
C<col2label ()> converts a C<(column)> (1 based) to the letters used in the |
1723
|
|
|
|
|
|
|
traditional cell notation: |
1724
|
|
|
|
|
|
|
|
1725
|
|
|
|
|
|
|
my $id = col2label ( 4); # $id now "D" |
1726
|
|
|
|
|
|
|
my $id = col2label (28); # $id now "AB" |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
=head3 cr2cell |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
my $cell = cr2cell (col, row); |
1731
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
my $cell = $book->cr2cell (col, row); # OO |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the |
1735
|
|
|
|
|
|
|
traditional cell notation: |
1736
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
my $cell = cr2cell ( 4, 14); # $cell now "D14" |
1738
|
|
|
|
|
|
|
my $cell = cr2cell (28, 4); # $cell now "AB4" |
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
=head3 cell2cr |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
my ($col, $row) = cell2cr ($cell); |
1743
|
|
|
|
|
|
|
|
1744
|
|
|
|
|
|
|
my ($col, $row) = $book->cell2cr ($cell); # OO |
1745
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
C<cell2cr ()> converts traditional cell notation to a C<(column, row)> |
1747
|
|
|
|
|
|
|
pair (1 based): |
1748
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
my ($col, $row) = cell2cr ("D14"); # returns ( 4, 14) |
1750
|
|
|
|
|
|
|
my ($col, $row) = cell2cr ("AB4"); # returns (28, 4) |
1751
|
|
|
|
|
|
|
|
1752
|
|
|
|
|
|
|
=head3 row |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
my @row = row ($sheet, $row) |
1755
|
|
|
|
|
|
|
|
1756
|
|
|
|
|
|
|
my @row = Spreadsheet::Read::row ($book->[1], 3); |
1757
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
my @row = $book->row ($sheet, $row); # OO |
1759
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>) |
1761
|
|
|
|
|
|
|
|
1762
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1763
|
|
|
|
|
|
|
|
1764
|
|
|
|
|
|
|
C<row ()> is not imported by default, so either specify it in the |
1765
|
|
|
|
|
|
|
use argument list, or call it fully qualified. |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
See also the C<row ()> method on sheets. |
1768
|
|
|
|
|
|
|
|
1769
|
|
|
|
|
|
|
=head3 cellrow |
1770
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
my @row = cellrow ($sheet, $row); |
1772
|
|
|
|
|
|
|
|
1773
|
|
|
|
|
|
|
my @row = Spreadsheet::Read::cellrow ($book->[1], 3); |
1774
|
|
|
|
|
|
|
|
1775
|
|
|
|
|
|
|
my @row = $book->cellrow ($sheet, $row); # OO |
1776
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>) |
1778
|
|
|
|
|
|
|
|
1779
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1780
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
C<cellrow ()> is not imported by default, so either specify it in the |
1782
|
|
|
|
|
|
|
use argument list, or call it fully qualified or as method call. |
1783
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
See also the C<cellrow ()> method on sheets. |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
=head3 rows |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
my @rows = rows ($sheet); |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
my @rows = Spreadsheet::Read::rows ($book->[1]); |
1791
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
my @rows = $book->rows (1); # OO |
1793
|
|
|
|
|
|
|
|
1794
|
|
|
|
|
|
|
Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list. |
1795
|
|
|
|
|
|
|
|
1796
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based, where the |
1797
|
|
|
|
|
|
|
index in the C<{cell}> entry is 1-based. |
1798
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
C<rows ()> is not imported by default, so either specify it in the |
1800
|
|
|
|
|
|
|
use argument list, or call it fully qualified. |
1801
|
|
|
|
|
|
|
|
1802
|
|
|
|
|
|
|
=head3 parses |
1803
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
parses ($format); |
1805
|
|
|
|
|
|
|
|
1806
|
|
|
|
|
|
|
Spreadsheet::Read::parses ("CSV"); |
1807
|
|
|
|
|
|
|
|
1808
|
|
|
|
|
|
|
$book->parses ("CSV"); # OO |
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
C<parses ()> returns Spreadsheet::Read's capability to parse the |
1811
|
|
|
|
|
|
|
required format. L<C<ReadData>|/ReadData> will pick its preferred parser |
1812
|
|
|
|
|
|
|
for that format unless overruled. See L<C<parser>|/parser>. |
1813
|
|
|
|
|
|
|
|
1814
|
|
|
|
|
|
|
C<parses ()> is not imported by default, so either specify it in the |
1815
|
|
|
|
|
|
|
use argument list, or call it fully qualified. |
1816
|
|
|
|
|
|
|
|
1817
|
|
|
|
|
|
|
=head3 Version |
1818
|
|
|
|
|
|
|
|
1819
|
|
|
|
|
|
|
my $v = Version () |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
my $v = Spreadsheet::Read::Version () |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
my $v = Spreadsheet::Read->VERSION; |
1824
|
|
|
|
|
|
|
|
1825
|
|
|
|
|
|
|
my $v = $book->Version (); # OO |
1826
|
|
|
|
|
|
|
|
1827
|
|
|
|
|
|
|
Returns the current version of Spreadsheet::Read. |
1828
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
C<Version ()> is not imported by default, so either specify it in the |
1830
|
|
|
|
|
|
|
use argument list, or call it fully qualified. |
1831
|
|
|
|
|
|
|
|
1832
|
|
|
|
|
|
|
This function returns exactly the same as C<< Spreadsheet::Read->VERSION >> |
1833
|
|
|
|
|
|
|
returns and is only kept for backward compatibility reasons. |
1834
|
|
|
|
|
|
|
|
1835
|
|
|
|
|
|
|
=head3 sheets |
1836
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
my $sheets = $book->sheets; # OO |
1838
|
|
|
|
|
|
|
my @sheets = $book->sheets; # OO |
1839
|
|
|
|
|
|
|
|
1840
|
|
|
|
|
|
|
In scalar context return the number of sheets in the book. |
1841
|
|
|
|
|
|
|
In list context return the labels of the sheets in the book. |
1842
|
|
|
|
|
|
|
|
1843
|
|
|
|
|
|
|
=head3 sheet |
1844
|
|
|
|
|
|
|
|
1845
|
|
|
|
|
|
|
my $sheet = $book->sheet (1); # OO |
1846
|
|
|
|
|
|
|
my $sheet = $book->sheet ("Foo"); # OO |
1847
|
|
|
|
|
|
|
|
1848
|
|
|
|
|
|
|
Return the numbered or named sheet out of the book. Will return C<undef> if |
1849
|
|
|
|
|
|
|
there is no match. Will not work for sheets I<named> with a number between 1 |
1850
|
|
|
|
|
|
|
and the number of sheets in the book. |
1851
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
With named sheets will first try to use the list of sheet-labels as stored in |
1853
|
|
|
|
|
|
|
the control structure. If no match is found, it will scan the actual labels |
1854
|
|
|
|
|
|
|
of the sheets. In that case, it will return the first matching sheet. |
1855
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
If defined, the returned sheet will be of class C<Spreadsheet::Read::Sheet>. |
1857
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
=head3 add |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
my $book = ReadData ("file.csv"); |
1861
|
|
|
|
|
|
|
Spreadsheet::Read::add ($book, "file.xlsx"); |
1862
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new ("file.csv"); |
1864
|
|
|
|
|
|
|
$book->add ("file.xlsx"); # OO |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
=head2 Methods on sheets |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
=head3 maxcol |
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
my $col = $sheet->maxcol; |
1871
|
|
|
|
|
|
|
|
1872
|
|
|
|
|
|
|
Return the index of the last in-use column in the sheet. This index is 1-based. |
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
=head3 maxrow |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
my $row = $sheet->maxrow; |
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
Return the index of the last in-use row in the sheet. This index is 1-based. |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
=head3 cell |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
my $cell = $sheet->cell ("A3"); |
1883
|
|
|
|
|
|
|
my $cell = $sheet->cell (1, 3); |
1884
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
Return the value for a cell. Using tags will return the formatted value, |
1886
|
|
|
|
|
|
|
using column and row will return unformatted value. |
1887
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
=head3 attr |
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
my $cell = $sheet->attr ("A3"); |
1891
|
|
|
|
|
|
|
my $cell = $sheet->attr (1, 3); |
1892
|
|
|
|
|
|
|
|
1893
|
|
|
|
|
|
|
Return the attributes of a cell. Only valid if attributes are enabled through |
1894
|
|
|
|
|
|
|
option C<attr>. |
1895
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
=head3 col2label |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
my $col_id = $sheet->col2label (col); |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
C<col2label ()> converts a C<(column)> (1 based) to the letters used in the |
1901
|
|
|
|
|
|
|
traditional cell notation: |
1902
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
my $id = $sheet->col2label ( 4); # $id now "D" |
1904
|
|
|
|
|
|
|
my $id = $sheet->col2label (28); # $id now "AB" |
1905
|
|
|
|
|
|
|
|
1906
|
|
|
|
|
|
|
=head3 cr2cell |
1907
|
|
|
|
|
|
|
|
1908
|
|
|
|
|
|
|
my $cell = $sheet->cr2cell (col, row); |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the |
1911
|
|
|
|
|
|
|
traditional cell notation: |
1912
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
my $cell = $sheet->cr2cell ( 4, 14); # $cell now "D14" |
1914
|
|
|
|
|
|
|
my $cell = $sheet->cr2cell (28, 4); # $cell now "AB4" |
1915
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
=head3 cell2cr |
1917
|
|
|
|
|
|
|
|
1918
|
|
|
|
|
|
|
my ($col, $row) = $sheet->cell2cr ($cell); |
1919
|
|
|
|
|
|
|
|
1920
|
|
|
|
|
|
|
C<cell2cr ()> converts traditional cell notation to a C<(column, row)> |
1921
|
|
|
|
|
|
|
pair (1 based): |
1922
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
my ($col, $row) = $sheet->cell2cr ("D14"); # returns ( 4, 14) |
1924
|
|
|
|
|
|
|
my ($col, $row) = $sheet->cell2cr ("AB4"); # returns (28, 4) |
1925
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
=head3 col |
1927
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
my @col = $sheet->column ($col); |
1929
|
|
|
|
|
|
|
|
1930
|
|
|
|
|
|
|
Get full column of formatted values (like C<< $sheet->{C1} .. $sheet->{C9} >>) |
1931
|
|
|
|
|
|
|
|
1932
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
=head3 cellcolumn |
1935
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
my @col = $sheet->cellcolumn ($col); |
1937
|
|
|
|
|
|
|
|
1938
|
|
|
|
|
|
|
Get full column of unformatted values (like C<< $sheet->{cell}[3][1] .. $sheet->{cell}[3][9] >>) |
1939
|
|
|
|
|
|
|
|
1940
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1941
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
=head3 row |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
my @row = $sheet->row ($row); |
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>) |
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1949
|
|
|
|
|
|
|
|
1950
|
|
|
|
|
|
|
=head3 cellrow |
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
my @row = $sheet->cellrow ($row); |
1953
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>) |
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based. |
1957
|
|
|
|
|
|
|
|
1958
|
|
|
|
|
|
|
=head3 rows |
1959
|
|
|
|
|
|
|
|
1960
|
|
|
|
|
|
|
my @rows = $sheet->rows (); |
1961
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list. |
1963
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
Note that the indexes in the returned list are 0-based, where the |
1965
|
|
|
|
|
|
|
index in the C<{cell}> entry is 1-based. |
1966
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
=head3 merged_from |
1968
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
my $top_left = $sheet->merged_from ("C2"); |
1970
|
|
|
|
|
|
|
my $top_left = $sheet->merged_from (3, 2); |
1971
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
If the parser supports merged areas, this method will return the label of the |
1973
|
|
|
|
|
|
|
top-left cell in the merged area the requested cell is part of. |
1974
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
If the requested ID is valid and withing the sheet cell range, but not part of |
1976
|
|
|
|
|
|
|
a merged area, it will return C<"">. |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
If the ID is not valid or out of range, it returns C<undef>. |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
See L<Merged cells|/merged> for more details. |
1981
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
=head3 label |
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
my $label = $sheet->label; |
1985
|
|
|
|
|
|
|
$sheet->label ("New sheet label"); |
1986
|
|
|
|
|
|
|
|
1987
|
|
|
|
|
|
|
Set a new label to a sheet. Note that the index in the control structure will |
1988
|
|
|
|
|
|
|
I<NOT> be updated. |
1989
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
=head3 active |
1991
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
my $sheet_is_active = $sheet->active; |
1993
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
Returns 1 if the selected sheet is active, otherwise returns 0. |
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
Currently only works on XLS (as of Spreadsheed::ParseExcel-0.61). |
1997
|
|
|
|
|
|
|
CSV is always active. |
1998
|
|
|
|
|
|
|
|
1999
|
|
|
|
|
|
|
=head2 Using CSV |
2000
|
|
|
|
|
|
|
|
2001
|
|
|
|
|
|
|
In case of CSV parsing, L<C<ReadData>|/ReadData> will use the first line of |
2002
|
|
|
|
|
|
|
the file to auto-detect the separation character if the first argument is a |
2003
|
|
|
|
|
|
|
file and both C<sep> and C<quote> are not passed as attributes. |
2004
|
|
|
|
|
|
|
L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> (or |
2005
|
|
|
|
|
|
|
L<Text::CSV_PP|https://metacpan.org/release/Text-CSV>) is able to |
2006
|
|
|
|
|
|
|
automatically detect and use C<\r> line endings. |
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
CSV can parse streams too, but be sure to pass C<sep> and/or C<quote> if |
2009
|
|
|
|
|
|
|
these do not match the default C<,> and C<">. |
2010
|
|
|
|
|
|
|
|
2011
|
|
|
|
|
|
|
When an error is found in the CSV, it is automatically reported (to STDERR). |
2012
|
|
|
|
|
|
|
The structure will store the error in C<< $ss->[0]{error} >> as anonymous |
2013
|
|
|
|
|
|
|
list returned by |
2014
|
|
|
|
|
|
|
L<C<< $csv->error_diag >>|https://metacpan.org/pod/Text::CSV_XS#error_diag>. |
2015
|
|
|
|
|
|
|
See L<Text::CSV_XS|https://metacpan.org/pod/Text::CSV_XS> for documentation. |
2016
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
my $ss = ReadData ("bad.csv"); |
2018
|
|
|
|
|
|
|
$ss->[0]{error} and say $ss->[0]{error}[1]; |
2019
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
As CSV has no sheet labels, the default label for a CSV sheet is its filename. |
2021
|
|
|
|
|
|
|
For CSV, this can be overruled using the I<label> attribute: |
2022
|
|
|
|
|
|
|
|
2023
|
|
|
|
|
|
|
my $ss = Spreadsheet::Read->new ("/some/place/test.csv", label => "Test"); |
2024
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
=head2 Cell Attributes |
2026
|
|
|
|
|
|
|
X<attr> |
2027
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
If the constructor was called with C<attr> having a true value, |
2029
|
|
|
|
|
|
|
|
2030
|
|
|
|
|
|
|
my $book = ReadData ("book.xls", attr => 1); |
2031
|
|
|
|
|
|
|
my $book = Spreadsheet::Read->new ("book.xlsx", attr => 1); |
2032
|
|
|
|
|
|
|
|
2033
|
|
|
|
|
|
|
effort is made to analyze and store field attributes like this: |
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
{ label => "Sheet 1", |
2036
|
|
|
|
|
|
|
maxrow => 5, |
2037
|
|
|
|
|
|
|
maxcol => 2, |
2038
|
|
|
|
|
|
|
cell => [ undef, |
2039
|
|
|
|
|
|
|
[ undef, 1 ], |
2040
|
|
|
|
|
|
|
[ undef, undef, undef, undef, undef, "Nugget" ], |
2041
|
|
|
|
|
|
|
], |
2042
|
|
|
|
|
|
|
attr => [ undef, |
2043
|
|
|
|
|
|
|
[ undef, { |
2044
|
|
|
|
|
|
|
type => "numeric", |
2045
|
|
|
|
|
|
|
fgcolor => "#ff0000", |
2046
|
|
|
|
|
|
|
bgcolor => undef, |
2047
|
|
|
|
|
|
|
font => "Arial", |
2048
|
|
|
|
|
|
|
size => undef, |
2049
|
|
|
|
|
|
|
format => "## ##0.00", |
2050
|
|
|
|
|
|
|
halign => "right", |
2051
|
|
|
|
|
|
|
valign => "top", |
2052
|
|
|
|
|
|
|
uline => 0, |
2053
|
|
|
|
|
|
|
bold => 0, |
2054
|
|
|
|
|
|
|
italic => 0, |
2055
|
|
|
|
|
|
|
wrap => 0, |
2056
|
|
|
|
|
|
|
merged => 0, |
2057
|
|
|
|
|
|
|
hidden => 0, |
2058
|
|
|
|
|
|
|
locked => 0, |
2059
|
|
|
|
|
|
|
enc => "utf-8", |
2060
|
|
|
|
|
|
|
}, ], |
2061
|
|
|
|
|
|
|
[ undef, undef, undef, undef, undef, { |
2062
|
|
|
|
|
|
|
type => "text", |
2063
|
|
|
|
|
|
|
fgcolor => "#e2e2e2", |
2064
|
|
|
|
|
|
|
bgcolor => undef, |
2065
|
|
|
|
|
|
|
font => "Letter Gothic", |
2066
|
|
|
|
|
|
|
size => 15, |
2067
|
|
|
|
|
|
|
format => undef, |
2068
|
|
|
|
|
|
|
halign => "left", |
2069
|
|
|
|
|
|
|
valign => "top", |
2070
|
|
|
|
|
|
|
uline => 0, |
2071
|
|
|
|
|
|
|
bold => 0, |
2072
|
|
|
|
|
|
|
italic => 0, |
2073
|
|
|
|
|
|
|
wrap => 0, |
2074
|
|
|
|
|
|
|
merged => 0, |
2075
|
|
|
|
|
|
|
hidden => 0, |
2076
|
|
|
|
|
|
|
locked => 0, |
2077
|
|
|
|
|
|
|
enc => "iso8859-1", |
2078
|
|
|
|
|
|
|
}, ], |
2079
|
|
|
|
|
|
|
], |
2080
|
|
|
|
|
|
|
merged => [], |
2081
|
|
|
|
|
|
|
A1 => 1, |
2082
|
|
|
|
|
|
|
B5 => "Nugget", |
2083
|
|
|
|
|
|
|
}, |
2084
|
|
|
|
|
|
|
|
2085
|
|
|
|
|
|
|
The entries C<maxrow> and C<maxcol> are 1-based. |
2086
|
|
|
|
|
|
|
|
2087
|
|
|
|
|
|
|
This has now been partially implemented, mainly for Excel, as the other |
2088
|
|
|
|
|
|
|
parsers do not (yet) support all of that. YMMV. |
2089
|
|
|
|
|
|
|
|
2090
|
|
|
|
|
|
|
If a cell itself is not hidden, but the parser holds the information that |
2091
|
|
|
|
|
|
|
either the row or the column (or both) the field is in is hidden, the flag |
2092
|
|
|
|
|
|
|
is inherited into the cell attributes. |
2093
|
|
|
|
|
|
|
|
2094
|
|
|
|
|
|
|
You can get the attributes of a cell (as a hash-ref) like this: |
2095
|
|
|
|
|
|
|
|
2096
|
|
|
|
|
|
|
my $attr = $book[1]{attr}[1][3]; # Direct structure |
2097
|
|
|
|
|
|
|
my $attr = $book->sheet (1)->attr (1, 3); # Same using OO |
2098
|
|
|
|
|
|
|
my $attr = $book->sheet (1)->attr ("A3"); # Same using OO |
2099
|
|
|
|
|
|
|
|
2100
|
|
|
|
|
|
|
To get to the C<font> attribute, use any of these: |
2101
|
|
|
|
|
|
|
|
2102
|
|
|
|
|
|
|
my $font = $book[1]{attr}[1][3]{font}; |
2103
|
|
|
|
|
|
|
my $font = $book->sheet (1)->attr (1, 3)->{font}; |
2104
|
|
|
|
|
|
|
my $font = $book->sheet (1)->attr ("A3")->font; |
2105
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
=head3 Merged cells |
2107
|
|
|
|
|
|
|
X<merged> |
2108
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
Note that only |
2110
|
|
|
|
|
|
|
L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> |
2111
|
|
|
|
|
|
|
documents the use of merged cells, and not in a way useful for the spreadsheet |
2112
|
|
|
|
|
|
|
consumer. |
2113
|
|
|
|
|
|
|
|
2114
|
|
|
|
|
|
|
CSV does not support merged cells (though future implementations of CSV |
2115
|
|
|
|
|
|
|
for the web might). |
2116
|
|
|
|
|
|
|
|
2117
|
|
|
|
|
|
|
The documentation of merged areas in |
2118
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> and |
2119
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> can |
2120
|
|
|
|
|
|
|
be found in |
2121
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel::Worksheet|https://metacpan.org/pod/Spreadsheet::ParseExcel::Worksheet> |
2122
|
|
|
|
|
|
|
and L<Spreadsheet::ParseExcel::Cell|https://metacpan.org/pod/Spreadsheet::ParseExcel::Cell>. |
2123
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
None of basic L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX>, |
2125
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, and |
2126
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> manual |
2127
|
|
|
|
|
|
|
pages mention merged cells at all. |
2128
|
|
|
|
|
|
|
|
2129
|
|
|
|
|
|
|
This module just tries to return the information in a generic way. |
2130
|
|
|
|
|
|
|
|
2131
|
|
|
|
|
|
|
Given this spreadsheet as an example |
2132
|
|
|
|
|
|
|
|
2133
|
|
|
|
|
|
|
merged.xlsx: |
2134
|
|
|
|
|
|
|
|
2135
|
|
|
|
|
|
|
A B C |
2136
|
|
|
|
|
|
|
+-----+-----------+ |
2137
|
|
|
|
|
|
|
1| | foo | |
2138
|
|
|
|
|
|
|
+-----+ + |
2139
|
|
|
|
|
|
|
2| bar | | |
2140
|
|
|
|
|
|
|
| +-----+-----+ |
2141
|
|
|
|
|
|
|
3| | urg | orc | |
2142
|
|
|
|
|
|
|
+-----+-----+-----+ |
2143
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
the information extracted from that undocumented information is |
2145
|
|
|
|
|
|
|
returned in the C<merged> entry of the sheet's hash as a list of |
2146
|
|
|
|
|
|
|
top-left, bottom-right coordinate pars (col, row, col, row). For |
2147
|
|
|
|
|
|
|
given example, that would be: |
2148
|
|
|
|
|
|
|
|
2149
|
|
|
|
|
|
|
$ss->{merged} = [ |
2150
|
|
|
|
|
|
|
[ 1, 2, 1, 3 ], # A2-A3 |
2151
|
|
|
|
|
|
|
[ 2, 1, 3, 2 ], # B1-C2 |
2152
|
|
|
|
|
|
|
]; |
2153
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
To find the label of the top-left cell in a merged area, use the |
2155
|
|
|
|
|
|
|
L<C<merged_from>|/merged_from> method. |
2156
|
|
|
|
|
|
|
|
2157
|
|
|
|
|
|
|
$ss->merged_from ("C2"); # will return "B1" |
2158
|
|
|
|
|
|
|
|
2159
|
|
|
|
|
|
|
When the attributes are also enabled, there is some merge information |
2160
|
|
|
|
|
|
|
copied directly from the cell information, but again, that stems from |
2161
|
|
|
|
|
|
|
code analysis and not from documentation: |
2162
|
|
|
|
|
|
|
|
2163
|
|
|
|
|
|
|
my $ss = ReadData ("merged.xlsx", attr => 1)->[1]; |
2164
|
|
|
|
|
|
|
foreach my $row (1 .. $ss->{maxrow}) { |
2165
|
|
|
|
|
|
|
foreach my $col (1 .. $ss->{maxcol}) { |
2166
|
|
|
|
|
|
|
my $cell = cr2cell ($col, $row); |
2167
|
|
|
|
|
|
|
printf "%s %-3s %s ", $cell, $ss->{$cell}, |
2168
|
|
|
|
|
|
|
$ss->{attr}[$col][$row]{merged}; |
2169
|
|
|
|
|
|
|
} |
2170
|
|
|
|
|
|
|
print "\n"; |
2171
|
|
|
|
|
|
|
} |
2172
|
|
|
|
|
|
|
|
2173
|
|
|
|
|
|
|
A1 0 B1 foo 1 C1 1 |
2174
|
|
|
|
|
|
|
A2 bar 1 B2 1 C2 1 |
2175
|
|
|
|
|
|
|
A3 1 B3 urg 0 C3 orc 0 |
2176
|
|
|
|
|
|
|
|
2177
|
|
|
|
|
|
|
In this example, there is no way to see if C<B2> is merged to C<A2> or |
2178
|
|
|
|
|
|
|
to C<B1> without analyzing all surrounding cells. This could as well |
2179
|
|
|
|
|
|
|
mean C<A2:A3>, C<B1:C1>, C<B2:C2>, as C<A2:A3>, C<B1:B2>, C<C1:C2>, as |
2180
|
|
|
|
|
|
|
C<A2:A3>, C<B1:C2>. |
2181
|
|
|
|
|
|
|
|
2182
|
|
|
|
|
|
|
Use the L<C<merged>|/merged> entry described above to find out what |
2183
|
|
|
|
|
|
|
fields are merged to what other fields or use C<merge>: |
2184
|
|
|
|
|
|
|
|
2185
|
|
|
|
|
|
|
my $ss = ReadData ("merged.xlsx", attr => 1, merge => 1)->[1]; |
2186
|
|
|
|
|
|
|
foreach my $row (1 .. $ss->{maxrow}) { |
2187
|
|
|
|
|
|
|
foreach my $col (1 .. $ss->{maxcol}) { |
2188
|
|
|
|
|
|
|
my $cell = cr2cell ($col, $row); |
2189
|
|
|
|
|
|
|
printf "%s %-3s %s ", $cell, $ss->{$cell}, |
2190
|
|
|
|
|
|
|
$ss->{attr}[$col][$row]{merged}; |
2191
|
|
|
|
|
|
|
} |
2192
|
|
|
|
|
|
|
print "\n"; |
2193
|
|
|
|
|
|
|
} |
2194
|
|
|
|
|
|
|
|
2195
|
|
|
|
|
|
|
A1 0 B1 foo B1 C1 foo B1 |
2196
|
|
|
|
|
|
|
A2 bar A2 B2 foo B1 C2 foo B1 |
2197
|
|
|
|
|
|
|
A3 bar A2 B3 urg 0 C3 orc 0 |
2198
|
|
|
|
|
|
|
|
2199
|
|
|
|
|
|
|
=head2 Streams from web-resources |
2200
|
|
|
|
|
|
|
|
2201
|
|
|
|
|
|
|
If you want to stream a web-resource, and the underlying parser supports it, |
2202
|
|
|
|
|
|
|
you could use a helper function like this (thanks Corion): |
2203
|
|
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
use HTTP::Tiny; |
2205
|
|
|
|
|
|
|
use Spreadsheet::Read; |
2206
|
|
|
|
|
|
|
|
2207
|
|
|
|
|
|
|
# Fetch data and return a filehandle to that data |
2208
|
|
|
|
|
|
|
sub fh_from_url { |
2209
|
|
|
|
|
|
|
my $url = shift; |
2210
|
|
|
|
|
|
|
my $ua = HTTP::Tiny->new; |
2211
|
|
|
|
|
|
|
my $res = $ua->get ($url); |
2212
|
|
|
|
|
|
|
open my $fh, "<", \$res->{content}; |
2213
|
|
|
|
|
|
|
return $fh |
2214
|
|
|
|
|
|
|
} # fh_from_url |
2215
|
|
|
|
|
|
|
|
2216
|
|
|
|
|
|
|
my $fh = fh_from_url ("http://example.com/example.csv"); |
2217
|
|
|
|
|
|
|
my $sheet = Spreadsheet::Read->new ($fh, parser => "csv"); |
2218
|
|
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
=head1 TOOLS |
2220
|
|
|
|
|
|
|
|
2221
|
|
|
|
|
|
|
This modules comes with a few tools that perform tasks from the FAQ, like |
2222
|
|
|
|
|
|
|
"How do I select only column D through F from sheet 2 into a CSV file?" |
2223
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
If the module was installed without the tools, you can find them here: |
2225
|
|
|
|
|
|
|
https://github.com/Tux/Spreadsheet-Read/tree/master/examples |
2226
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
=head2 C<xlscat> |
2228
|
|
|
|
|
|
|
|
2229
|
|
|
|
|
|
|
Show (parts of) a spreadsheet in plain text, CSV, or HTML |
2230
|
|
|
|
|
|
|
|
2231
|
|
|
|
|
|
|
usage: xlscat [-s <sep>] [-L] [-n] [-A] [-u] [Selection] file.xls |
2232
|
|
|
|
|
|
|
[-c | -m] [-u] [Selection] file.xls |
2233
|
|
|
|
|
|
|
-i [-S sheets] file.xls |
2234
|
|
|
|
|
|
|
Generic options: |
2235
|
|
|
|
|
|
|
-v[#] Set verbose level (xlscat/xlsgrep) |
2236
|
|
|
|
|
|
|
-d[#] Set debug level (Spreadsheet::Read) |
2237
|
|
|
|
|
|
|
-u Use unformatted values |
2238
|
|
|
|
|
|
|
--noclip Do not strip empty sheets and |
2239
|
|
|
|
|
|
|
trailing empty rows and columns |
2240
|
|
|
|
|
|
|
-e <enc> Set encoding for input and output |
2241
|
|
|
|
|
|
|
-b <enc> Set encoding for input |
2242
|
|
|
|
|
|
|
-a <enc> Set encoding for output |
2243
|
|
|
|
|
|
|
Input CSV: |
2244
|
|
|
|
|
|
|
--in-sep=c Set input sep_char for CSV |
2245
|
|
|
|
|
|
|
Input XLS: |
2246
|
|
|
|
|
|
|
--dtfmt=fmt Specify the default date format to replace 'm-d-yy' |
2247
|
|
|
|
|
|
|
the default replacement is 'yyyy-mm-dd' |
2248
|
|
|
|
|
|
|
Output Text (default): |
2249
|
|
|
|
|
|
|
-s <sep> Use separator <sep>. Default '|', \n allowed |
2250
|
|
|
|
|
|
|
-L Line up the columns |
2251
|
|
|
|
|
|
|
-n [skip] Number lines (prefix with column number) |
2252
|
|
|
|
|
|
|
optionally skip <skip> (header) lines |
2253
|
|
|
|
|
|
|
-A Show field attributes in ANSI escapes |
2254
|
|
|
|
|
|
|
-h[#] Show # header lines |
2255
|
|
|
|
|
|
|
Output Index only: |
2256
|
|
|
|
|
|
|
-i Show sheet names and size only |
2257
|
|
|
|
|
|
|
Output CSV: |
2258
|
|
|
|
|
|
|
-c Output CSV, separator = ',' |
2259
|
|
|
|
|
|
|
-m Output CSV, separator = ';' |
2260
|
|
|
|
|
|
|
Output HTML: |
2261
|
|
|
|
|
|
|
-H Output HTML |
2262
|
|
|
|
|
|
|
Selection: |
2263
|
|
|
|
|
|
|
-S <sheets> Only print sheets <sheets>. 'all' is a valid set |
2264
|
|
|
|
|
|
|
Default only prints the first sheet |
2265
|
|
|
|
|
|
|
-R <rows> Only print rows <rows>. Default is 'all' |
2266
|
|
|
|
|
|
|
-C <cols> Only print columns <cols>. Default is 'all' |
2267
|
|
|
|
|
|
|
-F <flds> Only fields <flds> e.g. -FA3,B16 |
2268
|
|
|
|
|
|
|
Ordering (column numbers in result set *after* selection): |
2269
|
|
|
|
|
|
|
--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2) |
2270
|
|
|
|
|
|
|
+# - first # lines do not sort (header) |
2271
|
|
|
|
|
|
|
# - order on column # lexical ascending |
2272
|
|
|
|
|
|
|
#n - order on column # numeric ascending |
2273
|
|
|
|
|
|
|
#r - order on column # lexical descending |
2274
|
|
|
|
|
|
|
#rn - order on column # numeric descending |
2275
|
|
|
|
|
|
|
|
2276
|
|
|
|
|
|
|
=head2 C<xlsgrep> |
2277
|
|
|
|
|
|
|
|
2278
|
|
|
|
|
|
|
Show (parts of) a spreadsheet that match a pattern in plain text, CSV, or HTML |
2279
|
|
|
|
|
|
|
|
2280
|
|
|
|
|
|
|
usage: xlsgrep [-s <sep>] [-L] [-n] [-A] [-u] [Selection] pattern file.xls |
2281
|
|
|
|
|
|
|
[-c | -m] [-u] [Selection] pattern file.xls |
2282
|
|
|
|
|
|
|
-i [-S sheets] pattern file.xls |
2283
|
|
|
|
|
|
|
Generic options: |
2284
|
|
|
|
|
|
|
-v[#] Set verbose level (xlscat/xlsgrep) |
2285
|
|
|
|
|
|
|
-d[#] Set debug level (Spreadsheet::Read) |
2286
|
|
|
|
|
|
|
-u Use unformatted values |
2287
|
|
|
|
|
|
|
--noclip Do not strip empty sheets and |
2288
|
|
|
|
|
|
|
trailing empty rows and columns |
2289
|
|
|
|
|
|
|
-e <enc> Set encoding for input and output |
2290
|
|
|
|
|
|
|
-b <enc> Set encoding for input |
2291
|
|
|
|
|
|
|
-a <enc> Set encoding for output |
2292
|
|
|
|
|
|
|
Input CSV: |
2293
|
|
|
|
|
|
|
--in-sep=c Set input sep_char for CSV |
2294
|
|
|
|
|
|
|
Input XLS: |
2295
|
|
|
|
|
|
|
--dtfmt=fmt Specify the default date format to replace 'm-d-yy' |
2296
|
|
|
|
|
|
|
the default replacement is 'yyyy-mm-dd' |
2297
|
|
|
|
|
|
|
Output Text (default): |
2298
|
|
|
|
|
|
|
-s <sep> Use separator <sep>. Default '|', \n allowed |
2299
|
|
|
|
|
|
|
-L Line up the columns |
2300
|
|
|
|
|
|
|
-n [skip] Number lines (prefix with column number) |
2301
|
|
|
|
|
|
|
optionally skip <skip> (header) lines |
2302
|
|
|
|
|
|
|
-A Show field attributes in ANSI escapes |
2303
|
|
|
|
|
|
|
-h[#] Show # header lines |
2304
|
|
|
|
|
|
|
Grep options: |
2305
|
|
|
|
|
|
|
-i Ignore case |
2306
|
|
|
|
|
|
|
-w Match whole words only |
2307
|
|
|
|
|
|
|
Output CSV: |
2308
|
|
|
|
|
|
|
-c Output CSV, separator = ',' |
2309
|
|
|
|
|
|
|
-m Output CSV, separator = ';' |
2310
|
|
|
|
|
|
|
Output HTML: |
2311
|
|
|
|
|
|
|
-H Output HTML |
2312
|
|
|
|
|
|
|
Selection: |
2313
|
|
|
|
|
|
|
-S <sheets> Only print sheets <sheets>. 'all' is a valid set |
2314
|
|
|
|
|
|
|
Default only prints the first sheet |
2315
|
|
|
|
|
|
|
-R <rows> Only print rows <rows>. Default is 'all' |
2316
|
|
|
|
|
|
|
-C <cols> Only print columns <cols>. Default is 'all' |
2317
|
|
|
|
|
|
|
-F <flds> Only fields <flds> e.g. -FA3,B16 |
2318
|
|
|
|
|
|
|
Ordering (column numbers in result set *after* selection): |
2319
|
|
|
|
|
|
|
--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2) |
2320
|
|
|
|
|
|
|
+# - first # lines do not sort (header) |
2321
|
|
|
|
|
|
|
# - order on column # lexical ascending |
2322
|
|
|
|
|
|
|
#n - order on column # numeric ascending |
2323
|
|
|
|
|
|
|
#r - order on column # lexical descending |
2324
|
|
|
|
|
|
|
#rn - order on column # numeric descending |
2325
|
|
|
|
|
|
|
|
2326
|
|
|
|
|
|
|
=head2 C<xls2csv> |
2327
|
|
|
|
|
|
|
|
2328
|
|
|
|
|
|
|
Convert a spreadsheet to CSV. This is just a small wrapper over C<xlscat>. |
2329
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
usage: xls2csv [ -o file.csv ] file.xls |
2331
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
=head2 C<ss2tk> |
2333
|
|
|
|
|
|
|
|
2334
|
|
|
|
|
|
|
Show a spreadsheet in a perl/Tk spreadsheet widget |
2335
|
|
|
|
|
|
|
|
2336
|
|
|
|
|
|
|
usage: ss2tk [-w <width>] [X11 options] file.xls [<pattern>] |
2337
|
|
|
|
|
|
|
-w <width> use <width> as default column width (4) |
2338
|
|
|
|
|
|
|
|
2339
|
|
|
|
|
|
|
=head2 C<ssdiff> |
2340
|
|
|
|
|
|
|
|
2341
|
|
|
|
|
|
|
Show the differences between two spreadsheets. |
2342
|
|
|
|
|
|
|
|
2343
|
|
|
|
|
|
|
usage: examples/ssdiff [--verbose[=1]] file.xls file.xlsx |
2344
|
|
|
|
|
|
|
|
2345
|
|
|
|
|
|
|
=head1 TODO |
2346
|
|
|
|
|
|
|
|
2347
|
|
|
|
|
|
|
=over 4 |
2348
|
|
|
|
|
|
|
|
2349
|
|
|
|
|
|
|
=item Options |
2350
|
|
|
|
|
|
|
|
2351
|
|
|
|
|
|
|
=over 2 |
2352
|
|
|
|
|
|
|
|
2353
|
|
|
|
|
|
|
=item Module Options |
2354
|
|
|
|
|
|
|
|
2355
|
|
|
|
|
|
|
New Spreadsheet::Read options are bound to happen. I'm thinking of an |
2356
|
|
|
|
|
|
|
option that disables the reading of the data entirely to speed up an |
2357
|
|
|
|
|
|
|
index request (how many sheets/fields/columns). See C<xlscat -i>. |
2358
|
|
|
|
|
|
|
|
2359
|
|
|
|
|
|
|
=item Parser options |
2360
|
|
|
|
|
|
|
|
2361
|
|
|
|
|
|
|
Try to transparently support as many options as the encapsulated modules |
2362
|
|
|
|
|
|
|
support regarding (un)formatted values, (date) formats, hidden columns |
2363
|
|
|
|
|
|
|
rows or fields etc. These could be implemented like C<attr> above but |
2364
|
|
|
|
|
|
|
names C<meta>, or just be new values in the C<attr> hashes. |
2365
|
|
|
|
|
|
|
|
2366
|
|
|
|
|
|
|
=back |
2367
|
|
|
|
|
|
|
|
2368
|
|
|
|
|
|
|
=item Other parsers |
2369
|
|
|
|
|
|
|
|
2370
|
|
|
|
|
|
|
Add support for new(er) parsers for already supported formats, like |
2371
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
=over 2 |
2373
|
|
|
|
|
|
|
|
2374
|
|
|
|
|
|
|
=item Data::XLSX::Parser |
2375
|
|
|
|
|
|
|
|
2376
|
|
|
|
|
|
|
Data::XLSX::Parser provides faster way to parse Microsoft Excel's .xlsx |
2377
|
|
|
|
|
|
|
files. The implementation of this module is highly inspired from Python's |
2378
|
|
|
|
|
|
|
FastXLSX library. |
2379
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
This is SAX based parser, so you can parse very large XLSX file with |
2381
|
|
|
|
|
|
|
lower memory usage. |
2382
|
|
|
|
|
|
|
|
2383
|
|
|
|
|
|
|
=back |
2384
|
|
|
|
|
|
|
|
2385
|
|
|
|
|
|
|
=item Other spreadsheet formats |
2386
|
|
|
|
|
|
|
|
2387
|
|
|
|
|
|
|
I consider adding any spreadsheet interface that offers a usable API. |
2388
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
Under investigation: |
2390
|
|
|
|
|
|
|
|
2391
|
|
|
|
|
|
|
=over 2 |
2392
|
|
|
|
|
|
|
|
2393
|
|
|
|
|
|
|
=item Gnumeric (.gnumeric) |
2394
|
|
|
|
|
|
|
|
2395
|
|
|
|
|
|
|
I have seen no existing CPAN module yet. |
2396
|
|
|
|
|
|
|
|
2397
|
|
|
|
|
|
|
It is gzip'ed XML |
2398
|
|
|
|
|
|
|
|
2399
|
|
|
|
|
|
|
=item Kspread (.ksp) |
2400
|
|
|
|
|
|
|
|
2401
|
|
|
|
|
|
|
Now knows as Calligra Sheets. |
2402
|
|
|
|
|
|
|
|
2403
|
|
|
|
|
|
|
I have seen no existing CPAN module yet. |
2404
|
|
|
|
|
|
|
|
2405
|
|
|
|
|
|
|
It is XML in ZIP |
2406
|
|
|
|
|
|
|
|
2407
|
|
|
|
|
|
|
=back |
2408
|
|
|
|
|
|
|
|
2409
|
|
|
|
|
|
|
=item Alternative parsers for existing formats |
2410
|
|
|
|
|
|
|
|
2411
|
|
|
|
|
|
|
As long as the alternative has a good reason for its existence, and the |
2412
|
|
|
|
|
|
|
API of that parser reasonable fits in my approach, I will consider to |
2413
|
|
|
|
|
|
|
implement the glue layer, or apply patches to do so as long as these |
2414
|
|
|
|
|
|
|
match what F<CONTRIBUTING.md> describes. |
2415
|
|
|
|
|
|
|
|
2416
|
|
|
|
|
|
|
=back |
2417
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
=head1 SEE ALSO |
2419
|
|
|
|
|
|
|
|
2420
|
|
|
|
|
|
|
=over 2 |
2421
|
|
|
|
|
|
|
|
2422
|
|
|
|
|
|
|
=item Text::CSV_XS, Text::CSV_PP |
2423
|
|
|
|
|
|
|
|
2424
|
|
|
|
|
|
|
See L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> , |
2425
|
|
|
|
|
|
|
L<Text::CSV_PP|https://metacpan.org/release/Text-CSV> , and |
2426
|
|
|
|
|
|
|
L<Text::CSV|https://metacpan.org/release/Text-CSV> documentation. |
2427
|
|
|
|
|
|
|
|
2428
|
|
|
|
|
|
|
L<Text::CSV|https://metacpan.org/release/Text-CSV> is a wrapper over Text::CSV_XS (the fast XS version) and/or |
2429
|
|
|
|
|
|
|
L<Text::CSV_PP|https://metacpan.org/release/Text-CSV> (the pure perl version). |
2430
|
|
|
|
|
|
|
|
2431
|
|
|
|
|
|
|
=item Spreadsheet::ParseExcel |
2432
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> is |
2434
|
|
|
|
|
|
|
the best parser for old-style Microsoft Excel (.xls) files. |
2435
|
|
|
|
|
|
|
|
2436
|
|
|
|
|
|
|
=item Spreadsheet::ParseXLSX |
2437
|
|
|
|
|
|
|
|
2438
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> is |
2439
|
|
|
|
|
|
|
like L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, |
2440
|
|
|
|
|
|
|
but for new Microsoft Excel 2007+ files (.xlsx). They have the same API. |
2441
|
|
|
|
|
|
|
|
2442
|
|
|
|
|
|
|
This module uses L<XML::Twig|https://metacpan.org/release/XML-Twig> to parse the |
2443
|
|
|
|
|
|
|
internal XML. |
2444
|
|
|
|
|
|
|
|
2445
|
|
|
|
|
|
|
=item Spreadsheet::XLSX |
2446
|
|
|
|
|
|
|
|
2447
|
|
|
|
|
|
|
See L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX> |
2448
|
|
|
|
|
|
|
documentation. |
2449
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
This module is dead and deprecated. It is B<buggy and unmaintained>. I<Please> |
2451
|
|
|
|
|
|
|
use L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> |
2452
|
|
|
|
|
|
|
instead. |
2453
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
=item Spreadsheet::ParseODS |
2455
|
|
|
|
|
|
|
|
2456
|
|
|
|
|
|
|
L<Spreadsheet::ParseODS|https://metacpan.org/release/Spreadsheet-ParseODS> is a |
2457
|
|
|
|
|
|
|
parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files. It is the |
2458
|
|
|
|
|
|
|
successor of L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>. |
2459
|
|
|
|
|
|
|
|
2460
|
|
|
|
|
|
|
=item Spreadsheet::ReadSXC |
2461
|
|
|
|
|
|
|
|
2462
|
|
|
|
|
|
|
L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> is a |
2463
|
|
|
|
|
|
|
parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files. |
2464
|
|
|
|
|
|
|
|
2465
|
|
|
|
|
|
|
=item Spreadsheet::BasicRead |
2466
|
|
|
|
|
|
|
|
2467
|
|
|
|
|
|
|
See L<Spreadsheet::BasicRead|https://metacpan.org/release/Spreadsheet-BasicRead> |
2468
|
|
|
|
|
|
|
for xlscat-like functionality (Excel only) |
2469
|
|
|
|
|
|
|
|
2470
|
|
|
|
|
|
|
=item Spreadsheet::ConvertAA |
2471
|
|
|
|
|
|
|
|
2472
|
|
|
|
|
|
|
See L<Spreadsheet::ConvertAA|https://metacpan.org/release/Spreadsheet-ConvertAA> |
2473
|
|
|
|
|
|
|
for an alternative set of L</cell2cr>/L</cr2cell> pair. |
2474
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
=item Spreadsheet::Perl |
2476
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
L<Spreadsheet::Perl|https://metacpan.org/release/Spreadsheet-Perl> offers a Pure |
2478
|
|
|
|
|
|
|
Perl implementation of a spreadsheet engine. Users that want this format to be |
2479
|
|
|
|
|
|
|
supported in Spreadsheet::Read are hereby motivated to offer patches. It is |
2480
|
|
|
|
|
|
|
not high on my TODO-list. |
2481
|
|
|
|
|
|
|
|
2482
|
|
|
|
|
|
|
=item Spreadsheet::CSV |
2483
|
|
|
|
|
|
|
|
2484
|
|
|
|
|
|
|
L<Spreadsheet::CSV|https://metacpan.org/release/Spreadsheet-CSV> offers the |
2485
|
|
|
|
|
|
|
interesting approach of seeing all supported spreadsheet formats as if it were |
2486
|
|
|
|
|
|
|
CSV, mimicking the L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> |
2487
|
|
|
|
|
|
|
interface. |
2488
|
|
|
|
|
|
|
|
2489
|
|
|
|
|
|
|
=item xls2csv |
2490
|
|
|
|
|
|
|
|
2491
|
|
|
|
|
|
|
L<xls2csv|https://github.com/Tux/Spreadsheet-Read/blob/master/examples/xls2csv> |
2492
|
|
|
|
|
|
|
offers an alternative for my C<xlscat -c>, in the xls2csv tool, but this tool |
2493
|
|
|
|
|
|
|
focuses on character encoding transparency, and requires some other modules. |
2494
|
|
|
|
|
|
|
|
2495
|
|
|
|
|
|
|
=back |
2496
|
|
|
|
|
|
|
|
2497
|
|
|
|
|
|
|
=head1 AUTHOR |
2498
|
|
|
|
|
|
|
|
2499
|
|
|
|
|
|
|
H.Merijn Brand, <h.m.brand@xs4all.nl> |
2500
|
|
|
|
|
|
|
|
2501
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
2502
|
|
|
|
|
|
|
|
2503
|
|
|
|
|
|
|
Copyright (C) 2005-2020 H.Merijn Brand |
2504
|
|
|
|
|
|
|
|
2505
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
2506
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
2507
|
|
|
|
|
|
|
|
2508
|
|
|
|
|
|
|
=cut |