File Coverage

blib/lib/Spreadsheet/Read.pm

Criterion	Covered	Total	%
statement	489	690	70.8
branch	423	652	64.8
condition	183	315	58.1
subroutine	47	50	94.0
pod	13	13	100.0
total	1155	1720	67.1

line	stmt	bran	cond	sub	pod	time	code
1							#!/pro/bin/perl
2
3							package Spreadsheet::Read;
4
5							=head1 NAME
6
7							Spreadsheet::Read - Read the data from a spreadsheet
8
9							=head1 SYNOPSIS
10
11							use Spreadsheet::Read;
12							my $book = ReadData ("test.csv", sep => ";");
13							my $book = ReadData ("test.sxc");
14							my $book = ReadData ("test.ods");
15							my $book = ReadData ("test.xls");
16							my $book = ReadData ("test.xlsx");
17							my $book = ReadData ("test.xlsm");
18							my $book = ReadData ($fh, parser => "xls");
19
20							Spreadsheet::Read::add ($book, "sheet.csv");
21
22							my $sheet = $book->[1]; # first datasheet
23							my $cell = $book->[1]{A3}; # content of field A3 of sheet 1
24							my $cell = $book->[1]{cell}[1][3]; # same, unformatted
25
26							# OO API
27							my $book = Spreadsheet::Read->new ("file.csv");
28							my $sheet = $book->sheet (1);
29							my $cell = $sheet->cell ("A3");
30							my $cell = $sheet->cell (1, 3);
31
32							$book->add ("test.xls");
33
34							=cut
35
36	58			58		5939642	use 5.8.1;
	58					792
37	58			58		386	use strict;
	58					131
	58					1419
38	58			58		320	use warnings;
	58					121
	58					3663
39
40							our $VERSION = "0.82";
41	1			1	1	137	sub Version { $VERSION }
42
43	58			58		394	use Carp;
	58					133
	58					3835
44	58			58		397	use Exporter;
	58					124
	58					5881
45							our @ISA = qw( Exporter );
46							our @EXPORT = qw( ReadData cell2cr cr2cell );
47							our @EXPORT_OK = qw( parses rows cellrow row add );
48
49	58			58		32974	use Encode qw( decode );
	58					903436
	58					4416
50	58			58		52982	use File::Temp qw( );
	58					1251489
	58					1782
51	58			58		35635	use Data::Dumper;
	58					379932
	58					70239
52
53							my @parsers = (
54							[ csv => "Text::CSV_XS", "0.71" ],
55							[ csv => "Text::CSV_PP", "1.17" ],
56							[ csv => "Text::CSV", "1.17" ],
57							[ ods => "Spreadsheet::ParseODS", "0.25" ],
58							[ ods => "Spreadsheet::ReadSXC", "0.20" ],
59							[ sxc => "Spreadsheet::ParseODS", "0.25" ],
60							[ sxc => "Spreadsheet::ReadSXC", "0.20" ],
61							[ xls => "Spreadsheet::ParseExcel", "0.34" ],
62							[ xlsx => "Spreadsheet::ParseXLSX", "0.24" ],
63							[ xlsx => "Spreadsheet::XLSX", "0.13" ],
64							[ prl => "Spreadsheet::Perl", "" ],
65
66							# Helper modules
67							[ ios => "IO::Scalar", "" ],
68							[ dmp => "Data::Peek", "" ],
69							);
70							my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers });
71							foreach my $p (@parsers) {
72							my $format = $p->[0];
73							$can{$format} and next;
74							$can{$format} = "";
75							my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next;
76							my $min_version = $can{supports}{$preset};
77							unless ($min_version) {
78							# Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle"
79							$can{$format} = "!$preset is not supported for the $format format";
80							next;
81							}
82							if (eval "local \$_; require $preset" and not $@) {
83							# forcing a parser should still check the version
84							my $ok;
85							my $has = $preset->VERSION;
86							$has =~ s/_[0-9]+$//; # Remove beta-part
87							if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions
88							my @min = split m/\./ => $1;
89							$has =~ s/^v//;
90							my @has = split m/\./ => $has;
91							$ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >=
92							(($min[0] * 1000 + $min[1]) * 1000 + $min[2]);
93							}
94							else { # normal versions
95							$ok = $has >= $min_version;
96							}
97							$ok or $preset = "!$preset";
98							}
99							else {
100							$preset = "!$preset";
101							}
102							$can{$format} = $preset;
103							}
104							delete $can{supports};
105							for (@parsers) {
106							my ($flag, $mod, $vsn) = @$_;
107							$can{$flag} and next;
108							eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" or
109							$_->[0] = "! Cannot use $mod version $vsn: $@";
110							}
111							$can{sc} = __PACKAGE__; # SquirelCalc is built-in
112
113							defined $Spreadsheet::ParseExcel::VERSION && $Spreadsheet::ParseExcel::VERSION < 0.61 and
114							*Spreadsheet::ParseExcel::Workbook::get_active_sheet = sub { undef; };
115							defined $Spreadsheet::ParseODS::VERSION && $Spreadsheet::ParseODS::VERSION < 0.25 and
116							*Spreadsheet::ParseODS::Workbook::get_active_sheet = sub { undef; };
117
118							my $debug = 0;
119							my %def_opts = (
120							rc => 1,
121							cells => 1,
122							attr => 0,
123							clip => undef, # $opt{cells};
124							strip => 0,
125							pivot => 0,
126							dtfmt => "yyyy-mm-dd", # Format 14
127							debug => 0,
128							passwd => undef,
129							parser => undef,
130							sep => undef,
131							quote => undef,
132							label => undef,
133							merge => 0,
134							);
135							my @def_attr = (
136							type => "text",
137							fgcolor => undef,
138							bgcolor => undef,
139							font => undef,
140							size => undef,
141							format => undef,
142							halign => "left",
143							valign => "top",
144							bold => 0,
145							italic => 0,
146							uline => 0,
147							wrap => 0,
148							merged => 0,
149							hidden => 0,
150							locked => 0,
151							enc => "utf-8", # $ENV{LC_ALL} // $ENV{LANG} // ...
152							formula => undef,
153							);
154
155							# Helper functions
156
157							sub _dump {
158	7			7		21	my ($label, $ref) = @_;
159	7	50				26	if ($can{dmp}) {
160	7					46	print STDERR Data::Peek::DDumper ({ $label => $ref });
161							}
162							else {
163	0					0	print STDERR Data::Dumper->Dump ([$ref], [$label]);
164							}
165	7					1881	my @c = caller (1);
166	7					51	print STDERR "<<- $c[1]:$c[2]\|$c[3]\n";
167							} # _dump
168
169							sub _parser {
170	420	100		420		1720	my $type = shift or return "";
171	177					532	$type = lc $type;
172	177	100				702	my $ods = $can{ods} ? "ods" : "sxc";
173							# Aliases and fullnames
174	177	100				560	$type eq "excel" and return "xls";
175	176	50				527	$type eq "excel2007" and return "xlsx";
176	176	50				483	$type eq "xlsm" and return "xlsx";
177	176	100				466	$type eq "oo" and return $ods;
178							# $type eq "sxc" and return $ods;
179	175	100				440	$type eq "openoffice" and return $ods;
180	174	50				413	$type eq "libreoffice" and return $ods;
181	174	100				524	$type eq "perl" and return "prl";
182	173	50				450	$type eq "squirelcalc" and return "sc";
183	173	100				1243	return exists $can{$type} ? $type : "";
184							} # _parser
185
186							sub new {
187	13			13	1	7331	my $class = shift;
188	13					69	my $r = ReadData (@_);
189	13	100				51	unless ($r) {
190	2	50				14	@_ and return; # new with arguments failed to open resource
191	0					0	$r = [{
192							parsers => [],
193							error => undef,
194							sheets => 0,
195							sheet => { },
196							}];
197							}
198	11					225	bless $r => $class;
199							} # new
200
201							# Spreadsheet::Read::parses ("csv") or die "Cannot parse CSV"
202							sub parses {
203	68	100		68	1	161862	ref $_[0] eq __PACKAGE__ and shift;
204	68	100				307	my $type = _parser (shift) or return 0;
205	66	100				625	if ($can{$type} =~ m/^!\s(.)/) {
206	17					73	$@ = $1;
207	17					61	return 0;
208							}
209	49					162	return $can{$type};
210							} # parses
211
212							sub sheets {
213	4			4	1	14	my $ctrl = shift->[0];
214	4					6	my %s = %{$ctrl->{sheet}};
	4					19
215	4	100				48	wantarray ? sort { $s{$a} <=> $s{$b} } keys %s : $ctrl->{sheets};
	3					27
216							} # sheets
217
218							# col2label (4) => "D"
219							sub col2label {
220	15739	100		15739	1	33822	ref $_[0] eq __PACKAGE__ and shift;
221	15739					20470	my $c = shift;
222	15739	100	100			39898	defined $c && $c > 0 or return "";
223	15735					21038	my $cell = "";
224	15735					25414	while ($c) {
225	58			58		31758	use integer;
	58					922
	58					380
226
227	15752					29069	substr $cell, 0, 0, chr (--$c % 26 + ord "A");
228	15752					27170	$c /= 26;
229							}
230	15735					41770	$cell;
231							} # col2label
232
233							# cr2cell (4, 18) => "D18"
234							# No prototype to allow 'cr2cell (@rowcol)'
235							sub cr2cell {
236	15537	100		15537	1	225094	ref $_[0] eq __PACKAGE__ and shift;
237	15537					24494	my ($c, $r) = @_;
238	15537	100	100			71221	defined $c && defined $r && $c > 0 && $r > 0 or return "";
			100
			100
239	15531					23927	col2label ($c) . $r;
240							} # cr2cell
241
242							# cell2cr ("D18") => (4, 18)
243							sub cell2cr {
244	620	100		620	1	690191	ref $_[0] eq __PACKAGE__ and shift;
245	620	100	100			6048	my ($cc, $r) = (uc ($_[0]\|\|"") =~ m/^([A-Z]+)([0-9]+)$/) or return (0, 0);
246	616					1298	my $c = 0;
247	616					2537	while ($cc =~ s/^([A-Z])//) {
248	624					2497	$c = 26 * $c + 1 + ord ($1) - ord ("A");
249							}
250	616					2221	($c, $r);
251							} # cell2cr
252
253							# my @row = cellrow ($book->[1], 1);
254							# my @row = $book->cellrow (1, 1);
255							sub cellrow {
256	10	100		10	1	70	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
257	9	100	66			95	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
258	7	50	33			45	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
259	7	100				35	my $row = shift or return;
260	6	100	100			51	$row > 0 && $row <= $sheet->{maxrow} or return;
261	4					11	my $s = $sheet->{cell};
262	4					16	map { $s->[$_][$row] } 1..$sheet->{maxcol};
	76					211
263							} # cellrow
264
265							# my @row = row ($book->[1], 1);
266							# my @row = $book->row (1, 1);
267							sub row {
268	10	100		10	1	3820	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
269	9	100	66			74	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
270	7	50	33			37	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
271	7	100				32	my $row = shift or return;
272	6	100	100			41	$row > 0 && $row <= $sheet->{maxrow} or return;
273	4					18	map { $sheet->{cr2cell ($_, $row)} } 1..$sheet->{maxcol};
	76					134
274							} # row
275
276							# Convert {cell}'s [column][row] to a [row][column] list
277							# my @rows = rows ($book->[1]);
278							sub rows {
279	33	50		33	1	22403	my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
		100
280	30	100	100			169	ref $sheet eq "HASH" && exists $sheet->{cell} or return;
281	26	100	100			149	exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
282	5					16	my $s = $sheet->{cell};
283
284							map {
285	20					32	my $r = $_;
286	20					34	[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
	80					181
287	5					25	} 1..$sheet->{maxrow};
288							} # rows
289
290							sub sheet {
291	24			24	1	612	my ($book, $sheet) = @_;
292	24	100	100			124	$book && $sheet or return;
293	22					39	my $class = "Spreadsheet::Read::Sheet";
294							$sheet =~ m/^[0-9]+$/ && $sheet >= 1 && $sheet <= $book->[0]{sheets} and
295	22	100	66			332	return bless $book->[$sheet] => $class;
			100
296							exists $book->[0]{sheet}{$sheet} and
297	6	100				24	return bless $book->[$book->[0]{sheet}{$sheet}] => $class;
298	5					16	foreach my $idx (1 .. $book->[0]{sheets}) {
299	6	100				25	$book->[$idx]{label} eq $sheet and
300							return bless $book->[$idx] => $class;
301							}
302	4					21	return;
303							} # sheet
304
305							# If option "clip" is set, remove the trailing rows and
306							# columns in each sheet that contain no visible data
307							sub _clipsheets {
308	228			228		631	my ($opt, $ref) = @_;
309
310	228	50				736	$ref->[0]{sheets} or return $ref;
311
312	228					644	my ($rc, $cl) = ($opt->{rc}, $opt->{cells});
313	228					685	my ($oc, $os, $oa) = ($opt->{clip}, $opt->{strip}, $opt->{attr});
314
315							# Strip leading/trailing spaces
316	228	100	100			937	if ($os \|\| $oc) {
317	213					653	foreach my $sheet (1 .. $ref->[0]{sheets}) {
318	255					573	$ref->[$sheet]{indx} = $sheet;
319	255					460	my $ss = $ref->[$sheet];
320	255	100	66			1109	$ss->{maxrow} && $ss->{maxcol} or next;
321	248					563	my ($mc, $mr) = (0, 0);
322	248					593	foreach my $row (1 .. $ss->{maxrow}) {
323	1958					3308	foreach my $col (1 .. $ss->{maxcol}) {
324	16652	100				25905	if ($rc) {
325	14754	100				27221	defined $ss->{cell}[$col][$row] or next;
326	5897	100				10078	$os & 2 and $ss->{cell}[$col][$row] =~ s/\s+$//;
327	5897	100				9547	$os & 1 and $ss->{cell}[$col][$row] =~ s/^\s+//;
328	5897	100				10590	if (length $ss->{cell}[$col][$row]) {
329	4432	100				7185	$col > $mc and $mc = $col;
330	4432	100				7530	$row > $mr and $mr = $row;
331							}
332							}
333	7795	100				12164	if ($cl) {
334	7576					11599	my $cell = cr2cell ($col, $row);
335	7576	100				15528	defined $ss->{$cell} or next;
336	6249	100				11234	$os & 2 and $ss->{$cell} =~ s/\s+$//;
337	6249	100				10114	$os & 1 and $ss->{$cell} =~ s/^\s+//;
338	6249	100				11479	if (length $ss->{$cell}) {
339	4772	100				7695	$col > $mc and $mc = $col;
340	4772	100				8905	$row > $mr and $mr = $row;
341							}
342							}
343							}
344							}
345
346	248	100	66			1540	$oc && ($mc < $ss->{maxcol} \|\| $mr < $ss->{maxrow}) or next;
			66
347
348							# Remove trailing empty columns
349	106					254	foreach my $col (($mc + 1) .. $ss->{maxcol}) {
350	243	100				679	$rc and undef $ss->{cell}[$col];
351	243	100				568	$oa and undef $ss->{attr}[$col];
352	243	100				445	$cl or next;
353	193					337	my $c = col2label ($col);
354	193					1438	delete $ss->{"$c$_"} for 1 .. $ss->{maxrow};
355							}
356
357							# Remove trailing empty rows
358	106					269	foreach my $row (($mr + 1) .. $ss->{maxrow}) {
359	139					265	foreach my $col (1 .. $mc) {
360	64	50				159	$cl and delete $ss->{cr2cell ($col, $row)};
361	64	50				213	$rc and undef $ss->{cell} [$col][$row];
362	64	50				1024	$oa and undef $ss->{attr} [$col][$row];
363							}
364							}
365
366	106					312	($ss->{maxrow}, $ss->{maxcol}) = ($mr, $mc);
367							}
368							}
369
370	228	100				708	if ($opt->{pivot}) {
371	1					4	foreach my $sheet (1 .. $ref->[0]{sheets}) {
372	1					4	my $ss = $ref->[$sheet];
373	1	0	33			4	$ss->{maxrow} \|\| $ss->{maxcol} or next;
374	1	50				5	my $mx = $ss->{maxrow} > $ss->{maxcol} ? $ss->{maxrow} : $ss->{maxcol};
375	1					3	foreach my $row (2 .. $mx) {
376	18					38	foreach my $col (1 .. ($row - 1)) {
377							$opt->{rc} and
378							($ss->{cell}[$col][$row], $ss->{cell}[$row][$col]) =
379	171	50				520	($ss->{cell}[$row][$col], $ss->{cell}[$col][$row]);
380							$opt->{cells} and
381							($ss->{cr2cell ($col, $row)}, $ss->{cr2cell ($row, $col)}) =
382	171	50				349	($ss->{cr2cell ($row, $col)}, $ss->{cr2cell ($col, $row)});
383							}
384							}
385	1					4	($ss->{maxcol}, $ss->{maxrow}) = ($ss->{maxrow}, $ss->{maxcol});
386							}
387							}
388
389	228					5183	$ref;
390							} # _clipsheets
391
392							# Convert a single color (index) to a color
393							sub _xls_color {
394	1758			1758		3249	my $clr = shift;
395	1758	100				3493	defined $clr or return undef;
396	866	50				1459	$clr eq "#000000" and return undef;
397	866	50				1719	$clr =~ m/^#[0-9a-fA-F]+$/ and return lc $clr;
398	866	100	66			2434	$clr == 0 \|\| $clr == 32767 and return undef; # Default fg color
399	484					1093	return "#" . lc Spreadsheet::ParseExcel->ColorIdxToRGB ($clr);
400							} # _xls_color
401
402							# Convert a fill [ $pattern, $front_color, $back_color ] to a single background
403							sub _xls_fill {
404	1542			1542		3011	my ($p, $fg, $bg) = @_;
405	1542	50				2656	defined $p or return undef;
406	1542	50				2580	$p == 32767 and return undef; # Default fg color
407	1542	100	100			14827	$p == 0 && !defined $bg and return undef; # No fill bg color
408	650	100				1104	$p == 1 and return _xls_color ($fg);
409	434	50	33			6146	$bg < 8 \|\| $bg > 63 and return undef; # see Workbook.pm#106
410	0					0	return _xls_color ($bg);
411							} # _xls_fill
412
413							sub ReadData {
414	280	100		280	1	342784	my $txt = shift or return;
415
416	277					573	my %opt;
417	277	100				774	if (@_) {
418	222	100				1117	if (ref $_[0] eq "HASH") { %opt = %{shift @_} }
	20	50				31
	20					79
419	202					776	elsif (@_ % 2 == 0) { %opt = @_ }
420							}
421
422	277	100				1075	exists $opt{rc} or $opt{rc} = $def_opts{rc};
423	277	100				1367	exists $opt{cells} or $opt{cells} = $def_opts{cells};
424	277	100				849	exists $opt{attr} or $opt{attr} = $def_opts{attr};
425	277	100				951	exists $opt{clip} or $opt{clip} = $opt{cells};
426	277	100				748	exists $opt{strip} or $opt{strip} = $def_opts{strip};
427	277	100				888	exists $opt{dtfmt} or $opt{dtfmt} = $def_opts{dtfmt};
428	277	100				771	exists $opt{merge} or $opt{merge} = $def_opts{merge};
429
430							# $debug = $opt{debug} \|\| 0;
431	277	100				783	$debug = defined $opt{debug} ? $opt{debug} : $def_opts{debug};
432	277	100				702	$debug > 4 and _dump (Options => \%opt);
433
434	31					97	my %parser_opts = map { $_ => $opt{$_} }
435	277					1036	grep { !exists $def_opts{$_} }
	2047					3971
436							keys %opt;
437
438	277					1216	my $_parser = _parser ($opt{parser});
439
440	277	100				1172	my $io_ref = ref ($txt) =~ m/GLOB\|IO/ ? $txt : undef;
441							my $io_fil = $io_ref ? 0 : $txt =~ m/\0/ ? 0
442	58	100		58		129661	: do { no warnings "newline"; -f $txt };
	58	100				168
	58					458040
	277					1053
	256					5473
443	277	100	100			1792	my $io_txt = $io_ref \|\| $io_fil ? 0 : 1;
444
445	277	100	100			4128	$io_fil && ! -s $txt and do { $@ = "$txt is empty"; return };
	11					63
	11					68
446	266	100	100			1445	$io_ref && eof $txt and do { $@ = "Empty stream"; return };
	5					22
	5					88
447
448	261	100	100			2023	if ($opt{parser} ? $_parser eq "csv" : ($io_fil && $txt =~ m/\.(csv)$/i)) {
		100
449	106	50				358	$can{csv} or croak "CSV parser not installed";
450
451	106	100				357	my $label = defined $opt{label} ? $opt{label} : $io_fil ? $txt : "IO";
		100
452
453	106	50				286	$debug and print STDERR "Opening CSV $label using $can{csv}-", $can{csv}->VERSION, "\n";
454
455							my @data = (
456							{ type => "csv",
457							parser => $can{csv},
458							version => $can{csv}->VERSION,
459							parsers => [ {
460							type => "csv",
461							parser => $can{csv},
462							version => $can{csv}->VERSION,
463	106					3215	}],
464							error => undef,
465							quote => '"',
466							sepchar => ',',
467							sheets => 1,
468							sheet => { $label => 1 },
469							},
470							{ parser => 0,
471							label => $label,
472							maxrow => 0,
473							maxcol => 0,
474							cell => [],
475							attr => [],
476							merged => [],
477							active => 1,
478							},
479							);
480
481	106					558	my ($sep, $quo, $in) = (",", '"');
482	106	100				323	defined $opt{sep} and $sep = $opt{sep};
483	106	100				248	defined $opt{quote} and $quo = $opt{quote};
484	106	50				282	$debug > 8 and _dump (debug => {
485							data => \@data, txt => $txt, io_ref => $io_ref, io_fil => $io_fil });
486	106	100				275	if ($io_fil) {
		100
		100
		50
487	94	100	100			331	unless (defined $opt{quote} && defined $opt{sep}) {
488	87	50				3475	open $in, "<", $txt or return;
489	87					2121	my $l1 = <$in>;
490
491	87	100				430	$quo = defined $opt{quote} ? $opt{quote} : '"';
492							$sep = # If explicitly set, use it
493							defined $opt{sep} ? $opt{sep} :
494							# otherwise start auto-detect with quoted strings
495	87	50				964	$l1 =~ m/["0-9];["0-9;]/ ? ";" :
		100
		100
		50
		50
		50
		100
		100
		50
496							$l1 =~ m/["0-9],["0-9,]/ ? "," :
497							$l1 =~ m/["0-9]\t["0-9,]/ ? "\t" :
498							$l1 =~ m/["0-9]\\|["0-9,]/ ? "\|" :
499							# If neither, then for unquoted strings
500							$l1 =~ m/\w;[\w;]/ ? ";" :
501							$l1 =~ m/\w,[\w,]/ ? "," :
502							$l1 =~ m/\w\t[\w,]/ ? "\t" :
503							$l1 =~ m/\w\\|[\w,]/ ? "\|" :
504							"," ;
505	87					1308	close $in;
506							}
507	94	50				2858	open $in, "<", $txt or return;
508							}
509							elsif ($io_ref) {
510	10					23	$in = $txt;
511							}
512							elsif (ref $txt eq "SCALAR") {
513	1	50				14	open $in, "<", $txt or croak "Cannot open input: $!";
514							}
515							elsif ($txt =~ m/[\r\n,;]/) {
516	1	50		2		94	open $in, "<", \$txt or croak "Cannot open input: $!";
	2					26
	2					4
	2					21
517							}
518							else {
519	0					0	warn "Input type ", ref $txt,
520							" might not be supported. Please file a ticket\n";
521	0					0	$in = $txt; # Now pray ...
522							}
523	106	50				1398	$debug > 1 and print STDERR "CSV sep_char '$sep', quote_char '$quo'\n";
524							my $csv = $can{csv}->new ({
525							%parser_opts,
526
527							sep_char => ($data[0]{sepchar} = $sep),
528	106	50				1403	quote_char => ($data[0]{quote} = $quo),
529							keep_meta_info => 1,
530							binary => 1,
531							auto_diag => 1,
532							}) or croak "Cannot create a csv ('$sep', '$quo') parser!";
533
534	106					23294	while (my $row = $csv->getline ($in)) {
535	413	50				72606	my @row = @$row or last;
536
537	413					920	my $r = ++$data[1]{maxrow};
538	413	100				1042	@row > $data[1]{maxcol} and $data[1]{maxcol} = @row;
539	413					1153	foreach my $c (0 .. $#row) {
540	2427					3555	my $val = $row[$c];
541	2427					4306	my $cell = cr2cell ($c + 1, $r);
542	2427	100				7000	$opt{rc} and $data[1]{cell}[$c + 1][$r] = $val;
543	2427	100				6482	$opt{cells} and $data[1]{$cell} = $val;
544	2427	50				12226	$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
545							}
546							}
547	106	50				15169	$csv->eof () or $data[0]{error} = [ $csv->error_diag ];
548	106					1601	close $in;
549
550	106					313	for (@{$data[1]{cell}}) {
	106					536
551	734	100				1624	defined or $_ = [];
552							}
553	106					437	return _clipsheets \%opt, [ @data ];
554							}
555
556	155	100				399	if ($io_txt) { # && $_parser !~ m/^xlsx?$/) {
557	29	100	66			475	if ( # /etc/magic: Microsoft Office Document
		50	66
		50	66
		100
558							$txt =~ m{\A(\376\067\0\043
559							\|\320\317\021\340\241\261\032\341
560							\|\333\245-\0\0\0)}x
561							# /usr/share/misc/magic
562							\|\| $txt =~ m{\A.{2080}Microsoft Excel 5.0 Worksheet}
563							\|\| $txt =~ m{\A\x09\x04\x06\x00\x00\x00\x10\x00}
564							) {
565	1	50				6	$can{xls} or croak "Spreadsheet::ParseExcel not installed";
566	1					4	my $tmpfile;
567	1	50				5	if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
568	1					3	$tmpfile = \$txt;
569							}
570							else {
571	0					0	$tmpfile = File::Temp->new (SUFFIX => ".xls", UNLINK => 1);
572	0					0	binmode $tmpfile;
573	0					0	print $tmpfile $txt;
574	0					0	close $tmpfile;
575							}
576	1	50				53	open $io_ref, "<", $tmpfile or do { $@ = $!; return };
	0					0
	0					0
577	1					1007	$io_txt = 0;
578	1					9	$_parser = _parser ($opt{parser} = "xls");
579							}
580							elsif ( # /usr/share/misc/magic
581							$txt =~ m{\APK\003\004.{4,30}(?:\[Content_Types\]\.xml\|_rels/\.rels)}
582							) {
583	0	0				0	$can{xlsx} or croak "XLSX parser not installed";
584	0					0	my $tmpfile;
585	0	0				0	if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
586	0					0	$tmpfile = \$txt;
587							}
588							else {
589	0					0	$tmpfile = File::Temp->new (SUFFIX => ".xlsx", UNLINK => 1);
590	0					0	binmode $tmpfile;
591	0					0	print $tmpfile $txt;
592	0					0	close $tmpfile;
593							}
594	0	0				0	open $io_ref, "<", $tmpfile or do { $@ = $!; return };
	0					0
	0					0
595	0					0	$io_txt = 0;
596	0					0	$_parser = _parser ($opt{parser} = "xlsx");
597							}
598							elsif ( # /usr/share/misc/magic
599							$txt =~ m{\APK\003\004.{9,30}\Qmimetypeapplication/vnd.oasis.opendocument.spreadsheet}
600							) {
601	0	0				0	$can{ods} or croak "ODS parser not installed";
602	0					0	my $tmpfile;
603	0	0				0	if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
604	0					0	$tmpfile = \$txt;
605							}
606							else {
607	0					0	$tmpfile = File::Temp->new (SUFFIX => ".ods", UNLINK => 1);
608	0					0	binmode $tmpfile;
609	0					0	print $tmpfile $txt;
610	0					0	close $tmpfile;
611							}
612	0	0				0	open $io_ref, "<", $tmpfile or do { $@ = $!; return };
	0					0
	0					0
613	0					0	$io_txt = 0;
614	0					0	$_parser = _parser ($opt{parser} = "ods");
615							}
616							elsif (!$io_ref && $txt =~ m/\.xls[xm]?$/i) {
617	3					19	$@ = "Cannot open $txt as file";
618	3					20	return;
619							}
620							}
621	152	100	66			1178	if ($opt{parser} ? $_parser =~ m/^(?:xlsx?)$/
		100
622							: ($io_fil && $txt =~ m/\.(xls[xm]?)$/i &&
623							($_parser = _parser ($1)))) {
624	76	100				380	my $parse_type = $_parser =~ m/x$/i ? "XLSX" : "XLS";
625	76	50				326	my $parser = $can{lc $parse_type} or
626							croak "Parser for $parse_type is not installed";
627							#$debug and print STDERR __FILE__, "#", __LINE__, " \| $_parser \| $parser \| $parse_type\n";
628							$debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt,
629	76	50				360	" using $parser-", $can{lc $parse_type}->VERSION, "\n";
		100
630	76	50				267	$opt{passwd} and $parser_opts{Password} = $opt{passwd};
631	76					152	my $oBook = eval {
632							$io_ref
633							? $parse_type eq "XLSX"
634							? $can{xlsx} =~ m/::XLSX$/
635							? $parser->new ($io_ref)
636							: $parser->new (%parser_opts)->parse ($io_ref)
637							: $parser->new (%parser_opts)->Parse ($io_ref)
638							: $parse_type eq "XLSX"
639	76	0				866	? $can{xlsx} =~ m/::XLSX$/
		50
		50
		100
		100
640							? $parser->new ($txt)
641							: $parser->new (%parser_opts)->parse ($txt)
642							: $parser->new (%parser_opts)->Parse ($txt);
643							};
644	76	50				2249905	unless ($oBook) {
645							# cleanup will fail on folders with spaces.
646	0					0	(my $msg = $@) =~ s/ at \S+ line \d+.*//s;
647	0					0	croak "$parse_type parser cannot parse data: $msg";
648							}
649	76	50				284	$debug > 8 and _dump (oBook => $oBook);
650
651							# WorkBook keys:
652							# aColor _CurSheet Format SheetCount
653							# ActiveSheet _CurSheet_ FormatStr _skip_chart
654							# Author File NotSetCell _string_contin
655							# BIFFVersion Flg1904 Object Version
656							# _buffer FmtClass PkgStr Worksheet
657							# CellHandler Font _previous_info
658
659							my @data = ( {
660							type => lc $parse_type,
661							parser => $can{lc $parse_type},
662							version => $can{lc $parse_type}->VERSION,
663							parsers => [{
664							type => lc $parse_type,
665							parser => $can{lc $parse_type},
666							version => $can{lc $parse_type}->VERSION,
667							}],
668							error => undef,
669	76		50			2469	sheets => $oBook->{SheetCount} \|\| 0,
670							sheet => {},
671							} );
672							# Overrule the default date format strings
673							my %def_fmt = (
674							0x0E => lc $opt{dtfmt}, # m-d-yy
675	76					652	0x0F => "d-mmm-yyyy", # d-mmm-yy
676							0x11 => "mmm-yyyy", # mmm-yy
677							0x16 => "yyyy-mm-dd hh:mm", # m-d-yy h:mm
678							);
679	76					586	$oBook->{FormatStr}{$_} = $def_fmt{$_} for keys %def_fmt;
680							my $oFmt = $parse_type eq "XLSX"
681	76	50				742	? $can{xlsx} =~ m/::XLSX$/
		100
682							? Spreadsheet::XLSX::Fmt2007->new
683							: Spreadsheet::ParseExcel::FmtDefault->new
684							: Spreadsheet::ParseExcel::FmtDefault->new;
685
686	76	50				916	$debug > 20 and _dump ("oBook before conversion", $oBook);
687	76	50	33			306	if ($parse_type eq "ODS" and !exists $oBook->{SheetCount}) {
688	0					0	my $styles = delete $oBook->{_styles};
689	0					0	my $sheets = delete $oBook->{_sheets};
690	0	0	0			0	if ($sheets && ref $sheets eq "ARRAY") {
691	0		0			0	$styles = ($styles \|\| {})->{styles} \|\| {};
692	0					0	$data[0]{sheets} = $oBook->{SheetCount} = scalar @{$sheets};
	0					0
693	0					0	$oBook->{Worksheet} = [];
694	0			0		0	*S::R::Sheet::get_merged_areas = sub { [] };
	0					0
695	0					0	my $x = 0;
696	0					0	foreach my $sh (@{$sheets}) {
	0					0
697	0					0	push @{$oBook->{Worksheet}} => bless {
698							Name => $sh->{label},
699							Cells => [],
700							MinRow => $sh->{col_min},
701							MaxRow => $sh->{row_max},
702							MinCol => $sh->{col_min},
703							MaxCol => $sh->{row_max},
704							RowHidden => $sh->{hidden_rows},
705							ColHidden => $sh->{hidden_cols},
706	0					0	_SheetNo => $x++,
707							} => "S::R::Sheet";
708							# header_cols
709							# header_rows
710							# print_areas
711							# sheet_hidden
712							# tab_color
713	0			0		0	*S::R::Cell::Value = sub { $_[0]{Raw} };
	0					0
714	0			0		0	*S::R::Cell::is_merged = sub { 0 };
	0					0
715	0					0	my $r = 0;
716	0					0	foreach my $row (@{$sh->{data}}) {
	0					0
717							$#$row > $oBook->{Worksheet}[-1]{MaxCol} and
718	0	0				0	$oBook->{Worksheet}[-1]{MaxCol} = $#$row;
719							$oBook->{Worksheet}[-1]{Cells}[$r++] = [ map { bless {
720							Code => undef,
721							Format => $_->{format},
722							Formula => $_->{formula},
723							Hidden => undef,
724							Merged => undef,
725							# use \|\| instead of // for now
726							# even though it is undesirable
727							Type => $_->{type} \|\| "",
728							Val => $_->{value} \|\| $_->{unformatted},
729							Raw => $_->{unformatted} \|\| $_->{value},
730							_Style => $styles->{$_->{style} \|\| ""}
731							\|\| $_->{style},
732							# hyperlink
733	0		0			0	} => "S::R::Cell" } @{$row} ];
	0		0			0
	0		0			0
			0
734							}
735							--$r > $oBook->{Worksheet}[-1]{MaxRow} and
736	0	0				0	$oBook->{Worksheet}[-1]{MaxRow} = $r;
737							}
738							}
739							}
740
741	76	100				347	$debug and print STDERR "\t$data[0]{sheets} sheets\n";
742							my $active_sheet = $oBook->get_active_sheet
743							\|\| $oBook->{ActiveSheet}
744	76		33			413	\|\| $oBook->{SelectedSheet};
745	76					939	my $current_sheet = 0;
746	76					156	foreach my $oWkS (@{$oBook->{Worksheet}}) {
	76					270
747	122	50				354	$debug > 8 and _dump ("oWkS", $oWkS);
748	122					198	$current_sheet++;
749	122	100	100			548	$opt{clip} and !defined $oWkS->{Cells} and next; # Skip empty sheets
750							my %sheet = (
751							parser => 0,
752							label => $oWkS->{Name},
753	116					808	maxrow => 0,
754							maxcol => 0,
755							cell => [],
756							attr => [],
757							merged => [],
758							active => 0,
759							);
760							# $debug and $sheet{_parser} = $oWkS;
761	116	50				377	defined $sheet{label} or $sheet{label} = "-- unlabeled --";
762	116	100				415	exists $oWkS->{MinRow} and $sheet{minrow} = $oWkS->{MinRow} + 1;
763	116	100				365	exists $oWkS->{MaxRow} and $sheet{maxrow} = $oWkS->{MaxRow} + 1;
764	116	100				362	exists $oWkS->{MinCol} and $sheet{mincol} = $oWkS->{MinCol} + 1;
765	116	100				295	exists $oWkS->{MaxCol} and $sheet{maxcol} = $oWkS->{MaxCol} + 1;
766							$sheet{merged} = [
767	14					37	map { $_->[0] }
768	5					30	sort { $a->[1] cmp $b->[1] }
769	14					125	map {[ $_, pack "NNNN", @$_ ]}
770	14					100	map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]}
	56					111
	14					48
771	116	100				231	@{$oWkS->get_merged_areas \|\| []}];
	116					523
772	116					1112	my $sheet_idx = 1 + @data;
773	116	100				390	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
774	116	100				289	if (defined $active_sheet) {
775							# _SheetNo is 0-based
776	7	50				21	my $sheet_no = defined $oWkS->{_SheetNo} ? $oWkS->{_SheetNo} : $current_sheet - 1;
777	7	100				23	$sheet_no eq $active_sheet and $sheet{active} = 1;
778							}
779							# Sheet keys:
780							# _Book FooterMargin MinCol RightMargin
781							# BottomMargin FooterMergin MinRow RightMergin
782							# BottomMergin HCenter Name RowHeight
783							# Cells Header NoColor RowHidden
784							# ColFmtNo HeaderMargin NoOrient Scale
785							# ColHidden HeaderMergin NoPls SheetHidden
786							# ColWidth Kind Notes _SheetNo
787							# Copis Landscape PageFit SheetType
788							# DefColWidth LeftMargin PageStart SheetVersion
789							# DefRowHeight LeftMergin PaperSize TopMargin
790							# Draft LeftToRight _Pos TopMergin
791							# FitHeight MaxCol PrintGrid UsePage
792							# FitWidth MaxRow PrintHeaders VCenter
793							# Footer MergedArea Res VRes
794	116	100				309	if (exists $oWkS->{MinRow}) {
795	111		50			550	my $hiddenRows = $oWkS->{RowHidden} \|\| [];
796	111		50			467	my $hiddenCols = $oWkS->{ColHidden} \|\| [];
797	111	100				346	if ($opt{clip}) {
798	92					250	my ($mr, $mc) = (-1, -1);
799	92					304	foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
800	522					902	foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
801	2938	100				5485	my $oWkC = $oWkS->{Cells}[$r][$c] or next;
802	1717	50				3279	defined (my $val = $oWkC->{Val}) or next;
803	1717	100				3171	$val eq "" and next;
804	1690	100				2831	$r > $mr and $mr = $r;
805	1690	100				2952	$c > $mc and $mc = $c;
806							}
807							}
808	92					317	($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1);
809							}
810	111					294	foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
811	602					1154	foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
812	3157	100				6913	my $oWkC = $oWkS->{Cells}[$r][$c] or next;
813							#defined (my $val = $oWkC->{Val}) or next;
814	1818					2808	my $val = $oWkC->{Val};
815	1818	50	33			5564	if (defined $val and my $enc = $oWkC->{Code}) {
816	0	0				0	$enc eq "ucs2" and $val = decode ("utf-16be", $val);
817							}
818	1818					3883	my $cell = cr2cell ($c + 1, $r + 1);
819	1818	100				5312	$opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original
820
821	1818					2461	my $fmt;
822	1818					2669	my $FmT = $oWkC->{Format};
823	1818	100				3087	if ($FmT) {
824	968	100				1888	unless (ref $FmT) {
825	130					186	$fmt = $FmT;
826	130					202	$FmT = {};
827							}
828							}
829							else {
830	850					1343	$FmT = {};
831							}
832	1818					3034	foreach my $attr (qw( AlignH AlignV FmtIdx Hidden Lock
833							Wrap )) {
834	10908	100				20618	exists $FmT->{$attr} or $FmT->{$attr} = 0;
835							}
836	1818	100				3764	exists $FmT->{Fill} or $FmT->{Fill} = [ 0 ];
837	1818	100				3943	exists $FmT->{Font} or $FmT->{Font} = undef;
838
839	1818	100				3252	unless (defined $fmt) {
840							$fmt = $FmT->{FmtIdx}
841							? $oBook->{FormatStr}{$FmT->{FmtIdx}}
842	1688	100				3178	: undef;
843							}
844							lc $oWkC->{Type} eq "float" and
845	1818	50				3745	$oWkC->{Type} = "Numeric";
846	1818	100				3223	if ($oWkC->{Type} eq "Numeric") {
847							# Fixed in 0.33 and up
848							# see Spreadsheet/ParseExcel/FmtDefault.pm
849							$FmT->{FmtIdx} == 0x0e \|\|
850							$FmT->{FmtIdx} == 0x0f \|\|
851							$FmT->{FmtIdx} == 0x10 \|\|
852							$FmT->{FmtIdx} == 0x11 \|\|
853							$FmT->{FmtIdx} == 0x16 \|\|
854							(defined $fmt && $fmt =~ m{^[dmy][-\\/dmy]*$}) and
855	370	100	33			3321	$oWkC->{Type} = "Date";
			33
			33
			33
			100
			66
856							$FmT->{FmtIdx} == 0x09 \|\|
857							$FmT->{FmtIdx} == 0x0a \|\|
858							(defined $fmt && $fmt =~ m{^0+\.0+%$}) and
859	370	100	100			2217	$oWkC->{Type} = "Percentage";
			100
			100
860							}
861	1818	100				3332	defined $fmt and $fmt =~ s/\\//g;
862							$opt{cells} and # Formatted value
863							$sheet{$cell} = defined $val ? $FmT && exists $def_fmt{$FmT->{FmtIdx}}
864	1818	100	66			9515	? $oFmt->ValFmt ($oWkC, $oBook)
		50
		100
865							: $oWkC->Value : undef;
866	1818	100				15533	if ($opt{attr}) {
867	1542					2221	my $FnT = $FmT->{Font};
868							my $fmi = $FmT->{FmtIdx}
869							? $oBook->{FormatStr}{$FmT->{FmtIdx}}
870	1542	100				2671	: undef;
871	1542	100				2605	$fmi and $fmi =~ s/\\//g;
872	1542		100			3663	my $merged = (defined $oWkC->{Merged} ? $oWkC->{Merged} : $oWkC->is_merged) \|\| 0;
873							$sheet{attr}[$c + 1][$r + 1] = {
874							@def_attr,
875
876							type => lc $oWkC->{Type},
877							enc => $oWkC->{Code},
878							merged => $merged,
879							hidden => ($hiddenRows->[$r] \|\| $hiddenCols->[$c] ? 1 :
880							defined $oWkC->{Hidden} ? $oWkC->{Hidden} : $FmT->{Hidden}) \|\| 0,
881							locked => $FmT->{Lock} \|\| 0,
882							format => $fmi,
883							halign => [ undef, qw( left center right
884							fill justify ), undef,
885							"equal_space" ]->[$FmT->{AlignH}],
886							valign => [ qw( top center bottom justify
887							equal_space )]->[$FmT->{AlignV}],
888							wrap => $FmT->{Wrap},
889							font => $FnT->{Name},
890							size => $FnT->{Height},
891							bold => $FnT->{Bold},
892							italic => $FnT->{Italic},
893							uline => $FnT->{Underline},
894							fgcolor => _xls_color ($FnT->{Color}),
895	1542					6211	bgcolor => _xls_fill (@{$FmT->{Fill}}),
896							formula => $oWkC->{Formula},
897	1542		100			21283	};
			100
898							#_dump "cell", $sheet{attr}[$c + 1][$r + 1];
899	1542	100	100			11792	if ($opt{merge} && $merged and
			66
900							my $p_cell = Spreadsheet::Read::Sheet::merged_from (\%sheet, $c + 1, $r + 1)) {
901	6					64	warn $p_cell;
902	6					2928	$sheet{attr}[$c + 1][$r + 1]{merged} = $p_cell;
903	6	100				19	if ($cell ne $p_cell) {
904	4					10	my ($C, $R) = cell2cr ($p_cell);
905							$sheet{cell}[$c + 1][$r + 1] =
906	4					14	$sheet{cell}[$C][$R];
907	4					10	$sheet{$cell} = $sheet{$p_cell};
908							}
909							}
910							}
911							}
912							}
913							}
914	116					221	for (@{$sheet{cell}}) {
	116					374
915	414	100				1023	defined or $_ = [];
916							}
917	116					1962	push @data, { %sheet };
918							# $data[0]{sheets}++;
919	116	50				507	if ($sheet{label} eq "-- unlabeled --") {
920	0					0	$sheet{label} = "";
921							}
922							else {
923	116					738	$data[0]{sheet}{$sheet{label}} = $#data;
924							}
925							}
926	76					407	return _clipsheets \%opt, [ @data ];
927							}
928	76	100	33			487	if ($opt{parser} ? $_parser =~ m/^(ods)$/
		50	0
			33
929							: ($io_fil && $txt =~ m/(ods)$/i &&
930							($_parser = _parser ($1)))
931							and ($can{$_parser} \|\| "") !~ m/sxc/i) {
932	0					0	my $parse_type = "ODS";
933	0	0				0	my $parser = $can{lc $parse_type} or
934							croak "Parser for $parse_type is not installed";
935							#$debug and print STDERR __FILE__, "#", __LINE__, " \| $_parser \| $parser \| $parse_type\n";
936							$debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt,
937	0	0				0	" using $parser-", $can{lc $parse_type}->VERSION, "\n";
		0
938	0	0				0	$opt{passwd} and $parser_opts{Password} = $opt{passwd};
939	0					0	my $oBook = eval {
940	0	0				0	$io_ref
941							? $parser->new (readonly => 1, %parser_opts)->parse ($io_ref)
942							: $parser->new (readonly => 1, %parser_opts)->parse ($txt)
943							};
944	0	0				0	unless ($oBook) {
945							# cleanup will fail on folders with spaces.
946	0					0	(my $msg = $@) =~ s/ at \S+ line \d+.*//s;
947	0					0	croak "$parse_type parser cannot parse data: $msg";
948							}
949	0	0				0	$debug > 8 and _dump (oBook => $oBook);
950
951							my @data = ( {
952							type => lc $parse_type,
953							parser => $can{lc $parse_type},
954							version => $can{lc $parse_type}->VERSION,
955							parsers => [{
956							type => lc $parse_type,
957							parser => $can{lc $parse_type},
958	0					0	version => $can{lc $parse_type}->VERSION,
959							}],
960							error => undef,
961							sheets => scalar $oBook->worksheets,
962							sheet => {},
963							} );
964							# $debug and $data[0]{_parser} = $oBook;
965
966	0	0				0	$debug and print STDERR "\t$data[0]{sheets} sheets\n";
967	0					0	my $active_sheet = $oBook->get_active_sheet;
968	0					0	my $current_sheet = 0;
969	0					0	foreach my $oWkS ($oBook->worksheets) {
970	0					0	$current_sheet++;
971	0	0	0			0	$opt{clip} && $oWkS->row_max < $oWkS->row_min
			0
972							&& $oWkS->col_max < $oWkS->col_min and next; # Skip empty sheets
973	0					0	my %sheet = (
974							parser => 0,
975							label => $oWkS->label,
976							maxrow => $oWkS->row_max+1,
977							maxcol => $oWkS->col_max+1,
978							cell => [],
979							attr => [],
980							merged => [],
981							active => 0,
982							);
983							# $debug and $sheet{_parser} = $oWkS;
984	0	0				0	defined $sheet{label} or $sheet{label} = "-- unlabeled --";
985							$sheet{merged} = [
986	0					0	map { $_->[0] }
987	0					0	sort { $a->[1] cmp $b->[1] }
988	0					0	map {[ $_, pack "NNNN", @$_ ]}
989	0					0	map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]}
	0					0
	0					0
990	0	0				0	@{$oWkS->get_merged_areas \|\| []}];
	0					0
991	0					0	my $sheet_idx = 1 + @data;
992	0	0				0	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
993	0	0				0	if (defined $active_sheet) {
994	0					0	my $sheet_no = $current_sheet - 1;
995	0	0				0	$sheet_no eq $active_sheet and $sheet{active} = 1;
996							}
997	0		0			0	my $hiddenRows = $oWkS->hidden_rows \|\| [];
998	0		0			0	my $hiddenCols = $oWkS->hidden_cols \|\| [];
999	0	0				0	if ($opt{clip}) {
1000	0					0	my ($mr, $mc) = (-1, -1);
1001	0					0	foreach my $r ($oWkS->row_min .. $sheet{maxrow}-1) {
1002	0					0	foreach my $c ($oWkS->col_min .. $sheet{maxcol}-1) {
1003	0	0				0	my $oWkC = $oWkS->get_cell($r, $c) or next;
1004	0	0				0	defined (my $val = $oWkC->value) or next;
1005	0	0				0	$val eq "" and next;
1006	0	0				0	$r > $mr and $mr = $r;
1007	0	0				0	$c > $mc and $mc = $c;
1008							}
1009							}
1010	0					0	($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1);
1011							}
1012	0					0	foreach my $r ($oWkS->row_min .. $sheet{maxrow}) {
1013	0					0	foreach my $c ($oWkS->col_min .. $sheet{maxcol}) {
1014	0	0				0	my $oWkC = $oWkS->get_cell($r, $c) or next;
1015	0					0	my $val = $oWkC->unformatted;
1016							#if (defined $val and my $enc = $oWkC->{Code}) {
1017							# $enc eq "ucs2" and $val = decode ("utf-16be", $val);
1018							# }
1019	0					0	my $cell = cr2cell ($c + 1, $r + 1);
1020	0	0				0	$opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original
1021
1022	0					0	my $fmt;
1023	0					0	my $styleName = $oWkC->style;
1024	0					0	my $FmT;
1025	0	0	0			0	if ($styleName && defined (my $s = $oBook->_styles->{$styleName})) {
1026	0					0	$fmt = $s;
1027							}
1028
1029	0	0				0	defined $fmt and $fmt =~ s/\\//g;
1030							$opt{cells} and # Formatted value
1031	0	0				0	$sheet{$cell} = defined $val ? $oWkC->value : undef;
		0
1032	0	0				0	if ($opt{attr}) {
1033	0	0				0	my $FnT = $FmT ? $FmT->{font_face} : undef;
1034	0					0	my $fmi;
1035							#my $fmi = $FmT ? $FmT->{FmtIdx}
1036							# ? $oBook->{FormatStr}{$FmT->{FmtIdx}}
1037							# : undef;
1038							#$fmi and $fmi =~ s/\\//g;
1039	0		0			0	my $type = $oWkC->type \|\| '';
1040	0	0				0	$type eq "float" and $type = "numeric";
1041
1042	0		0			0	my $merged = $oWkC->is_merged \|\| 0;
1043	0		0			0	$sheet{attr}[$c + 1][$r + 1] = {
1044							@def_attr,
1045
1046							type => $type,
1047							# enc => $oWkC->{Code},
1048							merged => $merged,
1049							hidden => ($hiddenRows->[$r] \|\| $hiddenCols->[$c] ? 1 :
1050							$oWkC->is_hidden ? $oWkC->is_hidden : undef) \|\| 0,
1051							# locked => $FmT->{Lock} \|\| 0,
1052							format => $fmi,
1053							# halign => [ undef, qw( left center right
1054							# fill justify ), undef,
1055							# "equal_space" ]->[$FmT->{AlignH}],
1056							# valign => [ qw( top center bottom justify
1057							# equal_space )]->[$FmT->{AlignV}],
1058							# wrap => $FmT->{Wrap},
1059							# font => $FnT->{Name},
1060							# size => $FnT->{Height},
1061							# bold => $FnT->{Bold},
1062							# italic => $FnT->{Italic},
1063							# uline => $FnT->{Underline},
1064							# fgcolor => _xls_color ($FnT->{Color}),
1065							# bgcolor => _xls_fill (@{$FmT->{Fill}}),
1066							formula => $oWkC->formula,
1067							};
1068							#_dump "cell", $sheet{attr}[$c + 1][$r + 1];
1069	0	0	0			0	if ($opt{merge} && $merged and
			0
1070							my $p_cell = Spreadsheet::Read::Sheet::merged_from(\%sheet, $c + 1, $r + 1)) {
1071	0					0	$sheet{attr}[$c + 1][$r + 1]{merged} = $p_cell;
1072	0	0				0	if ($cell ne $p_cell) {
1073	0					0	my ($C, $R) = cell2cr ($p_cell);
1074							$sheet{cell}[$c + 1][$r + 1] =
1075	0					0	$sheet{cell}[$C][$R];
1076	0					0	$sheet{$cell} = $sheet{$p_cell};
1077							}
1078							}
1079							}
1080							}
1081							}
1082	0					0	for (@{$sheet{cell}}) {
	0					0
1083	0	0				0	defined or $_ = [];
1084							}
1085	0					0	push @data, { %sheet };
1086	0	0				0	if ($sheet{label} eq "-- unlabeled --") {
1087	0					0	$sheet{label} = "";
1088							}
1089							else {
1090	0					0	$data[0]{sheet}{$sheet{label}} = $#data;
1091							}
1092							}
1093	0					0	return _clipsheets \%opt, [ @data ];
1094							}
1095
1096	76	100				418	if ($opt{parser} ? _parser ($opt{parser}) eq "sc"
		100
		100
1097							: $io_fil
1098							? $txt =~ m/\.sc$/
1099							: $txt =~ m/^# .*SquirrelCalc/) {
1100	46	100				111	if ($io_ref) {
		100
1101	2					8	local $/;
1102	2					35	my $x = <$txt>;
1103	2					10	$txt = $x;
1104							}
1105							elsif ($io_fil) {
1106	42					197	local $/;
1107	42	50				1546	open my $sc, "<", $txt or return;
1108	42					1839	$txt = <$sc>;
1109	42					750	close $sc;
1110							}
1111	46	50				297	$txt =~ m/\S/ or return;
1112	46	50				158	my $label = defined $opt{label} ? $opt{label} : "sheet";
1113	46					593	my @data = (
1114							{ type => "sc",
1115							parser => "Spreadsheet::Read",
1116							version => $VERSION,
1117							parsers => [{
1118							type => "sc",
1119							parser => "Spreadsheet::Read",
1120							version => $VERSION,
1121							}],
1122							error => undef,
1123							sheets => 1,
1124							sheet => { $label => 1 },
1125							},
1126							{ parser => 0,
1127							label => $label,
1128							maxrow => 0,
1129							maxcol => 0,
1130							cell => [],
1131							attr => [],
1132							merged => [],
1133							active => 1,
1134							},
1135							);
1136
1137	46					9671	for (split m/\s[\r\n]\s/, $txt) {
1138	13478	100				21913	if (m/^dimension.of ([0-9]+) rows.of ([0-9]+) columns/i) {
1139	46					77	@{$data[1]}{qw(maxrow maxcol)} = ($1, $2);
	46					232
1140	46					112	next;
1141							}
1142	13432	100				32829	s/^r([0-9]+)c([0-9]+)\s=\s// or next;
1143	3542					6394	my ($c, $r) = map { $_ + 1 } $2, $1;
	7084					15274
1144	3542	100	66			15102	if (m/.* \{(.)}$/ or m/"(.)"/) {
1145	2714					5077	my $cell = cr2cell ($c, $r);
1146	2714	100				8367	$opt{rc} and $data[1]{cell}[$c][$r] = $1;
1147	2714	50				7592	$opt{cells} and $data[1]{$cell} = $1;
1148	2714	100				6552	$opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
1149	2714					4821	next;
1150							}
1151							# Now only formula's remain. Ignore for now
1152							# r67c7 = [P2L] 2(1000r67c5-60)
1153							}
1154	46					770	for (@{$data[1]{cell}}) {
	46					171
1155	360	100				713	defined or $_ = [];
1156							}
1157	46					155	return _clipsheets \%opt, [ @data ];
1158							}
1159
1160	30	50	66			408	if ($opt{parser} ? _parser ($opt{parser}) eq "sxc"
		100
1161							: ($txt =~ m/^<\?xml/ or -f $txt)) {
1162	7	50				997	$can{sxc} or croak "Spreadsheet::ReadSXC not installed";
1163
1164	0	0	0			0	ref $txt && $can{sxc}->VERSION <= 0.23 and
1165							croak ("Sorry, references as input are not supported by Spreadsheet::ReadSXC before 0.23");
1166
1167	0					0	my $using = "using $can{sxc}-" . $can{sxc}->VERSION;
1168	0					0	my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up
1169	0					0	my $sxc;
1170	0	0	0			0	if ($txt =~ m/\.(sxc\|ods)$/i) {
		0
		0
		0
1171	0	0				0	$debug and print STDERR "Opening \U$1\E $txt $using\n";
1172	0	0				0	$debug and print STDERR __FILE__, "#", __LINE__, "\n";
1173	0	0				0	$sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return;
1174							}
1175							# treat all refs as a filehandle
1176							elsif (ref $txt) {
1177	0	0				0	$debug and print STDERR "Opening SXC filehandle\n";
1178	0	0				0	$sxc = Spreadsheet::ReadSXC::read_sxc_fh ($txt, $sxc_options) or return;
1179							}
1180							elsif ($txt =~ m/\.xml$/i) {
1181	0	0				0	$debug and print STDERR "Opening XML $txt $using\n";
1182	0	0				0	$sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return;
1183							}
1184							# need to test on pattern to prevent stat warning
1185							# on filename with newline
1186							elsif ($txt !~ m/^<\?xml/i and -f $txt) {
1187	0	0				0	$debug and print STDERR "Opening XML $txt $using\n";
1188	0	0				0	open my $f, "<", $txt or return;
1189	0					0	local $/;
1190	0					0	$txt = <$f>;
1191	0					0	close $f;
1192							}
1193	0	0	0			0	!$sxc && $txt =~ m/^<\?xml/i and
1194							$sxc = Spreadsheet::ReadSXC::read_xml_string ($txt, $sxc_options);
1195	0	0				0	$debug > 8 and _dump (sxc => $sxc);
1196	0	0				0	if ($sxc) {
1197	0					0	my @data = ( {
1198							type => "sxc",
1199							parser => "Spreadsheet::ReadSXC",
1200							version => $Spreadsheet::ReadSXC::VERSION,
1201							parsers => [{
1202							type => "sxc",
1203							parser => "Spreadsheet::ReadSXC",
1204							version => $Spreadsheet::ReadSXC::VERSION,
1205							}],
1206							error => undef,
1207							sheets => 0,
1208							sheet => {},
1209							} );
1210							my @sheets = ref $sxc eq "HASH" # < 0.20
1211							? map {
1212							{ label => $_,
1213	0					0	data => $sxc->{$_},
1214							}
1215							} keys %$sxc
1216	0	0				0	: @{$sxc};
	0					0
1217	0					0	foreach my $sheet (@sheets) {
1218	0	0				0	my @sheet = @{$sheet->{data} \|\| []};
	0					0
1219							my %sheet = (
1220							parser => 0,
1221							label => $sheet->{label},
1222	0					0	maxrow => scalar @sheet,
1223							maxcol => 0,
1224							cell => [],
1225							attr => [],
1226							merged => [],
1227							active => 0,
1228							);
1229	0					0	my $sheet_idx = 1 + @data;
1230	0	0				0	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} rows\n";
1231	0					0	foreach my $r (0 .. $#sheet) {
1232	0	0				0	my @row = @{$sheet[$r]} or next;
	0					0
1233	0					0	foreach my $c (0 .. $#row) {
1234	0	0				0	defined (my $val = $row[$c]) or next;
1235	0					0	my $C = $c + 1;
1236	0	0				0	$C > $sheet{maxcol} and $sheet{maxcol} = $C;
1237	0					0	my $cell = cr2cell ($C, $r + 1);
1238	0	0				0	$opt{rc} and $sheet{cell}[$C][$r + 1] = $val;
1239	0	0				0	$opt{cells} and $sheet{$cell} = $val;
1240	0	0				0	$opt{attr} and $sheet{attr}[$C][$r + 1] = { @def_attr };
1241							}
1242							}
1243	0					0	for (@{$sheet{cell}}) {
	0					0
1244	0	0				0	defined or $_ = [];
1245							}
1246	0	0				0	$debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
1247	0					0	push @data, { %sheet };
1248	0					0	$data[0]{sheets}++;
1249	0					0	$data[0]{sheet}{$sheet->{label}} = $#data;
1250							}
1251	0					0	return _clipsheets \%opt, [ @data ];
1252							}
1253							}
1254
1255	23	100	100			144	if (!ref $txt and $txt =~ m/\.\w+$/) {
1256							# Return (localized) system message
1257	5	50				130	open my $fh, "<", $txt and
1258							croak "I can open file $txt, but I do not know how to parse it\n";
1259
1260	5					93	$@ = $!;
1261							}
1262
1263	23					128	return;
1264							} # ReadData
1265
1266							sub add {
1267	2			2	1	7	my $book = shift;
1268	2	50				10	my $r = ReadData (@_) or return;
1269							$book && (ref $book eq "ARRAY" \|\|
1270	2	50	33			28	ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $r;
			33
			33
1271
1272	2					5	my $c1 = $book->[0];
1273	2					6	my $c2 = $r->[0];
1274
1275	2	50				7	unless ($c1->{parsers}) {
1276	0					0	$c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version );
1277	0					0	$book->[$_]{parser} = 0 for 1 .. $c1->{sheets};
1278							}
1279	2					5	my ($pidx) = (grep { my $p = $c1->{parsers}[$_];
1280							$p->{type} eq $c2->{type} &&
1281							$p->{parser} eq $c2->{parser} &&
1282	2	50	33			6	$p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}});
	2					26
	2					7
1283	2	50				8	unless (defined $pidx) {
1284	0					0	$pidx = scalar @{$c1->{parsers}};
	0					0
1285	0					0	$c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version );
1286							}
1287
1288	2					6	foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) {
	0					0
	2					12
1289	2					5	my $s = $sn;
1290	2					4	my $v = 2;
1291	2					9	while (exists $c1->{sheet}{$s}) {
1292	1					6	$s = $sn."[".$v++."]";
1293							}
1294	2					7	$c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn};
1295	2					6	$r->[$c2->{sheet}{$sn}]{parser} = $pidx;
1296	2					9	push @$book, $r->[$c2->{sheet}{$sn}];
1297							}
1298	2					5	$c1->{sheets} += $c2->{sheets};
1299
1300	2					18	return $book;
1301							} # add
1302
1303							package Spreadsheet::Read::Attribute;
1304
1305	58			58		660	use Carp;
	58					153
	58					4960
1306	58			58		492	use vars qw( $AUTOLOAD );
	58					185
	58					85151
1307
1308							sub AUTOLOAD {
1309	33			33		1778	my $self = shift;
1310	33					132	(my $attr = $AUTOLOAD) =~ s/.*:://;
1311	33					463	$self->{$attr};
1312							} # AUTOLOAD
1313
1314							package Spreadsheet::Read::Sheet;
1315
1316							sub cell {
1317	55			55		19960	my ($sheet, @id) = @_;
1318							@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1319	55	50	66			508	return $sheet->{cell}[$id[0]][$id[1]];
			66
1320							@id && $id[0] && exists $sheet->{$id[0]} and
1321	27	50	33			269	return $sheet->{$id[0]};
			33
1322							} # cell
1323
1324							sub attr {
1325	26			26		63	my ($sheet, @id) = @_;
1326	26					36	my $class = "Spreadsheet::Read::Attribute";
1327							@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1328	26	50	66			239	return bless $sheet->{attr}[$id[0]][$id[1]] => $class;
			66
1329	14	50	33			82	if (@id && $id[0] && exists $sheet->{$id[0]}) {
			33
1330	14					38	my ($c, $r) = $sheet->cell2cr ($id[0]);
1331	14					124	return bless $sheet->{attr}[$c][$r] => $class;
1332							}
1333	0					0	undef;
1334							} # attr
1335
1336							sub maxrow {
1337	2			2		4	my $sheet = shift;
1338	2					13	return $sheet->{maxrow};
1339							} # maxrow
1340
1341							sub maxcol {
1342	2			2		5	my $sheet = shift;
1343	2					11	return $sheet->{maxcol};
1344							} # maxrow
1345
1346							sub col2label {
1347	1	50		1		7	$_[0] =~ m/::/ and shift; # class unused
1348	1					6	return Spreadsheet::Read::col2label (@_);
1349							} # col2label
1350
1351							sub cr2cell {
1352	34	100		34		88	$_[0] =~ m/::/ and shift; # class unused
1353	34					82	return Spreadsheet::Read::cr2cell (@_);
1354							} # cr2cell
1355
1356							sub cell2cr {
1357	33	100		33		11908	$_[0] =~ m/::/ and shift; # class unused
1358	33					93	return Spreadsheet::Read::cell2cr (@_);
1359							} # cell2cr
1360
1361							sub label {
1362	2			2		7	my ($sheet, $label) = @_;
1363	2	100				9	defined $label and $sheet->{label} = $label;
1364	2					10	return $sheet->{label};
1365							} # label
1366
1367							sub active {
1368	1			1		4	my $sheet = shift;
1369	1					5	return $sheet->{active};
1370							} # label
1371
1372							# my @row = $sheet->cellrow (1);
1373							sub cellrow {
1374	4			4		10	my ($sheet, $row) = @_;
1375	4	100	66			35	defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
			100
1376	1					4	my $s = $sheet->{cell};
1377	1					4	map { $s->[$_][$row] } 1..$sheet->{maxcol};
	19					39
1378							} # cellrow
1379
1380							# my @row = $sheet->row (1);
1381							sub row {
1382	4			4		9	my ($sheet, $row) = @_;
1383	4	100	66			38	defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
			100
1384	1					8	map { $sheet->{$sheet->cr2cell ($_, $row)} } 1..$sheet->{maxcol};
	19					34
1385							} # row
1386
1387							# my @col = $sheet->cellcolumn (1);
1388							sub cellcolumn {
1389	4			4		13	my ($sheet, $col) = @_;
1390	4	100	66			38	defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
			100
1391	1					3	my $s = $sheet->{cell};
1392	1					4	map { $s->[$col][$_] } 1..$sheet->{maxrow};
	5					31
1393							} # cellcolumn
1394
1395							# my @col = $sheet->column (1);
1396							sub column {
1397	4			4		14	my ($sheet, $col) = @_;
1398	4	100	66			41	defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
			100
1399	1					6	map { $sheet->{$sheet->cr2cell ($col, $_)} } 1..$sheet->{maxrow};
	5					11
1400							} # column
1401
1402							# Convert {cell}'s [column][row] to a [row][column] list
1403							# my @rows = $sheet->rows ();
1404							sub rows {
1405	1			1		3	my $sheet = shift;
1406	1					3	my $s = $sheet->{cell};
1407
1408							map {
1409	5					10	my $r = $_;
1410	5					10	[ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
	95					154
1411	1					8	} 1..$sheet->{maxrow};
1412							} # rows
1413
1414							sub merged_from {
1415	17			17		46	my ($sheet, @id, $col, $row) = @_;
1416	17	50				39	my $ma = $sheet->{merged} or return;
1417	17	100	66			148	if (@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/) {
		100	66
			33
			66
1418	10					23	($col, $row) = @id;
1419							}
1420							elsif (@id && $id[0] && exists $sheet->{$id[0]}) {
1421	3					10	($col, $row) = cell2cr ($id[0]);
1422							}
1423	17	100	66			87	defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
			100
1424	12	50	33			48	defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
			33
1425	12					18	foreach my $range (@{$ma}) {
	12					21
1426	22					26	my ($ctl, $rtl, $cbr, $rbr) = @{$range};
	22					32
1427	22	100	100			66	$col >= $ctl && $col <= $cbr or next;
1428	12	100	66			30	$row >= $rtl && $row <= $rbr or next;
1429	10					25	return cr2cell ($ctl, $rtl);
1430							}
1431							} # cell
1432
1433							1;
1434
1435							__END__
1436							=head1 DESCRIPTION
1437
1438							Spreadsheet::Read tries to transparently read any spreadsheet and
1439							return its content in a universal manner independent of the parsing
1440							module that does the actual spreadsheet scanning.
1441
1442							For OpenOffice and/or LibreOffice this module uses
1443							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
1444
1445							For Microsoft Excel this module uses
1446							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>,
1447							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX>, or
1448							L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX> (stronly
1449							discouraged).
1450
1451							For CSV this module uses L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS>
1452							or L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV>.
1453
1454							For SquirrelCalc there is a very simplistic built-in parser
1455
1456							=head2 Data structure
1457
1458							The data is returned as an array reference:
1459
1460							$book = [
1461							# Entry 0 is the overall control hash
1462							{ sheets => 2,
1463							sheet => {
1464							"Sheet 1" => 1,
1465							"Sheet 2" => 2,
1466							},
1467							parsers => [ {
1468							type => "xls",
1469							parser => "Spreadsheet::ParseExcel",
1470							version => 0.59,
1471							}],
1472							error => undef,
1473							},
1474							# Entry 1 is the first sheet
1475							{ parser => 0,
1476							label => "Sheet 1",
1477							maxrow => 2,
1478							maxcol => 4,
1479							cell => [ undef,
1480							[ undef, 1 ],
1481							[ undef, undef, undef, undef, undef, "Nugget" ],
1482							],
1483							attr => [],
1484							merged => [],
1485							active => 1,
1486							A1 => 1,
1487							B5 => "Nugget",
1488							},
1489							# Entry 2 is the second sheet
1490							{ parser => 0,
1491							label => "Sheet 2",
1492							:
1493							:
1494
1495							To keep as close contact to spreadsheet users, row and column 1 have
1496							index 1 too in the C<cell> element of the sheet hash, so cell "A1" is
1497							the same as C<cell> [1, 1] (column first). To switch between the two,
1498							there are helper functions available: C<cell2cr ()>, C<cr2cell ()>,
1499							and C<col2label ()>.
1500
1501							The C<cell> hash entry contains unformatted data, while the hash entries
1502							with the traditional labels contain the formatted values (if applicable).
1503
1504							The control hash (the first entry in the returned array ref), contains
1505							some spreadsheet meta-data. The entry C<sheet> is there to be able to find
1506							the sheets when accessing them by name:
1507
1508							my %sheet2 = %{$book->[$book->[0]{sheet}{"Sheet 2"}]};
1509
1510							=head2 Formatted vs Unformatted
1511
1512							The difference between formatted and unformatted cells is that the (optional)
1513							format is applied to the cell or not. This part is B<completely> implemented
1514							on the parser side. Spreadsheet::Read just makes both available if these are
1515							supported. Options provide means to disable either. If the parser does not
1516							provide formatted cells - like CSV - both values are equal.
1517
1518							To show what this implies:
1519
1520							use Spreadsheet::Read;
1521
1522							my $file = "files/example.xlsx";
1523							my $workbook = Spreadsheet::Read->new ($file);
1524
1525							my $info = $workbook->[0];
1526							say "Parsed $file with $info->{parser}-$info->{version}";
1527
1528							my $sheet = $workbook->sheet (1);
1529
1530							say join "\t" => "Formatted:", $sheet->row (1);
1531							say join "\t" => "Unformatted:", $sheet->cellrow (1);
1532
1533							Might return very different results depending one the underlying parser (and
1534							its version):
1535
1536							Parsed files/example.xlsx with Spreadsheet::ParseXLSX-0.27
1537							Formatted: 8-Aug Foo & Barr < Quux
1538							Unformatted: 39668 Foo & Barr < Quux
1539
1540							Parsed files/example.xlsx with Spreadsheet::XLSX-0.15
1541							Formatted: 39668 Foo & Barr < Quux
1542							Unformatted: 39668 Foo & Barr < Quux
1543
1544							=head2 Functions and methods
1545
1546							=head3 new
1547
1548							my $book = Spreadsheet::Read->new (...) or die $@;
1549
1550							All options accepted by ReadData are accepted by new.
1551
1552							With no arguments at all, $book will be an object where sheets can be added
1553							using C<add>
1554
1555							my $book = Spreadsheet::Read->new ();
1556							$book->add ("file.csv");
1557							$book->add ("file.cslx");
1558
1559							=head3 ReadData
1560
1561							my $book = ReadData ($source [, option => value [, ... ]]);
1562
1563							my $book = ReadData ("file.csv", sep => ',', quote => '"');
1564
1565							my $book = ReadData ("file.xls", dtfmt => "yyyy-mm-dd");
1566
1567							my $book = ReadData ("file.ods");
1568
1569							my $book = ReadData ("file.sxc");
1570
1571							my $book = ReadData ("content.xml");
1572
1573							my $book = ReadData ($content);
1574
1575							my $book = ReadData ($content, parser => "xlsx");
1576
1577							my $book = ReadData ($fh, parser => "xlsx");
1578
1579							my $book = ReadData (\$content, parser => "xlsx");
1580
1581							Tries to convert the given file, string, or stream to the data structure
1582							described above.
1583
1584							Processing Excel data from a stream or content is supported through a
1585							L<File::Temp\|https://metacpan.org/release/File-Temp> temporary file or
1586							L<IO::Scalar\|https://metacpan.org/release/IO-stringy> when available.
1587
1588							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
1589							does preserve sheet order as of version 0.20.
1590
1591							Choosing between C<$content> and C<\\$content> (with or without passing
1592							the desired C<parser> option) may be depending on trial and terror.
1593							C<ReadData> does try to determine parser type on content if needed, but
1594							not all combinations are checked, and not all signatures are builtin.
1595
1596							Currently supported options are:
1597
1598							=over 2
1599
1600							=item parser
1601							X<parser>
1602
1603							Force the data to be parsed by a specific format. Possible values are
1604							C<csv>, C<prl> (or C<perl>), C<sc> (or C<squirelcalc>), C<sxc> (or C<oo>,
1605							C<ods>, C<openoffice>, C<libreoffice>) C<xls> (or C<excel>), and C<xlsx>
1606							(or C<excel2007>).
1607
1608							When parsing streams, instead of files, it is highly recommended to pass
1609							this option.
1610
1611							Spreadsheet::Read supports several underlying parsers per spreadsheet
1612							type. It will try those from most favored to least favored. When you
1613							have a good reason to prefer a different parser, you can set that in
1614							environment variables. The other options then will not be tested for:
1615
1616							env SPREADSHEET_READ_CSV=Text::CSV_PP ...
1617
1618							=item cells
1619							X<cells>
1620
1621							Control the generation of named cells ("C<A1>" etc). Default is true.
1622
1623							=item rc
1624
1625							Control the generation of the {cell}[c][r] entries. Default is true.
1626
1627							=item attr
1628
1629							Control the generation of the {attr}[c][r] entries. Default is false.
1630							See L</Cell Attributes> below.
1631
1632							=item clip
1633
1634							If set, L<C<ReadData>\|/ReadData> will remove all trailing rows and columns
1635							per sheet that have no data, where no data means only undefined or empty
1636							cells (after optional stripping). If a sheet has no data at all, the sheet
1637							will be skipped entirely when this attribute is true.
1638
1639							=item strip
1640
1641							If set, L<C<ReadData>\|/ReadData> will remove trailing- and/or
1642							leading-whitespace from every field.
1643
1644							strip leading strailing
1645							----- ------- ---------
1646							0 n/a n/a
1647							1 strip n/a
1648							2 n/a strip
1649							3 strip strip
1650
1651							=item pivot
1652
1653							Swap all rows and columns.
1654
1655							When a sheet contains data like
1656
1657							A1 B1 C1 E1
1658							A2 C2 D2
1659							A3 B3 C3 D3 E3
1660
1661							using C<pivot> will return the sheet data as
1662
1663							A1 A2 A3
1664							B1 B3
1665							C1 C2 C3
1666							D2 D3
1667							E1 E3
1668
1669							=item sep
1670
1671							Set separator for CSV. Default is comma C<,>.
1672
1673							=item quote
1674
1675							Set quote character for CSV. Default is C<">.
1676
1677							=item dtfmt
1678
1679							Set the format for MS-Excel date fields that are set to use the default
1680							date format. The default format in Excel is "C<m-d-yy>", which is both
1681							not year 2000 safe, nor very useful. The default is now "C<yyyy-mm-dd>",
1682							which is more ISO-like.
1683
1684							Note that date formatting in MS-Excel is not reliable at all, as it will
1685							store/replace/change the date field separator in already stored formats
1686							if you change your locale settings. So the above mentioned default can
1687							be either "C<m-d-yy>" OR "C<m/d/yy>" depending on what that specific
1688							character happened to be at the time the user saved the file.
1689
1690							=item merge
1691
1692							Copy content to all cells in merged areas.
1693
1694							If supported, this will copy formatted and unformatted values from the
1695							top-left cell of a merged area to all other cells in the area.
1696
1697							=item debug
1698
1699							Enable some diagnostic messages to STDERR.
1700
1701							The value determines how much diagnostics are dumped (using
1702							L<Data::Peek\|https://metacpan.org/release/Data-Peek>). A value of C<9>
1703							and higher will dump the entire structure from the back-end parser.
1704
1705							=item passwd
1706
1707							Use this password to decrypt password protected spreadsheet.
1708
1709							Currently only supports Excel.
1710
1711							=back
1712
1713							All other attributes/options will be passed to the underlying parser if
1714							that parser supports attributes.
1715
1716							=head3 col2label
1717
1718							my $col_id = col2label (col);
1719
1720							my $col_id = $book->col2label (col); # OO
1721
1722							C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1723							traditional cell notation:
1724
1725							my $id = col2label ( 4); # $id now "D"
1726							my $id = col2label (28); # $id now "AB"
1727
1728							=head3 cr2cell
1729
1730							my $cell = cr2cell (col, row);
1731
1732							my $cell = $book->cr2cell (col, row); # OO
1733
1734							C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1735							traditional cell notation:
1736
1737							my $cell = cr2cell ( 4, 14); # $cell now "D14"
1738							my $cell = cr2cell (28, 4); # $cell now "AB4"
1739
1740							=head3 cell2cr
1741
1742							my ($col, $row) = cell2cr ($cell);
1743
1744							my ($col, $row) = $book->cell2cr ($cell); # OO
1745
1746							C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1747							pair (1 based):
1748
1749							my ($col, $row) = cell2cr ("D14"); # returns ( 4, 14)
1750							my ($col, $row) = cell2cr ("AB4"); # returns (28, 4)
1751
1752							=head3 row
1753
1754							my @row = row ($sheet, $row)
1755
1756							my @row = Spreadsheet::Read::row ($book->[1], 3);
1757
1758							my @row = $book->row ($sheet, $row); # OO
1759
1760							Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1761
1762							Note that the indexes in the returned list are 0-based.
1763
1764							C<row ()> is not imported by default, so either specify it in the
1765							use argument list, or call it fully qualified.
1766
1767							See also the C<row ()> method on sheets.
1768
1769							=head3 cellrow
1770
1771							my @row = cellrow ($sheet, $row);
1772
1773							my @row = Spreadsheet::Read::cellrow ($book->[1], 3);
1774
1775							my @row = $book->cellrow ($sheet, $row); # OO
1776
1777							Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1778
1779							Note that the indexes in the returned list are 0-based.
1780
1781							C<cellrow ()> is not imported by default, so either specify it in the
1782							use argument list, or call it fully qualified or as method call.
1783
1784							See also the C<cellrow ()> method on sheets.
1785
1786							=head3 rows
1787
1788							my @rows = rows ($sheet);
1789
1790							my @rows = Spreadsheet::Read::rows ($book->[1]);
1791
1792							my @rows = $book->rows (1); # OO
1793
1794							Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1795
1796							Note that the indexes in the returned list are 0-based, where the
1797							index in the C<{cell}> entry is 1-based.
1798
1799							C<rows ()> is not imported by default, so either specify it in the
1800							use argument list, or call it fully qualified.
1801
1802							=head3 parses
1803
1804							parses ($format);
1805
1806							Spreadsheet::Read::parses ("CSV");
1807
1808							$book->parses ("CSV"); # OO
1809
1810							C<parses ()> returns Spreadsheet::Read's capability to parse the
1811							required format. L<C<ReadData>\|/ReadData> will pick its preferred parser
1812							for that format unless overruled. See L<C<parser>\|/parser>.
1813
1814							C<parses ()> is not imported by default, so either specify it in the
1815							use argument list, or call it fully qualified.
1816
1817							=head3 Version
1818
1819							my $v = Version ()
1820
1821							my $v = Spreadsheet::Read::Version ()
1822
1823							my $v = Spreadsheet::Read->VERSION;
1824
1825							my $v = $book->Version (); # OO
1826
1827							Returns the current version of Spreadsheet::Read.
1828
1829							C<Version ()> is not imported by default, so either specify it in the
1830							use argument list, or call it fully qualified.
1831
1832							This function returns exactly the same as C<< Spreadsheet::Read->VERSION >>
1833							returns and is only kept for backward compatibility reasons.
1834
1835							=head3 sheets
1836
1837							my $sheets = $book->sheets; # OO
1838							my @sheets = $book->sheets; # OO
1839
1840							In scalar context return the number of sheets in the book.
1841							In list context return the labels of the sheets in the book.
1842
1843							=head3 sheet
1844
1845							my $sheet = $book->sheet (1); # OO
1846							my $sheet = $book->sheet ("Foo"); # OO
1847
1848							Return the numbered or named sheet out of the book. Will return C<undef> if
1849							there is no match. Will not work for sheets I<named> with a number between 1
1850							and the number of sheets in the book.
1851
1852							With named sheets will first try to use the list of sheet-labels as stored in
1853							the control structure. If no match is found, it will scan the actual labels
1854							of the sheets. In that case, it will return the first matching sheet.
1855
1856							If defined, the returned sheet will be of class C<Spreadsheet::Read::Sheet>.
1857
1858							=head3 add
1859
1860							my $book = ReadData ("file.csv");
1861							Spreadsheet::Read::add ($book, "file.xlsx");
1862
1863							my $book = Spreadsheet::Read->new ("file.csv");
1864							$book->add ("file.xlsx"); # OO
1865
1866							=head2 Methods on sheets
1867
1868							=head3 maxcol
1869
1870							my $col = $sheet->maxcol;
1871
1872							Return the index of the last in-use column in the sheet. This index is 1-based.
1873
1874							=head3 maxrow
1875
1876							my $row = $sheet->maxrow;
1877
1878							Return the index of the last in-use row in the sheet. This index is 1-based.
1879
1880							=head3 cell
1881
1882							my $cell = $sheet->cell ("A3");
1883							my $cell = $sheet->cell (1, 3);
1884
1885							Return the value for a cell. Using tags will return the formatted value,
1886							using column and row will return unformatted value.
1887
1888							=head3 attr
1889
1890							my $cell = $sheet->attr ("A3");
1891							my $cell = $sheet->attr (1, 3);
1892
1893							Return the attributes of a cell. Only valid if attributes are enabled through
1894							option C<attr>.
1895
1896							=head3 col2label
1897
1898							my $col_id = $sheet->col2label (col);
1899
1900							C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1901							traditional cell notation:
1902
1903							my $id = $sheet->col2label ( 4); # $id now "D"
1904							my $id = $sheet->col2label (28); # $id now "AB"
1905
1906							=head3 cr2cell
1907
1908							my $cell = $sheet->cr2cell (col, row);
1909
1910							C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1911							traditional cell notation:
1912
1913							my $cell = $sheet->cr2cell ( 4, 14); # $cell now "D14"
1914							my $cell = $sheet->cr2cell (28, 4); # $cell now "AB4"
1915
1916							=head3 cell2cr
1917
1918							my ($col, $row) = $sheet->cell2cr ($cell);
1919
1920							C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1921							pair (1 based):
1922
1923							my ($col, $row) = $sheet->cell2cr ("D14"); # returns ( 4, 14)
1924							my ($col, $row) = $sheet->cell2cr ("AB4"); # returns (28, 4)
1925
1926							=head3 col
1927
1928							my @col = $sheet->column ($col);
1929
1930							Get full column of formatted values (like C<< $sheet->{C1} .. $sheet->{C9} >>)
1931
1932							Note that the indexes in the returned list are 0-based.
1933
1934							=head3 cellcolumn
1935
1936							my @col = $sheet->cellcolumn ($col);
1937
1938							Get full column of unformatted values (like C<< $sheet->{cell}[3][1] .. $sheet->{cell}[3][9] >>)
1939
1940							Note that the indexes in the returned list are 0-based.
1941
1942							=head3 row
1943
1944							my @row = $sheet->row ($row);
1945
1946							Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1947
1948							Note that the indexes in the returned list are 0-based.
1949
1950							=head3 cellrow
1951
1952							my @row = $sheet->cellrow ($row);
1953
1954							Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1955
1956							Note that the indexes in the returned list are 0-based.
1957
1958							=head3 rows
1959
1960							my @rows = $sheet->rows ();
1961
1962							Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1963
1964							Note that the indexes in the returned list are 0-based, where the
1965							index in the C<{cell}> entry is 1-based.
1966
1967							=head3 merged_from
1968
1969							my $top_left = $sheet->merged_from ("C2");
1970							my $top_left = $sheet->merged_from (3, 2);
1971
1972							If the parser supports merged areas, this method will return the label of the
1973							top-left cell in the merged area the requested cell is part of.
1974
1975							If the requested ID is valid and withing the sheet cell range, but not part of
1976							a merged area, it will return C<"">.
1977
1978							If the ID is not valid or out of range, it returns C<undef>.
1979
1980							See L<Merged cells\|/merged> for more details.
1981
1982							=head3 label
1983
1984							my $label = $sheet->label;
1985							$sheet->label ("New sheet label");
1986
1987							Set a new label to a sheet. Note that the index in the control structure will
1988							I<NOT> be updated.
1989
1990							=head3 active
1991
1992							my $sheet_is_active = $sheet->active;
1993
1994							Returns 1 if the selected sheet is active, otherwise returns 0.
1995
1996							Currently only works on XLS (as of Spreadsheed::ParseExcel-0.61).
1997							CSV is always active.
1998
1999							=head2 Using CSV
2000
2001							In case of CSV parsing, L<C<ReadData>\|/ReadData> will use the first line of
2002							the file to auto-detect the separation character if the first argument is a
2003							file and both C<sep> and C<quote> are not passed as attributes.
2004							L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS> (or
2005							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV>) is able to
2006							automatically detect and use C<\r> line endings.
2007
2008							CSV can parse streams too, but be sure to pass C<sep> and/or C<quote> if
2009							these do not match the default C<,> and C<">.
2010
2011							When an error is found in the CSV, it is automatically reported (to STDERR).
2012							The structure will store the error in C<< $ss->[0]{error} >> as anonymous
2013							list returned by
2014							L<C<< $csv->error_diag >>\|https://metacpan.org/pod/Text::CSV_XS#error_diag>.
2015							See L<Text::CSV_XS\|https://metacpan.org/pod/Text::CSV_XS> for documentation.
2016
2017							my $ss = ReadData ("bad.csv");
2018							$ss->[0]{error} and say $ss->[0]{error}[1];
2019
2020							As CSV has no sheet labels, the default label for a CSV sheet is its filename.
2021							For CSV, this can be overruled using the I<label> attribute:
2022
2023							my $ss = Spreadsheet::Read->new ("/some/place/test.csv", label => "Test");
2024
2025							=head2 Cell Attributes
2026							X<attr>
2027
2028							If the constructor was called with C<attr> having a true value,
2029
2030							my $book = ReadData ("book.xls", attr => 1);
2031							my $book = Spreadsheet::Read->new ("book.xlsx", attr => 1);
2032
2033							effort is made to analyze and store field attributes like this:
2034
2035							{ label => "Sheet 1",
2036							maxrow => 5,
2037							maxcol => 2,
2038							cell => [ undef,
2039							[ undef, 1 ],
2040							[ undef, undef, undef, undef, undef, "Nugget" ],
2041							],
2042							attr => [ undef,
2043							[ undef, {
2044							type => "numeric",
2045							fgcolor => "#ff0000",
2046							bgcolor => undef,
2047							font => "Arial",
2048							size => undef,
2049							format => "## ##0.00",
2050							halign => "right",
2051							valign => "top",
2052							uline => 0,
2053							bold => 0,
2054							italic => 0,
2055							wrap => 0,
2056							merged => 0,
2057							hidden => 0,
2058							locked => 0,
2059							enc => "utf-8",
2060							}, ],
2061							[ undef, undef, undef, undef, undef, {
2062							type => "text",
2063							fgcolor => "#e2e2e2",
2064							bgcolor => undef,
2065							font => "Letter Gothic",
2066							size => 15,
2067							format => undef,
2068							halign => "left",
2069							valign => "top",
2070							uline => 0,
2071							bold => 0,
2072							italic => 0,
2073							wrap => 0,
2074							merged => 0,
2075							hidden => 0,
2076							locked => 0,
2077							enc => "iso8859-1",
2078							}, ],
2079							],
2080							merged => [],
2081							A1 => 1,
2082							B5 => "Nugget",
2083							},
2084
2085							The entries C<maxrow> and C<maxcol> are 1-based.
2086
2087							This has now been partially implemented, mainly for Excel, as the other
2088							parsers do not (yet) support all of that. YMMV.
2089
2090							If a cell itself is not hidden, but the parser holds the information that
2091							either the row or the column (or both) the field is in is hidden, the flag
2092							is inherited into the cell attributes.
2093
2094							You can get the attributes of a cell (as a hash-ref) like this:
2095
2096							my $attr = $book[1]{attr}[1][3]; # Direct structure
2097							my $attr = $book->sheet (1)->attr (1, 3); # Same using OO
2098							my $attr = $book->sheet (1)->attr ("A3"); # Same using OO
2099
2100							To get to the C<font> attribute, use any of these:
2101
2102							my $font = $book[1]{attr}[1][3]{font};
2103							my $font = $book->sheet (1)->attr (1, 3)->{font};
2104							my $font = $book->sheet (1)->attr ("A3")->font;
2105
2106							=head3 Merged cells
2107							X<merged>
2108
2109							Note that only
2110							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>
2111							documents the use of merged cells, and not in a way useful for the spreadsheet
2112							consumer.
2113
2114							CSV does not support merged cells (though future implementations of CSV
2115							for the web might).
2116
2117							The documentation of merged areas in
2118							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel> and
2119							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> can
2120							be found in
2121							L<Spreadsheet::ParseExcel::Worksheet\|https://metacpan.org/pod/Spreadsheet::ParseExcel::Worksheet>
2122							and L<Spreadsheet::ParseExcel::Cell\|https://metacpan.org/pod/Spreadsheet::ParseExcel::Cell>.
2123
2124							None of basic L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX>,
2125							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>, and
2126							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> manual
2127							pages mention merged cells at all.
2128
2129							This module just tries to return the information in a generic way.
2130
2131							Given this spreadsheet as an example
2132
2133							merged.xlsx:
2134
2135							A B C
2136							+-----+-----------+
2137							1\| \| foo \|
2138							+-----+ +
2139							2\| bar \| \|
2140							\| +-----+-----+
2141							3\| \| urg \| orc \|
2142							+-----+-----+-----+
2143
2144							the information extracted from that undocumented information is
2145							returned in the C<merged> entry of the sheet's hash as a list of
2146							top-left, bottom-right coordinate pars (col, row, col, row). For
2147							given example, that would be:
2148
2149							$ss->{merged} = [
2150							[ 1, 2, 1, 3 ], # A2-A3
2151							[ 2, 1, 3, 2 ], # B1-C2
2152							];
2153
2154							To find the label of the top-left cell in a merged area, use the
2155							L<C<merged_from>\|/merged_from> method.
2156
2157							$ss->merged_from ("C2"); # will return "B1"
2158
2159							When the attributes are also enabled, there is some merge information
2160							copied directly from the cell information, but again, that stems from
2161							code analysis and not from documentation:
2162
2163							my $ss = ReadData ("merged.xlsx", attr => 1)->[1];
2164							foreach my $row (1 .. $ss->{maxrow}) {
2165							foreach my $col (1 .. $ss->{maxcol}) {
2166							my $cell = cr2cell ($col, $row);
2167							printf "%s %-3s %s ", $cell, $ss->{$cell},
2168							$ss->{attr}[$col][$row]{merged};
2169							}
2170							print "\n";
2171							}
2172
2173							A1 0 B1 foo 1 C1 1
2174							A2 bar 1 B2 1 C2 1
2175							A3 1 B3 urg 0 C3 orc 0
2176
2177							In this example, there is no way to see if C<B2> is merged to C<A2> or
2178							to C<B1> without analyzing all surrounding cells. This could as well
2179							mean C<A2:A3>, C<B1:C1>, C<B2:C2>, as C<A2:A3>, C<B1:B2>, C<C1:C2>, as
2180							C<A2:A3>, C<B1:C2>.
2181
2182							Use the L<C<merged>\|/merged> entry described above to find out what
2183							fields are merged to what other fields or use C<merge>:
2184
2185							my $ss = ReadData ("merged.xlsx", attr => 1, merge => 1)->[1];
2186							foreach my $row (1 .. $ss->{maxrow}) {
2187							foreach my $col (1 .. $ss->{maxcol}) {
2188							my $cell = cr2cell ($col, $row);
2189							printf "%s %-3s %s ", $cell, $ss->{$cell},
2190							$ss->{attr}[$col][$row]{merged};
2191							}
2192							print "\n";
2193							}
2194
2195							A1 0 B1 foo B1 C1 foo B1
2196							A2 bar A2 B2 foo B1 C2 foo B1
2197							A3 bar A2 B3 urg 0 C3 orc 0
2198
2199							=head2 Streams from web-resources
2200
2201							If you want to stream a web-resource, and the underlying parser supports it,
2202							you could use a helper function like this (thanks Corion):
2203
2204							use HTTP::Tiny;
2205							use Spreadsheet::Read;
2206
2207							# Fetch data and return a filehandle to that data
2208							sub fh_from_url {
2209							my $url = shift;
2210							my $ua = HTTP::Tiny->new;
2211							my $res = $ua->get ($url);
2212							open my $fh, "<", \$res->{content};
2213							return $fh
2214							} # fh_from_url
2215
2216							my $fh = fh_from_url ("http://example.com/example.csv");
2217							my $sheet = Spreadsheet::Read->new ($fh, parser => "csv");
2218
2219							=head1 TOOLS
2220
2221							This modules comes with a few tools that perform tasks from the FAQ, like
2222							"How do I select only column D through F from sheet 2 into a CSV file?"
2223
2224							If the module was installed without the tools, you can find them here:
2225							https://github.com/Tux/Spreadsheet-Read/tree/master/examples
2226
2227							=head2 C<xlscat>
2228
2229							Show (parts of) a spreadsheet in plain text, CSV, or HTML
2230
2231							usage: xlscat [-s <sep>] [-L] [-n] [-A] [-u] [Selection] file.xls
2232							[-c \| -m] [-u] [Selection] file.xls
2233							-i [-S sheets] file.xls
2234							Generic options:
2235							-v[#] Set verbose level (xlscat/xlsgrep)
2236							-d[#] Set debug level (Spreadsheet::Read)
2237							-u Use unformatted values
2238							--noclip Do not strip empty sheets and
2239							trailing empty rows and columns
2240							-e <enc> Set encoding for input and output
2241							-b <enc> Set encoding for input
2242							-a <enc> Set encoding for output
2243							Input CSV:
2244							--in-sep=c Set input sep_char for CSV
2245							Input XLS:
2246							--dtfmt=fmt Specify the default date format to replace 'm-d-yy'
2247							the default replacement is 'yyyy-mm-dd'
2248							Output Text (default):
2249							-s <sep> Use separator <sep>. Default '\|', \n allowed
2250							-L Line up the columns
2251							-n [skip] Number lines (prefix with column number)
2252							optionally skip <skip> (header) lines
2253							-A Show field attributes in ANSI escapes
2254							-h[#] Show # header lines
2255							Output Index only:
2256							-i Show sheet names and size only
2257							Output CSV:
2258							-c Output CSV, separator = ','
2259							-m Output CSV, separator = ';'
2260							Output HTML:
2261							-H Output HTML
2262							Selection:
2263							-S <sheets> Only print sheets <sheets>. 'all' is a valid set
2264							Default only prints the first sheet
2265							-R <rows> Only print rows <rows>. Default is 'all'
2266							-C <cols> Only print columns <cols>. Default is 'all'
2267							-F <flds> Only fields <flds> e.g. -FA3,B16
2268							Ordering (column numbers in result set after selection):
2269							--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
2270							+# - first # lines do not sort (header)
2271							# - order on column # lexical ascending
2272							#n - order on column # numeric ascending
2273							#r - order on column # lexical descending
2274							#rn - order on column # numeric descending
2275
2276							=head2 C<xlsgrep>
2277
2278							Show (parts of) a spreadsheet that match a pattern in plain text, CSV, or HTML
2279
2280							usage: xlsgrep [-s <sep>] [-L] [-n] [-A] [-u] [Selection] pattern file.xls
2281							[-c \| -m] [-u] [Selection] pattern file.xls
2282							-i [-S sheets] pattern file.xls
2283							Generic options:
2284							-v[#] Set verbose level (xlscat/xlsgrep)
2285							-d[#] Set debug level (Spreadsheet::Read)
2286							-u Use unformatted values
2287							--noclip Do not strip empty sheets and
2288							trailing empty rows and columns
2289							-e <enc> Set encoding for input and output
2290							-b <enc> Set encoding for input
2291							-a <enc> Set encoding for output
2292							Input CSV:
2293							--in-sep=c Set input sep_char for CSV
2294							Input XLS:
2295							--dtfmt=fmt Specify the default date format to replace 'm-d-yy'
2296							the default replacement is 'yyyy-mm-dd'
2297							Output Text (default):
2298							-s <sep> Use separator <sep>. Default '\|', \n allowed
2299							-L Line up the columns
2300							-n [skip] Number lines (prefix with column number)
2301							optionally skip <skip> (header) lines
2302							-A Show field attributes in ANSI escapes
2303							-h[#] Show # header lines
2304							Grep options:
2305							-i Ignore case
2306							-w Match whole words only
2307							Output CSV:
2308							-c Output CSV, separator = ','
2309							-m Output CSV, separator = ';'
2310							Output HTML:
2311							-H Output HTML
2312							Selection:
2313							-S <sheets> Only print sheets <sheets>. 'all' is a valid set
2314							Default only prints the first sheet
2315							-R <rows> Only print rows <rows>. Default is 'all'
2316							-C <cols> Only print columns <cols>. Default is 'all'
2317							-F <flds> Only fields <flds> e.g. -FA3,B16
2318							Ordering (column numbers in result set after selection):
2319							--sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
2320							+# - first # lines do not sort (header)
2321							# - order on column # lexical ascending
2322							#n - order on column # numeric ascending
2323							#r - order on column # lexical descending
2324							#rn - order on column # numeric descending
2325
2326							=head2 C<xls2csv>
2327
2328							Convert a spreadsheet to CSV. This is just a small wrapper over C<xlscat>.
2329
2330							usage: xls2csv [ -o file.csv ] file.xls
2331
2332							=head2 C<ss2tk>
2333
2334							Show a spreadsheet in a perl/Tk spreadsheet widget
2335
2336							usage: ss2tk [-w <width>] [X11 options] file.xls [<pattern>]
2337							-w <width> use <width> as default column width (4)
2338
2339							=head2 C<ssdiff>
2340
2341							Show the differences between two spreadsheets.
2342
2343							usage: examples/ssdiff [--verbose[=1]] file.xls file.xlsx
2344
2345							=head1 TODO
2346
2347							=over 4
2348
2349							=item Options
2350
2351							=over 2
2352
2353							=item Module Options
2354
2355							New Spreadsheet::Read options are bound to happen. I'm thinking of an
2356							option that disables the reading of the data entirely to speed up an
2357							index request (how many sheets/fields/columns). See C<xlscat -i>.
2358
2359							=item Parser options
2360
2361							Try to transparently support as many options as the encapsulated modules
2362							support regarding (un)formatted values, (date) formats, hidden columns
2363							rows or fields etc. These could be implemented like C<attr> above but
2364							names C<meta>, or just be new values in the C<attr> hashes.
2365
2366							=back
2367
2368							=item Other parsers
2369
2370							Add support for new(er) parsers for already supported formats, like
2371
2372							=over 2
2373
2374							=item Data::XLSX::Parser
2375
2376							Data::XLSX::Parser provides faster way to parse Microsoft Excel's .xlsx
2377							files. The implementation of this module is highly inspired from Python's
2378							FastXLSX library.
2379
2380							This is SAX based parser, so you can parse very large XLSX file with
2381							lower memory usage.
2382
2383							=back
2384
2385							=item Other spreadsheet formats
2386
2387							I consider adding any spreadsheet interface that offers a usable API.
2388
2389							Under investigation:
2390
2391							=over 2
2392
2393							=item Gnumeric (.gnumeric)
2394
2395							I have seen no existing CPAN module yet.
2396
2397							It is gzip'ed XML
2398
2399							=item Kspread (.ksp)
2400
2401							Now knows as Calligra Sheets.
2402
2403							I have seen no existing CPAN module yet.
2404
2405							It is XML in ZIP
2406
2407							=back
2408
2409							=item Alternative parsers for existing formats
2410
2411							As long as the alternative has a good reason for its existence, and the
2412							API of that parser reasonable fits in my approach, I will consider to
2413							implement the glue layer, or apply patches to do so as long as these
2414							match what F<CONTRIBUTING.md> describes.
2415
2416							=back
2417
2418							=head1 SEE ALSO
2419
2420							=over 2
2421
2422							=item Text::CSV_XS, Text::CSV_PP
2423
2424							See L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS> ,
2425							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV> , and
2426							L<Text::CSV\|https://metacpan.org/release/Text-CSV> documentation.
2427
2428							L<Text::CSV\|https://metacpan.org/release/Text-CSV> is a wrapper over Text::CSV_XS (the fast XS version) and/or
2429							L<Text::CSV_PP\|https://metacpan.org/release/Text-CSV> (the pure perl version).
2430
2431							=item Spreadsheet::ParseExcel
2432
2433							L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel> is
2434							the best parser for old-style Microsoft Excel (.xls) files.
2435
2436							=item Spreadsheet::ParseXLSX
2437
2438							L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX> is
2439							like L<Spreadsheet::ParseExcel\|https://metacpan.org/release/Spreadsheet-ParseExcel>,
2440							but for new Microsoft Excel 2007+ files (.xlsx). They have the same API.
2441
2442							This module uses L<XML::Twig\|https://metacpan.org/release/XML-Twig> to parse the
2443							internal XML.
2444
2445							=item Spreadsheet::XLSX
2446
2447							See L<Spreadsheet::XLSX\|https://metacpan.org/release/Spreadsheet-XLSX>
2448							documentation.
2449
2450							This module is dead and deprecated. It is B<buggy and unmaintained>. I<Please>
2451							use L<Spreadsheet::ParseXLSX\|https://metacpan.org/release/Spreadsheet-ParseXLSX>
2452							instead.
2453
2454							=item Spreadsheet::ParseODS
2455
2456							L<Spreadsheet::ParseODS\|https://metacpan.org/release/Spreadsheet-ParseODS> is a
2457							parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files. It is the
2458							successor of L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC>.
2459
2460							=item Spreadsheet::ReadSXC
2461
2462							L<Spreadsheet::ReadSXC\|https://metacpan.org/release/Spreadsheet-ReadSXC> is a
2463							parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files.
2464
2465							=item Spreadsheet::BasicRead
2466
2467							See L<Spreadsheet::BasicRead\|https://metacpan.org/release/Spreadsheet-BasicRead>
2468							for xlscat-like functionality (Excel only)
2469
2470							=item Spreadsheet::ConvertAA
2471
2472							See L<Spreadsheet::ConvertAA\|https://metacpan.org/release/Spreadsheet-ConvertAA>
2473							for an alternative set of L</cell2cr>/L</cr2cell> pair.
2474
2475							=item Spreadsheet::Perl
2476
2477							L<Spreadsheet::Perl\|https://metacpan.org/release/Spreadsheet-Perl> offers a Pure
2478							Perl implementation of a spreadsheet engine. Users that want this format to be
2479							supported in Spreadsheet::Read are hereby motivated to offer patches. It is
2480							not high on my TODO-list.
2481
2482							=item Spreadsheet::CSV
2483
2484							L<Spreadsheet::CSV\|https://metacpan.org/release/Spreadsheet-CSV> offers the
2485							interesting approach of seeing all supported spreadsheet formats as if it were
2486							CSV, mimicking the L<Text::CSV_XS\|https://metacpan.org/release/Text-CSV_XS>
2487							interface.
2488
2489							=item xls2csv
2490
2491							L<xls2csv\|https://github.com/Tux/Spreadsheet-Read/blob/master/examples/xls2csv>
2492							offers an alternative for my C<xlscat -c>, in the xls2csv tool, but this tool
2493							focuses on character encoding transparency, and requires some other modules.
2494
2495							=back
2496
2497							=head1 AUTHOR
2498
2499							H.Merijn Brand, <h.m.brand@xs4all.nl>
2500
2501							=head1 COPYRIGHT AND LICENSE
2502
2503							Copyright (C) 2005-2020 H.Merijn Brand
2504
2505							This library is free software; you can redistribute it and/or modify
2506							it under the same terms as Perl itself.
2507
2508							=cut