File Coverage

blib/lib/Spreadsheet/Read.pm
Criterion Covered Total %
statement 489 690 70.8
branch 423 652 64.8
condition 183 315 58.1
subroutine 47 50 94.0
pod 13 13 100.0
total 1155 1720 67.1


line stmt bran cond sub pod time code
1             #!/pro/bin/perl
2              
3             package Spreadsheet::Read;
4              
5             =head1 NAME
6              
7             Spreadsheet::Read - Read the data from a spreadsheet
8              
9             =head1 SYNOPSIS
10              
11             use Spreadsheet::Read;
12             my $book = ReadData ("test.csv", sep => ";");
13             my $book = ReadData ("test.sxc");
14             my $book = ReadData ("test.ods");
15             my $book = ReadData ("test.xls");
16             my $book = ReadData ("test.xlsx");
17             my $book = ReadData ("test.xlsm");
18             my $book = ReadData ($fh, parser => "xls");
19              
20             Spreadsheet::Read::add ($book, "sheet.csv");
21              
22             my $sheet = $book->[1]; # first datasheet
23             my $cell = $book->[1]{A3}; # content of field A3 of sheet 1
24             my $cell = $book->[1]{cell}[1][3]; # same, unformatted
25              
26             # OO API
27             my $book = Spreadsheet::Read->new ("file.csv");
28             my $sheet = $book->sheet (1);
29             my $cell = $sheet->cell ("A3");
30             my $cell = $sheet->cell (1, 3);
31              
32             $book->add ("test.xls");
33              
34             =cut
35              
36 58     58   5939642 use 5.8.1;
  58         792  
37 58     58   386 use strict;
  58         131  
  58         1419  
38 58     58   320 use warnings;
  58         121  
  58         3663  
39              
40             our $VERSION = "0.82";
41 1     1 1 137 sub Version { $VERSION }
42              
43 58     58   394 use Carp;
  58         133  
  58         3835  
44 58     58   397 use Exporter;
  58         124  
  58         5881  
45             our @ISA = qw( Exporter );
46             our @EXPORT = qw( ReadData cell2cr cr2cell );
47             our @EXPORT_OK = qw( parses rows cellrow row add );
48              
49 58     58   32974 use Encode qw( decode );
  58         903436  
  58         4416  
50 58     58   52982 use File::Temp qw( );
  58         1251489  
  58         1782  
51 58     58   35635 use Data::Dumper;
  58         379932  
  58         70239  
52              
53             my @parsers = (
54             [ csv => "Text::CSV_XS", "0.71" ],
55             [ csv => "Text::CSV_PP", "1.17" ],
56             [ csv => "Text::CSV", "1.17" ],
57             [ ods => "Spreadsheet::ParseODS", "0.25" ],
58             [ ods => "Spreadsheet::ReadSXC", "0.20" ],
59             [ sxc => "Spreadsheet::ParseODS", "0.25" ],
60             [ sxc => "Spreadsheet::ReadSXC", "0.20" ],
61             [ xls => "Spreadsheet::ParseExcel", "0.34" ],
62             [ xlsx => "Spreadsheet::ParseXLSX", "0.24" ],
63             [ xlsx => "Spreadsheet::XLSX", "0.13" ],
64             [ prl => "Spreadsheet::Perl", "" ],
65              
66             # Helper modules
67             [ ios => "IO::Scalar", "" ],
68             [ dmp => "Data::Peek", "" ],
69             );
70             my %can = ( supports => { map { $_->[1] => $_->[2] } @parsers });
71             foreach my $p (@parsers) {
72             my $format = $p->[0];
73             $can{$format} and next;
74             $can{$format} = "";
75             my $preset = $ENV{"SPREADSHEET_READ_\U$format"} or next;
76             my $min_version = $can{supports}{$preset};
77             unless ($min_version) {
78             # Catch weirdness like $SPREADSHEET_READ_XLSX = "DBD::Oracle"
79             $can{$format} = "!$preset is not supported for the $format format";
80             next;
81             }
82             if (eval "local \$_; require $preset" and not $@) {
83             # forcing a parser should still check the version
84             my $ok;
85             my $has = $preset->VERSION;
86             $has =~ s/_[0-9]+$//; # Remove beta-part
87             if ($min_version =~ m/^v([0-9.]+)/) { # clumsy versions
88             my @min = split m/\./ => $1;
89             $has =~ s/^v//;
90             my @has = split m/\./ => $has;
91             $ok = (($has[0] * 1000 + $has[1]) * 1000 + $has[2]) >=
92             (($min[0] * 1000 + $min[1]) * 1000 + $min[2]);
93             }
94             else { # normal versions
95             $ok = $has >= $min_version;
96             }
97             $ok or $preset = "!$preset";
98             }
99             else {
100             $preset = "!$preset";
101             }
102             $can{$format} = $preset;
103             }
104             delete $can{supports};
105             for (@parsers) {
106             my ($flag, $mod, $vsn) = @$_;
107             $can{$flag} and next;
108             eval "require $mod; \$vsn and ${mod}->VERSION (\$vsn); \$can{\$flag} = '$mod'" or
109             $_->[0] = "! Cannot use $mod version $vsn: $@";
110             }
111             $can{sc} = __PACKAGE__; # SquirelCalc is built-in
112              
113             defined $Spreadsheet::ParseExcel::VERSION && $Spreadsheet::ParseExcel::VERSION < 0.61 and
114             *Spreadsheet::ParseExcel::Workbook::get_active_sheet = sub { undef; };
115             defined $Spreadsheet::ParseODS::VERSION && $Spreadsheet::ParseODS::VERSION < 0.25 and
116             *Spreadsheet::ParseODS::Workbook::get_active_sheet = sub { undef; };
117              
118             my $debug = 0;
119             my %def_opts = (
120             rc => 1,
121             cells => 1,
122             attr => 0,
123             clip => undef, # $opt{cells};
124             strip => 0,
125             pivot => 0,
126             dtfmt => "yyyy-mm-dd", # Format 14
127             debug => 0,
128             passwd => undef,
129             parser => undef,
130             sep => undef,
131             quote => undef,
132             label => undef,
133             merge => 0,
134             );
135             my @def_attr = (
136             type => "text",
137             fgcolor => undef,
138             bgcolor => undef,
139             font => undef,
140             size => undef,
141             format => undef,
142             halign => "left",
143             valign => "top",
144             bold => 0,
145             italic => 0,
146             uline => 0,
147             wrap => 0,
148             merged => 0,
149             hidden => 0,
150             locked => 0,
151             enc => "utf-8", # $ENV{LC_ALL} // $ENV{LANG} // ...
152             formula => undef,
153             );
154              
155             # Helper functions
156              
157             sub _dump {
158 7     7   21 my ($label, $ref) = @_;
159 7 50       26 if ($can{dmp}) {
160 7         46 print STDERR Data::Peek::DDumper ({ $label => $ref });
161             }
162             else {
163 0         0 print STDERR Data::Dumper->Dump ([$ref], [$label]);
164             }
165 7         1881 my @c = caller (1);
166 7         51 print STDERR "<<- $c[1]:$c[2]|$c[3]\n";
167             } # _dump
168              
169             sub _parser {
170 420 100   420   1720 my $type = shift or return "";
171 177         532 $type = lc $type;
172 177 100       702 my $ods = $can{ods} ? "ods" : "sxc";
173             # Aliases and fullnames
174 177 100       560 $type eq "excel" and return "xls";
175 176 50       527 $type eq "excel2007" and return "xlsx";
176 176 50       483 $type eq "xlsm" and return "xlsx";
177 176 100       466 $type eq "oo" and return $ods;
178             # $type eq "sxc" and return $ods;
179 175 100       440 $type eq "openoffice" and return $ods;
180 174 50       413 $type eq "libreoffice" and return $ods;
181 174 100       524 $type eq "perl" and return "prl";
182 173 50       450 $type eq "squirelcalc" and return "sc";
183 173 100       1243 return exists $can{$type} ? $type : "";
184             } # _parser
185              
186             sub new {
187 13     13 1 7331 my $class = shift;
188 13         69 my $r = ReadData (@_);
189 13 100       51 unless ($r) {
190 2 50       14 @_ and return; # new with arguments failed to open resource
191 0         0 $r = [{
192             parsers => [],
193             error => undef,
194             sheets => 0,
195             sheet => { },
196             }];
197             }
198 11         225 bless $r => $class;
199             } # new
200              
201             # Spreadsheet::Read::parses ("csv") or die "Cannot parse CSV"
202             sub parses {
203 68 100   68 1 161862 ref $_[0] eq __PACKAGE__ and shift;
204 68 100       307 my $type = _parser (shift) or return 0;
205 66 100       625 if ($can{$type} =~ m/^!\s*(.*)/) {
206 17         73 $@ = $1;
207 17         61 return 0;
208             }
209 49         162 return $can{$type};
210             } # parses
211              
212             sub sheets {
213 4     4 1 14 my $ctrl = shift->[0];
214 4         6 my %s = %{$ctrl->{sheet}};
  4         19  
215 4 100       48 wantarray ? sort { $s{$a} <=> $s{$b} } keys %s : $ctrl->{sheets};
  3         27  
216             } # sheets
217              
218             # col2label (4) => "D"
219             sub col2label {
220 15739 100   15739 1 33822 ref $_[0] eq __PACKAGE__ and shift;
221 15739         20470 my $c = shift;
222 15739 100 100     39898 defined $c && $c > 0 or return "";
223 15735         21038 my $cell = "";
224 15735         25414 while ($c) {
225 58     58   31758 use integer;
  58         922  
  58         380  
226              
227 15752         29069 substr $cell, 0, 0, chr (--$c % 26 + ord "A");
228 15752         27170 $c /= 26;
229             }
230 15735         41770 $cell;
231             } # col2label
232              
233             # cr2cell (4, 18) => "D18"
234             # No prototype to allow 'cr2cell (@rowcol)'
235             sub cr2cell {
236 15537 100   15537 1 225094 ref $_[0] eq __PACKAGE__ and shift;
237 15537         24494 my ($c, $r) = @_;
238 15537 100 100     71221 defined $c && defined $r && $c > 0 && $r > 0 or return "";
      100        
      100        
239 15531         23927 col2label ($c) . $r;
240             } # cr2cell
241              
242             # cell2cr ("D18") => (4, 18)
243             sub cell2cr {
244 620 100   620 1 690191 ref $_[0] eq __PACKAGE__ and shift;
245 620 100 100     6048 my ($cc, $r) = (uc ($_[0]||"") =~ m/^([A-Z]+)([0-9]+)$/) or return (0, 0);
246 616         1298 my $c = 0;
247 616         2537 while ($cc =~ s/^([A-Z])//) {
248 624         2497 $c = 26 * $c + 1 + ord ($1) - ord ("A");
249             }
250 616         2221 ($c, $r);
251             } # cell2cr
252              
253             # my @row = cellrow ($book->[1], 1);
254             # my @row = $book->cellrow (1, 1);
255             sub cellrow {
256 10 100   10 1 70 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
257 9 100 66     95 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
258 7 50 33     45 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
259 7 100       35 my $row = shift or return;
260 6 100 100     51 $row > 0 && $row <= $sheet->{maxrow} or return;
261 4         11 my $s = $sheet->{cell};
262 4         16 map { $s->[$_][$row] } 1..$sheet->{maxcol};
  76         211  
263             } # cellrow
264              
265             # my @row = row ($book->[1], 1);
266             # my @row = $book->row (1, 1);
267             sub row {
268 10 100   10 1 3820 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
269 9 100 66     74 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
270 7 50 33     37 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
271 7 100       32 my $row = shift or return;
272 6 100 100     41 $row > 0 && $row <= $sheet->{maxrow} or return;
273 4         18 map { $sheet->{cr2cell ($_, $row)} } 1..$sheet->{maxcol};
  76         134  
274             } # row
275              
276             # Convert {cell}'s [column][row] to a [row][column] list
277             # my @rows = rows ($book->[1]);
278             sub rows {
279 33 50   33 1 22403 my $sheet = ref $_[0] eq __PACKAGE__ ? (shift)->[shift] : shift or return;
    100          
280 30 100 100     169 ref $sheet eq "HASH" && exists $sheet->{cell} or return;
281 26 100 100     149 exists $sheet->{maxcol} && exists $sheet->{maxrow} or return;
282 5         16 my $s = $sheet->{cell};
283              
284             map {
285 20         32 my $r = $_;
286 20         34 [ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
  80         181  
287 5         25 } 1..$sheet->{maxrow};
288             } # rows
289              
290             sub sheet {
291 24     24 1 612 my ($book, $sheet) = @_;
292 24 100 100     124 $book && $sheet or return;
293 22         39 my $class = "Spreadsheet::Read::Sheet";
294             $sheet =~ m/^[0-9]+$/ && $sheet >= 1 && $sheet <= $book->[0]{sheets} and
295 22 100 66     332 return bless $book->[$sheet] => $class;
      100        
296             exists $book->[0]{sheet}{$sheet} and
297 6 100       24 return bless $book->[$book->[0]{sheet}{$sheet}] => $class;
298 5         16 foreach my $idx (1 .. $book->[0]{sheets}) {
299 6 100       25 $book->[$idx]{label} eq $sheet and
300             return bless $book->[$idx] => $class;
301             }
302 4         21 return;
303             } # sheet
304              
305             # If option "clip" is set, remove the trailing rows and
306             # columns in each sheet that contain no visible data
307             sub _clipsheets {
308 228     228   631 my ($opt, $ref) = @_;
309              
310 228 50       736 $ref->[0]{sheets} or return $ref;
311              
312 228         644 my ($rc, $cl) = ($opt->{rc}, $opt->{cells});
313 228         685 my ($oc, $os, $oa) = ($opt->{clip}, $opt->{strip}, $opt->{attr});
314              
315             # Strip leading/trailing spaces
316 228 100 100     937 if ($os || $oc) {
317 213         653 foreach my $sheet (1 .. $ref->[0]{sheets}) {
318 255         573 $ref->[$sheet]{indx} = $sheet;
319 255         460 my $ss = $ref->[$sheet];
320 255 100 66     1109 $ss->{maxrow} && $ss->{maxcol} or next;
321 248         563 my ($mc, $mr) = (0, 0);
322 248         593 foreach my $row (1 .. $ss->{maxrow}) {
323 1958         3308 foreach my $col (1 .. $ss->{maxcol}) {
324 16652 100       25905 if ($rc) {
325 14754 100       27221 defined $ss->{cell}[$col][$row] or next;
326 5897 100       10078 $os & 2 and $ss->{cell}[$col][$row] =~ s/\s+$//;
327 5897 100       9547 $os & 1 and $ss->{cell}[$col][$row] =~ s/^\s+//;
328 5897 100       10590 if (length $ss->{cell}[$col][$row]) {
329 4432 100       7185 $col > $mc and $mc = $col;
330 4432 100       7530 $row > $mr and $mr = $row;
331             }
332             }
333 7795 100       12164 if ($cl) {
334 7576         11599 my $cell = cr2cell ($col, $row);
335 7576 100       15528 defined $ss->{$cell} or next;
336 6249 100       11234 $os & 2 and $ss->{$cell} =~ s/\s+$//;
337 6249 100       10114 $os & 1 and $ss->{$cell} =~ s/^\s+//;
338 6249 100       11479 if (length $ss->{$cell}) {
339 4772 100       7695 $col > $mc and $mc = $col;
340 4772 100       8905 $row > $mr and $mr = $row;
341             }
342             }
343             }
344             }
345              
346 248 100 66     1540 $oc && ($mc < $ss->{maxcol} || $mr < $ss->{maxrow}) or next;
      66        
347              
348             # Remove trailing empty columns
349 106         254 foreach my $col (($mc + 1) .. $ss->{maxcol}) {
350 243 100       679 $rc and undef $ss->{cell}[$col];
351 243 100       568 $oa and undef $ss->{attr}[$col];
352 243 100       445 $cl or next;
353 193         337 my $c = col2label ($col);
354 193         1438 delete $ss->{"$c$_"} for 1 .. $ss->{maxrow};
355             }
356              
357             # Remove trailing empty rows
358 106         269 foreach my $row (($mr + 1) .. $ss->{maxrow}) {
359 139         265 foreach my $col (1 .. $mc) {
360 64 50       159 $cl and delete $ss->{cr2cell ($col, $row)};
361 64 50       213 $rc and undef $ss->{cell} [$col][$row];
362 64 50       1024 $oa and undef $ss->{attr} [$col][$row];
363             }
364             }
365              
366 106         312 ($ss->{maxrow}, $ss->{maxcol}) = ($mr, $mc);
367             }
368             }
369              
370 228 100       708 if ($opt->{pivot}) {
371 1         4 foreach my $sheet (1 .. $ref->[0]{sheets}) {
372 1         4 my $ss = $ref->[$sheet];
373 1 0 33     4 $ss->{maxrow} || $ss->{maxcol} or next;
374 1 50       5 my $mx = $ss->{maxrow} > $ss->{maxcol} ? $ss->{maxrow} : $ss->{maxcol};
375 1         3 foreach my $row (2 .. $mx) {
376 18         38 foreach my $col (1 .. ($row - 1)) {
377             $opt->{rc} and
378             ($ss->{cell}[$col][$row], $ss->{cell}[$row][$col]) =
379 171 50       520 ($ss->{cell}[$row][$col], $ss->{cell}[$col][$row]);
380             $opt->{cells} and
381             ($ss->{cr2cell ($col, $row)}, $ss->{cr2cell ($row, $col)}) =
382 171 50       349 ($ss->{cr2cell ($row, $col)}, $ss->{cr2cell ($col, $row)});
383             }
384             }
385 1         4 ($ss->{maxcol}, $ss->{maxrow}) = ($ss->{maxrow}, $ss->{maxcol});
386             }
387             }
388              
389 228         5183 $ref;
390             } # _clipsheets
391              
392             # Convert a single color (index) to a color
393             sub _xls_color {
394 1758     1758   3249 my $clr = shift;
395 1758 100       3493 defined $clr or return undef;
396 866 50       1459 $clr eq "#000000" and return undef;
397 866 50       1719 $clr =~ m/^#[0-9a-fA-F]+$/ and return lc $clr;
398 866 100 66     2434 $clr == 0 || $clr == 32767 and return undef; # Default fg color
399 484         1093 return "#" . lc Spreadsheet::ParseExcel->ColorIdxToRGB ($clr);
400             } # _xls_color
401              
402             # Convert a fill [ $pattern, $front_color, $back_color ] to a single background
403             sub _xls_fill {
404 1542     1542   3011 my ($p, $fg, $bg) = @_;
405 1542 50       2656 defined $p or return undef;
406 1542 50       2580 $p == 32767 and return undef; # Default fg color
407 1542 100 100     14827 $p == 0 && !defined $bg and return undef; # No fill bg color
408 650 100       1104 $p == 1 and return _xls_color ($fg);
409 434 50 33     6146 $bg < 8 || $bg > 63 and return undef; # see Workbook.pm#106
410 0         0 return _xls_color ($bg);
411             } # _xls_fill
412              
413             sub ReadData {
414 280 100   280 1 342784 my $txt = shift or return;
415              
416 277         573 my %opt;
417 277 100       774 if (@_) {
418 222 100       1117 if (ref $_[0] eq "HASH") { %opt = %{shift @_} }
  20 50       31  
  20         79  
419 202         776 elsif (@_ % 2 == 0) { %opt = @_ }
420             }
421              
422 277 100       1075 exists $opt{rc} or $opt{rc} = $def_opts{rc};
423 277 100       1367 exists $opt{cells} or $opt{cells} = $def_opts{cells};
424 277 100       849 exists $opt{attr} or $opt{attr} = $def_opts{attr};
425 277 100       951 exists $opt{clip} or $opt{clip} = $opt{cells};
426 277 100       748 exists $opt{strip} or $opt{strip} = $def_opts{strip};
427 277 100       888 exists $opt{dtfmt} or $opt{dtfmt} = $def_opts{dtfmt};
428 277 100       771 exists $opt{merge} or $opt{merge} = $def_opts{merge};
429              
430             # $debug = $opt{debug} || 0;
431 277 100       783 $debug = defined $opt{debug} ? $opt{debug} : $def_opts{debug};
432 277 100       702 $debug > 4 and _dump (Options => \%opt);
433              
434 31         97 my %parser_opts = map { $_ => $opt{$_} }
435 277         1036 grep { !exists $def_opts{$_} }
  2047         3971  
436             keys %opt;
437              
438 277         1216 my $_parser = _parser ($opt{parser});
439              
440 277 100       1172 my $io_ref = ref ($txt) =~ m/GLOB|IO/ ? $txt : undef;
441             my $io_fil = $io_ref ? 0 : $txt =~ m/\0/ ? 0
442 58 100   58   129661 : do { no warnings "newline"; -f $txt };
  58 100       168  
  58         458040  
  277         1053  
  256         5473  
443 277 100 100     1792 my $io_txt = $io_ref || $io_fil ? 0 : 1;
444              
445 277 100 100     4128 $io_fil && ! -s $txt and do { $@ = "$txt is empty"; return };
  11         63  
  11         68  
446 266 100 100     1445 $io_ref && eof $txt and do { $@ = "Empty stream"; return };
  5         22  
  5         88  
447              
448 261 100 100     2023 if ($opt{parser} ? $_parser eq "csv" : ($io_fil && $txt =~ m/\.(csv)$/i)) {
    100          
449 106 50       358 $can{csv} or croak "CSV parser not installed";
450              
451 106 100       357 my $label = defined $opt{label} ? $opt{label} : $io_fil ? $txt : "IO";
    100          
452              
453 106 50       286 $debug and print STDERR "Opening CSV $label using $can{csv}-", $can{csv}->VERSION, "\n";
454              
455             my @data = (
456             { type => "csv",
457             parser => $can{csv},
458             version => $can{csv}->VERSION,
459             parsers => [ {
460             type => "csv",
461             parser => $can{csv},
462             version => $can{csv}->VERSION,
463 106         3215 }],
464             error => undef,
465             quote => '"',
466             sepchar => ',',
467             sheets => 1,
468             sheet => { $label => 1 },
469             },
470             { parser => 0,
471             label => $label,
472             maxrow => 0,
473             maxcol => 0,
474             cell => [],
475             attr => [],
476             merged => [],
477             active => 1,
478             },
479             );
480              
481 106         558 my ($sep, $quo, $in) = (",", '"');
482 106 100       323 defined $opt{sep} and $sep = $opt{sep};
483 106 100       248 defined $opt{quote} and $quo = $opt{quote};
484 106 50       282 $debug > 8 and _dump (debug => {
485             data => \@data, txt => $txt, io_ref => $io_ref, io_fil => $io_fil });
486 106 100       275 if ($io_fil) {
    100          
    100          
    50          
487 94 100 100     331 unless (defined $opt{quote} && defined $opt{sep}) {
488 87 50       3475 open $in, "<", $txt or return;
489 87         2121 my $l1 = <$in>;
490              
491 87 100       430 $quo = defined $opt{quote} ? $opt{quote} : '"';
492             $sep = # If explicitly set, use it
493             defined $opt{sep} ? $opt{sep} :
494             # otherwise start auto-detect with quoted strings
495 87 50       964 $l1 =~ m/["0-9];["0-9;]/ ? ";" :
    100          
    100          
    50          
    50          
    50          
    100          
    100          
    50          
496             $l1 =~ m/["0-9],["0-9,]/ ? "," :
497             $l1 =~ m/["0-9]\t["0-9,]/ ? "\t" :
498             $l1 =~ m/["0-9]\|["0-9,]/ ? "|" :
499             # If neither, then for unquoted strings
500             $l1 =~ m/\w;[\w;]/ ? ";" :
501             $l1 =~ m/\w,[\w,]/ ? "," :
502             $l1 =~ m/\w\t[\w,]/ ? "\t" :
503             $l1 =~ m/\w\|[\w,]/ ? "|" :
504             "," ;
505 87         1308 close $in;
506             }
507 94 50       2858 open $in, "<", $txt or return;
508             }
509             elsif ($io_ref) {
510 10         23 $in = $txt;
511             }
512             elsif (ref $txt eq "SCALAR") {
513 1 50       14 open $in, "<", $txt or croak "Cannot open input: $!";
514             }
515             elsif ($txt =~ m/[\r\n,;]/) {
516 1 50   2   94 open $in, "<", \$txt or croak "Cannot open input: $!";
  2         26  
  2         4  
  2         21  
517             }
518             else {
519 0         0 warn "Input type ", ref $txt,
520             " might not be supported. Please file a ticket\n";
521 0         0 $in = $txt; # Now pray ...
522             }
523 106 50       1398 $debug > 1 and print STDERR "CSV sep_char '$sep', quote_char '$quo'\n";
524             my $csv = $can{csv}->new ({
525             %parser_opts,
526              
527             sep_char => ($data[0]{sepchar} = $sep),
528 106 50       1403 quote_char => ($data[0]{quote} = $quo),
529             keep_meta_info => 1,
530             binary => 1,
531             auto_diag => 1,
532             }) or croak "Cannot create a csv ('$sep', '$quo') parser!";
533              
534 106         23294 while (my $row = $csv->getline ($in)) {
535 413 50       72606 my @row = @$row or last;
536              
537 413         920 my $r = ++$data[1]{maxrow};
538 413 100       1042 @row > $data[1]{maxcol} and $data[1]{maxcol} = @row;
539 413         1153 foreach my $c (0 .. $#row) {
540 2427         3555 my $val = $row[$c];
541 2427         4306 my $cell = cr2cell ($c + 1, $r);
542 2427 100       7000 $opt{rc} and $data[1]{cell}[$c + 1][$r] = $val;
543 2427 100       6482 $opt{cells} and $data[1]{$cell} = $val;
544 2427 50       12226 $opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
545             }
546             }
547 106 50       15169 $csv->eof () or $data[0]{error} = [ $csv->error_diag ];
548 106         1601 close $in;
549              
550 106         313 for (@{$data[1]{cell}}) {
  106         536  
551 734 100       1624 defined or $_ = [];
552             }
553 106         437 return _clipsheets \%opt, [ @data ];
554             }
555              
556 155 100       399 if ($io_txt) { # && $_parser !~ m/^xlsx?$/) {
557 29 100 66     475 if ( # /etc/magic: Microsoft Office Document
    50 66        
    50 66        
    100          
558             $txt =~ m{\A(\376\067\0\043
559             |\320\317\021\340\241\261\032\341
560             |\333\245-\0\0\0)}x
561             # /usr/share/misc/magic
562             || $txt =~ m{\A.{2080}Microsoft Excel 5.0 Worksheet}
563             || $txt =~ m{\A\x09\x04\x06\x00\x00\x00\x10\x00}
564             ) {
565 1 50       6 $can{xls} or croak "Spreadsheet::ParseExcel not installed";
566 1         4 my $tmpfile;
567 1 50       5 if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
568 1         3 $tmpfile = \$txt;
569             }
570             else {
571 0         0 $tmpfile = File::Temp->new (SUFFIX => ".xls", UNLINK => 1);
572 0         0 binmode $tmpfile;
573 0         0 print $tmpfile $txt;
574 0         0 close $tmpfile;
575             }
576 1 50       53 open $io_ref, "<", $tmpfile or do { $@ = $!; return };
  0         0  
  0         0  
577 1         1007 $io_txt = 0;
578 1         9 $_parser = _parser ($opt{parser} = "xls");
579             }
580             elsif ( # /usr/share/misc/magic
581             $txt =~ m{\APK\003\004.{4,30}(?:\[Content_Types\]\.xml|_rels/\.rels)}
582             ) {
583 0 0       0 $can{xlsx} or croak "XLSX parser not installed";
584 0         0 my $tmpfile;
585 0 0       0 if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
586 0         0 $tmpfile = \$txt;
587             }
588             else {
589 0         0 $tmpfile = File::Temp->new (SUFFIX => ".xlsx", UNLINK => 1);
590 0         0 binmode $tmpfile;
591 0         0 print $tmpfile $txt;
592 0         0 close $tmpfile;
593             }
594 0 0       0 open $io_ref, "<", $tmpfile or do { $@ = $!; return };
  0         0  
  0         0  
595 0         0 $io_txt = 0;
596 0         0 $_parser = _parser ($opt{parser} = "xlsx");
597             }
598             elsif ( # /usr/share/misc/magic
599             $txt =~ m{\APK\003\004.{9,30}\Qmimetypeapplication/vnd.oasis.opendocument.spreadsheet}
600             ) {
601 0 0       0 $can{ods} or croak "ODS parser not installed";
602 0         0 my $tmpfile;
603 0 0       0 if ($can{ios}) { # Do not use a temp file if IO::Scalar is available
604 0         0 $tmpfile = \$txt;
605             }
606             else {
607 0         0 $tmpfile = File::Temp->new (SUFFIX => ".ods", UNLINK => 1);
608 0         0 binmode $tmpfile;
609 0         0 print $tmpfile $txt;
610 0         0 close $tmpfile;
611             }
612 0 0       0 open $io_ref, "<", $tmpfile or do { $@ = $!; return };
  0         0  
  0         0  
613 0         0 $io_txt = 0;
614 0         0 $_parser = _parser ($opt{parser} = "ods");
615             }
616             elsif (!$io_ref && $txt =~ m/\.xls[xm]?$/i) {
617 3         19 $@ = "Cannot open $txt as file";
618 3         20 return;
619             }
620             }
621 152 100 66     1178 if ($opt{parser} ? $_parser =~ m/^(?:xlsx?)$/
    100          
622             : ($io_fil && $txt =~ m/\.(xls[xm]?)$/i &&
623             ($_parser = _parser ($1)))) {
624 76 100       380 my $parse_type = $_parser =~ m/x$/i ? "XLSX" : "XLS";
625 76 50       326 my $parser = $can{lc $parse_type} or
626             croak "Parser for $parse_type is not installed";
627             #$debug and print STDERR __FILE__, "#", __LINE__, " | $_parser | $parser | $parse_type\n";
628             $debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt,
629 76 50       360 " using $parser-", $can{lc $parse_type}->VERSION, "\n";
    100          
630 76 50       267 $opt{passwd} and $parser_opts{Password} = $opt{passwd};
631 76         152 my $oBook = eval {
632             $io_ref
633             ? $parse_type eq "XLSX"
634             ? $can{xlsx} =~ m/::XLSX$/
635             ? $parser->new ($io_ref)
636             : $parser->new (%parser_opts)->parse ($io_ref)
637             : $parser->new (%parser_opts)->Parse ($io_ref)
638             : $parse_type eq "XLSX"
639 76 0       866 ? $can{xlsx} =~ m/::XLSX$/
    50          
    50          
    100          
    100          
640             ? $parser->new ($txt)
641             : $parser->new (%parser_opts)->parse ($txt)
642             : $parser->new (%parser_opts)->Parse ($txt);
643             };
644 76 50       2249905 unless ($oBook) {
645             # cleanup will fail on folders with spaces.
646 0         0 (my $msg = $@) =~ s/ at \S+ line \d+.*//s;
647 0         0 croak "$parse_type parser cannot parse data: $msg";
648             }
649 76 50       284 $debug > 8 and _dump (oBook => $oBook);
650              
651             # WorkBook keys:
652             # aColor _CurSheet Format SheetCount
653             # ActiveSheet _CurSheet_ FormatStr _skip_chart
654             # Author File NotSetCell _string_contin
655             # BIFFVersion Flg1904 Object Version
656             # _buffer FmtClass PkgStr Worksheet
657             # CellHandler Font _previous_info
658              
659             my @data = ( {
660             type => lc $parse_type,
661             parser => $can{lc $parse_type},
662             version => $can{lc $parse_type}->VERSION,
663             parsers => [{
664             type => lc $parse_type,
665             parser => $can{lc $parse_type},
666             version => $can{lc $parse_type}->VERSION,
667             }],
668             error => undef,
669 76   50     2469 sheets => $oBook->{SheetCount} || 0,
670             sheet => {},
671             } );
672             # Overrule the default date format strings
673             my %def_fmt = (
674             0x0E => lc $opt{dtfmt}, # m-d-yy
675 76         652 0x0F => "d-mmm-yyyy", # d-mmm-yy
676             0x11 => "mmm-yyyy", # mmm-yy
677             0x16 => "yyyy-mm-dd hh:mm", # m-d-yy h:mm
678             );
679 76         586 $oBook->{FormatStr}{$_} = $def_fmt{$_} for keys %def_fmt;
680             my $oFmt = $parse_type eq "XLSX"
681 76 50       742 ? $can{xlsx} =~ m/::XLSX$/
    100          
682             ? Spreadsheet::XLSX::Fmt2007->new
683             : Spreadsheet::ParseExcel::FmtDefault->new
684             : Spreadsheet::ParseExcel::FmtDefault->new;
685              
686 76 50       916 $debug > 20 and _dump ("oBook before conversion", $oBook);
687 76 50 33     306 if ($parse_type eq "ODS" and !exists $oBook->{SheetCount}) {
688 0         0 my $styles = delete $oBook->{_styles};
689 0         0 my $sheets = delete $oBook->{_sheets};
690 0 0 0     0 if ($sheets && ref $sheets eq "ARRAY") {
691 0   0     0 $styles = ($styles || {})->{styles} || {};
692 0         0 $data[0]{sheets} = $oBook->{SheetCount} = scalar @{$sheets};
  0         0  
693 0         0 $oBook->{Worksheet} = [];
694 0     0   0 *S::R::Sheet::get_merged_areas = sub { [] };
  0         0  
695 0         0 my $x = 0;
696 0         0 foreach my $sh (@{$sheets}) {
  0         0  
697 0         0 push @{$oBook->{Worksheet}} => bless {
698             Name => $sh->{label},
699             Cells => [],
700             MinRow => $sh->{col_min},
701             MaxRow => $sh->{row_max},
702             MinCol => $sh->{col_min},
703             MaxCol => $sh->{row_max},
704             RowHidden => $sh->{hidden_rows},
705             ColHidden => $sh->{hidden_cols},
706 0         0 _SheetNo => $x++,
707             } => "S::R::Sheet";
708             # header_cols
709             # header_rows
710             # print_areas
711             # sheet_hidden
712             # tab_color
713 0     0   0 *S::R::Cell::Value = sub { $_[0]{Raw} };
  0         0  
714 0     0   0 *S::R::Cell::is_merged = sub { 0 };
  0         0  
715 0         0 my $r = 0;
716 0         0 foreach my $row (@{$sh->{data}}) {
  0         0  
717             $#$row > $oBook->{Worksheet}[-1]{MaxCol} and
718 0 0       0 $oBook->{Worksheet}[-1]{MaxCol} = $#$row;
719             $oBook->{Worksheet}[-1]{Cells}[$r++] = [ map { bless {
720             Code => undef,
721             Format => $_->{format},
722             Formula => $_->{formula},
723             Hidden => undef,
724             Merged => undef,
725             # use || instead of // for now
726             # even though it is undesirable
727             Type => $_->{type} || "",
728             Val => $_->{value} || $_->{unformatted},
729             Raw => $_->{unformatted} || $_->{value},
730             _Style => $styles->{$_->{style} || ""}
731             || $_->{style},
732             # hyperlink
733 0   0     0 } => "S::R::Cell" } @{$row} ];
  0   0     0  
  0   0     0  
      0        
734             }
735             --$r > $oBook->{Worksheet}[-1]{MaxRow} and
736 0 0       0 $oBook->{Worksheet}[-1]{MaxRow} = $r;
737             }
738             }
739             }
740              
741 76 100       347 $debug and print STDERR "\t$data[0]{sheets} sheets\n";
742             my $active_sheet = $oBook->get_active_sheet
743             || $oBook->{ActiveSheet}
744 76   33     413 || $oBook->{SelectedSheet};
745 76         939 my $current_sheet = 0;
746 76         156 foreach my $oWkS (@{$oBook->{Worksheet}}) {
  76         270  
747 122 50       354 $debug > 8 and _dump ("oWkS", $oWkS);
748 122         198 $current_sheet++;
749 122 100 100     548 $opt{clip} and !defined $oWkS->{Cells} and next; # Skip empty sheets
750             my %sheet = (
751             parser => 0,
752             label => $oWkS->{Name},
753 116         808 maxrow => 0,
754             maxcol => 0,
755             cell => [],
756             attr => [],
757             merged => [],
758             active => 0,
759             );
760             # $debug and $sheet{_parser} = $oWkS;
761 116 50       377 defined $sheet{label} or $sheet{label} = "-- unlabeled --";
762 116 100       415 exists $oWkS->{MinRow} and $sheet{minrow} = $oWkS->{MinRow} + 1;
763 116 100       365 exists $oWkS->{MaxRow} and $sheet{maxrow} = $oWkS->{MaxRow} + 1;
764 116 100       362 exists $oWkS->{MinCol} and $sheet{mincol} = $oWkS->{MinCol} + 1;
765 116 100       295 exists $oWkS->{MaxCol} and $sheet{maxcol} = $oWkS->{MaxCol} + 1;
766             $sheet{merged} = [
767 14         37 map { $_->[0] }
768 5         30 sort { $a->[1] cmp $b->[1] }
769 14         125 map {[ $_, pack "NNNN", @$_ ]}
770 14         100 map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]}
  56         111  
  14         48  
771 116 100       231 @{$oWkS->get_merged_areas || []}];
  116         523  
772 116         1112 my $sheet_idx = 1 + @data;
773 116 100       390 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
774 116 100       289 if (defined $active_sheet) {
775             # _SheetNo is 0-based
776 7 50       21 my $sheet_no = defined $oWkS->{_SheetNo} ? $oWkS->{_SheetNo} : $current_sheet - 1;
777 7 100       23 $sheet_no eq $active_sheet and $sheet{active} = 1;
778             }
779             # Sheet keys:
780             # _Book FooterMargin MinCol RightMargin
781             # BottomMargin FooterMergin MinRow RightMergin
782             # BottomMergin HCenter Name RowHeight
783             # Cells Header NoColor RowHidden
784             # ColFmtNo HeaderMargin NoOrient Scale
785             # ColHidden HeaderMergin NoPls SheetHidden
786             # ColWidth Kind Notes _SheetNo
787             # Copis Landscape PageFit SheetType
788             # DefColWidth LeftMargin PageStart SheetVersion
789             # DefRowHeight LeftMergin PaperSize TopMargin
790             # Draft LeftToRight _Pos TopMergin
791             # FitHeight MaxCol PrintGrid UsePage
792             # FitWidth MaxRow PrintHeaders VCenter
793             # Footer MergedArea Res VRes
794 116 100       309 if (exists $oWkS->{MinRow}) {
795 111   50     550 my $hiddenRows = $oWkS->{RowHidden} || [];
796 111   50     467 my $hiddenCols = $oWkS->{ColHidden} || [];
797 111 100       346 if ($opt{clip}) {
798 92         250 my ($mr, $mc) = (-1, -1);
799 92         304 foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
800 522         902 foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
801 2938 100       5485 my $oWkC = $oWkS->{Cells}[$r][$c] or next;
802 1717 50       3279 defined (my $val = $oWkC->{Val}) or next;
803 1717 100       3171 $val eq "" and next;
804 1690 100       2831 $r > $mr and $mr = $r;
805 1690 100       2952 $c > $mc and $mc = $c;
806             }
807             }
808 92         317 ($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1);
809             }
810 111         294 foreach my $r ($oWkS->{MinRow} .. $sheet{maxrow}) {
811 602         1154 foreach my $c ($oWkS->{MinCol} .. $sheet{maxcol}) {
812 3157 100       6913 my $oWkC = $oWkS->{Cells}[$r][$c] or next;
813             #defined (my $val = $oWkC->{Val}) or next;
814 1818         2808 my $val = $oWkC->{Val};
815 1818 50 33     5564 if (defined $val and my $enc = $oWkC->{Code}) {
816 0 0       0 $enc eq "ucs2" and $val = decode ("utf-16be", $val);
817             }
818 1818         3883 my $cell = cr2cell ($c + 1, $r + 1);
819 1818 100       5312 $opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original
820              
821 1818         2461 my $fmt;
822 1818         2669 my $FmT = $oWkC->{Format};
823 1818 100       3087 if ($FmT) {
824 968 100       1888 unless (ref $FmT) {
825 130         186 $fmt = $FmT;
826 130         202 $FmT = {};
827             }
828             }
829             else {
830 850         1343 $FmT = {};
831             }
832 1818         3034 foreach my $attr (qw( AlignH AlignV FmtIdx Hidden Lock
833             Wrap )) {
834 10908 100       20618 exists $FmT->{$attr} or $FmT->{$attr} = 0;
835             }
836 1818 100       3764 exists $FmT->{Fill} or $FmT->{Fill} = [ 0 ];
837 1818 100       3943 exists $FmT->{Font} or $FmT->{Font} = undef;
838              
839 1818 100       3252 unless (defined $fmt) {
840             $fmt = $FmT->{FmtIdx}
841             ? $oBook->{FormatStr}{$FmT->{FmtIdx}}
842 1688 100       3178 : undef;
843             }
844             lc $oWkC->{Type} eq "float" and
845 1818 50       3745 $oWkC->{Type} = "Numeric";
846 1818 100       3223 if ($oWkC->{Type} eq "Numeric") {
847             # Fixed in 0.33 and up
848             # see Spreadsheet/ParseExcel/FmtDefault.pm
849             $FmT->{FmtIdx} == 0x0e ||
850             $FmT->{FmtIdx} == 0x0f ||
851             $FmT->{FmtIdx} == 0x10 ||
852             $FmT->{FmtIdx} == 0x11 ||
853             $FmT->{FmtIdx} == 0x16 ||
854             (defined $fmt && $fmt =~ m{^[dmy][-\\/dmy]*$}) and
855 370 100 33     3321 $oWkC->{Type} = "Date";
      33        
      33        
      33        
      100        
      66        
856             $FmT->{FmtIdx} == 0x09 ||
857             $FmT->{FmtIdx} == 0x0a ||
858             (defined $fmt && $fmt =~ m{^0+\.0+%$}) and
859 370 100 100     2217 $oWkC->{Type} = "Percentage";
      100        
      100        
860             }
861 1818 100       3332 defined $fmt and $fmt =~ s/\\//g;
862             $opt{cells} and # Formatted value
863             $sheet{$cell} = defined $val ? $FmT && exists $def_fmt{$FmT->{FmtIdx}}
864 1818 100 66     9515 ? $oFmt->ValFmt ($oWkC, $oBook)
    50          
    100          
865             : $oWkC->Value : undef;
866 1818 100       15533 if ($opt{attr}) {
867 1542         2221 my $FnT = $FmT->{Font};
868             my $fmi = $FmT->{FmtIdx}
869             ? $oBook->{FormatStr}{$FmT->{FmtIdx}}
870 1542 100       2671 : undef;
871 1542 100       2605 $fmi and $fmi =~ s/\\//g;
872 1542   100     3663 my $merged = (defined $oWkC->{Merged} ? $oWkC->{Merged} : $oWkC->is_merged) || 0;
873             $sheet{attr}[$c + 1][$r + 1] = {
874             @def_attr,
875              
876             type => lc $oWkC->{Type},
877             enc => $oWkC->{Code},
878             merged => $merged,
879             hidden => ($hiddenRows->[$r] || $hiddenCols->[$c] ? 1 :
880             defined $oWkC->{Hidden} ? $oWkC->{Hidden} : $FmT->{Hidden}) || 0,
881             locked => $FmT->{Lock} || 0,
882             format => $fmi,
883             halign => [ undef, qw( left center right
884             fill justify ), undef,
885             "equal_space" ]->[$FmT->{AlignH}],
886             valign => [ qw( top center bottom justify
887             equal_space )]->[$FmT->{AlignV}],
888             wrap => $FmT->{Wrap},
889             font => $FnT->{Name},
890             size => $FnT->{Height},
891             bold => $FnT->{Bold},
892             italic => $FnT->{Italic},
893             uline => $FnT->{Underline},
894             fgcolor => _xls_color ($FnT->{Color}),
895 1542         6211 bgcolor => _xls_fill (@{$FmT->{Fill}}),
896             formula => $oWkC->{Formula},
897 1542   100     21283 };
      100        
898             #_dump "cell", $sheet{attr}[$c + 1][$r + 1];
899 1542 100 100     11792 if ($opt{merge} && $merged and
      66        
900             my $p_cell = Spreadsheet::Read::Sheet::merged_from (\%sheet, $c + 1, $r + 1)) {
901 6         64 warn $p_cell;
902 6         2928 $sheet{attr}[$c + 1][$r + 1]{merged} = $p_cell;
903 6 100       19 if ($cell ne $p_cell) {
904 4         10 my ($C, $R) = cell2cr ($p_cell);
905             $sheet{cell}[$c + 1][$r + 1] =
906 4         14 $sheet{cell}[$C][$R];
907 4         10 $sheet{$cell} = $sheet{$p_cell};
908             }
909             }
910             }
911             }
912             }
913             }
914 116         221 for (@{$sheet{cell}}) {
  116         374  
915 414 100       1023 defined or $_ = [];
916             }
917 116         1962 push @data, { %sheet };
918             # $data[0]{sheets}++;
919 116 50       507 if ($sheet{label} eq "-- unlabeled --") {
920 0         0 $sheet{label} = "";
921             }
922             else {
923 116         738 $data[0]{sheet}{$sheet{label}} = $#data;
924             }
925             }
926 76         407 return _clipsheets \%opt, [ @data ];
927             }
928 76 100 33     487 if ($opt{parser} ? $_parser =~ m/^(ods)$/
    50 0        
      33        
929             : ($io_fil && $txt =~ m/(ods)$/i &&
930             ($_parser = _parser ($1)))
931             and ($can{$_parser} || "") !~ m/sxc/i) {
932 0         0 my $parse_type = "ODS";
933 0 0       0 my $parser = $can{lc $parse_type} or
934             croak "Parser for $parse_type is not installed";
935             #$debug and print STDERR __FILE__, "#", __LINE__, " | $_parser | $parser | $parse_type\n";
936             $debug and print STDERR "Opening $parse_type ", $io_ref ? "<REF>" : $txt,
937 0 0       0 " using $parser-", $can{lc $parse_type}->VERSION, "\n";
    0          
938 0 0       0 $opt{passwd} and $parser_opts{Password} = $opt{passwd};
939 0         0 my $oBook = eval {
940 0 0       0 $io_ref
941             ? $parser->new (readonly => 1, %parser_opts)->parse ($io_ref)
942             : $parser->new (readonly => 1, %parser_opts)->parse ($txt)
943             };
944 0 0       0 unless ($oBook) {
945             # cleanup will fail on folders with spaces.
946 0         0 (my $msg = $@) =~ s/ at \S+ line \d+.*//s;
947 0         0 croak "$parse_type parser cannot parse data: $msg";
948             }
949 0 0       0 $debug > 8 and _dump (oBook => $oBook);
950              
951             my @data = ( {
952             type => lc $parse_type,
953             parser => $can{lc $parse_type},
954             version => $can{lc $parse_type}->VERSION,
955             parsers => [{
956             type => lc $parse_type,
957             parser => $can{lc $parse_type},
958 0         0 version => $can{lc $parse_type}->VERSION,
959             }],
960             error => undef,
961             sheets => scalar $oBook->worksheets,
962             sheet => {},
963             } );
964             # $debug and $data[0]{_parser} = $oBook;
965              
966 0 0       0 $debug and print STDERR "\t$data[0]{sheets} sheets\n";
967 0         0 my $active_sheet = $oBook->get_active_sheet;
968 0         0 my $current_sheet = 0;
969 0         0 foreach my $oWkS ($oBook->worksheets) {
970 0         0 $current_sheet++;
971 0 0 0     0 $opt{clip} && $oWkS->row_max < $oWkS->row_min
      0        
972             && $oWkS->col_max < $oWkS->col_min and next; # Skip empty sheets
973 0         0 my %sheet = (
974             parser => 0,
975             label => $oWkS->label,
976             maxrow => $oWkS->row_max+1,
977             maxcol => $oWkS->col_max+1,
978             cell => [],
979             attr => [],
980             merged => [],
981             active => 0,
982             );
983             # $debug and $sheet{_parser} = $oWkS;
984 0 0       0 defined $sheet{label} or $sheet{label} = "-- unlabeled --";
985             $sheet{merged} = [
986 0         0 map { $_->[0] }
987 0         0 sort { $a->[1] cmp $b->[1] }
988 0         0 map {[ $_, pack "NNNN", @$_ ]}
989 0         0 map {[ map { $_ + 1 } @{$_}[1,0,3,2] ]}
  0         0  
  0         0  
990 0 0       0 @{$oWkS->get_merged_areas || []}];
  0         0  
991 0         0 my $sheet_idx = 1 + @data;
992 0 0       0 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
993 0 0       0 if (defined $active_sheet) {
994 0         0 my $sheet_no = $current_sheet - 1;
995 0 0       0 $sheet_no eq $active_sheet and $sheet{active} = 1;
996             }
997 0   0     0 my $hiddenRows = $oWkS->hidden_rows || [];
998 0   0     0 my $hiddenCols = $oWkS->hidden_cols || [];
999 0 0       0 if ($opt{clip}) {
1000 0         0 my ($mr, $mc) = (-1, -1);
1001 0         0 foreach my $r ($oWkS->row_min .. $sheet{maxrow}-1) {
1002 0         0 foreach my $c ($oWkS->col_min .. $sheet{maxcol}-1) {
1003 0 0       0 my $oWkC = $oWkS->get_cell($r, $c) or next;
1004 0 0       0 defined (my $val = $oWkC->value) or next;
1005 0 0       0 $val eq "" and next;
1006 0 0       0 $r > $mr and $mr = $r;
1007 0 0       0 $c > $mc and $mc = $c;
1008             }
1009             }
1010 0         0 ($sheet{maxrow}, $sheet{maxcol}) = ($mr + 1, $mc + 1);
1011             }
1012 0         0 foreach my $r ($oWkS->row_min .. $sheet{maxrow}) {
1013 0         0 foreach my $c ($oWkS->col_min .. $sheet{maxcol}) {
1014 0 0       0 my $oWkC = $oWkS->get_cell($r, $c) or next;
1015 0         0 my $val = $oWkC->unformatted;
1016             #if (defined $val and my $enc = $oWkC->{Code}) {
1017             # $enc eq "ucs2" and $val = decode ("utf-16be", $val);
1018             # }
1019 0         0 my $cell = cr2cell ($c + 1, $r + 1);
1020 0 0       0 $opt{rc} and $sheet{cell}[$c + 1][$r + 1] = $val; # Original
1021              
1022 0         0 my $fmt;
1023 0         0 my $styleName = $oWkC->style;
1024 0         0 my $FmT;
1025 0 0 0     0 if ($styleName && defined (my $s = $oBook->_styles->{$styleName})) {
1026 0         0 $fmt = $s;
1027             }
1028              
1029 0 0       0 defined $fmt and $fmt =~ s/\\//g;
1030             $opt{cells} and # Formatted value
1031 0 0       0 $sheet{$cell} = defined $val ? $oWkC->value : undef;
    0          
1032 0 0       0 if ($opt{attr}) {
1033 0 0       0 my $FnT = $FmT ? $FmT->{font_face} : undef;
1034 0         0 my $fmi;
1035             #my $fmi = $FmT ? $FmT->{FmtIdx}
1036             # ? $oBook->{FormatStr}{$FmT->{FmtIdx}}
1037             # : undef;
1038             #$fmi and $fmi =~ s/\\//g;
1039 0   0     0 my $type = $oWkC->type || '';
1040 0 0       0 $type eq "float" and $type = "numeric";
1041              
1042 0   0     0 my $merged = $oWkC->is_merged || 0;
1043 0   0     0 $sheet{attr}[$c + 1][$r + 1] = {
1044             @def_attr,
1045              
1046             type => $type,
1047             # enc => $oWkC->{Code},
1048             merged => $merged,
1049             hidden => ($hiddenRows->[$r] || $hiddenCols->[$c] ? 1 :
1050             $oWkC->is_hidden ? $oWkC->is_hidden : undef) || 0,
1051             # locked => $FmT->{Lock} || 0,
1052             format => $fmi,
1053             # halign => [ undef, qw( left center right
1054             # fill justify ), undef,
1055             # "equal_space" ]->[$FmT->{AlignH}],
1056             # valign => [ qw( top center bottom justify
1057             # equal_space )]->[$FmT->{AlignV}],
1058             # wrap => $FmT->{Wrap},
1059             # font => $FnT->{Name},
1060             # size => $FnT->{Height},
1061             # bold => $FnT->{Bold},
1062             # italic => $FnT->{Italic},
1063             # uline => $FnT->{Underline},
1064             # fgcolor => _xls_color ($FnT->{Color}),
1065             # bgcolor => _xls_fill (@{$FmT->{Fill}}),
1066             formula => $oWkC->formula,
1067             };
1068             #_dump "cell", $sheet{attr}[$c + 1][$r + 1];
1069 0 0 0     0 if ($opt{merge} && $merged and
      0        
1070             my $p_cell = Spreadsheet::Read::Sheet::merged_from(\%sheet, $c + 1, $r + 1)) {
1071 0         0 $sheet{attr}[$c + 1][$r + 1]{merged} = $p_cell;
1072 0 0       0 if ($cell ne $p_cell) {
1073 0         0 my ($C, $R) = cell2cr ($p_cell);
1074             $sheet{cell}[$c + 1][$r + 1] =
1075 0         0 $sheet{cell}[$C][$R];
1076 0         0 $sheet{$cell} = $sheet{$p_cell};
1077             }
1078             }
1079             }
1080             }
1081             }
1082 0         0 for (@{$sheet{cell}}) {
  0         0  
1083 0 0       0 defined or $_ = [];
1084             }
1085 0         0 push @data, { %sheet };
1086 0 0       0 if ($sheet{label} eq "-- unlabeled --") {
1087 0         0 $sheet{label} = "";
1088             }
1089             else {
1090 0         0 $data[0]{sheet}{$sheet{label}} = $#data;
1091             }
1092             }
1093 0         0 return _clipsheets \%opt, [ @data ];
1094             }
1095              
1096 76 100       418 if ($opt{parser} ? _parser ($opt{parser}) eq "sc"
    100          
    100          
1097             : $io_fil
1098             ? $txt =~ m/\.sc$/
1099             : $txt =~ m/^# .*SquirrelCalc/) {
1100 46 100       111 if ($io_ref) {
    100          
1101 2         8 local $/;
1102 2         35 my $x = <$txt>;
1103 2         10 $txt = $x;
1104             }
1105             elsif ($io_fil) {
1106 42         197 local $/;
1107 42 50       1546 open my $sc, "<", $txt or return;
1108 42         1839 $txt = <$sc>;
1109 42         750 close $sc;
1110             }
1111 46 50       297 $txt =~ m/\S/ or return;
1112 46 50       158 my $label = defined $opt{label} ? $opt{label} : "sheet";
1113 46         593 my @data = (
1114             { type => "sc",
1115             parser => "Spreadsheet::Read",
1116             version => $VERSION,
1117             parsers => [{
1118             type => "sc",
1119             parser => "Spreadsheet::Read",
1120             version => $VERSION,
1121             }],
1122             error => undef,
1123             sheets => 1,
1124             sheet => { $label => 1 },
1125             },
1126             { parser => 0,
1127             label => $label,
1128             maxrow => 0,
1129             maxcol => 0,
1130             cell => [],
1131             attr => [],
1132             merged => [],
1133             active => 1,
1134             },
1135             );
1136              
1137 46         9671 for (split m/\s*[\r\n]\s*/, $txt) {
1138 13478 100       21913 if (m/^dimension.*of ([0-9]+) rows.*of ([0-9]+) columns/i) {
1139 46         77 @{$data[1]}{qw(maxrow maxcol)} = ($1, $2);
  46         232  
1140 46         112 next;
1141             }
1142 13432 100       32829 s/^r([0-9]+)c([0-9]+)\s*=\s*// or next;
1143 3542         6394 my ($c, $r) = map { $_ + 1 } $2, $1;
  7084         15274  
1144 3542 100 66     15102 if (m/.* \{(.*)}$/ or m/"(.*)"/) {
1145 2714         5077 my $cell = cr2cell ($c, $r);
1146 2714 100       8367 $opt{rc} and $data[1]{cell}[$c][$r] = $1;
1147 2714 50       7592 $opt{cells} and $data[1]{$cell} = $1;
1148 2714 100       6552 $opt{attr} and $data[1]{attr}[$c + 1][$r] = { @def_attr };
1149 2714         4821 next;
1150             }
1151             # Now only formula's remain. Ignore for now
1152             # r67c7 = [P2L] 2*(1000*r67c5-60)
1153             }
1154 46         770 for (@{$data[1]{cell}}) {
  46         171  
1155 360 100       713 defined or $_ = [];
1156             }
1157 46         155 return _clipsheets \%opt, [ @data ];
1158             }
1159              
1160 30 50 66     408 if ($opt{parser} ? _parser ($opt{parser}) eq "sxc"
    100          
1161             : ($txt =~ m/^<\?xml/ or -f $txt)) {
1162 7 50       997 $can{sxc} or croak "Spreadsheet::ReadSXC not installed";
1163              
1164 0 0 0     0 ref $txt && $can{sxc}->VERSION <= 0.23 and
1165             croak ("Sorry, references as input are not supported by Spreadsheet::ReadSXC before 0.23");
1166              
1167 0         0 my $using = "using $can{sxc}-" . $can{sxc}->VERSION;
1168 0         0 my $sxc_options = { %parser_opts, OrderBySheet => 1 }; # New interface 0.20 and up
1169 0         0 my $sxc;
1170 0 0 0     0 if ($txt =~ m/\.(sxc|ods)$/i) {
    0          
    0          
    0          
1171 0 0       0 $debug and print STDERR "Opening \U$1\E $txt $using\n";
1172 0 0       0 $debug and print STDERR __FILE__, "#", __LINE__, "\n";
1173 0 0       0 $sxc = Spreadsheet::ReadSXC::read_sxc ($txt, $sxc_options) or return;
1174             }
1175             # treat all refs as a filehandle
1176             elsif (ref $txt) {
1177 0 0       0 $debug and print STDERR "Opening SXC filehandle\n";
1178 0 0       0 $sxc = Spreadsheet::ReadSXC::read_sxc_fh ($txt, $sxc_options) or return;
1179             }
1180             elsif ($txt =~ m/\.xml$/i) {
1181 0 0       0 $debug and print STDERR "Opening XML $txt $using\n";
1182 0 0       0 $sxc = Spreadsheet::ReadSXC::read_xml_file ($txt, $sxc_options) or return;
1183             }
1184             # need to test on pattern to prevent stat warning
1185             # on filename with newline
1186             elsif ($txt !~ m/^<\?xml/i and -f $txt) {
1187 0 0       0 $debug and print STDERR "Opening XML $txt $using\n";
1188 0 0       0 open my $f, "<", $txt or return;
1189 0         0 local $/;
1190 0         0 $txt = <$f>;
1191 0         0 close $f;
1192             }
1193 0 0 0     0 !$sxc && $txt =~ m/^<\?xml/i and
1194             $sxc = Spreadsheet::ReadSXC::read_xml_string ($txt, $sxc_options);
1195 0 0       0 $debug > 8 and _dump (sxc => $sxc);
1196 0 0       0 if ($sxc) {
1197 0         0 my @data = ( {
1198             type => "sxc",
1199             parser => "Spreadsheet::ReadSXC",
1200             version => $Spreadsheet::ReadSXC::VERSION,
1201             parsers => [{
1202             type => "sxc",
1203             parser => "Spreadsheet::ReadSXC",
1204             version => $Spreadsheet::ReadSXC::VERSION,
1205             }],
1206             error => undef,
1207             sheets => 0,
1208             sheet => {},
1209             } );
1210             my @sheets = ref $sxc eq "HASH" # < 0.20
1211             ? map {
1212             { label => $_,
1213 0         0 data => $sxc->{$_},
1214             }
1215             } keys %$sxc
1216 0 0       0 : @{$sxc};
  0         0  
1217 0         0 foreach my $sheet (@sheets) {
1218 0 0       0 my @sheet = @{$sheet->{data} || []};
  0         0  
1219             my %sheet = (
1220             parser => 0,
1221             label => $sheet->{label},
1222 0         0 maxrow => scalar @sheet,
1223             maxcol => 0,
1224             cell => [],
1225             attr => [],
1226             merged => [],
1227             active => 0,
1228             );
1229 0         0 my $sheet_idx = 1 + @data;
1230 0 0       0 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} rows\n";
1231 0         0 foreach my $r (0 .. $#sheet) {
1232 0 0       0 my @row = @{$sheet[$r]} or next;
  0         0  
1233 0         0 foreach my $c (0 .. $#row) {
1234 0 0       0 defined (my $val = $row[$c]) or next;
1235 0         0 my $C = $c + 1;
1236 0 0       0 $C > $sheet{maxcol} and $sheet{maxcol} = $C;
1237 0         0 my $cell = cr2cell ($C, $r + 1);
1238 0 0       0 $opt{rc} and $sheet{cell}[$C][$r + 1] = $val;
1239 0 0       0 $opt{cells} and $sheet{$cell} = $val;
1240 0 0       0 $opt{attr} and $sheet{attr}[$C][$r + 1] = { @def_attr };
1241             }
1242             }
1243 0         0 for (@{$sheet{cell}}) {
  0         0  
1244 0 0       0 defined or $_ = [];
1245             }
1246 0 0       0 $debug and print STDERR "\tSheet $sheet_idx '$sheet{label}' $sheet{maxrow} x $sheet{maxcol}\n";
1247 0         0 push @data, { %sheet };
1248 0         0 $data[0]{sheets}++;
1249 0         0 $data[0]{sheet}{$sheet->{label}} = $#data;
1250             }
1251 0         0 return _clipsheets \%opt, [ @data ];
1252             }
1253             }
1254              
1255 23 100 100     144 if (!ref $txt and $txt =~ m/\.\w+$/) {
1256             # Return (localized) system message
1257 5 50       130 open my $fh, "<", $txt and
1258             croak "I can open file $txt, but I do not know how to parse it\n";
1259              
1260 5         93 $@ = $!;
1261             }
1262              
1263 23         128 return;
1264             } # ReadData
1265              
1266             sub add {
1267 2     2 1 7 my $book = shift;
1268 2 50       10 my $r = ReadData (@_) or return;
1269             $book && (ref $book eq "ARRAY" ||
1270 2 50 33     28 ref $book eq __PACKAGE__) && $book->[0]{sheets} or return $r;
      33        
      33        
1271              
1272 2         5 my $c1 = $book->[0];
1273 2         6 my $c2 = $r->[0];
1274              
1275 2 50       7 unless ($c1->{parsers}) {
1276 0         0 $c1->{parsers}[0]{$_} = $c1->{$_} for qw( type parser version );
1277 0         0 $book->[$_]{parser} = 0 for 1 .. $c1->{sheets};
1278             }
1279 2         5 my ($pidx) = (grep { my $p = $c1->{parsers}[$_];
1280             $p->{type} eq $c2->{type} &&
1281             $p->{parser} eq $c2->{parser} &&
1282 2 50 33     6 $p->{version} eq $c2->{version} } 0 .. $#{$c1->{parsers}});
  2         26  
  2         7  
1283 2 50       8 unless (defined $pidx) {
1284 0         0 $pidx = scalar @{$c1->{parsers}};
  0         0  
1285 0         0 $c1->{parsers}[$pidx]{$_} = $c2->{$_} for qw( type parser version );
1286             }
1287              
1288 2         6 foreach my $sn (sort { $c2->{sheet}{$a} <=> $c2->{sheet}{$b} } keys %{$c2->{sheet}}) {
  0         0  
  2         12  
1289 2         5 my $s = $sn;
1290 2         4 my $v = 2;
1291 2         9 while (exists $c1->{sheet}{$s}) {
1292 1         6 $s = $sn."[".$v++."]";
1293             }
1294 2         7 $c1->{sheet}{$s} = $c1->{sheets} + $c2->{sheet}{$sn};
1295 2         6 $r->[$c2->{sheet}{$sn}]{parser} = $pidx;
1296 2         9 push @$book, $r->[$c2->{sheet}{$sn}];
1297             }
1298 2         5 $c1->{sheets} += $c2->{sheets};
1299              
1300 2         18 return $book;
1301             } # add
1302              
1303             package Spreadsheet::Read::Attribute;
1304              
1305 58     58   660 use Carp;
  58         153  
  58         4960  
1306 58     58   492 use vars qw( $AUTOLOAD );
  58         185  
  58         85151  
1307              
1308             sub AUTOLOAD {
1309 33     33   1778 my $self = shift;
1310 33         132 (my $attr = $AUTOLOAD) =~ s/.*:://;
1311 33         463 $self->{$attr};
1312             } # AUTOLOAD
1313              
1314             package Spreadsheet::Read::Sheet;
1315              
1316             sub cell {
1317 55     55   19960 my ($sheet, @id) = @_;
1318             @id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1319 55 50 66     508 return $sheet->{cell}[$id[0]][$id[1]];
      66        
1320             @id && $id[0] && exists $sheet->{$id[0]} and
1321 27 50 33     269 return $sheet->{$id[0]};
      33        
1322             } # cell
1323              
1324             sub attr {
1325 26     26   63 my ($sheet, @id) = @_;
1326 26         36 my $class = "Spreadsheet::Read::Attribute";
1327             @id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/ and
1328 26 50 66     239 return bless $sheet->{attr}[$id[0]][$id[1]] => $class;
      66        
1329 14 50 33     82 if (@id && $id[0] && exists $sheet->{$id[0]}) {
      33        
1330 14         38 my ($c, $r) = $sheet->cell2cr ($id[0]);
1331 14         124 return bless $sheet->{attr}[$c][$r] => $class;
1332             }
1333 0         0 undef;
1334             } # attr
1335              
1336             sub maxrow {
1337 2     2   4 my $sheet = shift;
1338 2         13 return $sheet->{maxrow};
1339             } # maxrow
1340              
1341             sub maxcol {
1342 2     2   5 my $sheet = shift;
1343 2         11 return $sheet->{maxcol};
1344             } # maxrow
1345              
1346             sub col2label {
1347 1 50   1   7 $_[0] =~ m/::/ and shift; # class unused
1348 1         6 return Spreadsheet::Read::col2label (@_);
1349             } # col2label
1350              
1351             sub cr2cell {
1352 34 100   34   88 $_[0] =~ m/::/ and shift; # class unused
1353 34         82 return Spreadsheet::Read::cr2cell (@_);
1354             } # cr2cell
1355              
1356             sub cell2cr {
1357 33 100   33   11908 $_[0] =~ m/::/ and shift; # class unused
1358 33         93 return Spreadsheet::Read::cell2cr (@_);
1359             } # cell2cr
1360              
1361             sub label {
1362 2     2   7 my ($sheet, $label) = @_;
1363 2 100       9 defined $label and $sheet->{label} = $label;
1364 2         10 return $sheet->{label};
1365             } # label
1366              
1367             sub active {
1368 1     1   4 my $sheet = shift;
1369 1         5 return $sheet->{active};
1370             } # label
1371              
1372             # my @row = $sheet->cellrow (1);
1373             sub cellrow {
1374 4     4   10 my ($sheet, $row) = @_;
1375 4 100 66     35 defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
      100        
1376 1         4 my $s = $sheet->{cell};
1377 1         4 map { $s->[$_][$row] } 1..$sheet->{maxcol};
  19         39  
1378             } # cellrow
1379              
1380             # my @row = $sheet->row (1);
1381             sub row {
1382 4     4   9 my ($sheet, $row) = @_;
1383 4 100 66     38 defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
      100        
1384 1         8 map { $sheet->{$sheet->cr2cell ($_, $row)} } 1..$sheet->{maxcol};
  19         34  
1385             } # row
1386              
1387             # my @col = $sheet->cellcolumn (1);
1388             sub cellcolumn {
1389 4     4   13 my ($sheet, $col) = @_;
1390 4 100 66     38 defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
      100        
1391 1         3 my $s = $sheet->{cell};
1392 1         4 map { $s->[$col][$_] } 1..$sheet->{maxrow};
  5         31  
1393             } # cellcolumn
1394              
1395             # my @col = $sheet->column (1);
1396             sub column {
1397 4     4   14 my ($sheet, $col) = @_;
1398 4 100 66     41 defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
      100        
1399 1         6 map { $sheet->{$sheet->cr2cell ($col, $_)} } 1..$sheet->{maxrow};
  5         11  
1400             } # column
1401              
1402             # Convert {cell}'s [column][row] to a [row][column] list
1403             # my @rows = $sheet->rows ();
1404             sub rows {
1405 1     1   3 my $sheet = shift;
1406 1         3 my $s = $sheet->{cell};
1407              
1408             map {
1409 5         10 my $r = $_;
1410 5         10 [ map { $s->[$_][$r] } 1..$sheet->{maxcol} ];
  95         154  
1411 1         8 } 1..$sheet->{maxrow};
1412             } # rows
1413              
1414             sub merged_from {
1415 17     17   46 my ($sheet, @id, $col, $row) = @_;
1416 17 50       39 my $ma = $sheet->{merged} or return;
1417 17 100 66     148 if (@id == 2 && $id[0] =~ m/^[0-9]+$/ && $id[1] =~ m/^[0-9]+$/) {
    100 66        
      33        
      66        
1418 10         23 ($col, $row) = @id;
1419             }
1420             elsif (@id && $id[0] && exists $sheet->{$id[0]}) {
1421 3         10 ($col, $row) = cell2cr ($id[0]);
1422             }
1423 17 100 66     87 defined $row && $row > 0 && $row <= $sheet->{maxrow} or return;
      100        
1424 12 50 33     48 defined $col && $col > 0 && $col <= $sheet->{maxcol} or return;
      33        
1425 12         18 foreach my $range (@{$ma}) {
  12         21  
1426 22         26 my ($ctl, $rtl, $cbr, $rbr) = @{$range};
  22         32  
1427 22 100 100     66 $col >= $ctl && $col <= $cbr or next;
1428 12 100 66     30 $row >= $rtl && $row <= $rbr or next;
1429 10         25 return cr2cell ($ctl, $rtl);
1430             }
1431             } # cell
1432              
1433             1;
1434              
1435             __END__
1436             =head1 DESCRIPTION
1437              
1438             Spreadsheet::Read tries to transparently read *any* spreadsheet and
1439             return its content in a universal manner independent of the parsing
1440             module that does the actual spreadsheet scanning.
1441              
1442             For OpenOffice and/or LibreOffice this module uses
1443             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
1444              
1445             For Microsoft Excel this module uses
1446             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>,
1447             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX>, or
1448             L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX> (stronly
1449             discouraged).
1450              
1451             For CSV this module uses L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS>
1452             or L<Text::CSV_PP|https://metacpan.org/release/Text-CSV>.
1453              
1454             For SquirrelCalc there is a very simplistic built-in parser
1455              
1456             =head2 Data structure
1457              
1458             The data is returned as an array reference:
1459              
1460             $book = [
1461             # Entry 0 is the overall control hash
1462             { sheets => 2,
1463             sheet => {
1464             "Sheet 1" => 1,
1465             "Sheet 2" => 2,
1466             },
1467             parsers => [ {
1468             type => "xls",
1469             parser => "Spreadsheet::ParseExcel",
1470             version => 0.59,
1471             }],
1472             error => undef,
1473             },
1474             # Entry 1 is the first sheet
1475             { parser => 0,
1476             label => "Sheet 1",
1477             maxrow => 2,
1478             maxcol => 4,
1479             cell => [ undef,
1480             [ undef, 1 ],
1481             [ undef, undef, undef, undef, undef, "Nugget" ],
1482             ],
1483             attr => [],
1484             merged => [],
1485             active => 1,
1486             A1 => 1,
1487             B5 => "Nugget",
1488             },
1489             # Entry 2 is the second sheet
1490             { parser => 0,
1491             label => "Sheet 2",
1492             :
1493             :
1494              
1495             To keep as close contact to spreadsheet users, row and column 1 have
1496             index 1 too in the C<cell> element of the sheet hash, so cell "A1" is
1497             the same as C<cell> [1, 1] (column first). To switch between the two,
1498             there are helper functions available: C<cell2cr ()>, C<cr2cell ()>,
1499             and C<col2label ()>.
1500              
1501             The C<cell> hash entry contains unformatted data, while the hash entries
1502             with the traditional labels contain the formatted values (if applicable).
1503              
1504             The control hash (the first entry in the returned array ref), contains
1505             some spreadsheet meta-data. The entry C<sheet> is there to be able to find
1506             the sheets when accessing them by name:
1507              
1508             my %sheet2 = %{$book->[$book->[0]{sheet}{"Sheet 2"}]};
1509              
1510             =head2 Formatted vs Unformatted
1511              
1512             The difference between formatted and unformatted cells is that the (optional)
1513             format is applied to the cell or not. This part is B<completely> implemented
1514             on the parser side. Spreadsheet::Read just makes both available if these are
1515             supported. Options provide means to disable either. If the parser does not
1516             provide formatted cells - like CSV - both values are equal.
1517              
1518             To show what this implies:
1519              
1520             use Spreadsheet::Read;
1521              
1522             my $file = "files/example.xlsx";
1523             my $workbook = Spreadsheet::Read->new ($file);
1524              
1525             my $info = $workbook->[0];
1526             say "Parsed $file with $info->{parser}-$info->{version}";
1527              
1528             my $sheet = $workbook->sheet (1);
1529              
1530             say join "\t" => "Formatted:", $sheet->row (1);
1531             say join "\t" => "Unformatted:", $sheet->cellrow (1);
1532              
1533             Might return very different results depending one the underlying parser (and
1534             its version):
1535              
1536             Parsed files/example.xlsx with Spreadsheet::ParseXLSX-0.27
1537             Formatted: 8-Aug Foo & Barr < Quux
1538             Unformatted: 39668 Foo & Barr < Quux
1539              
1540             Parsed files/example.xlsx with Spreadsheet::XLSX-0.15
1541             Formatted: 39668 Foo &amp; Barr &lt; Quux
1542             Unformatted: 39668 Foo &amp; Barr &lt; Quux
1543              
1544             =head2 Functions and methods
1545              
1546             =head3 new
1547              
1548             my $book = Spreadsheet::Read->new (...) or die $@;
1549              
1550             All options accepted by ReadData are accepted by new.
1551              
1552             With no arguments at all, $book will be an object where sheets can be added
1553             using C<add>
1554              
1555             my $book = Spreadsheet::Read->new ();
1556             $book->add ("file.csv");
1557             $book->add ("file.cslx");
1558              
1559             =head3 ReadData
1560              
1561             my $book = ReadData ($source [, option => value [, ... ]]);
1562              
1563             my $book = ReadData ("file.csv", sep => ',', quote => '"');
1564              
1565             my $book = ReadData ("file.xls", dtfmt => "yyyy-mm-dd");
1566              
1567             my $book = ReadData ("file.ods");
1568              
1569             my $book = ReadData ("file.sxc");
1570              
1571             my $book = ReadData ("content.xml");
1572              
1573             my $book = ReadData ($content);
1574              
1575             my $book = ReadData ($content, parser => "xlsx");
1576              
1577             my $book = ReadData ($fh, parser => "xlsx");
1578              
1579             my $book = ReadData (\$content, parser => "xlsx");
1580              
1581             Tries to convert the given file, string, or stream to the data structure
1582             described above.
1583              
1584             Processing Excel data from a stream or content is supported through a
1585             L<File::Temp|https://metacpan.org/release/File-Temp> temporary file or
1586             L<IO::Scalar|https://metacpan.org/release/IO-stringy> when available.
1587              
1588             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
1589             does preserve sheet order as of version 0.20.
1590              
1591             Choosing between C<$content> and C<\\$content> (with or without passing
1592             the desired C<parser> option) may be depending on trial and terror.
1593             C<ReadData> does try to determine parser type on content if needed, but
1594             not all combinations are checked, and not all signatures are builtin.
1595              
1596             Currently supported options are:
1597              
1598             =over 2
1599              
1600             =item parser
1601             X<parser>
1602              
1603             Force the data to be parsed by a specific format. Possible values are
1604             C<csv>, C<prl> (or C<perl>), C<sc> (or C<squirelcalc>), C<sxc> (or C<oo>,
1605             C<ods>, C<openoffice>, C<libreoffice>) C<xls> (or C<excel>), and C<xlsx>
1606             (or C<excel2007>).
1607              
1608             When parsing streams, instead of files, it is highly recommended to pass
1609             this option.
1610              
1611             Spreadsheet::Read supports several underlying parsers per spreadsheet
1612             type. It will try those from most favored to least favored. When you
1613             have a good reason to prefer a different parser, you can set that in
1614             environment variables. The other options then will not be tested for:
1615              
1616             env SPREADSHEET_READ_CSV=Text::CSV_PP ...
1617              
1618             =item cells
1619             X<cells>
1620              
1621             Control the generation of named cells ("C<A1>" etc). Default is true.
1622              
1623             =item rc
1624              
1625             Control the generation of the {cell}[c][r] entries. Default is true.
1626              
1627             =item attr
1628              
1629             Control the generation of the {attr}[c][r] entries. Default is false.
1630             See L</Cell Attributes> below.
1631              
1632             =item clip
1633              
1634             If set, L<C<ReadData>|/ReadData> will remove all trailing rows and columns
1635             per sheet that have no data, where no data means only undefined or empty
1636             cells (after optional stripping). If a sheet has no data at all, the sheet
1637             will be skipped entirely when this attribute is true.
1638              
1639             =item strip
1640              
1641             If set, L<C<ReadData>|/ReadData> will remove trailing- and/or
1642             leading-whitespace from every field.
1643              
1644             strip leading strailing
1645             ----- ------- ---------
1646             0 n/a n/a
1647             1 strip n/a
1648             2 n/a strip
1649             3 strip strip
1650              
1651             =item pivot
1652              
1653             Swap all rows and columns.
1654              
1655             When a sheet contains data like
1656              
1657             A1 B1 C1 E1
1658             A2 C2 D2
1659             A3 B3 C3 D3 E3
1660              
1661             using C<pivot> will return the sheet data as
1662              
1663             A1 A2 A3
1664             B1 B3
1665             C1 C2 C3
1666             D2 D3
1667             E1 E3
1668              
1669             =item sep
1670              
1671             Set separator for CSV. Default is comma C<,>.
1672              
1673             =item quote
1674              
1675             Set quote character for CSV. Default is C<">.
1676              
1677             =item dtfmt
1678              
1679             Set the format for MS-Excel date fields that are set to use the default
1680             date format. The default format in Excel is "C<m-d-yy>", which is both
1681             not year 2000 safe, nor very useful. The default is now "C<yyyy-mm-dd>",
1682             which is more ISO-like.
1683              
1684             Note that date formatting in MS-Excel is not reliable at all, as it will
1685             store/replace/change the date field separator in already stored formats
1686             if you change your locale settings. So the above mentioned default can
1687             be either "C<m-d-yy>" OR "C<m/d/yy>" depending on what that specific
1688             character happened to be at the time the user saved the file.
1689              
1690             =item merge
1691              
1692             Copy content to all cells in merged areas.
1693              
1694             If supported, this will copy formatted and unformatted values from the
1695             top-left cell of a merged area to all other cells in the area.
1696              
1697             =item debug
1698              
1699             Enable some diagnostic messages to STDERR.
1700              
1701             The value determines how much diagnostics are dumped (using
1702             L<Data::Peek|https://metacpan.org/release/Data-Peek>). A value of C<9>
1703             and higher will dump the entire structure from the back-end parser.
1704              
1705             =item passwd
1706              
1707             Use this password to decrypt password protected spreadsheet.
1708              
1709             Currently only supports Excel.
1710              
1711             =back
1712              
1713             All other attributes/options will be passed to the underlying parser if
1714             that parser supports attributes.
1715              
1716             =head3 col2label
1717              
1718             my $col_id = col2label (col);
1719              
1720             my $col_id = $book->col2label (col); # OO
1721              
1722             C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1723             traditional cell notation:
1724              
1725             my $id = col2label ( 4); # $id now "D"
1726             my $id = col2label (28); # $id now "AB"
1727              
1728             =head3 cr2cell
1729              
1730             my $cell = cr2cell (col, row);
1731              
1732             my $cell = $book->cr2cell (col, row); # OO
1733              
1734             C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1735             traditional cell notation:
1736              
1737             my $cell = cr2cell ( 4, 14); # $cell now "D14"
1738             my $cell = cr2cell (28, 4); # $cell now "AB4"
1739              
1740             =head3 cell2cr
1741              
1742             my ($col, $row) = cell2cr ($cell);
1743              
1744             my ($col, $row) = $book->cell2cr ($cell); # OO
1745              
1746             C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1747             pair (1 based):
1748              
1749             my ($col, $row) = cell2cr ("D14"); # returns ( 4, 14)
1750             my ($col, $row) = cell2cr ("AB4"); # returns (28, 4)
1751              
1752             =head3 row
1753              
1754             my @row = row ($sheet, $row)
1755              
1756             my @row = Spreadsheet::Read::row ($book->[1], 3);
1757              
1758             my @row = $book->row ($sheet, $row); # OO
1759              
1760             Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1761              
1762             Note that the indexes in the returned list are 0-based.
1763              
1764             C<row ()> is not imported by default, so either specify it in the
1765             use argument list, or call it fully qualified.
1766              
1767             See also the C<row ()> method on sheets.
1768              
1769             =head3 cellrow
1770              
1771             my @row = cellrow ($sheet, $row);
1772              
1773             my @row = Spreadsheet::Read::cellrow ($book->[1], 3);
1774              
1775             my @row = $book->cellrow ($sheet, $row); # OO
1776              
1777             Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1778              
1779             Note that the indexes in the returned list are 0-based.
1780              
1781             C<cellrow ()> is not imported by default, so either specify it in the
1782             use argument list, or call it fully qualified or as method call.
1783              
1784             See also the C<cellrow ()> method on sheets.
1785              
1786             =head3 rows
1787              
1788             my @rows = rows ($sheet);
1789              
1790             my @rows = Spreadsheet::Read::rows ($book->[1]);
1791              
1792             my @rows = $book->rows (1); # OO
1793              
1794             Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1795              
1796             Note that the indexes in the returned list are 0-based, where the
1797             index in the C<{cell}> entry is 1-based.
1798              
1799             C<rows ()> is not imported by default, so either specify it in the
1800             use argument list, or call it fully qualified.
1801              
1802             =head3 parses
1803              
1804             parses ($format);
1805              
1806             Spreadsheet::Read::parses ("CSV");
1807              
1808             $book->parses ("CSV"); # OO
1809              
1810             C<parses ()> returns Spreadsheet::Read's capability to parse the
1811             required format. L<C<ReadData>|/ReadData> will pick its preferred parser
1812             for that format unless overruled. See L<C<parser>|/parser>.
1813              
1814             C<parses ()> is not imported by default, so either specify it in the
1815             use argument list, or call it fully qualified.
1816              
1817             =head3 Version
1818              
1819             my $v = Version ()
1820              
1821             my $v = Spreadsheet::Read::Version ()
1822              
1823             my $v = Spreadsheet::Read->VERSION;
1824              
1825             my $v = $book->Version (); # OO
1826              
1827             Returns the current version of Spreadsheet::Read.
1828              
1829             C<Version ()> is not imported by default, so either specify it in the
1830             use argument list, or call it fully qualified.
1831              
1832             This function returns exactly the same as C<< Spreadsheet::Read->VERSION >>
1833             returns and is only kept for backward compatibility reasons.
1834              
1835             =head3 sheets
1836              
1837             my $sheets = $book->sheets; # OO
1838             my @sheets = $book->sheets; # OO
1839              
1840             In scalar context return the number of sheets in the book.
1841             In list context return the labels of the sheets in the book.
1842              
1843             =head3 sheet
1844              
1845             my $sheet = $book->sheet (1); # OO
1846             my $sheet = $book->sheet ("Foo"); # OO
1847              
1848             Return the numbered or named sheet out of the book. Will return C<undef> if
1849             there is no match. Will not work for sheets I<named> with a number between 1
1850             and the number of sheets in the book.
1851              
1852             With named sheets will first try to use the list of sheet-labels as stored in
1853             the control structure. If no match is found, it will scan the actual labels
1854             of the sheets. In that case, it will return the first matching sheet.
1855              
1856             If defined, the returned sheet will be of class C<Spreadsheet::Read::Sheet>.
1857              
1858             =head3 add
1859              
1860             my $book = ReadData ("file.csv");
1861             Spreadsheet::Read::add ($book, "file.xlsx");
1862              
1863             my $book = Spreadsheet::Read->new ("file.csv");
1864             $book->add ("file.xlsx"); # OO
1865              
1866             =head2 Methods on sheets
1867              
1868             =head3 maxcol
1869              
1870             my $col = $sheet->maxcol;
1871              
1872             Return the index of the last in-use column in the sheet. This index is 1-based.
1873              
1874             =head3 maxrow
1875              
1876             my $row = $sheet->maxrow;
1877              
1878             Return the index of the last in-use row in the sheet. This index is 1-based.
1879              
1880             =head3 cell
1881              
1882             my $cell = $sheet->cell ("A3");
1883             my $cell = $sheet->cell (1, 3);
1884              
1885             Return the value for a cell. Using tags will return the formatted value,
1886             using column and row will return unformatted value.
1887              
1888             =head3 attr
1889              
1890             my $cell = $sheet->attr ("A3");
1891             my $cell = $sheet->attr (1, 3);
1892              
1893             Return the attributes of a cell. Only valid if attributes are enabled through
1894             option C<attr>.
1895              
1896             =head3 col2label
1897              
1898             my $col_id = $sheet->col2label (col);
1899              
1900             C<col2label ()> converts a C<(column)> (1 based) to the letters used in the
1901             traditional cell notation:
1902              
1903             my $id = $sheet->col2label ( 4); # $id now "D"
1904             my $id = $sheet->col2label (28); # $id now "AB"
1905              
1906             =head3 cr2cell
1907              
1908             my $cell = $sheet->cr2cell (col, row);
1909              
1910             C<cr2cell ()> converts a C<(column, row)> pair (1 based) to the
1911             traditional cell notation:
1912              
1913             my $cell = $sheet->cr2cell ( 4, 14); # $cell now "D14"
1914             my $cell = $sheet->cr2cell (28, 4); # $cell now "AB4"
1915              
1916             =head3 cell2cr
1917              
1918             my ($col, $row) = $sheet->cell2cr ($cell);
1919              
1920             C<cell2cr ()> converts traditional cell notation to a C<(column, row)>
1921             pair (1 based):
1922              
1923             my ($col, $row) = $sheet->cell2cr ("D14"); # returns ( 4, 14)
1924             my ($col, $row) = $sheet->cell2cr ("AB4"); # returns (28, 4)
1925              
1926             =head3 col
1927              
1928             my @col = $sheet->column ($col);
1929              
1930             Get full column of formatted values (like C<< $sheet->{C1} .. $sheet->{C9} >>)
1931              
1932             Note that the indexes in the returned list are 0-based.
1933              
1934             =head3 cellcolumn
1935              
1936             my @col = $sheet->cellcolumn ($col);
1937              
1938             Get full column of unformatted values (like C<< $sheet->{cell}[3][1] .. $sheet->{cell}[3][9] >>)
1939              
1940             Note that the indexes in the returned list are 0-based.
1941              
1942             =head3 row
1943              
1944             my @row = $sheet->row ($row);
1945              
1946             Get full row of formatted values (like C<< $sheet->{A3} .. $sheet->{G3} >>)
1947              
1948             Note that the indexes in the returned list are 0-based.
1949              
1950             =head3 cellrow
1951              
1952             my @row = $sheet->cellrow ($row);
1953              
1954             Get full row of unformatted values (like C<< $sheet->{cell}[1][3] .. $sheet->{cell}[7][3] >>)
1955              
1956             Note that the indexes in the returned list are 0-based.
1957              
1958             =head3 rows
1959              
1960             my @rows = $sheet->rows ();
1961              
1962             Convert C<{cell}>'s C<[column][row]> to a C<[row][column]> list.
1963              
1964             Note that the indexes in the returned list are 0-based, where the
1965             index in the C<{cell}> entry is 1-based.
1966              
1967             =head3 merged_from
1968              
1969             my $top_left = $sheet->merged_from ("C2");
1970             my $top_left = $sheet->merged_from (3, 2);
1971              
1972             If the parser supports merged areas, this method will return the label of the
1973             top-left cell in the merged area the requested cell is part of.
1974              
1975             If the requested ID is valid and withing the sheet cell range, but not part of
1976             a merged area, it will return C<"">.
1977              
1978             If the ID is not valid or out of range, it returns C<undef>.
1979              
1980             See L<Merged cells|/merged> for more details.
1981              
1982             =head3 label
1983              
1984             my $label = $sheet->label;
1985             $sheet->label ("New sheet label");
1986              
1987             Set a new label to a sheet. Note that the index in the control structure will
1988             I<NOT> be updated.
1989              
1990             =head3 active
1991              
1992             my $sheet_is_active = $sheet->active;
1993              
1994             Returns 1 if the selected sheet is active, otherwise returns 0.
1995              
1996             Currently only works on XLS (as of Spreadsheed::ParseExcel-0.61).
1997             CSV is always active.
1998              
1999             =head2 Using CSV
2000              
2001             In case of CSV parsing, L<C<ReadData>|/ReadData> will use the first line of
2002             the file to auto-detect the separation character if the first argument is a
2003             file and both C<sep> and C<quote> are not passed as attributes.
2004             L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> (or
2005             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV>) is able to
2006             automatically detect and use C<\r> line endings.
2007              
2008             CSV can parse streams too, but be sure to pass C<sep> and/or C<quote> if
2009             these do not match the default C<,> and C<">.
2010              
2011             When an error is found in the CSV, it is automatically reported (to STDERR).
2012             The structure will store the error in C<< $ss->[0]{error} >> as anonymous
2013             list returned by
2014             L<C<< $csv->error_diag >>|https://metacpan.org/pod/Text::CSV_XS#error_diag>.
2015             See L<Text::CSV_XS|https://metacpan.org/pod/Text::CSV_XS> for documentation.
2016              
2017             my $ss = ReadData ("bad.csv");
2018             $ss->[0]{error} and say $ss->[0]{error}[1];
2019              
2020             As CSV has no sheet labels, the default label for a CSV sheet is its filename.
2021             For CSV, this can be overruled using the I<label> attribute:
2022              
2023             my $ss = Spreadsheet::Read->new ("/some/place/test.csv", label => "Test");
2024              
2025             =head2 Cell Attributes
2026             X<attr>
2027              
2028             If the constructor was called with C<attr> having a true value,
2029              
2030             my $book = ReadData ("book.xls", attr => 1);
2031             my $book = Spreadsheet::Read->new ("book.xlsx", attr => 1);
2032              
2033             effort is made to analyze and store field attributes like this:
2034              
2035             { label => "Sheet 1",
2036             maxrow => 5,
2037             maxcol => 2,
2038             cell => [ undef,
2039             [ undef, 1 ],
2040             [ undef, undef, undef, undef, undef, "Nugget" ],
2041             ],
2042             attr => [ undef,
2043             [ undef, {
2044             type => "numeric",
2045             fgcolor => "#ff0000",
2046             bgcolor => undef,
2047             font => "Arial",
2048             size => undef,
2049             format => "## ##0.00",
2050             halign => "right",
2051             valign => "top",
2052             uline => 0,
2053             bold => 0,
2054             italic => 0,
2055             wrap => 0,
2056             merged => 0,
2057             hidden => 0,
2058             locked => 0,
2059             enc => "utf-8",
2060             }, ],
2061             [ undef, undef, undef, undef, undef, {
2062             type => "text",
2063             fgcolor => "#e2e2e2",
2064             bgcolor => undef,
2065             font => "Letter Gothic",
2066             size => 15,
2067             format => undef,
2068             halign => "left",
2069             valign => "top",
2070             uline => 0,
2071             bold => 0,
2072             italic => 0,
2073             wrap => 0,
2074             merged => 0,
2075             hidden => 0,
2076             locked => 0,
2077             enc => "iso8859-1",
2078             }, ],
2079             ],
2080             merged => [],
2081             A1 => 1,
2082             B5 => "Nugget",
2083             },
2084              
2085             The entries C<maxrow> and C<maxcol> are 1-based.
2086              
2087             This has now been partially implemented, mainly for Excel, as the other
2088             parsers do not (yet) support all of that. YMMV.
2089              
2090             If a cell itself is not hidden, but the parser holds the information that
2091             either the row or the column (or both) the field is in is hidden, the flag
2092             is inherited into the cell attributes.
2093              
2094             You can get the attributes of a cell (as a hash-ref) like this:
2095              
2096             my $attr = $book[1]{attr}[1][3]; # Direct structure
2097             my $attr = $book->sheet (1)->attr (1, 3); # Same using OO
2098             my $attr = $book->sheet (1)->attr ("A3"); # Same using OO
2099              
2100             To get to the C<font> attribute, use any of these:
2101              
2102             my $font = $book[1]{attr}[1][3]{font};
2103             my $font = $book->sheet (1)->attr (1, 3)->{font};
2104             my $font = $book->sheet (1)->attr ("A3")->font;
2105              
2106             =head3 Merged cells
2107             X<merged>
2108              
2109             Note that only
2110             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>
2111             documents the use of merged cells, and not in a way useful for the spreadsheet
2112             consumer.
2113              
2114             CSV does not support merged cells (though future implementations of CSV
2115             for the web might).
2116              
2117             The documentation of merged areas in
2118             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> and
2119             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> can
2120             be found in
2121             L<Spreadsheet::ParseExcel::Worksheet|https://metacpan.org/pod/Spreadsheet::ParseExcel::Worksheet>
2122             and L<Spreadsheet::ParseExcel::Cell|https://metacpan.org/pod/Spreadsheet::ParseExcel::Cell>.
2123              
2124             None of basic L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX>,
2125             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>, and
2126             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> manual
2127             pages mention merged cells at all.
2128              
2129             This module just tries to return the information in a generic way.
2130              
2131             Given this spreadsheet as an example
2132              
2133             merged.xlsx:
2134              
2135             A B C
2136             +-----+-----------+
2137             1| | foo |
2138             +-----+ +
2139             2| bar | |
2140             | +-----+-----+
2141             3| | urg | orc |
2142             +-----+-----+-----+
2143              
2144             the information extracted from that undocumented information is
2145             returned in the C<merged> entry of the sheet's hash as a list of
2146             top-left, bottom-right coordinate pars (col, row, col, row). For
2147             given example, that would be:
2148              
2149             $ss->{merged} = [
2150             [ 1, 2, 1, 3 ], # A2-A3
2151             [ 2, 1, 3, 2 ], # B1-C2
2152             ];
2153              
2154             To find the label of the top-left cell in a merged area, use the
2155             L<C<merged_from>|/merged_from> method.
2156              
2157             $ss->merged_from ("C2"); # will return "B1"
2158              
2159             When the attributes are also enabled, there is some merge information
2160             copied directly from the cell information, but again, that stems from
2161             code analysis and not from documentation:
2162              
2163             my $ss = ReadData ("merged.xlsx", attr => 1)->[1];
2164             foreach my $row (1 .. $ss->{maxrow}) {
2165             foreach my $col (1 .. $ss->{maxcol}) {
2166             my $cell = cr2cell ($col, $row);
2167             printf "%s %-3s %s ", $cell, $ss->{$cell},
2168             $ss->{attr}[$col][$row]{merged};
2169             }
2170             print "\n";
2171             }
2172              
2173             A1 0 B1 foo 1 C1 1
2174             A2 bar 1 B2 1 C2 1
2175             A3 1 B3 urg 0 C3 orc 0
2176              
2177             In this example, there is no way to see if C<B2> is merged to C<A2> or
2178             to C<B1> without analyzing all surrounding cells. This could as well
2179             mean C<A2:A3>, C<B1:C1>, C<B2:C2>, as C<A2:A3>, C<B1:B2>, C<C1:C2>, as
2180             C<A2:A3>, C<B1:C2>.
2181              
2182             Use the L<C<merged>|/merged> entry described above to find out what
2183             fields are merged to what other fields or use C<merge>:
2184              
2185             my $ss = ReadData ("merged.xlsx", attr => 1, merge => 1)->[1];
2186             foreach my $row (1 .. $ss->{maxrow}) {
2187             foreach my $col (1 .. $ss->{maxcol}) {
2188             my $cell = cr2cell ($col, $row);
2189             printf "%s %-3s %s ", $cell, $ss->{$cell},
2190             $ss->{attr}[$col][$row]{merged};
2191             }
2192             print "\n";
2193             }
2194              
2195             A1 0 B1 foo B1 C1 foo B1
2196             A2 bar A2 B2 foo B1 C2 foo B1
2197             A3 bar A2 B3 urg 0 C3 orc 0
2198              
2199             =head2 Streams from web-resources
2200              
2201             If you want to stream a web-resource, and the underlying parser supports it,
2202             you could use a helper function like this (thanks Corion):
2203              
2204             use HTTP::Tiny;
2205             use Spreadsheet::Read;
2206              
2207             # Fetch data and return a filehandle to that data
2208             sub fh_from_url {
2209             my $url = shift;
2210             my $ua = HTTP::Tiny->new;
2211             my $res = $ua->get ($url);
2212             open my $fh, "<", \$res->{content};
2213             return $fh
2214             } # fh_from_url
2215              
2216             my $fh = fh_from_url ("http://example.com/example.csv");
2217             my $sheet = Spreadsheet::Read->new ($fh, parser => "csv");
2218              
2219             =head1 TOOLS
2220              
2221             This modules comes with a few tools that perform tasks from the FAQ, like
2222             "How do I select only column D through F from sheet 2 into a CSV file?"
2223              
2224             If the module was installed without the tools, you can find them here:
2225             https://github.com/Tux/Spreadsheet-Read/tree/master/examples
2226              
2227             =head2 C<xlscat>
2228              
2229             Show (parts of) a spreadsheet in plain text, CSV, or HTML
2230              
2231             usage: xlscat [-s <sep>] [-L] [-n] [-A] [-u] [Selection] file.xls
2232             [-c | -m] [-u] [Selection] file.xls
2233             -i [-S sheets] file.xls
2234             Generic options:
2235             -v[#] Set verbose level (xlscat/xlsgrep)
2236             -d[#] Set debug level (Spreadsheet::Read)
2237             -u Use unformatted values
2238             --noclip Do not strip empty sheets and
2239             trailing empty rows and columns
2240             -e <enc> Set encoding for input and output
2241             -b <enc> Set encoding for input
2242             -a <enc> Set encoding for output
2243             Input CSV:
2244             --in-sep=c Set input sep_char for CSV
2245             Input XLS:
2246             --dtfmt=fmt Specify the default date format to replace 'm-d-yy'
2247             the default replacement is 'yyyy-mm-dd'
2248             Output Text (default):
2249             -s <sep> Use separator <sep>. Default '|', \n allowed
2250             -L Line up the columns
2251             -n [skip] Number lines (prefix with column number)
2252             optionally skip <skip> (header) lines
2253             -A Show field attributes in ANSI escapes
2254             -h[#] Show # header lines
2255             Output Index only:
2256             -i Show sheet names and size only
2257             Output CSV:
2258             -c Output CSV, separator = ','
2259             -m Output CSV, separator = ';'
2260             Output HTML:
2261             -H Output HTML
2262             Selection:
2263             -S <sheets> Only print sheets <sheets>. 'all' is a valid set
2264             Default only prints the first sheet
2265             -R <rows> Only print rows <rows>. Default is 'all'
2266             -C <cols> Only print columns <cols>. Default is 'all'
2267             -F <flds> Only fields <flds> e.g. -FA3,B16
2268             Ordering (column numbers in result set *after* selection):
2269             --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
2270             +# - first # lines do not sort (header)
2271             # - order on column # lexical ascending
2272             #n - order on column # numeric ascending
2273             #r - order on column # lexical descending
2274             #rn - order on column # numeric descending
2275              
2276             =head2 C<xlsgrep>
2277              
2278             Show (parts of) a spreadsheet that match a pattern in plain text, CSV, or HTML
2279              
2280             usage: xlsgrep [-s <sep>] [-L] [-n] [-A] [-u] [Selection] pattern file.xls
2281             [-c | -m] [-u] [Selection] pattern file.xls
2282             -i [-S sheets] pattern file.xls
2283             Generic options:
2284             -v[#] Set verbose level (xlscat/xlsgrep)
2285             -d[#] Set debug level (Spreadsheet::Read)
2286             -u Use unformatted values
2287             --noclip Do not strip empty sheets and
2288             trailing empty rows and columns
2289             -e <enc> Set encoding for input and output
2290             -b <enc> Set encoding for input
2291             -a <enc> Set encoding for output
2292             Input CSV:
2293             --in-sep=c Set input sep_char for CSV
2294             Input XLS:
2295             --dtfmt=fmt Specify the default date format to replace 'm-d-yy'
2296             the default replacement is 'yyyy-mm-dd'
2297             Output Text (default):
2298             -s <sep> Use separator <sep>. Default '|', \n allowed
2299             -L Line up the columns
2300             -n [skip] Number lines (prefix with column number)
2301             optionally skip <skip> (header) lines
2302             -A Show field attributes in ANSI escapes
2303             -h[#] Show # header lines
2304             Grep options:
2305             -i Ignore case
2306             -w Match whole words only
2307             Output CSV:
2308             -c Output CSV, separator = ','
2309             -m Output CSV, separator = ';'
2310             Output HTML:
2311             -H Output HTML
2312             Selection:
2313             -S <sheets> Only print sheets <sheets>. 'all' is a valid set
2314             Default only prints the first sheet
2315             -R <rows> Only print rows <rows>. Default is 'all'
2316             -C <cols> Only print columns <cols>. Default is 'all'
2317             -F <flds> Only fields <flds> e.g. -FA3,B16
2318             Ordering (column numbers in result set *after* selection):
2319             --sort=spec Sort output (e.g. --sort=3,2r,5n,1rn+2)
2320             +# - first # lines do not sort (header)
2321             # - order on column # lexical ascending
2322             #n - order on column # numeric ascending
2323             #r - order on column # lexical descending
2324             #rn - order on column # numeric descending
2325              
2326             =head2 C<xls2csv>
2327              
2328             Convert a spreadsheet to CSV. This is just a small wrapper over C<xlscat>.
2329              
2330             usage: xls2csv [ -o file.csv ] file.xls
2331              
2332             =head2 C<ss2tk>
2333              
2334             Show a spreadsheet in a perl/Tk spreadsheet widget
2335              
2336             usage: ss2tk [-w <width>] [X11 options] file.xls [<pattern>]
2337             -w <width> use <width> as default column width (4)
2338              
2339             =head2 C<ssdiff>
2340              
2341             Show the differences between two spreadsheets.
2342              
2343             usage: examples/ssdiff [--verbose[=1]] file.xls file.xlsx
2344              
2345             =head1 TODO
2346              
2347             =over 4
2348              
2349             =item Options
2350              
2351             =over 2
2352              
2353             =item Module Options
2354              
2355             New Spreadsheet::Read options are bound to happen. I'm thinking of an
2356             option that disables the reading of the data entirely to speed up an
2357             index request (how many sheets/fields/columns). See C<xlscat -i>.
2358              
2359             =item Parser options
2360              
2361             Try to transparently support as many options as the encapsulated modules
2362             support regarding (un)formatted values, (date) formats, hidden columns
2363             rows or fields etc. These could be implemented like C<attr> above but
2364             names C<meta>, or just be new values in the C<attr> hashes.
2365              
2366             =back
2367              
2368             =item Other parsers
2369              
2370             Add support for new(er) parsers for already supported formats, like
2371              
2372             =over 2
2373              
2374             =item Data::XLSX::Parser
2375              
2376             Data::XLSX::Parser provides faster way to parse Microsoft Excel's .xlsx
2377             files. The implementation of this module is highly inspired from Python's
2378             FastXLSX library.
2379              
2380             This is SAX based parser, so you can parse very large XLSX file with
2381             lower memory usage.
2382              
2383             =back
2384              
2385             =item Other spreadsheet formats
2386              
2387             I consider adding any spreadsheet interface that offers a usable API.
2388              
2389             Under investigation:
2390              
2391             =over 2
2392              
2393             =item Gnumeric (.gnumeric)
2394              
2395             I have seen no existing CPAN module yet.
2396              
2397             It is gzip'ed XML
2398              
2399             =item Kspread (.ksp)
2400              
2401             Now knows as Calligra Sheets.
2402              
2403             I have seen no existing CPAN module yet.
2404              
2405             It is XML in ZIP
2406              
2407             =back
2408              
2409             =item Alternative parsers for existing formats
2410              
2411             As long as the alternative has a good reason for its existence, and the
2412             API of that parser reasonable fits in my approach, I will consider to
2413             implement the glue layer, or apply patches to do so as long as these
2414             match what F<CONTRIBUTING.md> describes.
2415              
2416             =back
2417              
2418             =head1 SEE ALSO
2419              
2420             =over 2
2421              
2422             =item Text::CSV_XS, Text::CSV_PP
2423              
2424             See L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS> ,
2425             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV> , and
2426             L<Text::CSV|https://metacpan.org/release/Text-CSV> documentation.
2427              
2428             L<Text::CSV|https://metacpan.org/release/Text-CSV> is a wrapper over Text::CSV_XS (the fast XS version) and/or
2429             L<Text::CSV_PP|https://metacpan.org/release/Text-CSV> (the pure perl version).
2430              
2431             =item Spreadsheet::ParseExcel
2432              
2433             L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel> is
2434             the best parser for old-style Microsoft Excel (.xls) files.
2435              
2436             =item Spreadsheet::ParseXLSX
2437              
2438             L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX> is
2439             like L<Spreadsheet::ParseExcel|https://metacpan.org/release/Spreadsheet-ParseExcel>,
2440             but for new Microsoft Excel 2007+ files (.xlsx). They have the same API.
2441              
2442             This module uses L<XML::Twig|https://metacpan.org/release/XML-Twig> to parse the
2443             internal XML.
2444              
2445             =item Spreadsheet::XLSX
2446              
2447             See L<Spreadsheet::XLSX|https://metacpan.org/release/Spreadsheet-XLSX>
2448             documentation.
2449              
2450             This module is dead and deprecated. It is B<buggy and unmaintained>. I<Please>
2451             use L<Spreadsheet::ParseXLSX|https://metacpan.org/release/Spreadsheet-ParseXLSX>
2452             instead.
2453              
2454             =item Spreadsheet::ParseODS
2455              
2456             L<Spreadsheet::ParseODS|https://metacpan.org/release/Spreadsheet-ParseODS> is a
2457             parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files. It is the
2458             successor of L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC>.
2459              
2460             =item Spreadsheet::ReadSXC
2461              
2462             L<Spreadsheet::ReadSXC|https://metacpan.org/release/Spreadsheet-ReadSXC> is a
2463             parser for OpenOffice/LibreOffice (.sxc and .ods) spreadsheet files.
2464              
2465             =item Spreadsheet::BasicRead
2466              
2467             See L<Spreadsheet::BasicRead|https://metacpan.org/release/Spreadsheet-BasicRead>
2468             for xlscat-like functionality (Excel only)
2469              
2470             =item Spreadsheet::ConvertAA
2471              
2472             See L<Spreadsheet::ConvertAA|https://metacpan.org/release/Spreadsheet-ConvertAA>
2473             for an alternative set of L</cell2cr>/L</cr2cell> pair.
2474              
2475             =item Spreadsheet::Perl
2476              
2477             L<Spreadsheet::Perl|https://metacpan.org/release/Spreadsheet-Perl> offers a Pure
2478             Perl implementation of a spreadsheet engine. Users that want this format to be
2479             supported in Spreadsheet::Read are hereby motivated to offer patches. It is
2480             not high on my TODO-list.
2481              
2482             =item Spreadsheet::CSV
2483              
2484             L<Spreadsheet::CSV|https://metacpan.org/release/Spreadsheet-CSV> offers the
2485             interesting approach of seeing all supported spreadsheet formats as if it were
2486             CSV, mimicking the L<Text::CSV_XS|https://metacpan.org/release/Text-CSV_XS>
2487             interface.
2488              
2489             =item xls2csv
2490              
2491             L<xls2csv|https://github.com/Tux/Spreadsheet-Read/blob/master/examples/xls2csv>
2492             offers an alternative for my C<xlscat -c>, in the xls2csv tool, but this tool
2493             focuses on character encoding transparency, and requires some other modules.
2494              
2495             =back
2496              
2497             =head1 AUTHOR
2498              
2499             H.Merijn Brand, <h.m.brand@xs4all.nl>
2500              
2501             =head1 COPYRIGHT AND LICENSE
2502              
2503             Copyright (C) 2005-2020 H.Merijn Brand
2504              
2505             This library is free software; you can redistribute it and/or modify
2506             it under the same terms as Perl itself.
2507              
2508             =cut