| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Excel::ValueReader::XLSX; | 
| 2 | 1 |  |  | 1 |  | 156168 | use utf8; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 6 |  | 
| 3 | 1 |  |  | 1 |  | 677 | use Moose; | 
|  | 1 |  |  |  |  | 476794 |  | 
|  | 1 |  |  |  |  | 8 |  | 
| 4 | 1 |  |  | 1 |  | 9062 | use Module::Load          qw/load/; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 11 |  | 
| 5 | 1 |  |  | 1 |  | 754 | use Date::Calc            qw/Add_Delta_Days/; | 
|  | 1 |  |  |  |  | 7125 |  | 
|  | 1 |  |  |  |  | 115 |  | 
| 6 | 1 |  |  | 1 |  | 7 | use POSIX                 qw/strftime modf/; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 15 |  | 
| 7 | 1 |  |  | 1 |  | 1758 | use Carp                  qw/croak/; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 51 |  | 
| 8 | 1 |  |  | 1 |  | 6 | use feature 'state'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 1933 |  | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | our $VERSION = '1.10'; | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | #====================================================================== | 
| 13 |  |  |  |  |  |  | # ATTRIBUTES | 
| 14 |  |  |  |  |  |  | #====================================================================== | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | # PUBLIC ATTRIBUTES | 
| 17 |  |  |  |  |  |  | has 'xlsx'            => (is => 'ro', isa => 'Str', required => 1);      # path of xlsx file | 
| 18 |  |  |  |  |  |  | has 'using'           => (is => 'ro', isa => 'Str', default => 'Regex'); # name of backend class | 
| 19 |  |  |  |  |  |  | has 'date_format'     => (is => 'ro', isa => 'Str', default => '%d.%m.%Y'); | 
| 20 |  |  |  |  |  |  | has 'time_format'     => (is => 'ro', isa => 'Str', default => '%H:%M:%S'); | 
| 21 |  |  |  |  |  |  | has 'datetime_format' => (is => 'ro', isa => 'Str', | 
| 22 |  |  |  |  |  |  | builder => '_datetime_format', lazy => 1); | 
| 23 |  |  |  |  |  |  | has 'date_formatter'  => (is => 'ro',   isa => 'Maybe[CodeRef]', | 
| 24 |  |  |  |  |  |  | builder => '_date_formatter', lazy => 1); | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | # ATTRIBUTES USED INTERNALLY, NOT DOCUMENTED | 
| 29 |  |  |  |  |  |  | has 'backend'         => (is => 'ro',   isa => 'Object', init_arg => undef, | 
| 30 |  |  |  |  |  |  | builder => '_backend', lazy => 1, | 
| 31 |  |  |  |  |  |  | handles => [qw/values base_year sheets/]); | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | #====================================================================== | 
| 34 |  |  |  |  |  |  | # BUILDING | 
| 35 |  |  |  |  |  |  | #====================================================================== | 
| 36 |  |  |  |  |  |  |  | 
| 37 |  |  |  |  |  |  | # syntactic sugar for supporting ->new($path) instead of ->new(xlsx => $path) | 
| 38 |  |  |  |  |  |  | around BUILDARGS => sub { | 
| 39 |  |  |  |  |  |  | my $orig  = shift; | 
| 40 |  |  |  |  |  |  | my $class = shift; | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | if ( @_ == 1 && !ref $_[0] ) { | 
| 43 |  |  |  |  |  |  | return $class->$orig(xlsx => $_[0]); | 
| 44 |  |  |  |  |  |  | } | 
| 45 |  |  |  |  |  |  | else { | 
| 46 |  |  |  |  |  |  | return $class->$orig(@_); | 
| 47 |  |  |  |  |  |  | } | 
| 48 |  |  |  |  |  |  | }; | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | #====================================================================== | 
| 53 |  |  |  |  |  |  | # ATTRIBUTE CONSTRUCTORS | 
| 54 |  |  |  |  |  |  | #====================================================================== | 
| 55 |  |  |  |  |  |  |  | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | sub _backend { | 
| 58 | 14 |  |  | 14 |  | 34 | my $self = shift; | 
| 59 |  |  |  |  |  |  |  | 
| 60 | 14 |  |  |  |  | 439 | my $backend_class = ref($self) . '::Backend::' . $self->using; | 
| 61 | 14 |  |  |  |  | 71 | load $backend_class; | 
| 62 |  |  |  |  |  |  |  | 
| 63 | 14 |  |  |  |  | 1112 | return $backend_class->new(frontend => $self); | 
| 64 |  |  |  |  |  |  | } | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | sub _datetime_format { | 
| 67 | 12 |  |  | 12 |  | 27 | my ($self) = @_; | 
| 68 | 12 |  |  |  |  | 339 | return $self->date_format . ' ' . $self->time_format; | 
| 69 |  |  |  |  |  |  | } | 
| 70 |  |  |  |  |  |  |  | 
| 71 |  |  |  |  |  |  | sub _date_formatter { | 
| 72 | 12 |  |  | 12 |  | 31 | my ($self) = @_; | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | # local copies of the various formats so that we can build a closure | 
| 75 | 12 |  |  |  |  | 360 | my @formats = (undef,                     # 0 -- error | 
| 76 |  |  |  |  |  |  | $self->date_format,        # 1 -- just a date | 
| 77 |  |  |  |  |  |  | $self->time_format,        # 2 -- just a time | 
| 78 |  |  |  |  |  |  | $self->datetime_format);   # 3 -- date and time | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | my $strftime_formatter = sub { | 
| 81 | 156 |  |  | 156 |  | 353 | my ($xl_date_format, $y, $m, $d, $h, $min, $s, $ms) = @_; | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | # choose the proper format for strftime | 
| 84 | 156 |  |  |  |  | 210 | my $ix = 0; # index into the @formats array | 
| 85 | 156 | 100 |  |  |  | 466 | $ix += 1 if $xl_date_format =~ /[dy]/; # the Excel format contains a date portion | 
| 86 | 156 | 100 |  |  |  | 349 | $ix += 2 if $xl_date_format =~ /[hs]/; # the Excel format contains a time portion | 
| 87 | 156 | 50 |  |  |  | 331 | my $strftime_format = $formats[$ix] | 
| 88 |  |  |  |  |  |  | or die "cell with unexpected Excel date format : $xl_date_format"; | 
| 89 |  |  |  |  |  |  |  | 
| 90 |  |  |  |  |  |  | # formatting through strftime | 
| 91 | 156 |  |  |  |  | 5036 | my $formatted_date = strftime($strftime_format, $s, $min, $h, $d, $m-1, $y-1900); | 
| 92 |  |  |  |  |  |  |  | 
| 93 | 156 |  |  |  |  | 630 | return $formatted_date; | 
| 94 | 12 |  |  |  |  | 74 | }; | 
| 95 |  |  |  |  |  |  |  | 
| 96 | 12 |  |  |  |  | 363 | return $strftime_formatter; | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  |  | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | #====================================================================== | 
| 101 |  |  |  |  |  |  | # GENERAL METHODS | 
| 102 |  |  |  |  |  |  | #====================================================================== | 
| 103 |  |  |  |  |  |  |  | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | sub sheet_names { | 
| 106 | 2 |  |  | 2 | 1 | 4529 | my ($self) = @_; | 
| 107 |  |  |  |  |  |  |  | 
| 108 | 2 |  |  |  |  | 19 | my $sheets = $self->sheets; # arrayref of shape {$name => $sheet_position} | 
| 109 |  |  |  |  |  |  |  | 
| 110 | 2 |  |  |  |  | 18 | my @sorted_names = sort {$sheets->{$a} <=> $sheets->{$b}} keys %$sheets; | 
|  | 18 |  |  |  |  | 61 |  | 
| 111 | 2 |  |  |  |  | 12 | return @sorted_names; | 
| 112 |  |  |  |  |  |  | } | 
| 113 |  |  |  |  |  |  |  | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | sub A1_to_num { # convert Excel A1 reference format to a number | 
| 116 | 2406 |  |  | 2406 | 0 | 22030 | my ($self, $string) = @_; | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  | # ordinal number for character just before 'A' | 
| 119 | 2406 |  |  |  |  | 3503 | state $base = ord('A') - 1; | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | # iterate on 'digits' (letters of the A1 cell reference) | 
| 122 | 2406 |  |  |  |  | 3258 | my $num = 0; | 
| 123 | 2406 |  |  |  |  | 5368 | foreach my $digit (map {ord($_) - $base} split //, $string) { | 
|  | 2406 |  |  |  |  | 6387 |  | 
| 124 | 2406 |  |  |  |  | 4373 | $num = $num*26 + $digit; | 
| 125 |  |  |  |  |  |  | } | 
| 126 |  |  |  |  |  |  |  | 
| 127 | 2406 |  |  |  |  | 6793 | return $num; | 
| 128 |  |  |  |  |  |  | } | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | sub formatted_date { | 
| 132 | 156 |  |  | 156 | 0 | 8233 | my ($self, $val, $date_format, $date_formatter) = @_; | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | # separate date (integer part) from time (fractional part) | 
| 135 | 156 |  |  |  |  | 524 | my ($time, $n_days) = modf($val); | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | # Convert $n_days into a date in Date::Calc format (year, month, day). | 
| 138 |  |  |  |  |  |  | # The algorithm is quite odd because in the 1900 system, 01.01.1900 == 0 while | 
| 139 |  |  |  |  |  |  | # in the 1904 system, 01.01.1904 == 1; furthermore, in the 1900 system, | 
| 140 |  |  |  |  |  |  | # Excel treats 1900 as a leap year. | 
| 141 | 156 |  |  |  |  | 512 | my $base_year  = $self->base_year; | 
| 142 | 156 | 100 |  |  |  | 402 | if ($base_year == 1900) { | 
| 143 | 114 |  |  |  |  | 226 | my $is_after_february_1900 = $n_days > 60; | 
| 144 | 114 | 100 |  |  |  | 245 | $n_days -= $is_after_february_1900 ? 2 : 1; | 
| 145 |  |  |  |  |  |  | } | 
| 146 | 156 |  |  |  |  | 577 | my @d = Add_Delta_Days($base_year, 1, 1, $n_days); | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | # decode the fractional part (the time) into hours, minutes, seconds, milliseconds | 
| 149 | 156 |  |  |  |  | 240 | my @t; | 
| 150 | 156 |  |  |  |  | 289 | foreach my $subdivision (24, 60, 60, 1000) { | 
| 151 | 624 |  |  |  |  | 817 | $time                    *= $subdivision; | 
| 152 | 624 |  |  |  |  | 1179 | ($time, my $time_portion) = modf($time); | 
| 153 | 624 |  |  |  |  | 1051 | push @t, $time_portion; | 
| 154 |  |  |  |  |  |  | } | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | # dirty hack to deal with float imprecisions : if 999 millisecs, round to the next second | 
| 157 | 156 |  |  |  |  | 325 | my ($h, $m, $s, $ms) = @t; | 
| 158 | 156 | 100 |  |  |  | 319 | if ($ms == 999) { | 
| 159 | 2 |  |  |  |  | 17 | $s += 1, $ms = 0; | 
| 160 | 2 | 50 |  |  |  | 67 | if ($s == 60) { | 
| 161 | 0 |  |  |  |  | 0 | $m += 1, $s = 0; | 
| 162 | 0 | 0 |  |  |  | 0 | if ($m == 60) { | 
| 163 | 0 |  |  |  |  | 0 | $h += 1, $m = 0; | 
| 164 |  |  |  |  |  |  | } | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  | # NOTE : because of this hack, theoretically we could end up with a value | 
| 168 |  |  |  |  |  |  | # like 01.01.2000 24:00:00, semantically equal to 02.01.2000 00:00:00 but different | 
| 169 |  |  |  |  |  |  | # in its rendering. | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | # call the date_formatter subroutine | 
| 172 | 156 | 50 | 33 |  |  | 4891 | $date_formatter //= $self->date_formatter | 
| 173 |  |  |  |  |  |  | or die ref($self) . " has no date_formatter subroutine"; | 
| 174 | 156 |  |  |  |  | 366 | my $formatted_date = $date_formatter->($date_format, @d, $h, $m, $s, $ms); | 
| 175 |  |  |  |  |  |  |  | 
| 176 | 156 |  |  |  |  | 648 | return $formatted_date; | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | #====================================================================== | 
| 180 |  |  |  |  |  |  | # METHODS FOR PARSING EXCEL TABLES | 
| 181 |  |  |  |  |  |  | #====================================================================== | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | sub table_names { | 
| 185 | 2 |  |  | 2 | 1 | 5625 | my ($self) = @_; | 
| 186 |  |  |  |  |  |  |  | 
| 187 | 2 |  |  |  |  | 82 | my $table_info = $self->backend->table_info; | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | # sort on table id (field [1] in table_info arrayrefs) | 
| 190 | 2 |  |  |  |  | 19 | my @table_names = sort {$table_info->{$a}[1] <=> $table_info->{$b}->[1]} keys %$table_info; | 
|  | 13 |  |  |  |  | 35 |  | 
| 191 |  |  |  |  |  |  |  | 
| 192 | 2 |  |  |  |  | 19 | return @table_names; | 
| 193 |  |  |  |  |  |  | } | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | # info fields returned from the backend parsing methods | 
| 197 |  |  |  |  |  |  | my @table_info_fields = qw/sheet table_id ref columns no_headers/; | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  |  | 
| 200 |  |  |  |  |  |  | # the same fields are also the valid args for the method call | 
| 201 |  |  |  |  |  |  | my $is_valid_arg      = "^(" . join("|", @table_info_fields) . ")\$"; | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | sub table { | 
| 204 | 10 |  |  | 10 | 1 | 7119 | my $self = shift; | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | # syntactic sugar : ->table('foo') is treated as ->table(name => 'foo') | 
| 207 | 10 | 50 |  |  |  | 76 | my %args = @_ == 1 ? (name => $_[0]) : @_; | 
| 208 |  |  |  |  |  |  |  | 
| 209 |  |  |  |  |  |  | # if called with a table name, derive all other args from internal workbook info | 
| 210 | 10 | 50 |  |  |  | 38 | if (my $table_name = delete $args{name}) { | 
| 211 | 10 |  | 50 |  |  | 72 | !$args{$_} or croak "table() : arg '$_' is incompatible with 'name'"  for @table_info_fields; | 
| 212 | 10 | 50 |  |  |  | 18 | @args{@table_info_fields} = @{$self->backend->table_info->{$table_name}} | 
|  | 10 |  |  |  |  | 387 |  | 
| 213 |  |  |  |  |  |  | or croak "no table info for table: $table_name"; | 
| 214 |  |  |  |  |  |  | } | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | # check args | 
| 217 | 10 |  |  |  |  | 38 | my @invalid_args = grep {!/$is_valid_arg/} keys %args; | 
|  | 50 |  |  |  |  | 263 |  | 
| 218 | 10 | 50 |  |  |  | 32 | croak "invalid args to table(): " . join ", ", @invalid_args if @invalid_args; | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | # get raw values from the sheet | 
| 221 | 10 |  |  |  |  | 46 | my $values = $self->values($args{sheet}); | 
| 222 |  |  |  |  |  |  |  | 
| 223 |  |  |  |  |  |  | # restrict values to the table subrange (if applicable) | 
| 224 | 10 | 50 |  |  |  | 194 | $values = $self->_subrange($values, $args{ref}) if $args{ref}; | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | # take headers from first row if not already given in $args{columns} | 
| 227 | 10 |  | 33 |  |  | 26 | $args{columns} //= $values->[0]; | 
| 228 |  |  |  |  |  |  |  | 
| 229 |  |  |  |  |  |  | # if this table has headers (which is almost always the case), drop the header row | 
| 230 | 10 | 100 |  |  |  | 25 | shift @$values unless $args{no_headers}; | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | # build a table of hashes. This could be done with a simple map(), but using a loop | 
| 233 |  |  |  |  |  |  | # avoids to store 2 copies of cell values in memory : @$values is shifted when @table is pushed. | 
| 234 | 10 |  |  |  |  | 19 | my @cols = @{$args{columns}}; | 
|  | 10 |  |  |  |  | 32 |  | 
| 235 | 10 | 50 |  |  |  | 20 | croak "table contains undefined columns" if grep {!defined $_} @cols; | 
|  | 32 |  |  |  |  | 70 |  | 
| 236 | 10 |  |  |  |  | 21 | my @rows; | 
| 237 | 10 |  |  |  |  | 29 | while (my $vals = shift @$values) { | 
| 238 | 150 |  |  |  |  | 211 | my %row; | 
| 239 | 150 |  |  |  |  | 517 | @row{@cols} = @$vals; | 
| 240 | 150 |  |  |  |  | 429 | push @rows, \%row; | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  | # in scalar context, just return the rows. In list context, also return the column names | 
| 244 | 10 | 100 |  |  |  | 83 | return wantarray ? (\@cols, \@rows) : \@rows; | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  |  | 
| 249 |  |  |  |  |  |  | sub _subrange { | 
| 250 | 10 |  |  | 10 |  | 26 | my ($self, $values, $ref) = @_; | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | # parse rows and columns from the $ref string (of shape like for example "A1:D34") | 
| 253 | 10 | 50 |  |  |  | 91 | my ($col1, $row1, $col2, $row2) = $ref =~ /^([A-Z]+)(\d+):([A-Z]+)(\d+)$/ | 
| 254 |  |  |  |  |  |  | or croak "_subrange : invalid ref: $ref"; | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  | # restrict to the row range | 
| 257 | 10 | 100 | 100 |  |  | 46 | if ($row1 > 1 || $row2 < @$values){ | 
| 258 | 8 |  |  |  |  | 31 | $values = [ @$values[$row1-1 .. $row2-1] ]; | 
| 259 |  |  |  |  |  |  | } | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | # restrict to the column range | 
| 262 | 10 |  |  |  |  | 33 | my @col_nums = map {$self->A1_to_num($_) - 1} ($col1, $col2); | 
|  | 20 |  |  |  |  | 41 |  | 
| 263 | 10 | 100 |  |  |  | 36 | if ($col_nums[0] > 1){ # THINK : should check if $colnum2 is smaller that the max row size ?? | 
| 264 | 6 |  |  |  |  | 17 | my @col_range = ($col_nums[0] .. $col_nums[1]); | 
| 265 | 6 |  |  |  |  | 11 | $values = [map { [ @$_[@col_range] ]} @$values]; | 
|  | 16 |  |  |  |  | 49 |  | 
| 266 |  |  |  |  |  |  | } | 
| 267 |  |  |  |  |  |  |  | 
| 268 | 10 |  |  |  |  | 56 | return $values; | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | 1; | 
| 273 |  |  |  |  |  |  |  | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | __END__ | 
| 276 |  |  |  |  |  |  |  | 
| 277 |  |  |  |  |  |  | =head1 NAME | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | Excel::ValueReader::XLSX - extracting values from Excel workbooks in XLSX format, fast | 
| 281 |  |  |  |  |  |  |  | 
| 282 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | my $reader = Excel::ValueReader::XLSX->new(xlsx => $filename); | 
| 285 |  |  |  |  |  |  | # .. or with syntactic sugar : | 
| 286 |  |  |  |  |  |  | my $reader = Excel::ValueReader::XLSX->new($filename); | 
| 287 |  |  |  |  |  |  | # .. or with LibXML backend : | 
| 288 |  |  |  |  |  |  | my $reader = Excel::ValueReader::XLSX->new(xlsx => $filename, | 
| 289 |  |  |  |  |  |  | using => 'LibXML'); | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | foreach my $sheet_name ($reader->sheet_names) { | 
| 292 |  |  |  |  |  |  | my $grid = $reader->values($sheet_name); | 
| 293 |  |  |  |  |  |  | my $n_rows = @$grid; | 
| 294 |  |  |  |  |  |  | print "sheet $sheet_name has $n_rows rows; ", | 
| 295 |  |  |  |  |  |  | "first cell contains : ", $grid->[0][0]; | 
| 296 |  |  |  |  |  |  | } | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | foreach my $table_name ($reader->table_names) { | 
| 299 |  |  |  |  |  |  | my ($columns, $rows) = $reader->table($table_name); | 
| 300 |  |  |  |  |  |  | my $n_data_rows      = @$rows; | 
| 301 |  |  |  |  |  |  | my $n_columns        = @$columns; | 
| 302 |  |  |  |  |  |  | print "table $table_name has $n_data_rows rows and $n_columns columns; ", | 
| 303 |  |  |  |  |  |  | "column 'foo' in first row contains : ", $rows->[0]{foo}; | 
| 304 |  |  |  |  |  |  | } | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | This module reads the contents of an Excel file in XLSX format. | 
| 309 |  |  |  |  |  |  | Unlike other modules like L<Spreadsheet::ParseXLSX> or L<Spreadsheet::XLSX>, | 
| 310 |  |  |  |  |  |  | there is no support for reading formulas, formats or other Excel internal | 
| 311 |  |  |  |  |  |  | information; all you get are plain values -- but you get them much | 
| 312 |  |  |  |  |  |  | faster ! Besides, this module also has support for parsing Excel tables. | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | This front module has two different backends for extracting values : | 
| 315 |  |  |  |  |  |  |  | 
| 316 |  |  |  |  |  |  | =over | 
| 317 |  |  |  |  |  |  |  | 
| 318 |  |  |  |  |  |  | =item Regex (default) | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  | this backend uses regular expressions to parse the XML content. | 
| 321 |  |  |  |  |  |  |  | 
| 322 |  |  |  |  |  |  | =item LibXML | 
| 323 |  |  |  |  |  |  |  | 
| 324 |  |  |  |  |  |  | this backend uses L<XML::LibXML::Reader> to parse the XML content. | 
| 325 |  |  |  |  |  |  | It is probably safer but about three times slower than the Regex backend | 
| 326 |  |  |  |  |  |  | (but still much faster than L<Spreadsheet::ParseXLSX>). | 
| 327 |  |  |  |  |  |  |  | 
| 328 |  |  |  |  |  |  | =back | 
| 329 |  |  |  |  |  |  |  | 
| 330 |  |  |  |  |  |  |  | 
| 331 |  |  |  |  |  |  | =head1 METHODS | 
| 332 |  |  |  |  |  |  |  | 
| 333 |  |  |  |  |  |  | =head2 new | 
| 334 |  |  |  |  |  |  |  | 
| 335 |  |  |  |  |  |  | my $reader = Excel::ValueReader::XLSX->new(xlsx  => $filename, | 
| 336 |  |  |  |  |  |  | using => $backend, | 
| 337 |  |  |  |  |  |  | %date_formatting_options); | 
| 338 |  |  |  |  |  |  |  | 
| 339 |  |  |  |  |  |  | The C<xlsx> argument is mandatory and points to the C<.xlsx> file to be parsed. | 
| 340 |  |  |  |  |  |  | The C<using> argument is optional; it specifies the backend to be used for parsing; | 
| 341 |  |  |  |  |  |  | default is 'Regex'. | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | As syntactic sugar, a shorter form is admitted : | 
| 344 |  |  |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | my $reader = Excel::ValueReader::XLSX->new($filename); | 
| 346 |  |  |  |  |  |  |  | 
| 347 |  |  |  |  |  |  | Optional parameters for formatting date and time values | 
| 348 |  |  |  |  |  |  | are described in the L</DATE AND TIME FORMATS> section below. | 
| 349 |  |  |  |  |  |  |  | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | =head2 sheet_names | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | my @sheets = $reader->sheet_names; | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | Returns the list of worksheet names, in the same order as in the Excel file. | 
| 356 |  |  |  |  |  |  |  | 
| 357 |  |  |  |  |  |  | =head2 values | 
| 358 |  |  |  |  |  |  |  | 
| 359 |  |  |  |  |  |  | my $grid = $reader->values($sheet); | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | Returns a bidimensional array of scalars, corresponding to cell | 
| 362 |  |  |  |  |  |  | values in the specified worksheet. The C<$sheet> argument can be either | 
| 363 |  |  |  |  |  |  | a sheet name or a sheet position (starting at 1). | 
| 364 |  |  |  |  |  |  |  | 
| 365 |  |  |  |  |  |  | Unlike the original Excel cells, positions in the grid are zero-based, | 
| 366 |  |  |  |  |  |  | so for example the content of cell B3 is in C<< $grid->[1][2] >>. | 
| 367 |  |  |  |  |  |  | The grid is sparse : the size of each row depends on the | 
| 368 |  |  |  |  |  |  | position of the last non-empty cell in that row. | 
| 369 |  |  |  |  |  |  | Thanks to Perl's auto-vivification mechanism, any attempt to access | 
| 370 |  |  |  |  |  |  | a non-existent cell will automatically create the corresponding cell | 
| 371 |  |  |  |  |  |  | within the grid. The number of rows and columns in the grid can be computed | 
| 372 |  |  |  |  |  |  | like this : | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | my $nb_rows = @$grid; | 
| 375 |  |  |  |  |  |  | my $nb_cols = max map {scalar @$_} @$grid; # must import List::Util::max | 
| 376 |  |  |  |  |  |  |  | 
| 377 |  |  |  |  |  |  | =head2 table_names | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | my @table_names = $reader->table_names; | 
| 380 |  |  |  |  |  |  |  | 
| 381 |  |  |  |  |  |  | Returns the list of names of tables registered in this workbook. | 
| 382 |  |  |  |  |  |  |  | 
| 383 |  |  |  |  |  |  |  | 
| 384 |  |  |  |  |  |  | =head2 table | 
| 385 |  |  |  |  |  |  |  | 
| 386 |  |  |  |  |  |  | my $rows             = $reader->table(name => $table_name);  # or just : $reader->table($table_name) | 
| 387 |  |  |  |  |  |  | # or | 
| 388 |  |  |  |  |  |  | my ($columns, $rows) = $reader->table(name => $table_name); | 
| 389 |  |  |  |  |  |  | # or | 
| 390 |  |  |  |  |  |  | my ($columns, $rows) = $reader->table(sheet => $sheet [, ref        => $range] | 
| 391 |  |  |  |  |  |  | [, columns    => \@columns] | 
| 392 |  |  |  |  |  |  | [, no_headers => 1] | 
| 393 |  |  |  |  |  |  | ); | 
| 394 |  |  |  |  |  |  |  | 
| 395 |  |  |  |  |  |  | In its simplest form, this method returns the content of an Excel table referenced by its table name | 
| 396 |  |  |  |  |  |  | (in Excel, the table name appears and can be modified through the ribbon tab | 
| 397 |  |  |  |  |  |  | entry "Table tools / Design"). | 
| 398 |  |  |  |  |  |  | The table name is passed either through the named argument C<name>, or positionally as unique argument | 
| 399 |  |  |  |  |  |  | to the method. | 
| 400 |  |  |  |  |  |  |  | 
| 401 |  |  |  |  |  |  | Rows are returned as hashrefs, where keys of the hashes correspond to column names | 
| 402 |  |  |  |  |  |  | in the table. In scalar context, the method just returns an arrayref to the list of rows. In list | 
| 403 |  |  |  |  |  |  | context, the method returns a pair, where the first element is an arrayref of column names, and the | 
| 404 |  |  |  |  |  |  | second element is an arrayref to the list of rows. | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | Instead of specifying a table name, it is also possible to give a sheet name or sheet number. | 
| 407 |  |  |  |  |  |  | By default, this considers the whole sheet content as a single table, where column names | 
| 408 |  |  |  |  |  |  | are on the first row. However, additional arguments can be supplied to change the default | 
| 409 |  |  |  |  |  |  | behaviour : | 
| 410 |  |  |  |  |  |  |  | 
| 411 |  |  |  |  |  |  | =over | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | =item ref | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | a specific range of cells within the sheet that contain the table rows and columns. | 
| 416 |  |  |  |  |  |  | The range must be expressed using traditional Excel notation, | 
| 417 |  |  |  |  |  |  | like for example C<"C9:E23"> (colums 3 to 5, rows 9 to 23). | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | =item columns | 
| 420 |  |  |  |  |  |  |  | 
| 421 |  |  |  |  |  |  | an arrayref containing the list of column names. | 
| 422 |  |  |  |  |  |  | If absent, column names will be taken from the first row in the table. | 
| 423 |  |  |  |  |  |  |  | 
| 424 |  |  |  |  |  |  | =item no_headers | 
| 425 |  |  |  |  |  |  |  | 
| 426 |  |  |  |  |  |  | if true, the first row in the table will be treated as a regular data row, instead | 
| 427 |  |  |  |  |  |  | of being treated as a list of column names. In that case, since column names cannot | 
| 428 |  |  |  |  |  |  | be inferred from cell values in the first row, the C<columns> argument to the method | 
| 429 |  |  |  |  |  |  | must be present. | 
| 430 |  |  |  |  |  |  |  | 
| 431 |  |  |  |  |  |  | =back | 
| 432 |  |  |  |  |  |  |  | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  | =head1 AUXILIARY METHODS | 
| 435 |  |  |  |  |  |  |  | 
| 436 |  |  |  |  |  |  | =head1 A1_to_num | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | my $col_num = $reader->A1_to_num('A');    #   1 | 
| 439 |  |  |  |  |  |  | $col_num = $reader->A1_to_num('AZ');   #  52 | 
| 440 |  |  |  |  |  |  | $col_num = $reader->A1_to_num('AA');   #  26 | 
| 441 |  |  |  |  |  |  | $col_num = $reader->A1_to_num('ABC');  # 731 | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | Converts a column expressed as a sequence of capital letters (in Excel's "A1" notation) | 
| 444 |  |  |  |  |  |  | into the corresponding numeric value. | 
| 445 |  |  |  |  |  |  |  | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | =head1 formatted_date | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  | my $date = $reader->formatted_date($numeric_date, $excel_date_format); | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | Given a numeric date, this method returns a string date formatted according | 
| 452 |  |  |  |  |  |  | to the I<date formatter> routine explained in the next section. The C<$excel_date_format> | 
| 453 |  |  |  |  |  |  | argument should be the Excel format string for that specific cell; it is used | 
| 454 |  |  |  |  |  |  | only for for deciding if the numeric value should be presented as a date, as a time, | 
| 455 |  |  |  |  |  |  | or both. Optionally, a custom date formatter callback could be passed as third argument. | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  |  | 
| 458 |  |  |  |  |  |  | =head1 DATE AND TIME FORMATS | 
| 459 |  |  |  |  |  |  |  | 
| 460 |  |  |  |  |  |  | =head2 Date and time handling | 
| 461 |  |  |  |  |  |  |  | 
| 462 |  |  |  |  |  |  | In Excel, date and times values are stored as numeric values, where the integer part | 
| 463 |  |  |  |  |  |  | represents the date, and the fractional part represents the time. What distinguishes | 
| 464 |  |  |  |  |  |  | such numbers from ordinary numbers is the I<numeric format> applied to the cells | 
| 465 |  |  |  |  |  |  | where they appear. | 
| 466 |  |  |  |  |  |  |  | 
| 467 |  |  |  |  |  |  | Numeric formats in Excel are complex to reproduce, in particular | 
| 468 |  |  |  |  |  |  | because they are locale-dependent; therefore the present module does not attempt | 
| 469 |  |  |  |  |  |  | to faithfully interpret Excel formats. It just infers from formats which | 
| 470 |  |  |  |  |  |  | cells should be presented as date and/or time values. All such values are then | 
| 471 |  |  |  |  |  |  | presented through the same I<date_formatter> routine. The default formatter | 
| 472 |  |  |  |  |  |  | is based on L<POSIX/strftime>; other behaviours may be specified through the C<date_formatter> | 
| 473 |  |  |  |  |  |  | parameter (explained below). | 
| 474 |  |  |  |  |  |  |  | 
| 475 |  |  |  |  |  |  | =head2 Parameters for the default strftime formatter | 
| 476 |  |  |  |  |  |  |  | 
| 477 |  |  |  |  |  |  | When using the default strftime formatter, the following parameters may be passed | 
| 478 |  |  |  |  |  |  | to the constructor : | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | =over | 
| 481 |  |  |  |  |  |  |  | 
| 482 |  |  |  |  |  |  | =item date_format | 
| 483 |  |  |  |  |  |  |  | 
| 484 |  |  |  |  |  |  | The L<POSIX/strftime> format for representing dates. The default is C<%d.%m.%Y>. | 
| 485 |  |  |  |  |  |  |  | 
| 486 |  |  |  |  |  |  | =item time_format | 
| 487 |  |  |  |  |  |  |  | 
| 488 |  |  |  |  |  |  | The L<POSIX/strftime> format for representing times. The default is C<%H:%M:%S>. | 
| 489 |  |  |  |  |  |  |  | 
| 490 |  |  |  |  |  |  | =item datetime_format | 
| 491 |  |  |  |  |  |  |  | 
| 492 |  |  |  |  |  |  | The L<POSIX/strftime> format for representing date and time together. | 
| 493 |  |  |  |  |  |  | The default is the concatenation of C<date_format> and C<time_format>, with | 
| 494 |  |  |  |  |  |  | a space inbetween. | 
| 495 |  |  |  |  |  |  |  | 
| 496 |  |  |  |  |  |  | =back | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  |  | 
| 499 |  |  |  |  |  |  | =head2 Writing a custom formatter | 
| 500 |  |  |  |  |  |  |  | 
| 501 |  |  |  |  |  |  | A custom algorithm for date formatting can be specified as a parameter to the constructor | 
| 502 |  |  |  |  |  |  |  | 
| 503 |  |  |  |  |  |  | my $reader = Excel::ValueReader::XLSX->new(xlsx           => $filename, | 
| 504 |  |  |  |  |  |  | date_formatter => sub {...}); | 
| 505 |  |  |  |  |  |  |  | 
| 506 |  |  |  |  |  |  | If this parameter is C<undef>, date formatting is canceled and therefore date and | 
| 507 |  |  |  |  |  |  | time values will be presented as plain numbers. | 
| 508 |  |  |  |  |  |  |  | 
| 509 |  |  |  |  |  |  | If not C<undef>, the date formatting routine will we called as : | 
| 510 |  |  |  |  |  |  |  | 
| 511 |  |  |  |  |  |  | $date_formater->($excel_date_format, $year, $month, $day, $hour, $minute, $second, $millisecond); | 
| 512 |  |  |  |  |  |  |  | 
| 513 |  |  |  |  |  |  | where | 
| 514 |  |  |  |  |  |  |  | 
| 515 |  |  |  |  |  |  | =over | 
| 516 |  |  |  |  |  |  |  | 
| 517 |  |  |  |  |  |  | =item * | 
| 518 |  |  |  |  |  |  |  | 
| 519 |  |  |  |  |  |  | C<$excel_date_format> is the Excel numbering format associated to that cell, like for example | 
| 520 |  |  |  |  |  |  | C<mm-dd-yy> or C<h:mm:ss AM/PM>. See the Excel documentation for the syntax description. | 
| 521 |  |  |  |  |  |  | This is useful to decide if the value should be presented as a date, a time, or both. | 
| 522 |  |  |  |  |  |  | The present module uses a simple heuristic : if the format contains C<d> or C<y>, it should | 
| 523 |  |  |  |  |  |  | be presented as a date; if the format contains C<h> or C<s>, it should be presented | 
| 524 |  |  |  |  |  |  | as a time. The letter C<m> is not taken into consideration because it is ambiguous : | 
| 525 |  |  |  |  |  |  | depending on the position in the format string, it may represent either a "month" or a "minute". | 
| 526 |  |  |  |  |  |  |  | 
| 527 |  |  |  |  |  |  | =item * | 
| 528 |  |  |  |  |  |  |  | 
| 529 |  |  |  |  |  |  | C<year> is the full year, such as 1993 or 2021. The date system of the Excel file (either 1900 or 1904, | 
| 530 |  |  |  |  |  |  | see L<https://support.microsoft.com/en-us/office/date-systems-in-excel-e7fe7167-48a9-4b96-bb53-5612a800b487>) is properly taken into account. Excel has no support for dates prior to 1900 or 1904, so the | 
| 531 |  |  |  |  |  |  | C<year> component wil always be above this value. | 
| 532 |  |  |  |  |  |  |  | 
| 533 |  |  |  |  |  |  | =item * | 
| 534 |  |  |  |  |  |  |  | 
| 535 |  |  |  |  |  |  | C<month> is the numeric value of the month, starting at 1 | 
| 536 |  |  |  |  |  |  |  | 
| 537 |  |  |  |  |  |  | =item * | 
| 538 |  |  |  |  |  |  |  | 
| 539 |  |  |  |  |  |  | C<day> is the numeric value of the day in month, starting at 1 | 
| 540 |  |  |  |  |  |  |  | 
| 541 |  |  |  |  |  |  | =item * | 
| 542 |  |  |  |  |  |  |  | 
| 543 |  |  |  |  |  |  | C<$hour>, C<$minute>, C<$second>, C<$millisecond> obviously contain the corresponding | 
| 544 |  |  |  |  |  |  | numeric values. | 
| 545 |  |  |  |  |  |  |  | 
| 546 |  |  |  |  |  |  | =back | 
| 547 |  |  |  |  |  |  |  | 
| 548 |  |  |  |  |  |  |  | 
| 549 |  |  |  |  |  |  | =head1 CAVEATS | 
| 550 |  |  |  |  |  |  |  | 
| 551 |  |  |  |  |  |  | =over | 
| 552 |  |  |  |  |  |  |  | 
| 553 |  |  |  |  |  |  | =item * | 
| 554 |  |  |  |  |  |  |  | 
| 555 |  |  |  |  |  |  | This module was optimized for speed, not for completeness of | 
| 556 |  |  |  |  |  |  | OOXML-SpreadsheetML support; so there may be some edge cases where the | 
| 557 |  |  |  |  |  |  | output is incorrect with respect to the original Excel data. | 
| 558 |  |  |  |  |  |  |  | 
| 559 |  |  |  |  |  |  | =item * | 
| 560 |  |  |  |  |  |  |  | 
| 561 |  |  |  |  |  |  | Embedded newline characters in strings are stored in Excel as C<< \r\n >>, | 
| 562 |  |  |  |  |  |  | following the old Windows convention. When retrieved through the C<Regex> | 
| 563 |  |  |  |  |  |  | backend, the result contains the original C<< \r >> and C<< \n >> characters; | 
| 564 |  |  |  |  |  |  | but when retrieved through the LibXML, C<< \r >> are silently removed by the | 
| 565 |  |  |  |  |  |  | C<XML::LibXML> package. | 
| 566 |  |  |  |  |  |  |  | 
| 567 |  |  |  |  |  |  | =back | 
| 568 |  |  |  |  |  |  |  | 
| 569 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 570 |  |  |  |  |  |  |  | 
| 571 |  |  |  |  |  |  | The official reference for OOXML-SpreadsheetML format is in | 
| 572 |  |  |  |  |  |  | L<https://www.ecma-international.org/publications/standards/Ecma-376.htm>. | 
| 573 |  |  |  |  |  |  |  | 
| 574 |  |  |  |  |  |  | Introductory material on XLSX file structure can be found at | 
| 575 |  |  |  |  |  |  | L<http://officeopenxml.com/anatomyofOOXML-xlsx.php>. | 
| 576 |  |  |  |  |  |  |  | 
| 577 |  |  |  |  |  |  | The CPAN module L<Data::XLSX::Parser> is claimed to be in alpha stage; | 
| 578 |  |  |  |  |  |  | it seems to be working but the documentation is insufficient -- I had | 
| 579 |  |  |  |  |  |  | to inspect the test suite to understand how to use it. | 
| 580 |  |  |  |  |  |  |  | 
| 581 |  |  |  |  |  |  | Another unpublished but working module for parsing Excel files in Perl | 
| 582 |  |  |  |  |  |  | can be found at L<https://github.com/jmcnamara/excel-reader-xlsx>. | 
| 583 |  |  |  |  |  |  | Some test cases were borrowed from that distribution. | 
| 584 |  |  |  |  |  |  |  | 
| 585 |  |  |  |  |  |  | Conversions from and to Excel internal date format can also be performed | 
| 586 |  |  |  |  |  |  | through the L<DateTime::Format::Excel> module. | 
| 587 |  |  |  |  |  |  |  | 
| 588 |  |  |  |  |  |  | =head1 BENCHMARKS | 
| 589 |  |  |  |  |  |  |  | 
| 590 |  |  |  |  |  |  | Below are some benchmarks computed with the program C<benchmark.pl> in | 
| 591 |  |  |  |  |  |  | this distribution. The task was to parse an Excel file of five worksheets | 
| 592 |  |  |  |  |  |  | with about 62600 rows in total, and report the number of rows per sheet. | 
| 593 |  |  |  |  |  |  | Reported figures are in seconds. | 
| 594 |  |  |  |  |  |  |  | 
| 595 |  |  |  |  |  |  | Excel::ValueReader::XLSX::Regex    11 elapsed,  10 cpu, 0 system | 
| 596 |  |  |  |  |  |  | Excel::ValueReader::XLSX::LibXML   35 elapsed,  34 cpu, 0 system | 
| 597 |  |  |  |  |  |  | [unpublished] Excel::Reader::XLSX  39 elapsed,  37 cpu, 0 system | 
| 598 |  |  |  |  |  |  | Spreadsheet::ParseXLSX            244 elapsed, 240 cpu, 1 system | 
| 599 |  |  |  |  |  |  | Data::XLSX::Parser                 37 elapsed,  35 cpu, 0 system | 
| 600 |  |  |  |  |  |  |  | 
| 601 |  |  |  |  |  |  | These figures show that the regex version is about 3 times faster | 
| 602 |  |  |  |  |  |  | than the LibXML version, and about 22 times faster than | 
| 603 |  |  |  |  |  |  | L<Spreadsheet::ParseXLSX>. Tests with a bigger file of about 90000 rows | 
| 604 |  |  |  |  |  |  | showed similar ratios. | 
| 605 |  |  |  |  |  |  |  | 
| 606 |  |  |  |  |  |  | Modules | 
| 607 |  |  |  |  |  |  | C<Excel::Reader::XLSX> (unpublished) and L<Data::XLSX::Parser> | 
| 608 |  |  |  |  |  |  | are based on L<XML::LibXML> like L<Excel::ValueReader::XLSX::Backend::LibXML>; | 
| 609 |  |  |  |  |  |  | execution times for those three modules are very close. | 
| 610 |  |  |  |  |  |  |  | 
| 611 |  |  |  |  |  |  | =head1 ACKNOWLEDGEMENTS | 
| 612 |  |  |  |  |  |  |  | 
| 613 |  |  |  |  |  |  | =over | 
| 614 |  |  |  |  |  |  |  | 
| 615 |  |  |  |  |  |  | =item * | 
| 616 |  |  |  |  |  |  |  | 
| 617 |  |  |  |  |  |  | David Flink signaled (and fixed) a bug about strings with embedded newline characters, and | 
| 618 |  |  |  |  |  |  | signaled that the 'r' attribute in cells is optional. | 
| 619 |  |  |  |  |  |  |  | 
| 620 |  |  |  |  |  |  | =item * | 
| 621 |  |  |  |  |  |  |  | 
| 622 |  |  |  |  |  |  | Ulibuck signaled bugs several minor bugs on the LibXML backend | 
| 623 |  |  |  |  |  |  |  | 
| 624 |  |  |  |  |  |  | =back | 
| 625 |  |  |  |  |  |  |  | 
| 626 |  |  |  |  |  |  |  | 
| 627 |  |  |  |  |  |  | =head1 AUTHOR | 
| 628 |  |  |  |  |  |  |  | 
| 629 |  |  |  |  |  |  | Laurent Dami, E<lt>dami at cpan.orgE<gt> | 
| 630 |  |  |  |  |  |  |  | 
| 631 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 632 |  |  |  |  |  |  |  | 
| 633 |  |  |  |  |  |  | Copyright 2020-2023 by Laurent Dami. | 
| 634 |  |  |  |  |  |  |  | 
| 635 |  |  |  |  |  |  | This library is free software; you can redistribute it and/or modify | 
| 636 |  |  |  |  |  |  | it under the same terms as Perl itself. |