line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Spreadsheet::Reader::ExcelXML; |
2
|
|
|
|
|
|
|
our $AUTHORITY = 'cpan:JANDREW'; |
3
|
15
|
|
|
15
|
|
4022159
|
use version 0.77; our $VERSION = version->declare('v0.16.8'); |
|
15
|
|
|
|
|
22357
|
|
|
15
|
|
|
|
|
99
|
|
4
|
|
|
|
|
|
|
###LogSD warn "You uncovered internal logging statements for Spreadsheet::Reader::ExcelXML-$VERSION"; |
5
|
|
|
|
|
|
|
|
6
|
15
|
|
|
15
|
|
1580
|
use 5.010; |
|
15
|
|
|
|
|
68
|
|
7
|
15
|
|
|
15
|
|
6722
|
use Moose; |
|
15
|
|
|
|
|
3009318
|
|
|
15
|
|
|
|
|
89
|
|
8
|
15
|
|
|
15
|
|
80359
|
use MooseX::StrictConstructor; |
|
15
|
|
|
|
|
301509
|
|
|
15
|
|
|
|
|
58
|
|
9
|
15
|
|
|
15
|
|
96343
|
use MooseX::HasDefaults::RO; |
|
15
|
|
|
|
|
72620
|
|
|
15
|
|
|
|
|
47
|
|
10
|
15
|
|
|
15
|
|
98488
|
use Types::Standard qw( is_HashRef is_Object ); |
|
15
|
|
|
|
|
682985
|
|
|
15
|
|
|
|
|
142
|
|
11
|
15
|
|
|
15
|
|
13774
|
use Clone 'clone'; |
|
15
|
|
|
|
|
24485
|
|
|
15
|
|
|
|
|
958
|
|
12
|
|
|
|
|
|
|
###LogSD use B::Deparse; |
13
|
|
|
|
|
|
|
###LogSD my $deparser = B::Deparse->new; |
14
|
|
|
|
|
|
|
|
15
|
15
|
|
|
|
|
90
|
use MooseX::ShortCut::BuildInstance 1.040 qw( |
16
|
|
|
|
|
|
|
build_instance should_re_use_classes set_args_cloning |
17
|
15
|
|
|
15
|
|
5734
|
); |
|
15
|
|
|
|
|
5481521
|
|
18
|
|
|
|
|
|
|
should_re_use_classes( 1 ); |
19
|
|
|
|
|
|
|
set_args_cloning ( 0 ); |
20
|
|
|
|
|
|
|
###LogSD use Log::Shiras::Telephone v0.48.0; |
21
|
15
|
|
|
15
|
|
7434
|
use lib '../../../../lib',; |
|
15
|
|
|
|
|
25
|
|
|
15
|
|
|
|
|
108
|
|
22
|
15
|
|
|
15
|
|
8683
|
use Spreadsheet::Reader::ExcelXML::Error; |
|
15
|
|
|
|
|
68
|
|
|
15
|
|
|
|
|
812
|
|
23
|
15
|
|
|
15
|
|
8952
|
use Spreadsheet::Reader::ExcelXML::Workbook; |
|
15
|
|
|
|
|
55
|
|
|
15
|
|
|
|
|
2551
|
|
24
|
15
|
|
|
15
|
|
10450
|
use Spreadsheet::Reader::Format v0.6.4; |
|
15
|
|
|
|
|
90440
|
|
|
15
|
|
|
|
|
483
|
|
25
|
15
|
|
|
15
|
|
7278
|
use Spreadsheet::Reader::Format::FmtDefault; |
|
15
|
|
|
|
|
780981
|
|
|
15
|
|
|
|
|
566
|
|
26
|
15
|
|
|
15
|
|
12108
|
use Spreadsheet::Reader::Format::ParseExcelFormatStrings; |
|
15
|
|
|
|
|
6605652
|
|
|
15
|
|
|
|
|
707
|
|
27
|
15
|
|
|
15
|
|
117
|
use Spreadsheet::Reader::ExcelXML::Types qw( XLSXFile IOFileType ); |
|
15
|
|
|
|
|
24
|
|
|
15
|
|
|
|
|
169
|
|
28
|
|
|
|
|
|
|
###LogSD with 'Log::Shiras::LogSpace'; |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
#########1 Dispatch Tables and data 4#########5#########6#########7#########8#########9 |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
my $attribute_defaults ={ |
33
|
|
|
|
|
|
|
error_inst =>{ |
34
|
|
|
|
|
|
|
package => 'ErrorInstance', |
35
|
|
|
|
|
|
|
superclasses => ['Spreadsheet::Reader::ExcelXML::Error'], |
36
|
|
|
|
|
|
|
should_warn => 0, |
37
|
|
|
|
|
|
|
}, |
38
|
|
|
|
|
|
|
formatter_inst =>{ |
39
|
|
|
|
|
|
|
package => 'FormatInstance', |
40
|
|
|
|
|
|
|
superclasses => [ 'Spreadsheet::Reader::Format::FmtDefault' ], |
41
|
|
|
|
|
|
|
add_roles_in_sequence =>[qw( |
42
|
|
|
|
|
|
|
Spreadsheet::Reader::Format::ParseExcelFormatStrings |
43
|
|
|
|
|
|
|
Spreadsheet::Reader::Format |
44
|
|
|
|
|
|
|
)], |
45
|
|
|
|
|
|
|
}, |
46
|
|
|
|
|
|
|
count_from_zero => 1, |
47
|
|
|
|
|
|
|
file_boundary_flags => 1, |
48
|
|
|
|
|
|
|
empty_is_end => 0, |
49
|
|
|
|
|
|
|
values_only => 0, |
50
|
|
|
|
|
|
|
from_the_edge => 1, |
51
|
|
|
|
|
|
|
group_return_type => 'instance', |
52
|
|
|
|
|
|
|
empty_return_type => 'empty_string', |
53
|
|
|
|
|
|
|
cache_positions =>{# Test this !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
54
|
|
|
|
|
|
|
shared_strings_interface => 5242880,# 5 MB |
55
|
|
|
|
|
|
|
styles_interface => 5242880,# 5 MB |
56
|
|
|
|
|
|
|
worksheet_interface => 2097152,# 2 MB |
57
|
|
|
|
|
|
|
}, |
58
|
|
|
|
|
|
|
show_sub_file_size => 0, |
59
|
|
|
|
|
|
|
spread_merged_values => 0, |
60
|
|
|
|
|
|
|
skip_hidden => 0, |
61
|
|
|
|
|
|
|
spaces_are_empty => 0, |
62
|
|
|
|
|
|
|
merge_data => 1, |
63
|
|
|
|
|
|
|
column_formats => 1, |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
}; |
66
|
|
|
|
|
|
|
my $flag_settings ={ |
67
|
|
|
|
|
|
|
alt_default =>{ |
68
|
|
|
|
|
|
|
values_only => 1, |
69
|
|
|
|
|
|
|
count_from_zero => 0, |
70
|
|
|
|
|
|
|
empty_is_end => 1, |
71
|
|
|
|
|
|
|
}, |
72
|
|
|
|
|
|
|
just_the_data =>{ |
73
|
|
|
|
|
|
|
count_from_zero => 0, |
74
|
|
|
|
|
|
|
values_only => 1, |
75
|
|
|
|
|
|
|
empty_is_end => 1, |
76
|
|
|
|
|
|
|
group_return_type => 'value', |
77
|
|
|
|
|
|
|
from_the_edge => 0, |
78
|
|
|
|
|
|
|
empty_return_type => 'undef_string', |
79
|
|
|
|
|
|
|
spaces_are_empty => 1, |
80
|
|
|
|
|
|
|
merge_data => 0, |
81
|
|
|
|
|
|
|
column_formats => 0, |
82
|
|
|
|
|
|
|
}, |
83
|
|
|
|
|
|
|
just_raw_data =>{ |
84
|
|
|
|
|
|
|
count_from_zero => 0, |
85
|
|
|
|
|
|
|
values_only => 1, |
86
|
|
|
|
|
|
|
empty_is_end => 1, |
87
|
|
|
|
|
|
|
group_return_type => 'unformatted', |
88
|
|
|
|
|
|
|
from_the_edge => 0, |
89
|
|
|
|
|
|
|
empty_return_type => 'undef_string', |
90
|
|
|
|
|
|
|
spaces_are_empty => 1, |
91
|
|
|
|
|
|
|
merge_data => 0, |
92
|
|
|
|
|
|
|
column_formats => 0, |
93
|
|
|
|
|
|
|
}, |
94
|
|
|
|
|
|
|
like_ParseExcel =>{ |
95
|
|
|
|
|
|
|
count_from_zero => 1, |
96
|
|
|
|
|
|
|
group_return_type => 'instance', |
97
|
|
|
|
|
|
|
}, |
98
|
|
|
|
|
|
|
debug =>{ |
99
|
|
|
|
|
|
|
error_inst =>{ |
100
|
|
|
|
|
|
|
superclasses => ['Spreadsheet::Reader::ExcelXML::Error'], |
101
|
|
|
|
|
|
|
package => 'ErrorInstance', |
102
|
|
|
|
|
|
|
should_warn => 1, |
103
|
|
|
|
|
|
|
}, |
104
|
|
|
|
|
|
|
show_sub_file_size => 1, |
105
|
|
|
|
|
|
|
}, |
106
|
|
|
|
|
|
|
lots_of_ram =>{ |
107
|
|
|
|
|
|
|
cache_positions =>{ |
108
|
|
|
|
|
|
|
shared_strings_interface => 209715200,# 200 MB |
109
|
|
|
|
|
|
|
styles_interface => 209715200,# 200 MB |
110
|
|
|
|
|
|
|
worksheet_interface => 209715200,# 200 MB |
111
|
|
|
|
|
|
|
}, |
112
|
|
|
|
|
|
|
}, |
113
|
|
|
|
|
|
|
less_ram =>{ |
114
|
|
|
|
|
|
|
cache_positions =>{ |
115
|
|
|
|
|
|
|
shared_strings_interface => 10240,# 10 KB |
116
|
|
|
|
|
|
|
styles_interface => 10240,# 10 KB |
117
|
|
|
|
|
|
|
worksheet_interface => 1024,# 1 KB |
118
|
|
|
|
|
|
|
}, |
119
|
|
|
|
|
|
|
}, |
120
|
|
|
|
|
|
|
}; |
121
|
|
|
|
|
|
|
my $delay_till_build = [qw( formatter_inst )]; |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
#########1 Public Methods 3#########4#########5#########6#########7#########8#########9 |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
###LogSD sub get_class_space{ 'Top' } |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
sub import{# Flags handled here! |
128
|
20
|
|
|
20
|
|
2274
|
my ( $self, @flag_list ) = @_; |
129
|
|
|
|
|
|
|
#~ print "Made it to import\n"; |
130
|
|
|
|
|
|
|
#~ if( scalar( @flag_list ) ){ |
131
|
20
|
|
|
|
|
51680
|
for my $flag ( @flag_list ){ |
132
|
|
|
|
|
|
|
#~ print "Arrived at import with flag: $flag\n"; |
133
|
9
|
100
|
|
|
|
40
|
if( $flag =~ /^:(\w*)$/ ){# Handle text based flags |
|
|
50
|
|
|
|
|
|
134
|
8
|
|
|
|
|
15
|
my $default_choice = $1; |
135
|
|
|
|
|
|
|
#~ print "Attempting to change the default group type to: $default_choice\n"; |
136
|
8
|
50
|
|
|
|
31
|
if( exists $flag_settings->{$default_choice} ){ |
137
|
8
|
|
|
|
|
11
|
for my $attribute ( keys %{$flag_settings->{$default_choice}} ){ |
|
8
|
|
|
|
|
31
|
|
138
|
|
|
|
|
|
|
#~ print "Changing flag -$attribute- to:" . Dumper( $flag_settings->{$default_choice}->{$attribute} ); |
139
|
30
|
|
|
|
|
8884
|
$attribute_defaults->{$attribute} = $flag_settings->{$default_choice}->{$attribute}; |
140
|
|
|
|
|
|
|
} |
141
|
|
|
|
|
|
|
}else{ |
142
|
0
|
|
|
|
|
0
|
confess "No settings available for the flag: $flag"; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
}elsif( $flag =~ /^v?\d+\.?\d*/ ){# Version check may wind up here |
145
|
|
|
|
|
|
|
#~ print "Running version check on version: $flag\n"; |
146
|
0
|
|
|
|
|
0
|
my $result = $VERSION <=> version->parse( $flag ); |
147
|
|
|
|
|
|
|
#~ print "Tested against version -$VERSION- gives result: $result\n"; |
148
|
0
|
0
|
|
|
|
0
|
if( $result < 0 ){ |
149
|
0
|
|
|
|
|
0
|
confess "Version -$flag- required - the installed version is: $VERSION"; |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
}else{ |
152
|
1
|
|
|
|
|
27
|
confess "Passed attribute default flag -$flag- does not comply with the correct format"; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
#~ } |
156
|
|
|
|
|
|
|
#~ print "Finished import\n"; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub parse{ |
160
|
|
|
|
|
|
|
|
161
|
11
|
|
|
11
|
1
|
3392
|
my ( $self, $file, $formatter ) = @_; |
162
|
|
|
|
|
|
|
###LogSD my $phone = Log::Shiras::Telephone->new( name_space => |
163
|
|
|
|
|
|
|
###LogSD $self->get_all_space . '::parse', ); |
164
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'info', message =>[ |
165
|
|
|
|
|
|
|
###LogSD "Arrived at parse for:", $file, |
166
|
|
|
|
|
|
|
###LogSD (($formatter) ? "with formatter: $formatter" : '') ] ); |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# Test the file |
169
|
11
|
100
|
|
|
|
59
|
if( XLSXFile->check( $file ) ){ |
|
|
50
|
|
|
|
|
|
170
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'info', message =>[ "This is an xlsx file: $file" ] ); |
171
|
|
|
|
|
|
|
}elsif( IOFileType->check( $file ) ){ |
172
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'info', message =>[ "This is a file handle:", $file ] ); |
173
|
|
|
|
|
|
|
}else{ |
174
|
2
|
|
|
|
|
50
|
$self->set_error( "Value \"$file\" did not pass type constraint \"IOFileType\"" ); |
175
|
2
|
|
|
|
|
9
|
return undef; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# Load the formatter |
179
|
9
|
50
|
|
|
|
86
|
if( $formatter ){ |
180
|
0
|
|
|
|
|
0
|
$self->set_formatter_inst( $formatter ); |
181
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'info', message =>[ "Formatter added" ] ); |
182
|
|
|
|
|
|
|
} |
183
|
|
|
|
|
|
|
|
184
|
9
|
|
|
|
|
57
|
$self->set_file( $file ); |
185
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'info', message =>[ "Build workbook attempt complete", $self->file_opened ] ); |
186
|
9
|
100
|
|
|
|
67
|
return $self->file_opened ? $self : undef; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
#########1 Private Attributes 3#########4#########5#########6#########7#########8#########9 |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
has _workbook =>( |
192
|
|
|
|
|
|
|
isa => 'Spreadsheet::Reader::ExcelXML::Workbook', |
193
|
|
|
|
|
|
|
predicate => '_has_workbook', |
194
|
|
|
|
|
|
|
writer => '_set_workbook', |
195
|
|
|
|
|
|
|
clearer => '_clear_the_workbook', |
196
|
|
|
|
|
|
|
handles => [qw( |
197
|
|
|
|
|
|
|
error set_error clear_error set_warnings |
198
|
|
|
|
|
|
|
should_spew_longmess spewing_longmess if_warn has_error |
199
|
|
|
|
|
|
|
get_error_inst has_error_inst set_formatter_inst get_excel_region |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
get_formatter_region has_target_encoding get_target_encoding set_workbook_for_formatter |
202
|
|
|
|
|
|
|
set_target_encoding set_european_first set_date_behavior get_defined_conversion |
203
|
|
|
|
|
|
|
set_defined_excel_formats change_output_encoding |
204
|
|
|
|
|
|
|
set_formatter_cache_behavior parse_excel_format_string |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
set_file counting_from_zero boundary_flag_setting spreading_merged_values |
207
|
|
|
|
|
|
|
is_empty_the_end get_values_only starts_at_the_edge get_group_return_type |
208
|
|
|
|
|
|
|
get_empty_return_type cache_positions get_cache_size has_cache_size |
209
|
|
|
|
|
|
|
should_skip_hidden are_spaces_empty |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
worksheet worksheets build_workbook demolish_the_workbook |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
file_name file_opened get_epoch_year has_epoch_year |
214
|
|
|
|
|
|
|
get_sheet_names get_sheet_name sheet_count get_sheet_info |
215
|
|
|
|
|
|
|
get_rel_info get_id_info get_worksheet_names worksheet_name |
216
|
|
|
|
|
|
|
worksheet_count get_chartsheet_names chartsheet_name chartsheet_count |
217
|
|
|
|
|
|
|
creator modified_by date_created date_modified |
218
|
|
|
|
|
|
|
in_the_list start_at_the_beginning |
219
|
|
|
|
|
|
|
)], |
220
|
|
|
|
|
|
|
); |
221
|
|
|
|
|
|
|
#~ get_shared_string start_the_ss_file_over has_shared_strings_interface |
222
|
|
|
|
|
|
|
#~ has_styles_interface get_format |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
#########1 Private Methods 3#########4#########5#########6#########7#########8#########9 |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
around BUILDARGS => sub { |
227
|
|
|
|
|
|
|
my ( $orig, $class, @args ) = @_; |
228
|
|
|
|
|
|
|
my %args = is_HashRef( $args[0] ) ? %{$args[0]} : @args; |
229
|
|
|
|
|
|
|
###LogSD $args{log_space} //= 'ExcelXML'; |
230
|
|
|
|
|
|
|
###LogSD my $class_space = __PACKAGE__->get_class_space; |
231
|
|
|
|
|
|
|
###LogSD my $log_space = $args{log_space} . "::$class_space" . '::_hidden::BUILDARGS'; |
232
|
|
|
|
|
|
|
###LogSD my $phone = Log::Shiras::Telephone->new( name_space => $log_space, ); |
233
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
234
|
|
|
|
|
|
|
###LogSD 'Arrived at BUILDARGS with: ', @args, ] );# caller(3), caller(4), caller(5) |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# Handle depricated cache_positions |
237
|
|
|
|
|
|
|
#~ print longmess( Dumper( %args ) ); |
238
|
|
|
|
|
|
|
if( exists $args{cache_positions} ){ |
239
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
240
|
|
|
|
|
|
|
###LogSD "The user did pass a value to cache_positions as:", $args{cache_positions}] ); |
241
|
|
|
|
|
|
|
if( !is_HashRef( $args{cache_positions} ) ){ |
242
|
|
|
|
|
|
|
warn "Passing a boolean value to the attribute 'cache_positions' is depricated since v0.40.2 - the input will be converted per the documentation"; |
243
|
|
|
|
|
|
|
$args{cache_positions} = !$args{cache_positions} ? |
244
|
|
|
|
|
|
|
$flag_settings->{big_file}->{cache_positions} : |
245
|
|
|
|
|
|
|
$attribute_defaults->{cache_positions}; |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
#scrub cache_positions |
249
|
|
|
|
|
|
|
for my $passed_key ( keys %{$args{cache_positions}} ){ |
250
|
|
|
|
|
|
|
if( !exists $attribute_defaults->{cache_positions}->{$passed_key} ){ |
251
|
|
|
|
|
|
|
warn "Passing a cache position for '$passed_key' but that is not allowed"; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
for my $stored_key ( keys %{$attribute_defaults->{cache_positions}} ){ |
255
|
|
|
|
|
|
|
if( !exists $args{cache_positions}->{$stored_key} ){ |
256
|
|
|
|
|
|
|
warn "Passed cache positions are missing key => values for key: $stored_key"; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# Add any defaults |
262
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
263
|
|
|
|
|
|
|
###LogSD "Processing possible default values", $attribute_defaults ] ); |
264
|
|
|
|
|
|
|
for my $key ( keys %$attribute_defaults ){ |
265
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
266
|
|
|
|
|
|
|
###LogSD "Processing possible default for -$key- with value:", $attribute_defaults->{$key} ] ); |
267
|
|
|
|
|
|
|
if( exists $args{$key} ){ |
268
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
269
|
|
|
|
|
|
|
###LogSD "Found user defined -$key- with value(s): ", $args{$key} ] ); |
270
|
|
|
|
|
|
|
}else{ |
271
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
272
|
|
|
|
|
|
|
###LogSD "Setting default -$key- with value(s): ", $attribute_defaults->{$key} ] ); |
273
|
|
|
|
|
|
|
$args{$key} = clone( $attribute_defaults->{$key} ); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
# Build object instances as needed |
278
|
|
|
|
|
|
|
for my $key ( keys %args ){ |
279
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
280
|
|
|
|
|
|
|
###LogSD "Checking if an instance needs built for key: $key" ] ); |
281
|
|
|
|
|
|
|
if( $key =~ /_inst$/ and !is_Object( $args{$key} ) and is_HashRef( $args{$key} ) ){ |
282
|
|
|
|
|
|
|
# Import log_space as needed |
283
|
|
|
|
|
|
|
###LogSD if( exists $args{log_space} and $args{log_space} ){ |
284
|
|
|
|
|
|
|
###LogSD $args{$key}->{log_space} = $args{log_space}; |
285
|
|
|
|
|
|
|
###LogSD } |
286
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
287
|
|
|
|
|
|
|
###LogSD "Key -$key- requires an instance built from:", $args{$key} ] ); |
288
|
|
|
|
|
|
|
$args{$key} = build_instance( $args{$key} ); |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
293
|
|
|
|
|
|
|
###LogSD "Final BUILDARGS:", %args ] ); |
294
|
|
|
|
|
|
|
my $workbook = Spreadsheet::Reader::ExcelXML::Workbook->new( %args ); |
295
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'trace', message =>[ |
296
|
|
|
|
|
|
|
###LogSD "Assigning the built workbook to the _workbook attribute with: ", $deparser->coderef2text( $orig ), |
297
|
|
|
|
|
|
|
###LogSD "..with workboook: ", $workbook->dump ] ); |
298
|
|
|
|
|
|
|
return $class->$orig( |
299
|
|
|
|
|
|
|
_workbook => $workbook, |
300
|
|
|
|
|
|
|
###LogSD log_space => $args{log_space} |
301
|
|
|
|
|
|
|
); |
302
|
|
|
|
|
|
|
}; |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
sub DEMOLISH{ |
305
|
22
|
|
|
22
|
0
|
661
|
my ( $self ) = @_; |
306
|
|
|
|
|
|
|
###LogSD my $phone = Log::Shiras::Telephone->new( name_space => |
307
|
|
|
|
|
|
|
###LogSD $self->get_all_space . '::_hidden::DEMOLISH', ); |
308
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'debug', message =>[ |
309
|
|
|
|
|
|
|
###LogSD "Forcing Non-recursive garbage collection on recursive stuff" ] ); |
310
|
22
|
100
|
|
|
|
521
|
if( $self->_has_workbook ){ |
311
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'debug', message =>[ |
312
|
|
|
|
|
|
|
###LogSD "Need to demolish the workbook" ] ); |
313
|
21
|
|
|
|
|
119
|
$self->demolish_the_workbook; |
314
|
|
|
|
|
|
|
###LogSD $phone->talk( level => 'debug', message =>[ |
315
|
|
|
|
|
|
|
###LogSD "Clearing the attribute" ] ); |
316
|
21
|
|
|
|
|
1024
|
$self->_clear_the_workbook; |
317
|
|
|
|
|
|
|
} |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
#########1 Phinish 3#########4#########5#########6#########7#########8#########9 |
321
|
|
|
|
|
|
|
|
322
|
15
|
|
|
15
|
|
22337
|
no Moose; |
|
15
|
|
|
|
|
23
|
|
|
15
|
|
|
|
|
98
|
|
323
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
1; |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
#########1 Documentation 3#########4#########5#########6#########7#########8#########9 |
328
|
|
|
|
|
|
|
__END__ |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=head1 NAME |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
Spreadsheet::Reader::ExcelXML - Read xlsx/xlsm/xml extention Excel files |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
=begin html |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
<a href="https://www.perl.org"> |
337
|
|
|
|
|
|
|
<img src="https://img.shields.io/badge/perl-5.10+-brightgreen.svg" alt="perl version"> |
338
|
|
|
|
|
|
|
</a> |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
<a href="https://travis-ci.org/jandrew/p5-spreadsheet-reader-excelxml"> |
341
|
|
|
|
|
|
|
<img alt="Build Status" src="https://travis-ci.org/jandrew/p5-spreadsheet-reader-excelxml.png?branch=master" alt='Travis Build'/> |
342
|
|
|
|
|
|
|
</a> |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
<a href='https://coveralls.io/github/jandrew/p5-spreadsheet-reader-excelxml?branch=master'> |
345
|
|
|
|
|
|
|
<img src='https://coveralls.io/repos/github/jandrew/p5-spreadsheet-reader-excelxml/badge.svg?branch=master' alt='Coverage Status' /> |
346
|
|
|
|
|
|
|
</a> |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
<a href='https://github.com/jandrew/p5-spreadsheet-reader-excelxml'> |
349
|
|
|
|
|
|
|
<img src="https://img.shields.io/github/tag/jandrew/p5-spreadsheet-reader-excelxml.svg?label=github version" alt="github version"/> |
350
|
|
|
|
|
|
|
</a> |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
<a href="https://metacpan.org/pod/Spreadsheet::Reader::ExcelXML"> |
353
|
|
|
|
|
|
|
<img src="https://badge.fury.io/pl/Spreadsheet-Reader-ExcelXML.svg?label=cpan version" alt="CPAN version" height="20"> |
354
|
|
|
|
|
|
|
</a> |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
<a href='http://cpants.cpanauthors.org/dist/Spreadsheet-Reader-ExcelXML'> |
357
|
|
|
|
|
|
|
<img src='http://cpants.cpanauthors.org/dist/Spreadsheet-Reader-ExcelXML.png' alt='kwalitee' height="20"/> |
358
|
|
|
|
|
|
|
</a> |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=end html |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=encoding UTF-8 |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=head1 SYNOPSIS |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
The following uses the 'TestBook.xlsx' file found in the t/test_files/ folder of the package |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
#!/usr/bin/env perl |
369
|
|
|
|
|
|
|
use strict; |
370
|
|
|
|
|
|
|
use warnings; |
371
|
|
|
|
|
|
|
use Spreadsheet::Reader::ExcelXML; |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
my $parser = Spreadsheet::Reader::ExcelXML->new(); |
374
|
|
|
|
|
|
|
my $workbook = $parser->parse( 'TestBook.xlsx' ); |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
if ( !defined $workbook ) { |
377
|
|
|
|
|
|
|
die $parser->error(), "\n"; |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
for my $worksheet ( $workbook->worksheets() ) { |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
my ( $row_min, $row_max ) = $worksheet->row_range(); |
383
|
|
|
|
|
|
|
my ( $col_min, $col_max ) = $worksheet->col_range(); |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
for my $row ( $row_min .. $row_max ) { |
386
|
|
|
|
|
|
|
for my $col ( $col_min .. $col_max ) { |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
my $cell = $worksheet->get_cell( $row, $col ); |
389
|
|
|
|
|
|
|
next unless $cell; |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
print "Row, Col = ($row, $col)\n"; |
392
|
|
|
|
|
|
|
print "Value = ", $cell->value(), "\n"; |
393
|
|
|
|
|
|
|
print "Unformatted = ", $cell->unformatted(), "\n"; |
394
|
|
|
|
|
|
|
print "\n"; |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
last;# In order not to read all sheets |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
########################### |
401
|
|
|
|
|
|
|
# SYNOPSIS Screen Output |
402
|
|
|
|
|
|
|
# 01: Row, Col = (0, 0) |
403
|
|
|
|
|
|
|
# 02: Value = Category |
404
|
|
|
|
|
|
|
# 03: Unformatted = Category |
405
|
|
|
|
|
|
|
# 04: |
406
|
|
|
|
|
|
|
# 05: Row, Col = (0, 1) |
407
|
|
|
|
|
|
|
# 06: Value = Total |
408
|
|
|
|
|
|
|
# 07: Unformatted = Total |
409
|
|
|
|
|
|
|
# 08: |
410
|
|
|
|
|
|
|
# 09: Row, Col = (0, 2) |
411
|
|
|
|
|
|
|
# 10: Value = Date |
412
|
|
|
|
|
|
|
# 11: Unformatted = Date |
413
|
|
|
|
|
|
|
# 12: |
414
|
|
|
|
|
|
|
# 13: Row, Col = (1, 0) |
415
|
|
|
|
|
|
|
# 14: Value = Red |
416
|
|
|
|
|
|
|
# 16: Unformatted = Red |
417
|
|
|
|
|
|
|
# 17: |
418
|
|
|
|
|
|
|
# 18: Row, Col = (1, 1) |
419
|
|
|
|
|
|
|
# 19: Value = 5 |
420
|
|
|
|
|
|
|
# 20: Unformatted = 5 |
421
|
|
|
|
|
|
|
# 21: |
422
|
|
|
|
|
|
|
# 22: Row, Col = (1, 2) |
423
|
|
|
|
|
|
|
# 23: Value = 2017-2-14 #(shows as 2/14/2017 in the sheet) |
424
|
|
|
|
|
|
|
# 24: Unformatted = 41318 |
425
|
|
|
|
|
|
|
# 25: |
426
|
|
|
|
|
|
|
# More intermediate rows ... |
427
|
|
|
|
|
|
|
# 82: |
428
|
|
|
|
|
|
|
# 83: Row, Col = (6, 2) |
429
|
|
|
|
|
|
|
# 84: Value = 2016-2-6 #(shows as 2/6/2016 in the sheet) |
430
|
|
|
|
|
|
|
# 85: Unformatted = 40944 |
431
|
|
|
|
|
|
|
########################### |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=head1 DESCRIPTION |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
This is an Excel spreadsheet reading package that should parse all excel files with the |
436
|
|
|
|
|
|
|
extentions .xlsx, .xlsm, .xml I<L<Excel 2003 xml |
437
|
|
|
|
|
|
|
|https://en.wikipedia.org/wiki/Microsoft_Office_XML_formats> (L<SpreadsheetML |
438
|
|
|
|
|
|
|
|https://en.wikipedia.org/wiki/SpreadsheetML>)> that can be opened in the Excel 2007+ |
439
|
|
|
|
|
|
|
applications. The quick-start example provided in the SYNOPSIS attempts to follow the |
440
|
|
|
|
|
|
|
example from L<Spreadsheet::ParseExcel> (.xls binary file reader) as close as possible. |
441
|
|
|
|
|
|
|
There are additional methods and other approaches that can be used by this package for |
442
|
|
|
|
|
|
|
spreadsheet reading but the basic access to data from newer xml based Excel files can be |
443
|
|
|
|
|
|
|
as simple as above. |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
This is L<not the only perl package|/SEE ALSO> able to parse .xlsx files on METACPAN. For |
446
|
|
|
|
|
|
|
now it does appear to be the only package that will parse .xlsm and Excel 2003 .xml |
447
|
|
|
|
|
|
|
workbooks. |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
There is some documentation throughout this package for users who intend to extend the |
450
|
|
|
|
|
|
|
package but the primary documentation is intended for the person who uses the package as |
451
|
|
|
|
|
|
|
is. Parsing through an Excel workbook is done with three levels of classes; |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
=head2 Workbook level (This doc) |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=over |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=item * General L<attribute|/Attributes> settings that affect parsing of the file in general |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=item * The place to L<set workbook level output formatting|Spreadsheet::Reader::Format> |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item * Object L<methods|/Methods> to retreive document level metadata and worksheets |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=back |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=head2 L<Worksheet level|Spreadsheet::Reader::ExcelXML::Worksheet> |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=over |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=item * Object methods to return specific cell instances/L<data|/group_return_type> |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=item * Access to some worksheet level format information (more access pending) |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=item * The place to L<customize|Spreadsheet::Reader::ExcelXML::Worksheet/custom_formats> |
474
|
|
|
|
|
|
|
data output formats targeting specific cell ranges |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=back |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=head2 L<Cell level|Spreadsheet::Reader::ExcelXML::Cell> |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
=over |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=item * Access to the cell contents |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
=item * Access to the cell formats (more access pending) |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
=back |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
There are some differences from the L<Spreadsheet::ParseExcel> package. For instance |
489
|
|
|
|
|
|
|
in the L<SYNOPSIS|/SYNOPSIS> the '$parser' and the '$workbook' are actually the same |
490
|
|
|
|
|
|
|
class for this package. You could therefore combine both steps by calling ->new with |
491
|
|
|
|
|
|
|
the 'file' attribute called out. The test for load success would then rely on the |
492
|
|
|
|
|
|
|
method L<file_opened|/file_opened>. Afterward it is still possible to call ->error |
493
|
|
|
|
|
|
|
on the instance. Another difference is the data formatter and specifically date |
494
|
|
|
|
|
|
|
handling. This package leverages L<Spreadsheet::Reader::Format> to allows for a |
495
|
|
|
|
|
|
|
simple pluggable custom output format that is very flexible as well as handling dates |
496
|
|
|
|
|
|
|
in the Excel file older than 1-January-1900. I leveraged coercions from L<Type::Tiny |
497
|
|
|
|
|
|
|
|Type::Tiny::Manual> to do this but anything that follows that general format will work |
498
|
|
|
|
|
|
|
here. |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
The why and nitty gritty of design choices I made are in the L<Architecture Choices |
501
|
|
|
|
|
|
|
|/Architecture Choices> section. Some pitfalls are outlined in the L<Warnings|/Warnings> |
502
|
|
|
|
|
|
|
section. Read the full documentation for all opportunities! |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=head2 Primary Methods |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
These are the primary ways to use this class. They can be used to open a workbook, |
507
|
|
|
|
|
|
|
investigate information at the workbook level, and provide ways to access sheets in |
508
|
|
|
|
|
|
|
the workbook. |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
All methods are object methods and should be implemented on the object instance. |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
B<Example:> |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
my @worksheet_array = $workbook_instance->worksheets; |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=head3 parse( $file_name|$file_handle, $formatter ) |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=over |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
B<Definition:> This is a convenience method to match L<Spreadsheet::ParseExcel/parse($filename, $formatter)>. |
521
|
|
|
|
|
|
|
It is one way to set the L<file|/file> attribute [and the L<formatter_inst|/formatter_inst> attribute]. |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
B<Accepts:> |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
$file = see the L<file|/file> attribute for valid options (required) (required) |
526
|
|
|
|
|
|
|
[$formatter] = see the L<formatter_inst|/formatter_inst> attribute for valid options (optional) |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
B<Returns:> an instance of the package (not cloned) when passing with the xlsx file successfully |
529
|
|
|
|
|
|
|
opened or undef for failure. |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
=back |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
=head3 worksheets |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=over |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
B<Definition:> This method will return an array (I<not an array reference>) containing a list of references |
538
|
|
|
|
|
|
|
to all worksheets in the workbook as objects. This is not a reccomended method because it builds all |
539
|
|
|
|
|
|
|
worksheet instance and returns an array of objects. It is provided for compatibility to |
540
|
|
|
|
|
|
|
Spreadsheet::ParseExcel. For alternatives see the L<get_worksheet_names|/get_worksheet_names> method and |
541
|
|
|
|
|
|
|
the L<worksheet|/worksheet( $name )> methods. B<It also only returns the tabular worksheets in the |
542
|
|
|
|
|
|
|
workbook. All chart sheets are ignored!> |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
B<Accepts:> nothing |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
B<Returns:> an array ref of L<Worksheet|Spreadsheet::Reader::ExcelXML::Worksheet> |
547
|
|
|
|
|
|
|
objects for all worksheets in the workbook. |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=back |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
=head3 worksheet( $name ) |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
=over |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
B<Definition:> This method will return an object to read values in the identified |
556
|
|
|
|
|
|
|
worksheet. If no value is passed to $name then the 'next' worksheet in physical order |
557
|
|
|
|
|
|
|
is returned. I<'next' will NOT wrap> It also only iterates through the 'worksheets' |
558
|
|
|
|
|
|
|
in the workbook (not the 'chartsheets'). |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
B<Accepts:> the $name string representing the name of the worksheet object you |
561
|
|
|
|
|
|
|
want to open. This name is the word visible on the tab when opening the spreadsheet |
562
|
|
|
|
|
|
|
in Excel. (not the underlying zip member file name - which can be different. It will |
563
|
|
|
|
|
|
|
not accept chart tab names.) |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
B<Returns:> a L<Worksheet|Spreadsheet::Reader::ExcelXML::Worksheet> object with the |
566
|
|
|
|
|
|
|
ability to read the worksheet of that name. It returns undef and sets the error attribute |
567
|
|
|
|
|
|
|
if a 'chartsheet' is requested. Or in 'next' mode it returns undef if past the last sheet. |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
B<Example:> using the implied 'next' worksheet; |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
while( my $worksheet = $workbook->worksheet ){ |
572
|
|
|
|
|
|
|
print "Reading: " . $worksheet->name . "\n"; |
573
|
|
|
|
|
|
|
# get the data needed from this worksheet |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=back |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
=head3 file_name |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
=over |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
B<Definition:> If you pass a file $location/$name string to the attribute L<file|/file> then before |
583
|
|
|
|
|
|
|
the package converts it to a file handle it will store the string. You can retreive that string |
584
|
|
|
|
|
|
|
with this method. This is true if you pass a string to the L<parse |
585
|
|
|
|
|
|
|
|/parse( $file_name|$file_handle, $formatter )> method as well. |
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
B<Accepts:> nothing |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
B<Returns:> the $location/$name file string if available. |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
=back |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=head3 file_opened |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=over |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
B<Definition:> This method is the test for success that should be used when opening a workbook |
598
|
|
|
|
|
|
|
using the -E<gt>new method. This allows for the object to store the error without dying |
599
|
|
|
|
|
|
|
entirely. |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
B<Accepts:> nothing |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
B<Returns:> 1 if the workbook file was successfully opened |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
B<Example:> |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
use Spreadsheet::Reader::ExcelXML qw( :just_the_data ); |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
my $workbook = Spreadsheet::Reader::ExcelXML->new( file => 'TestBook.xlsx' ); |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
if ( !$workbook->file_opened ) { |
612
|
|
|
|
|
|
|
die $workbook->error(), "\n"; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
for my $worksheet ( $workbook->worksheets ) { |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
print "Reading worksheet named: " . $worksheet->get_name . "\n"; |
618
|
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
while( 1 ){ |
620
|
|
|
|
|
|
|
my $cell = $worksheet->get_next_value; |
621
|
|
|
|
|
|
|
print "Cell is: $cell\n"; |
622
|
|
|
|
|
|
|
last if $cell eq 'EOF'; |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=back |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=head3 get_sheet_names |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
=over |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
B<Definition:> This method returns an array ref of all the sheet names (tabs) in the |
633
|
|
|
|
|
|
|
workbook in order. (It includes chartsheets.) |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
B<Accepts:> nothing |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
B<Returns:> an array ref of strings |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=back |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=head3 worksheet_name( $position ) |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
=over |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
B<Definition:> This returns the name of the worksheet in that $position. (counting from zero) |
646
|
|
|
|
|
|
|
interspersed chartsheets in the file are not considered to hold a position by this accounting. |
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
B<Accepts:> $position (an integer) |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
B<Returns:> the worksheet name |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
B<Example:> To return only worksheet positions 2 through 4 without building them all at once |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
for $x (2..4){ |
655
|
|
|
|
|
|
|
my $worksheet = $workbook->worksheet( $workbook->worksheet_name( $x ) ); |
656
|
|
|
|
|
|
|
# Read the worksheet here |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
=back |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=head3 get_worksheet_names |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
=over |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
B<Definition:> This method returns an array ref of all the worksheet names (tabs) in the |
666
|
|
|
|
|
|
|
workbook in order. (No chartsheets.) |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
B<Accepts:> nothing |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
B<Returns:> an array ref of strings |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
B<Example:> Another way to parse a workbook without building all the sheets at |
673
|
|
|
|
|
|
|
once is; |
674
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
for $sheet_name ( @{$workbook->worksheet_names} ){ |
676
|
|
|
|
|
|
|
my $worksheet = $workbook->worksheet( $sheet_name ); |
677
|
|
|
|
|
|
|
# Read the worksheet here |
678
|
|
|
|
|
|
|
} |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
=back |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=head3 worksheet_count |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
=over |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
B<Definition:> This returns the total number of recorded worksheets |
687
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
B<Accepts:> nothing |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
B<Returns:> $total - a count of all worksheets (only) |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
=back |
693
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
=head2 Attributes |
695
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
Data passed to new when creating an instance. For modification of these attributes |
697
|
|
|
|
|
|
|
see the listed 'attribute methods'. For general information on attributes see |
698
|
|
|
|
|
|
|
L<Moose::Manual::Attributes>. For additional lesser used workbook options see |
699
|
|
|
|
|
|
|
L<Secondary Methods|/Secondary Methods>. There are several grouped default values |
700
|
|
|
|
|
|
|
for these attributes documented in the L<Flags|/Flags> section. |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
B<Example> |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
$workbook_instance = Spreadsheet::Reader::ExcelXML->new( %attributes ) |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
I<note: if the file information is not included in the initial %attributes then it must be |
707
|
|
|
|
|
|
|
set by one of the attribute setter methods below or the L<parse |
708
|
|
|
|
|
|
|
|parse( $file_nameE<verbar>$file_handle, $formatter )> method before the rest of the package |
709
|
|
|
|
|
|
|
can be used.> |
710
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
=head3 file |
712
|
|
|
|
|
|
|
|
713
|
|
|
|
|
|
|
=over |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
B<Definition:> This attribute holds the file handle for the top level workbook. If a |
716
|
|
|
|
|
|
|
file name is passed it is coerced into an L<IO::File> handle and stored that way. The |
717
|
|
|
|
|
|
|
originaly file name can be retrieved with the method L<file_name|/file_name>. |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
B<Default> no default |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
B<Required:> yes |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
B<Range> any unencrypted xlsx|xlsm|xml file that can be opened in Microsoft Excel 2007+. |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
=over |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
B<set_file( $file|$file_handle )> |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
=over |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
B<Definition:> change the file value in the attribute (this will reboot the workbook instance) |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
=back |
736
|
|
|
|
|
|
|
|
737
|
|
|
|
|
|
|
=back |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=back |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head3 error_inst |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
=over |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
B<Definition:> This attribute holds an 'error' object instance. It should have several |
746
|
|
|
|
|
|
|
methods for managing errors. Currently no error codes or error language translation |
747
|
|
|
|
|
|
|
options are available but this should make implementation of that easier. |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
B<Default:> a L<Spreadsheet::Reader::ExcelXML::Error> instance with the attributes set |
750
|
|
|
|
|
|
|
as; |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
( should_warn => 0 ) |
753
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
B<Range:> See the 'Exported methods' section below for methods required by the workbook. |
755
|
|
|
|
|
|
|
The error instance must also be able to extract the error string from a passed error |
756
|
|
|
|
|
|
|
object as well. For now the current implementation will attempt ->as_string first |
757
|
|
|
|
|
|
|
and then ->message if an object is passed. |
758
|
|
|
|
|
|
|
|
759
|
|
|
|
|
|
|
B<attribute methods> Methods provided to manage this attribute |
760
|
|
|
|
|
|
|
|
761
|
|
|
|
|
|
|
=over |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
B<get_error_inst> |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
=over |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
B<Definition:> returns this instance |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
=back |
770
|
|
|
|
|
|
|
|
771
|
|
|
|
|
|
|
B<has_error_inst> |
772
|
|
|
|
|
|
|
|
773
|
|
|
|
|
|
|
=over |
774
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
B<Definition:> indicates in the error instance has been set |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
=back |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
B<Exported methods:> |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
The following methods are exported (delegated) to the workbook level |
782
|
|
|
|
|
|
|
from the stored instance of this class. Links are provided to the default implemenation; |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
=over |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
L<Spreadsheet::Reader::ExcelXML::Error/error> |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
L<Spreadsheet::Reader::ExcelXML::Error/set_error> |
789
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
L<Spreadsheet::Reader::ExcelXML::Error/clear_error> |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
L<Spreadsheet::Reader::ExcelXML::Error/set_warnings> |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
L<Spreadsheet::Reader::ExcelXML::Error/if_warn> |
795
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
L<Spreadsheet::Reader::ExcelXML::Error/should_spew_longmess> |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
L<Spreadsheet::Reader::ExcelXML::Error/spewing_longmess> |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
L<Spreadsheet::Reader::ExcelXML::Error/has_error> |
801
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=back |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
=back |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
=back |
807
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
=head3 formatter_inst |
809
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
=over |
811
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
B<Definition:> This attribute holds a 'formatter' object instance. This instance does all |
813
|
|
|
|
|
|
|
the heavy lifting to transform raw text into desired output. It does include |
814
|
|
|
|
|
|
|
a role that interprets the excel L<format string |
815
|
|
|
|
|
|
|
|https://support.office.com/en-us/article/Create-or-delete-a-custom-number-format-2d450d95-2630-43b8-bf06-ccee7cbe6864?ui=en-US&rs=en-US&ad=US> |
816
|
|
|
|
|
|
|
into a L<Type::Tiny> coercion. The default case is actually built from a number of |
817
|
|
|
|
|
|
|
different elements using L<MooseX::ShortCut::BuildInstance> on the fly so you can |
818
|
|
|
|
|
|
|
just call out the replacement base class or role rather than fully building |
819
|
|
|
|
|
|
|
the formatter prior to calling new on the workbook. However the naming of the interface |
820
|
|
|
|
|
|
|
|http://www.cs.utah.edu/~germain/PPS/Topics/interfaces.html> is locked and should not be |
821
|
|
|
|
|
|
|
tampered with since it manages the methods to be imported into the workbook; |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
B<Default> An instance built with L<MooseX::ShortCut::BuildInstance> from the following |
824
|
|
|
|
|
|
|
arguments (note the instance itself is not built here) |
825
|
|
|
|
|
|
|
{ |
826
|
|
|
|
|
|
|
superclasses => ['Spreadsheet::Reader::ExcelXML::FmtDefault'], # base class |
827
|
|
|
|
|
|
|
add_roles_in_sequence =>[qw( |
828
|
|
|
|
|
|
|
Spreadsheet::Reader::ExcelXML::ParseExcelFormatStrings # role containing the heavy lifting methods |
829
|
|
|
|
|
|
|
Spreadsheet::Reader::ExcelXML::FormatInterface # the interface |
830
|
|
|
|
|
|
|
)], |
831
|
|
|
|
|
|
|
package => 'FormatInstance', # a formality more than anything |
832
|
|
|
|
|
|
|
} |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
B<Range:> A replacement formatter instance or a set of arguments that will lead to building an acceptable |
835
|
|
|
|
|
|
|
formatter instance. See the 'Exported methods'section below for all methods required methods for the |
836
|
|
|
|
|
|
|
workbook. The FormatInterface is required by name so a replacement of that role requires the same name. |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
B<attribute methods> Methods provided to manage this attribute |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
=over |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
B<get_formatter_inst> |
843
|
|
|
|
|
|
|
|
844
|
|
|
|
|
|
|
=over |
845
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
B<Definition:> returns the stored formatter instance |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
=back |
849
|
|
|
|
|
|
|
|
850
|
|
|
|
|
|
|
B<set_formatter_inst> |
851
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
=over |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
B<Definition:> sets the formatter instance |
855
|
|
|
|
|
|
|
|
856
|
|
|
|
|
|
|
=back |
857
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
B<Exported methods:> |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
Additionally the following methods are exported (delegated) to the workbook level |
861
|
|
|
|
|
|
|
from the stored instance of this class. Links are provided to the default implemenation; |
862
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
=over |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
B<Example:> name_the_workbook_uses_to_access_the_method => B<Link to the default source of the method> |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
get_formatter_region => L<Spreadsheet::Reader::Format::FmtDefault/get_excel_region> |
868
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
has_target_encoding => L<Spreadsheet::Reader::Format::FmtDefault/has_target_encoding> |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
get_target_encoding => L<Spreadsheet::Reader::Format::FmtDefault/get_target_encoding> |
872
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
set_target_encoding => L<Spreadsheet::Reader::Format::FmtDefault/set_target_encoding( $encoding )> |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
change_output_encoding => L<Spreadsheet::Reader::Format::FmtDefault/change_output_encoding( $string )> |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
set_defined_excel_formats => L<Spreadsheet::Reader::Format::FmtDefault/set_defined_excel_formats( %args )> |
878
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
get_defined_conversion => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/get_defined_conversion( $position )> |
880
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
parse_excel_format_string => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/parse_excel_format_string( $string, $name )> |
882
|
|
|
|
|
|
|
|
883
|
|
|
|
|
|
|
set_date_behavior => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/set_date_behavior( $bool )> |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
set_european_first => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/set_european_first( $bool )> |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
set_formatter_cache_behavior => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/set_cache_behavior( $bool )> |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
set_workbook_for_formatter => L<Spreadsheet::Reader::Format::ParseExcelFormatStrings/set_workbook_inst( $instance )> |
890
|
|
|
|
|
|
|
|
891
|
|
|
|
|
|
|
=back |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
=back |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
=back |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
=head3 count_from_zero |
898
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
=over |
900
|
|
|
|
|
|
|
|
901
|
|
|
|
|
|
|
B<Definition:> Excel spreadsheets count from 1. L<Spreadsheet::ParseExcel> |
902
|
|
|
|
|
|
|
counts from zero. This allows you to choose either way. |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
B<Default> 1 |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
B<Range> 1 = counting from zero like Spreadsheet::ParseExcel, |
907
|
|
|
|
|
|
|
0 = Counting from 1 like Excel |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
910
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
=over |
912
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
B<counting_from_zero> |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=over |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
=back |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
=back |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
=back |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
=head3 file_boundary_flags |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
=over |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
B<Definition:> When you request data to the right of the last column or below |
930
|
|
|
|
|
|
|
the last row of the data this package can return 'EOR' or 'EOF' to indicate that |
931
|
|
|
|
|
|
|
state. This is especially helpful in 'while' loops. The other option is to |
932
|
|
|
|
|
|
|
return 'undef'. This is problematic if some cells in your table are empty which |
933
|
|
|
|
|
|
|
also returns undef. The determination for what constitues the last column and |
934
|
|
|
|
|
|
|
row is selected with the attributes L<empty_is_end|/empty_is_end>, L<values_only |
935
|
|
|
|
|
|
|
|/values_only>, and L<spaces_are_empty|/spaces_are_empty>. |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
B<Default> 1 |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
B<Range> 1 = return 'EOR' or 'EOF' flags as appropriate, 0 = return undef when |
940
|
|
|
|
|
|
|
requesting a position that is out of bounds |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
=over |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
B<boundary_flag_setting> |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
=over |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
=back |
953
|
|
|
|
|
|
|
|
954
|
|
|
|
|
|
|
=back |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
=back |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
=head3 empty_is_end |
959
|
|
|
|
|
|
|
|
960
|
|
|
|
|
|
|
=over |
961
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
B<Definition:> The excel convention is to read the table left to right and top |
963
|
|
|
|
|
|
|
to bottom. Some tables have an uneven number of columns with real data from row |
964
|
|
|
|
|
|
|
to row. This allows the several methods that excersize a 'next' function to wrap |
965
|
|
|
|
|
|
|
after the last element with data rather than going to the max column. This also |
966
|
|
|
|
|
|
|
can combine with the attribute L<file_boundary_flags|/file_boundary_flags> to |
967
|
|
|
|
|
|
|
trigger 'EOR' flags after the last data element and before the sheet max column |
968
|
|
|
|
|
|
|
when not implementing 'next' functionality. It will also return 'EOF' if the |
969
|
|
|
|
|
|
|
remaining rows are empty even if the max row is farther on. |
970
|
|
|
|
|
|
|
|
971
|
|
|
|
|
|
|
B<Default> 0 |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
B<Range> 0 = treat all columns short of the max column for the sheet as being in |
974
|
|
|
|
|
|
|
the table, 1 = treat all cells after the last cell with data as past the end of |
975
|
|
|
|
|
|
|
the row. This will be most visible when |
976
|
|
|
|
|
|
|
L<boundary flags are turned on|/boundary_flag_setting> or next functionality is |
977
|
|
|
|
|
|
|
used in the context of the attribute L<values_only|/values_only>. |
978
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
980
|
|
|
|
|
|
|
|
981
|
|
|
|
|
|
|
=over |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
B<is_empty_the_end> |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
=over |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
=back |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
=back |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
=back |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
=head3 values_only |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=over |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
B<Definition:> Excel will store information about a cell even if it only contains |
1000
|
|
|
|
|
|
|
formatting data. In many cases you only want to see cells that actually have |
1001
|
|
|
|
|
|
|
values. This attribute will change the package behaviour regarding cells that have |
1002
|
|
|
|
|
|
|
formatting stored against that cell but no actual value. If values in the cells |
1003
|
|
|
|
|
|
|
exist as zero length strings or spaces only you can also set those to empty with |
1004
|
|
|
|
|
|
|
the attribute L<spaces_are_empty|/spaces_are_empty>. |
1005
|
|
|
|
|
|
|
|
1006
|
|
|
|
|
|
|
B<Default> 0 |
1007
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
B<Range> 1 = return 'undef' for cells with formatting only, |
1009
|
|
|
|
|
|
|
0 = return the result of L<empty_return_type|/empty_return_type> (or cell objects) |
1010
|
|
|
|
|
|
|
for cells that only contain formatting. |
1011
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
=over |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
B<get_values_only> |
1017
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
=over |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
1021
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
=back |
1023
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
=back |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
=back |
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
=head3 from_the_edge |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
=over |
1031
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
B<Definition:> Some data tables start in the top left corner. Others do not. I |
1033
|
|
|
|
|
|
|
don't reccomend that practice but when aquiring data in the wild it is often good |
1034
|
|
|
|
|
|
|
to adapt. This attribute sets whether the file percieves the L<min_col |
1035
|
|
|
|
|
|
|
|/Spreadsheet::Reader::Worksheet/min_col> and L<min_row |
1036
|
|
|
|
|
|
|
|/Spreadsheet::Reader::Worksheet/min_col> as the top left edge of the sheeto or |
1037
|
|
|
|
|
|
|
from the top row with data and starting from the leftmost column with data. |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
B<Default> 1 |
1040
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
B<Range> 1 = treat the top left corner of the sheet as the beginning of rows and |
1042
|
|
|
|
|
|
|
columns even if there is no data in the top row or leftmost column, 0 = Set the |
1043
|
|
|
|
|
|
|
minimum row and minimum columns to be the first row and first column with data |
1044
|
|
|
|
|
|
|
|
1045
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1046
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
=over |
1048
|
|
|
|
|
|
|
|
1049
|
|
|
|
|
|
|
B<starts_at_the_edge> |
1050
|
|
|
|
|
|
|
|
1051
|
|
|
|
|
|
|
=over |
1052
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
B<Definition:> returns the attribute state |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
=back |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
=back |
1058
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
=back |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
=head3 cache_positions |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
=over |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
B<Definition:> Using the standard architecture this parser would go back and |
1066
|
|
|
|
|
|
|
read the sharedStrings and styles files sequentially from the beginning each |
1067
|
|
|
|
|
|
|
time it had to access a sub elelement. This trade-off is generally not desired |
1068
|
|
|
|
|
|
|
for these two files since the data is generally stored in a less than sequential |
1069
|
|
|
|
|
|
|
fasion. The solution is to cache these files as they are read the first time so |
1070
|
|
|
|
|
|
|
that a second pass through is not necessary to retreive an earlier element. The |
1071
|
|
|
|
|
|
|
only time this doesn't make sence is if either of the files would overwhelm RAM if |
1072
|
|
|
|
|
|
|
cached. The package has file size break points below which the files will cache. |
1073
|
|
|
|
|
|
|
The thinking is that above these points the RAM is at risk of being overwhelmed |
1074
|
|
|
|
|
|
|
and that not crashing and slow is better than a possible out-of-memory state. |
1075
|
|
|
|
|
|
|
This attribute allows you to change those break points based on the target machine |
1076
|
|
|
|
|
|
|
you are running on. The breaks are set on the byte size of the sub file not on the |
1077
|
|
|
|
|
|
|
cached expansion of the sub file. In general the styles file is cached into a hash |
1078
|
|
|
|
|
|
|
and the shared strings file is cached into an array ref. The attribute |
1079
|
|
|
|
|
|
|
L<group_return_type|/group_return_type> also affects the size of the cache for the |
1080
|
|
|
|
|
|
|
sharedStrings file since it will not cache the string formats unless the attribute |
1081
|
|
|
|
|
|
|
is set to 'instance'. There is also a setting for caching worksheet data. Some |
1082
|
|
|
|
|
|
|
worksheet row position settings will always be cached in order to speed up multiple |
1083
|
|
|
|
|
|
|
reads over the same sheet or to query meta data about the rows. However, this |
1084
|
|
|
|
|
|
|
cache level is set lower since the row caching creates much deeper data structures. |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
B<Default> |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
{ |
1089
|
|
|
|
|
|
|
shared_strings_interface => 5242880,# 5 MB |
1090
|
|
|
|
|
|
|
styles_interface => 5242880,# 5 MB |
1091
|
|
|
|
|
|
|
worksheet_interface => 2097152,# 2 MB |
1092
|
|
|
|
|
|
|
} |
1093
|
|
|
|
|
|
|
|
1094
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
=over |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
B<cache_positions> |
1099
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
=over |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
B<Definition:> returns the full attribute settings as a hashref |
1103
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
=back |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
B<get_cache_size( (shared_strings_interface|styles_interface|worksheet_interface) )> |
1107
|
|
|
|
|
|
|
|
1108
|
|
|
|
|
|
|
=over |
1109
|
|
|
|
|
|
|
|
1110
|
|
|
|
|
|
|
B<Definition:> return the max file size allowed to cache for the indicated interface |
1111
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
=back |
1113
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
B<set_cache_size( $target_interface => $max_file_size )> |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
=over |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
B<Definition:> set the $max_file_size in bytes to be cached for the indicated $target_interface |
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
=back |
1121
|
|
|
|
|
|
|
|
1122
|
|
|
|
|
|
|
B<has_cache_size( $target_interface )> |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
=over |
1125
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
B<Definition:> returns true if the $target_interface has a cache size set |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
=back |
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
=back |
1131
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
=back |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
=head3 show_sub_file_size |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
=over |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
B<Definition:> Especially for zip (xlsx and xlsm) files you may not know how big the |
1139
|
|
|
|
|
|
|
file is and want to the package to tell you what size it thinks the file is. This |
1140
|
|
|
|
|
|
|
attribute turns on a warning statment that prints to STDERR with information on the |
1141
|
|
|
|
|
|
|
size of potientially cached files. |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
B<Default> 0 |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
B<Range> 0 = don't warn the file size, 1 = send the potentially cached file sizes to |
1146
|
|
|
|
|
|
|
STDERR for review |
1147
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
=back |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
=head3 group_return_type |
1151
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
=over |
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
B<Definition:> Traditionally ParseExcel returns a cell object with lots of methods |
1155
|
|
|
|
|
|
|
to reveal information about the cell. In reality the extra information is not used very |
1156
|
|
|
|
|
|
|
much (witness the popularity of L<Spreadsheet::XLSX>). Because many users don't need or |
1157
|
|
|
|
|
|
|
want the extra cell formatting information it is possible to get either the raw xml value, |
1158
|
|
|
|
|
|
|
the raw visible cell value (seen in the Excel format bar), or the formatted cell value |
1159
|
|
|
|
|
|
|
returned either the way the Excel file specified or the L<way you specify |
1160
|
|
|
|
|
|
|
|Spreadsheet::Reader::ExcelXML::Worksheet/custom_formats> instead of a Cell instance with |
1161
|
|
|
|
|
|
|
all the data. All empty cells return undef no matter what. |
1162
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
B<Default> instance |
1164
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
B<Range> instance = returns a populated L<Spreadsheet::Reader::ExcelXML::Cell> instance, |
1166
|
|
|
|
|
|
|
xml_value = returns the string stored in the xml file - for xml based sheets this can sometimes |
1167
|
|
|
|
|
|
|
be different thant the visible value in the cell or formula bar. unformatted = returns just the |
1168
|
|
|
|
|
|
|
raw visible value of the cell shown in the Excel formula bar, value = returns just the formatted |
1169
|
|
|
|
|
|
|
value stored in the excel cell |
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1172
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
=over |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
B<get_group_return_type> |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
=over |
1178
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
1180
|
|
|
|
|
|
|
|
1181
|
|
|
|
|
|
|
=back |
1182
|
|
|
|
|
|
|
|
1183
|
|
|
|
|
|
|
=back |
1184
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
=back |
1186
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
=head3 empty_return_type |
1188
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
=over |
1190
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
B<Definition:> Traditionally L<Spreadsheet::ParseExcel> returns an empty string for cells |
1192
|
|
|
|
|
|
|
with unique formatting but no stored value. It may be that the more accurate way of returning |
1193
|
|
|
|
|
|
|
undef works better for you. This will turn that behaviour on. |
1194
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
B<Default> empty_string |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
B<Range> |
1198
|
|
|
|
|
|
|
empty_string = populates the unformatted value with '' even if it is set to undef |
1199
|
|
|
|
|
|
|
undef_string = if excel stores undef for an unformatted value it will return undef |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
=over |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
B<get_empty_return_type> |
1206
|
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
=over |
1208
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
1210
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
=back |
1212
|
|
|
|
|
|
|
|
1213
|
|
|
|
|
|
|
=back |
1214
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
=back |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
=head3 spread_merged_values |
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
=over |
1220
|
|
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
B<Definition:> In Excel you visibly see the value of the primary cell in a merged range displayed |
1222
|
|
|
|
|
|
|
in all the cells. This attribute lets the code see the primary value show in each of the merged |
1223
|
|
|
|
|
|
|
cells. There is some mandatory caching to pull this off so it will consume more memory. |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
B<Default> 0 (To match the Excel formula bar, VBscript, and Spreadsheet::ParseExcel) |
1226
|
|
|
|
|
|
|
|
1227
|
|
|
|
|
|
|
B<Range> 0 = don't spread the primary value, 1 = spread the primary value |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1230
|
|
|
|
|
|
|
|
1231
|
|
|
|
|
|
|
=over |
1232
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
B<spreading_merged_values> |
1234
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
=over |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
=back |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
=back |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
=back |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
=head3 skip_hidden |
1246
|
|
|
|
|
|
|
|
1247
|
|
|
|
|
|
|
=over |
1248
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
B<Definition:> Like the previous attribute this attempts to match a visual effect in Excel. |
1250
|
|
|
|
|
|
|
Even though hidden cells still contain values you can't see them visibly. This allows |
1251
|
|
|
|
|
|
|
you to skip hidden rows and columns (not hidden sheets). The one gotcha is Excel will |
1252
|
|
|
|
|
|
|
place the primary value in the new primary merged cell (formula bar) if a merge range is |
1253
|
|
|
|
|
|
|
only partially obscured to include the original primary cell. This package can't do that. |
1254
|
|
|
|
|
|
|
Either spread the primary to all cells or none. |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
B<Default> 0 (To match VBscript and Spreadsheet::ParseExcel) |
1257
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
B<Range> 0 = don't skip hidden rows and columns, 1 = skip hidden rows and columns |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
=over |
1263
|
|
|
|
|
|
|
|
1264
|
|
|
|
|
|
|
B<should_skip_hidden> |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
=over |
1267
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=back |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
=back |
1273
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=back |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
=head3 spaces_are_empty |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
=over |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
B<Definition:> Some auto file generators tend to add empty strings or strings with spaces to |
1281
|
|
|
|
|
|
|
fill empty cells. There may be some visual value in this but they can slow down parsing scripts. |
1282
|
|
|
|
|
|
|
this attribute allows the sheet to treat spaces as empty or undef instead of cells with values. |
1283
|
|
|
|
|
|
|
|
1284
|
|
|
|
|
|
|
B<Default> 0 (To match Excel and Spreadsheet::ParseExcel) |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
B<Range> 0 = cells with zero length strings and spaces are considered to have 'values", 1 = There must |
1287
|
|
|
|
|
|
|
be something other than spaces or a zero length string for the cell to have value. |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
=over |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
B<are_spaces_empty> |
1294
|
|
|
|
|
|
|
|
1295
|
|
|
|
|
|
|
=over |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
=back |
1300
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
=back |
1302
|
|
|
|
|
|
|
|
1303
|
|
|
|
|
|
|
=back |
1304
|
|
|
|
|
|
|
|
1305
|
|
|
|
|
|
|
=head3 merge_data |
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
=over |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
B<Definition:> For zip based worksheets the merge data is stored at the end of the file. In order for |
1310
|
|
|
|
|
|
|
the parser to arrive at that point it has to read through the whole sheet first. For big worksheet |
1311
|
|
|
|
|
|
|
files this is very slow. If you are willing to not know or implement cell merge information then turn |
1312
|
|
|
|
|
|
|
this off and the sheet should load much faster. |
1313
|
|
|
|
|
|
|
|
1314
|
|
|
|
|
|
|
B<Default> 1 (collect merge data) |
1315
|
|
|
|
|
|
|
|
1316
|
|
|
|
|
|
|
B<Range> 1 = The merge data is parsed from the worksheet file when it is opened, 0 = No merge data is |
1317
|
|
|
|
|
|
|
parsed. The effect is equal to the cell merges dissapearing. |
1318
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
B<attribute methods> Methods provided to adjust this attribute |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
=over |
1322
|
|
|
|
|
|
|
|
1323
|
|
|
|
|
|
|
B<collecting_merge_data> |
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
=over |
1326
|
|
|
|
|
|
|
|
1327
|
|
|
|
|
|
|
B<Definition:> a way to check the current attribute setting |
1328
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
=back |
1330
|
|
|
|
|
|
|
|
1331
|
|
|
|
|
|
|
=back |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
=back |
1334
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
=head2 FLAGS |
1336
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
The parameter list (Attributes) that are possible to pass to ->new is somewhat long. |
1338
|
|
|
|
|
|
|
Therefore you may want a shortcut that aggregates some set of attribute settings that |
1339
|
|
|
|
|
|
|
are not the defaults but wind up being boilerplate. I have provided possible |
1340
|
|
|
|
|
|
|
alternate sets like this and am open to providing others that are suggested. The |
1341
|
|
|
|
|
|
|
flags will have a : in front of the identifier and will be passed to the class in the |
1342
|
|
|
|
|
|
|
'use' statement for consumption by the import method. The flags can be stacked and |
1343
|
|
|
|
|
|
|
where there is conflict between the flag settings the rightmost passed flag setting is |
1344
|
|
|
|
|
|
|
used. If everything in the flag but one or two settings are desirable still use the flag and |
1345
|
|
|
|
|
|
|
then overwrite those settings when calling new. |
1346
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
Example; |
1348
|
|
|
|
|
|
|
|
1349
|
|
|
|
|
|
|
use Spreadsheet::Reader::ExcelXML v0.2 qw( :alt_default :debug ); |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
=head3 :alt_default |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
This is intended for a deep look at data and skip formatting cells. |
1354
|
|
|
|
|
|
|
|
1355
|
|
|
|
|
|
|
=over |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
B<Default attribute differences> |
1358
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
=over |
1360
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
L<values_only|/values_only> => 1 |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
L<count_from_zero|/count_from_zero> => 0 |
1364
|
|
|
|
|
|
|
|
1365
|
|
|
|
|
|
|
L<empty_is_end|/empty_is_end> => 1 |
1366
|
|
|
|
|
|
|
|
1367
|
|
|
|
|
|
|
=back |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
=back |
1370
|
|
|
|
|
|
|
|
1371
|
|
|
|
|
|
|
=head3 :just_the_data |
1372
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
This is intended for a shallow look at data with value formatting implemented |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
=over |
1376
|
|
|
|
|
|
|
|
1377
|
|
|
|
|
|
|
B<Default attribute differences> |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
=over |
1380
|
|
|
|
|
|
|
|
1381
|
|
|
|
|
|
|
L<count_from_zero|/count_from_zero> => 0 |
1382
|
|
|
|
|
|
|
|
1383
|
|
|
|
|
|
|
L<values_only|/values_only> => 1 |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
L<empty_is_end|/empty_is_end> => 1 |
1386
|
|
|
|
|
|
|
|
1387
|
|
|
|
|
|
|
L<group_return_type|/group_return_type> => 'value' |
1388
|
|
|
|
|
|
|
|
1389
|
|
|
|
|
|
|
L<from_the_edge|/from_the_edge> => 0 |
1390
|
|
|
|
|
|
|
|
1391
|
|
|
|
|
|
|
L<empty_return_type|/empty_return_type> => 'undef_string' |
1392
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
L<spaces_are_empty|/spaces_are_empty> => 1 |
1394
|
|
|
|
|
|
|
|
1395
|
|
|
|
|
|
|
L<merge_data|/merge_data> => 0 |
1396
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
L<column_formats|/column_formats> => 0 |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
=back |
1400
|
|
|
|
|
|
|
|
1401
|
|
|
|
|
|
|
=back |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
=head3 :just_raw_data |
1404
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
This is intended for a shallow look at raw text and skips all formatting including number formats. |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
=over |
1408
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
B<Default attribute differences> |
1410
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
=over |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
L<count_from_zero|/count_from_zero> => 0 |
1414
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
L<values_only|/values_only> => 1 |
1416
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
L<empty_is_end|/empty_is_end> => 1 |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
L<group_return_type|/group_return_type> => 'xml_value' |
1420
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
L<from_the_edge|/from_the_edge> => 0, |
1422
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
L<empty_return_type|/empty_return_type> => 'undef_string' |
1424
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
L<spaces_are_empty|/spaces_are_empty> => 1 |
1426
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
L<merge_data|/merge_data> => 0 |
1428
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
L<column_formats|/column_formats> => 0 |
1430
|
|
|
|
|
|
|
|
1431
|
|
|
|
|
|
|
=back |
1432
|
|
|
|
|
|
|
|
1433
|
|
|
|
|
|
|
=back |
1434
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
=head3 :like_ParseExcel |
1436
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
This is a way to force some of the other groups back to instance and count from zero |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
=over |
1440
|
|
|
|
|
|
|
|
1441
|
|
|
|
|
|
|
B<Default attribute differences> |
1442
|
|
|
|
|
|
|
|
1443
|
|
|
|
|
|
|
=over |
1444
|
|
|
|
|
|
|
|
1445
|
|
|
|
|
|
|
L<count_from_zero|/count_from_zero> => 1 |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
L<group_return_type|/group_return_type> => 'instance' |
1448
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
=back |
1450
|
|
|
|
|
|
|
|
1451
|
|
|
|
|
|
|
=back |
1452
|
|
|
|
|
|
|
|
1453
|
|
|
|
|
|
|
=head3 :debug |
1454
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
This is a way to turn on as much reporting as possible |
1456
|
|
|
|
|
|
|
|
1457
|
|
|
|
|
|
|
=over |
1458
|
|
|
|
|
|
|
|
1459
|
|
|
|
|
|
|
B<Default attribute differences> |
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
=over |
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
L<error_inst|/error_inst> -> |
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
error_inst =>{ |
1466
|
|
|
|
|
|
|
superclasses => ['Spreadsheet::Reader::ExcelXML::Error'], |
1467
|
|
|
|
|
|
|
package => 'ErrorInstance', |
1468
|
|
|
|
|
|
|
should_warn => 1, |
1469
|
|
|
|
|
|
|
} |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
L<show_sub_file_size|/show_sub_file_size> => 1 |
1472
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
=back |
1474
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
=back |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
=head3 :lots_of_ram |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
This opens the caching size allowances way up |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
=over |
1482
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
B<Default attribute differences> |
1484
|
|
|
|
|
|
|
|
1485
|
|
|
|
|
|
|
=over |
1486
|
|
|
|
|
|
|
|
1487
|
|
|
|
|
|
|
L<cache_positions|/cache_positions> -> |
1488
|
|
|
|
|
|
|
|
1489
|
|
|
|
|
|
|
cache_positions =>{ |
1490
|
|
|
|
|
|
|
shared_strings_interface => 209715200,# 200 MB |
1491
|
|
|
|
|
|
|
styles_interface => 209715200,# 200 MB |
1492
|
|
|
|
|
|
|
worksheet_interface => 209715200,# 200 MB |
1493
|
|
|
|
|
|
|
}, |
1494
|
|
|
|
|
|
|
|
1495
|
|
|
|
|
|
|
=back |
1496
|
|
|
|
|
|
|
|
1497
|
|
|
|
|
|
|
=back |
1498
|
|
|
|
|
|
|
|
1499
|
|
|
|
|
|
|
=head3 :less_ram |
1500
|
|
|
|
|
|
|
|
1501
|
|
|
|
|
|
|
This tightens caching size allowances way down |
1502
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
=over |
1504
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
B<Default attribute differences> |
1506
|
|
|
|
|
|
|
|
1507
|
|
|
|
|
|
|
=over |
1508
|
|
|
|
|
|
|
|
1509
|
|
|
|
|
|
|
L<cache_positions|/cache_positions> -> |
1510
|
|
|
|
|
|
|
|
1511
|
|
|
|
|
|
|
cache_positions =>{ |
1512
|
|
|
|
|
|
|
shared_strings_interface => 10240,# 10 KB |
1513
|
|
|
|
|
|
|
styles_interface => 10240,# 10 KB |
1514
|
|
|
|
|
|
|
worksheet_interface => 1024,# 1 KB |
1515
|
|
|
|
|
|
|
}, |
1516
|
|
|
|
|
|
|
|
1517
|
|
|
|
|
|
|
=back |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
=back |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
=head2 Secondary Methods |
1522
|
|
|
|
|
|
|
|
1523
|
|
|
|
|
|
|
These are additional ways to use this class. They can be used to open an .xlsx workbook. |
1524
|
|
|
|
|
|
|
They are also ways to investigate information at the workbook level. For information on |
1525
|
|
|
|
|
|
|
how to retrieve data from the worksheets see the |
1526
|
|
|
|
|
|
|
L<Worksheet|Spreadsheet::Reader::ExcelXML::Worksheet> and |
1527
|
|
|
|
|
|
|
L<Cell|Spreadsheet::Reader::ExcelXML::Cell> documentation. For additional workbook |
1528
|
|
|
|
|
|
|
options see the L<Secondary Methods|/Secondary Methods> |
1529
|
|
|
|
|
|
|
and the L<Attributes|/Attributes> sections. The attributes section specifically contains |
1530
|
|
|
|
|
|
|
all the methods used to adjust the attributes of this class. |
1531
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
All methods are object methods and should be implemented on the object instance. |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
B<Example:> |
1535
|
|
|
|
|
|
|
|
1536
|
|
|
|
|
|
|
my @worksheet_array = $workbook_instance->worksheets; |
1537
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
=head3 get_epoch_year |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
=over |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
B<Definition:> This returns the epoch year defined by the Excel workbook. The epoch year |
1543
|
|
|
|
|
|
|
affects the way dates are processed in the formatter L<Spreadsheet::Reader::Format> |
1544
|
|
|
|
|
|
|
|
1545
|
|
|
|
|
|
|
B<Accepts:> nothing |
1546
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
B<Returns:> 1900 = Windows Excel or 1904 = Apple Excel |
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=back |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
=head3 has_epoch_year |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
=over |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
B<Definition:> This indicates if an epoch_year has been determined for the workbook (yet) |
1556
|
|
|
|
|
|
|
|
1557
|
|
|
|
|
|
|
B<Accepts:> nothing |
1558
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
B<Returns:> 1 = yes there is one, 0 = nothing (yet) |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
=back |
1562
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
=head3 get_sheet_name( $Int ) |
1564
|
|
|
|
|
|
|
|
1565
|
|
|
|
|
|
|
=over |
1566
|
|
|
|
|
|
|
|
1567
|
|
|
|
|
|
|
B<Definition:> This method returns the sheet name for a given physical position |
1568
|
|
|
|
|
|
|
in the workbook from left to right. It counts from zero even if the workbook is in |
1569
|
|
|
|
|
|
|
'count_from_one' mode. B(It will return chart names but chart tab names cannot currently |
1570
|
|
|
|
|
|
|
be converted to worksheets). You may actually want L<worksheet_name|worksheet_name( $Int )> |
1571
|
|
|
|
|
|
|
instead of this function. |
1572
|
|
|
|
|
|
|
|
1573
|
|
|
|
|
|
|
B<Accepts:> integers |
1574
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
B<Returns:> the sheet name (both worksheet and chartsheet ) |
1576
|
|
|
|
|
|
|
|
1577
|
|
|
|
|
|
|
=back |
1578
|
|
|
|
|
|
|
|
1579
|
|
|
|
|
|
|
=head3 sheet_count |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
=over |
1582
|
|
|
|
|
|
|
|
1583
|
|
|
|
|
|
|
B<Definition:> This returns the total number of recorded sheets |
1584
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
B<Accepts:> nothing |
1586
|
|
|
|
|
|
|
|
1587
|
|
|
|
|
|
|
B<Returns:> $total - a count of all sheets (including chartsheets and worksheets) |
1588
|
|
|
|
|
|
|
|
1589
|
|
|
|
|
|
|
=back |
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
=head3 get_sheet_info( $name ) |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
=over |
1594
|
|
|
|
|
|
|
|
1595
|
|
|
|
|
|
|
B<Definition:> This returns any stored metadata about the sheet in a hashref |
1596
|
|
|
|
|
|
|
|
1597
|
|
|
|
|
|
|
B<Accepts:> $name |
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
B<Returns:> a hashref of sheet metadata ( a pretty thin list still ) |
1600
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
=back |
1602
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=head3 get_rel_info( $relId ) |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
=over |
1606
|
|
|
|
|
|
|
|
1607
|
|
|
|
|
|
|
B<Definition:> This returns the sheet name for the $relId |
1608
|
|
|
|
|
|
|
|
1609
|
|
|
|
|
|
|
B<Accepts:> $relId ex; 'rId5' |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
B<Returns:> The sheet $name associated with that relId |
1612
|
|
|
|
|
|
|
|
1613
|
|
|
|
|
|
|
=back |
1614
|
|
|
|
|
|
|
|
1615
|
|
|
|
|
|
|
=head3 get_id_info( $Id ) |
1616
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
=over |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
B<Definition:> This returns the sheet name for the $Id |
1620
|
|
|
|
|
|
|
|
1621
|
|
|
|
|
|
|
B<Accepts:> $Id (an integer) ex; '2' |
1622
|
|
|
|
|
|
|
|
1623
|
|
|
|
|
|
|
B<Returns:> The sheet $name associated with that Id |
1624
|
|
|
|
|
|
|
|
1625
|
|
|
|
|
|
|
=back |
1626
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
=head3 get_chartsheet_names |
1628
|
|
|
|
|
|
|
|
1629
|
|
|
|
|
|
|
=over |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
B<Definition:> This method returns an array ref of all the chartsheet names (tabs) in the |
1632
|
|
|
|
|
|
|
workbook in order. (No worksheets.) |
1633
|
|
|
|
|
|
|
|
1634
|
|
|
|
|
|
|
B<Accepts:> nothing |
1635
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
B<Returns:> an array ref of strings |
1637
|
|
|
|
|
|
|
|
1638
|
|
|
|
|
|
|
=back |
1639
|
|
|
|
|
|
|
|
1640
|
|
|
|
|
|
|
=head3 chartsheet_name( $position ) |
1641
|
|
|
|
|
|
|
|
1642
|
|
|
|
|
|
|
=over |
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
B<Definition:> This returns the name of the chartsheet in that $position. (counting from zero) |
1645
|
|
|
|
|
|
|
interspersed worksheets in the file are not considered to hold a position by this accounting. |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
B<Accepts:> $position (an integer) |
1648
|
|
|
|
|
|
|
|
1649
|
|
|
|
|
|
|
B<Returns:> the chartsheet name |
1650
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
=back |
1652
|
|
|
|
|
|
|
|
1653
|
|
|
|
|
|
|
=head3 chartsheet_count |
1654
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
=over |
1656
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
B<Definition:> This returns the total number of recorded chartsheets |
1658
|
|
|
|
|
|
|
|
1659
|
|
|
|
|
|
|
B<Accepts:> nothing |
1660
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
B<Returns:> $total - a count of all chartsheets (only) |
1662
|
|
|
|
|
|
|
|
1663
|
|
|
|
|
|
|
=back |
1664
|
|
|
|
|
|
|
|
1665
|
|
|
|
|
|
|
=head3 creator |
1666
|
|
|
|
|
|
|
|
1667
|
|
|
|
|
|
|
=over |
1668
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
B<Definition:> Returns the recorded creator of the file from the parsed metadata |
1670
|
|
|
|
|
|
|
|
1671
|
|
|
|
|
|
|
B<Accepts:> nothing |
1672
|
|
|
|
|
|
|
|
1673
|
|
|
|
|
|
|
B<Returns:> a string |
1674
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
=back |
1676
|
|
|
|
|
|
|
|
1677
|
|
|
|
|
|
|
=head3 modified_by |
1678
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
=over |
1680
|
|
|
|
|
|
|
|
1681
|
|
|
|
|
|
|
B<Definition:> Returns the recorded last entity to modify the file from the parsed metadata |
1682
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
B<Accepts:> nothing |
1684
|
|
|
|
|
|
|
|
1685
|
|
|
|
|
|
|
B<Returns:> a string |
1686
|
|
|
|
|
|
|
|
1687
|
|
|
|
|
|
|
=back |
1688
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
=head3 date_created |
1690
|
|
|
|
|
|
|
|
1691
|
|
|
|
|
|
|
=over |
1692
|
|
|
|
|
|
|
|
1693
|
|
|
|
|
|
|
B<Definition:> Returns the date that Excel recorded for the file creation |
1694
|
|
|
|
|
|
|
|
1695
|
|
|
|
|
|
|
B<Accepts:> nothing |
1696
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
B<Returns:> a string (YYYY-MM-DD) |
1698
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
=back |
1700
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
=head3 date_modified |
1702
|
|
|
|
|
|
|
|
1703
|
|
|
|
|
|
|
=over |
1704
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
B<Definition:> Returns the date that Excel recorded for the last file modification |
1706
|
|
|
|
|
|
|
|
1707
|
|
|
|
|
|
|
B<Accepts:> nothing |
1708
|
|
|
|
|
|
|
|
1709
|
|
|
|
|
|
|
B<Returns:> a string (YYYY-MM-DD) |
1710
|
|
|
|
|
|
|
|
1711
|
|
|
|
|
|
|
=back |
1712
|
|
|
|
|
|
|
|
1713
|
|
|
|
|
|
|
=head3 in_the_list |
1714
|
|
|
|
|
|
|
|
1715
|
|
|
|
|
|
|
=over |
1716
|
|
|
|
|
|
|
|
1717
|
|
|
|
|
|
|
B<Definition:> This is a predicate method that indicates if the 'next' |
1718
|
|
|
|
|
|
|
L<worksheet|/worksheet( $name )> function has been implemented at least once. |
1719
|
|
|
|
|
|
|
|
1720
|
|
|
|
|
|
|
B<Accepts:>nothing |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
B<Returns:> true = 1, false = 0 |
1723
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
=back |
1725
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
=head3 start_at_the_beginning |
1727
|
|
|
|
|
|
|
|
1728
|
|
|
|
|
|
|
=over |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
B<Definition:> This restarts the 'next' worksheet at the first worksheet. This |
1731
|
|
|
|
|
|
|
method is only useful in the context of the L<worksheet|/worksheet( $name )> |
1732
|
|
|
|
|
|
|
function. |
1733
|
|
|
|
|
|
|
|
1734
|
|
|
|
|
|
|
B<Accepts:> nothing |
1735
|
|
|
|
|
|
|
|
1736
|
|
|
|
|
|
|
B<Returns:> nothing |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
=back |
1739
|
|
|
|
|
|
|
|
1740
|
|
|
|
|
|
|
=head2 Architecture Choices |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
This is yet another package for parsing Excel xml or 2007+ (and 2003+ xml) workbooks. |
1743
|
|
|
|
|
|
|
There are two other options for 2007+ XLSX parsing (but not 2003 xml parsing) on CPAN. |
1744
|
|
|
|
|
|
|
(L<Spreadsheet::ParseXLSX> and L<Spreadsheet::XLSX>) In general if either of them |
1745
|
|
|
|
|
|
|
already work for you without issue then there is probably no compelling reason to |
1746
|
|
|
|
|
|
|
switch to this package. However, the goals of this package which may provide |
1747
|
|
|
|
|
|
|
differentiation are five fold. First, as close as possible produce the same output as |
1748
|
|
|
|
|
|
|
is visible in an excel spreadsheet with exposure to underlying settings from Excel. |
1749
|
|
|
|
|
|
|
Second, adhere as close as is reasonable to the L<Spreadsheet::ParseExcel> API (where |
1750
|
|
|
|
|
|
|
it doesn't conflict with the first objective) so that less work would be needed to |
1751
|
|
|
|
|
|
|
integrate ParseExcel and this package. An addendum to the second goal is this package |
1752
|
|
|
|
|
|
|
will not expose elements of the object hash for use by the consuming program. This |
1753
|
|
|
|
|
|
|
package will either return an unblessed hash with the equivalent elements to the |
1754
|
|
|
|
|
|
|
Spreadsheet::ParseExcel output (instead of a class instance) or it will provide methods |
1755
|
|
|
|
|
|
|
to provide these sets of data. The third goal is to read the excel files in a 'just in |
1756
|
|
|
|
|
|
|
time' manner without storing all the data in memory. The intent is to minimize the |
1757
|
|
|
|
|
|
|
footprint of large file reads. Initially I did this using L<XML::LibXML> but it |
1758
|
|
|
|
|
|
|
eventually L<proved to not play well|http://www.perlmonks.org/?node_id=1151609> with |
1759
|
|
|
|
|
|
|
Moose ( or perl? ) garbage collection so this package uses a pure perl xml parser. |
1760
|
|
|
|
|
|
|
In general this means that design decisions will generally sacrifice speed to keep RAM |
1761
|
|
|
|
|
|
|
consumption low. Since the data in the sheet is parsed just in time the information that |
1762
|
|
|
|
|
|
|
is not contained in the primary meta-data headers will not be available for review L<until |
1763
|
|
|
|
|
|
|
the sheet parses to that point|Spreadsheet::Reader::ExcelXML::Worksheet/max_row>. In |
1764
|
|
|
|
|
|
|
cases where the parser has made choices that prioritize speed over RAM savings there will |
1765
|
|
|
|
|
|
|
generally be an L<attribute available to turn that decision off|/set_cache_behavior>. |
1766
|
|
|
|
|
|
|
Fourth, Excel files get abused in the wild. In general the Microsoft (TM) Excel |
1767
|
|
|
|
|
|
|
application handles these mangled files gracefully. The goal is to be able to read any |
1768
|
|
|
|
|
|
|
xml based spreadsheet Excel can read from the supported extention list. Finally, this |
1769
|
|
|
|
|
|
|
parser supports the Excel 2003 xml format. All in all this package solves many of the |
1770
|
|
|
|
|
|
|
issues I found parsing Excel in the wild. I hope it solves some of yours as well. |
1771
|
|
|
|
|
|
|
|
1772
|
|
|
|
|
|
|
=head2 Warnings |
1773
|
|
|
|
|
|
|
|
1774
|
|
|
|
|
|
|
B<1.>This package uses L<Archive::Zip>. Not all versions of Archive::Zip work for everyone. |
1775
|
|
|
|
|
|
|
I have tested this with Archive::Zip 1.30. Please let me know if this does not work with a |
1776
|
|
|
|
|
|
|
sucessfully installed (read passed the full test suit) version of Archive::Zip newer than that. |
1777
|
|
|
|
|
|
|
|
1778
|
|
|
|
|
|
|
B<2.> Not all workbook sheets (tabs) are created equal! Some Excel sheet tabs are only a |
1779
|
|
|
|
|
|
|
chart. These tabs are 'chartsheets'. The methods with 'worksheet' in the name only act on |
1780
|
|
|
|
|
|
|
the sub set of tabs that are worksheets. Future methods with 'chartsheet' in the name will |
1781
|
|
|
|
|
|
|
focus on the subset of sheets that are chartsheets. Methods with just 'sheet' in the name |
1782
|
|
|
|
|
|
|
have the potential to act on both. The documentation for the chartsheet level class is |
1783
|
|
|
|
|
|
|
found in L<Spreadsheet::Reader::ExcelXML::Chartsheet> (still under construction). |
1784
|
|
|
|
|
|
|
All chartsheet classes do not provide access to cells. |
1785
|
|
|
|
|
|
|
|
1786
|
|
|
|
|
|
|
B<3.> This package supports reading xlsm files (Macro enabled Excel 2007+ workbooks). |
1787
|
|
|
|
|
|
|
xlsm files allow for binaries to be embedded that may contain malicious code. However, |
1788
|
|
|
|
|
|
|
other than unzipping the excel file no work is done by this package with the sub-file |
1789
|
|
|
|
|
|
|
'vbaProject.bin' containing the binaries. This package does not provide an API to that |
1790
|
|
|
|
|
|
|
sub-file and I have no intention of doing so. Therefore my research indicates there should |
1791
|
|
|
|
|
|
|
be no risk of virus activation while parsing even an infected xlsm file with this package |
1792
|
|
|
|
|
|
|
but I encourage you to use your own judgement in this area. B<L<caveat utilitor! |
1793
|
|
|
|
|
|
|
|https://en.wiktionary.org/wiki/Appendix:List_of_Latin_phrases>> |
1794
|
|
|
|
|
|
|
|
1795
|
|
|
|
|
|
|
B<4.> This package will read some files with 'broken' xml. In general this should be |
1796
|
|
|
|
|
|
|
transparent but in the case of the maximum row value and the maximum column value for a |
1797
|
|
|
|
|
|
|
worksheet it can cause some surprising problems. This includes the possibility that the |
1798
|
|
|
|
|
|
|
maximum values are initially stored as 'undef' if the sheet does not provide them in the |
1799
|
|
|
|
|
|
|
metadata as expected. I<These values are generally never available in Excel 2003 xml |
1800
|
|
|
|
|
|
|
files.> The answer to the methods L<Spreadsheet::Reader::ExcelXML::Worksheet/row_range> |
1801
|
|
|
|
|
|
|
and L<Spreadsheet::Reader::ExcelXML::Worksheet/col_range> will then change as more |
1802
|
|
|
|
|
|
|
of the sheet is parsed. You can use the attribute L<file_boundary_flags |
1803
|
|
|
|
|
|
|
|/file_boundary_flags> or the methods L<Spreadsheet::Reader::ExcelXML::Worksheet/get_next_value> |
1804
|
|
|
|
|
|
|
or L<Spreadsheet::Reader::ExcelXML::Worksheet/fetchrow_arrayref> as alternates to |
1805
|
|
|
|
|
|
|
pre-testing for boundaries when iterating. The primary cause of these broken XML |
1806
|
|
|
|
|
|
|
elements in Excel 2007+ files are non-XML applications writing to or editing the |
1807
|
|
|
|
|
|
|
underlying xml. If you have an example of other broken xml files readable by the |
1808
|
|
|
|
|
|
|
Excel application that are not parsable by this package please L<submit them |
1809
|
|
|
|
|
|
|
|https://github.com/jandrew/Spreadsheet-XLSX-Reader-LibXML/issues> to my github repo |
1810
|
|
|
|
|
|
|
so I can work to improve this package. If you don't want your test case included |
1811
|
|
|
|
|
|
|
with the distribution I will use it to improve the package without publishing it. |
1812
|
|
|
|
|
|
|
|
1813
|
|
|
|
|
|
|
B<5.> I reserve the right to tweak the sub file L<caching breakpoints|/cache_positions> |
1814
|
|
|
|
|
|
|
over the next few releases. The goal is to have a default that appears to be the |
1815
|
|
|
|
|
|
|
best compromise by 2017-1-1. |
1816
|
|
|
|
|
|
|
|
1817
|
|
|
|
|
|
|
B<6.> This package provides support for L<SpreadsheetML |
1818
|
|
|
|
|
|
|
|https://odieweblog.wordpress.com/2012/02/12/how-to-read-and-write-office-2003-excel-xml-files/> |
1819
|
|
|
|
|
|
|
(Excel 2003) .xml extention documents. These files should include the header; |
1820
|
|
|
|
|
|
|
|
1821
|
|
|
|
|
|
|
<?mso-application progid="Excel.Sheet"?> |
1822
|
|
|
|
|
|
|
|
1823
|
|
|
|
|
|
|
to indicate their intended format. Please L<submit|/SUPPORT> any cases that |
1824
|
|
|
|
|
|
|
appear to behave differently than expected for .xml extention files that are |
1825
|
|
|
|
|
|
|
readable by the Excel application. I am also interested in cases where an out of |
1826
|
|
|
|
|
|
|
memory error occurs with an .xml extension file. This warning will stay till |
1827
|
|
|
|
|
|
|
2017-1-1. |
1828
|
|
|
|
|
|
|
|
1829
|
|
|
|
|
|
|
B<7.> This package uses two classes at the top to L<handle cleanup for some self |
1830
|
|
|
|
|
|
|
referential|http://perldoc.perl.org/5.8.9/perlobj.html#Two-Phased-Garbage-Collection> |
1831
|
|
|
|
|
|
|
object organization that I use. As a result the action taken on this package is |
1832
|
|
|
|
|
|
|
(mostly) implemented in L<Spreadsheet::Reader::ExcelXML::Workbook> code. I documented |
1833
|
|
|
|
|
|
|
most of that code API here. If you want to look at the raw code go there. |
1834
|
|
|
|
|
|
|
|
1835
|
|
|
|
|
|
|
=head1 BUILD / INSTALL from Source |
1836
|
|
|
|
|
|
|
|
1837
|
|
|
|
|
|
|
B<0.> Using L<cpanm|https://metacpan.org/pod/App::cpanminus> is much easier |
1838
|
|
|
|
|
|
|
than a source build! |
1839
|
|
|
|
|
|
|
|
1840
|
|
|
|
|
|
|
cpanm Spreadsheet::Reader::ExcelXML |
1841
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
And then if you feel kindly L<App::cpanminus::reporter> |
1843
|
|
|
|
|
|
|
|
1844
|
|
|
|
|
|
|
cpanm-reporter |
1845
|
|
|
|
|
|
|
|
1846
|
|
|
|
|
|
|
B<1.> Download a compressed file with this package code from your favorite source |
1847
|
|
|
|
|
|
|
|
1848
|
|
|
|
|
|
|
=over |
1849
|
|
|
|
|
|
|
|
1850
|
|
|
|
|
|
|
L<github|https://github.com/jandrew/p5-spreadsheet-reader-excelxml> |
1851
|
|
|
|
|
|
|
|
1852
|
|
|
|
|
|
|
L<Meta::CPAN|https://metacpan.org/pod/Spreadsheet::Reader::ExcelXML> |
1853
|
|
|
|
|
|
|
|
1854
|
|
|
|
|
|
|
L<CPAN|http://search.cpan.org/~jandrew/Spreadsheet-Reader-ExcelXML/> |
1855
|
|
|
|
|
|
|
|
1856
|
|
|
|
|
|
|
=back |
1857
|
|
|
|
|
|
|
|
1858
|
|
|
|
|
|
|
B<2.> Extract the code from the compressed file. |
1859
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
=over |
1861
|
|
|
|
|
|
|
|
1862
|
|
|
|
|
|
|
If you are using tar on a .tar.gz file this should work: |
1863
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
tar -zxvf Spreadsheet-Reader-ExcelXML-v0.xx.tar.gz |
1865
|
|
|
|
|
|
|
|
1866
|
|
|
|
|
|
|
=back |
1867
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
B<3.> Change (cd) into the extracted directory |
1869
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
B<4.> Run the following |
1871
|
|
|
|
|
|
|
|
1872
|
|
|
|
|
|
|
=over |
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
(for Windows find what version of make was used to compile your perl) |
1875
|
|
|
|
|
|
|
|
1876
|
|
|
|
|
|
|
perl -V:make |
1877
|
|
|
|
|
|
|
|
1878
|
|
|
|
|
|
|
(then for Windows substitute the correct make function (s/make/dmake/g)? below) |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
=back |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
perl Makefile.PL |
1883
|
|
|
|
|
|
|
|
1884
|
|
|
|
|
|
|
make |
1885
|
|
|
|
|
|
|
|
1886
|
|
|
|
|
|
|
make test |
1887
|
|
|
|
|
|
|
|
1888
|
|
|
|
|
|
|
make install # As sudo/root |
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
make clean |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
=head1 SUPPORT |
1893
|
|
|
|
|
|
|
|
1894
|
|
|
|
|
|
|
=over |
1895
|
|
|
|
|
|
|
|
1896
|
|
|
|
|
|
|
L<github Spreadsheet::Reader::Format/issues |
1897
|
|
|
|
|
|
|
|https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues> |
1898
|
|
|
|
|
|
|
|
1899
|
|
|
|
|
|
|
=back |
1900
|
|
|
|
|
|
|
|
1901
|
|
|
|
|
|
|
=head1 TODO |
1902
|
|
|
|
|
|
|
|
1903
|
|
|
|
|
|
|
=over |
1904
|
|
|
|
|
|
|
|
1905
|
|
|
|
|
|
|
B<1.> Write a chartsheet parser and functions |
1906
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
B<2.> Add a pivot table reader (Not just read the values from the sheet) |
1908
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
B<3.> Add calc chain methods |
1910
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
B<4.> Add more exposure to workbook/worksheet formatting values |
1912
|
|
|
|
|
|
|
|
1913
|
|
|
|
|
|
|
=back |
1914
|
|
|
|
|
|
|
|
1915
|
|
|
|
|
|
|
=head1 AUTHOR |
1916
|
|
|
|
|
|
|
|
1917
|
|
|
|
|
|
|
=over |
1918
|
|
|
|
|
|
|
|
1919
|
|
|
|
|
|
|
Jed Lund |
1920
|
|
|
|
|
|
|
|
1921
|
|
|
|
|
|
|
jandrew@cpan.org |
1922
|
|
|
|
|
|
|
|
1923
|
|
|
|
|
|
|
=back |
1924
|
|
|
|
|
|
|
|
1925
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
1926
|
|
|
|
|
|
|
|
1927
|
|
|
|
|
|
|
This is the (likely incomplete) list of people who have helped |
1928
|
|
|
|
|
|
|
make this distribution what it is, either via code contributions, |
1929
|
|
|
|
|
|
|
patches, bug reports, help with troubleshooting, etc. A huge |
1930
|
|
|
|
|
|
|
'thank you' to all of them. Most were contributors to |
1931
|
|
|
|
|
|
|
L<Spreadsheet::XLSX::Reader::LibXML> but the contributions have |
1932
|
|
|
|
|
|
|
(hopefully) not been lost. |
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
=over |
1935
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
L<Frank Maas|https://github.com/Frank071> |
1937
|
|
|
|
|
|
|
|
1938
|
|
|
|
|
|
|
L<Stuart Watt|https://github.com/morungos> |
1939
|
|
|
|
|
|
|
|
1940
|
|
|
|
|
|
|
L<Toby Inkster|https://github.com/tobyink> |
1941
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
L<Breno G. de Oliveira|https://github.com/garu> |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
L<Bill Baker|https://github.com/wdbaker54> |
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
L<H.Merijin Brand|https://github.com/Tux> |
1947
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
L<Todd Eigenschink|mailto:todd@xymmetrix.com> |
1949
|
|
|
|
|
|
|
|
1950
|
|
|
|
|
|
|
L<Slaven ReziÄ|https://metacpan.org/author/SREZIC> |
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
=back |
1953
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
=head1 COPYRIGHT |
1955
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
This program is free software; you can redistribute |
1957
|
|
|
|
|
|
|
it and/or modify it under the same terms as Perl itself. |
1958
|
|
|
|
|
|
|
|
1959
|
|
|
|
|
|
|
The full text of the license can be found in the |
1960
|
|
|
|
|
|
|
LICENSE file included with this module. |
1961
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
This software is copyrighted (c) 2016, 2017 by Jed Lund |
1963
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
1965
|
|
|
|
|
|
|
|
1966
|
|
|
|
|
|
|
=over |
1967
|
|
|
|
|
|
|
|
1968
|
|
|
|
|
|
|
L<perl 5.010|perl/5.10.0> |
1969
|
|
|
|
|
|
|
|
1970
|
|
|
|
|
|
|
L<Archive::Zip> |
1971
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
L<Carp> |
1973
|
|
|
|
|
|
|
|
1974
|
|
|
|
|
|
|
L<Clone> |
1975
|
|
|
|
|
|
|
|
1976
|
|
|
|
|
|
|
L<DateTime::Format::Flexible> |
1977
|
|
|
|
|
|
|
|
1978
|
|
|
|
|
|
|
L<DateTimeX::Format::Excel> |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
L<IO::File> |
1981
|
|
|
|
|
|
|
|
1982
|
|
|
|
|
|
|
L<Moose> - 2.1213 |
1983
|
|
|
|
|
|
|
|
1984
|
|
|
|
|
|
|
L<MooseX::HasDefaults::RO> |
1985
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
L<MooseX::ShortCut::BuildInstance> - 1.032 |
1987
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
L<MooseX::StrictConstructor> |
1989
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
L<Type::Tiny> - 1.000 |
1991
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
L<version> - 0.077 |
1993
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
=back |
1995
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
=head1 SEE ALSO |
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
=over |
1999
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
L<Spreadsheet::Read> - generic Spreadsheet reader |
2001
|
|
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
L<Spreadsheet::ParseExcel> - Excel binary files from 2003 and earlier |
2003
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
L<Spreadsheet::ParseXLSX> - Excel version 2007 and later |
2005
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
L<Spreadsheet::XLSX> - Excel version 2007 and later (Very rough) |
2007
|
|
|
|
|
|
|
|
2008
|
|
|
|
|
|
|
L<Log::Shiras|https://github.com/jandrew/Log-Shiras> |
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
=over |
2011
|
|
|
|
|
|
|
|
2012
|
|
|
|
|
|
|
All lines in this package that use Log::Shiras are commented out |
2013
|
|
|
|
|
|
|
|
2014
|
|
|
|
|
|
|
=back |
2015
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
=back |
2017
|
|
|
|
|
|
|
|
2018
|
|
|
|
|
|
|
=begin html |
2019
|
|
|
|
|
|
|
|
2020
|
|
|
|
|
|
|
<a href="http://www.perlmonks.org/?node_id=706986"> |
2021
|
|
|
|
|
|
|
<img src="http://www.perlmonksflair.com/jandrew.jpg" alt="perl monks"> |
2022
|
|
|
|
|
|
|
</a> |
2023
|
|
|
|
|
|
|
|
2024
|
|
|
|
|
|
|
=end html |
2025
|
|
|
|
|
|
|
|
2026
|
|
|
|
|
|
|
=cut |
2027
|
|
|
|
|
|
|
|
2028
|
|
|
|
|
|
|
#########1#########2 main pod documentation end 5#########6#########7#########8#########9 |