File Coverage

lib/Spreadsheet/Reader/ExcelXML/XMLReader/NamedSharedStrings.pm
Criterion Covered Total %
statement 30 36 83.3
branch 2 6 33.3
condition n/a
subroutine 9 10 90.0
pod 2 2 100.0
total 43 54 79.6


line stmt bran cond sub pod time code
1             package Spreadsheet::Reader::ExcelXML::XMLReader::NamedSharedStrings;
2             our $AUTHORITY = 'cpan:JANDREW';
3 3     3   10833 use version; our $VERSION = version->declare('v0.16.8');
  3         6  
  3         24  
4             ###LogSD warn "You uncovered internal logging statements for Spreadsheet::Reader::ExcelXML::XMLReader::NamedSharedStrings-$VERSION";
5              
6 3     3   385 use 5.010;
  3         8  
7 3     3   13 use Moose::Role;
  3         5  
  3         28  
8             requires qw(
9             set_error close_the_file advance_element_position
10             parse_element get_group_return_type squash_node
11             start_the_file_over current_named_node good_load
12              
13             );
14 3         30 use Types::Standard qw(
15             Int Bool HashRef is_HashRef ArrayRef Enum is_Int
16 3     3   11657 );
  3         3  
17 3     3   3170 use Carp qw( confess );
  3         5  
  3         180  
18 3     3   14 use lib '../../../../../../lib';
  3         4  
  3         23  
19             ###LogSD use Log::Shiras::Telephone;
20              
21             #########1 Public Attributes 3#########4#########5#########6#########7#########8#########9
22              
23             has cache_positions =>(
24             isa => Bool,
25             reader => 'should_cache_positions',
26             default => 1,
27             );
28              
29             #########1 Public Methods 3#########4#########5#########6#########7#########8#########9
30              
31             sub get_shared_string{
32 1     1 1 27 my( $self, $name ) = @_;
33             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
34             ###LogSD $self->get_all_space . '::get_shared_string', );
35 1         10 confess "Please post an example of this file to: " .
36             "https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues\n" .
37             "I don't have a good example of this type of file for parsing yet";
38             }
39              
40             sub load_unique_bits{
41 4     4 1 7 my( $self, ) = @_;
42             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
43             ###LogSD $self->get_all_space . '::load_unique_bits', );
44             ###LogSD $phone->talk( level => 'debug', message => [
45             ###LogSD "Setting the sharedStrings unique bits" ] );
46              
47             # Check for empty node and react (Sub element of SharedStrings is SharedString?)
48 4         7 my( $result, $node_name, $node_level, $result_ref );
49 4         21 my $current_node = $self->current_node_parsed;
50             ###LogSD $phone->talk( level => 'trace', message =>[
51             ###LogSD "The current node is:", $current_node ] );
52 4 50       16 if( (keys %$current_node)[0] eq 'SharedString' ){
53             ###LogSD $phone->talk( level => 'trace', message =>[
54             ###LogSD "Found the core properties node" ] );
55 0         0 $result = 2;
56 0         0 $node_name = 'cp:coreProperties';
57             }else{
58 4         26 ( $result, $node_name, $node_level, $result_ref ) =
59             $self->advance_element_position( 'SharedString' );
60             }
61 4 50       15 if( $result ){
62             ###LogSD $phone->talk( level => 'debug', message => [
63             ###LogSD "The SharedString node has an - Implied 'SharedStrings' node - this is totally incomplete" ] );
64 0         0 $self->start_the_file_over;
65 0         0 $self->good_load( 1 );
66             }else{
67 4         45 $self->set_error( "No 'SharedString' element with content found - can't parse this as a sharedStrings file" );
68 4         17 return undef;
69             }
70             }
71              
72             #########1 Private Attributes 3#########4#########5#########6#########7#########8#########9
73              
74              
75              
76             #########1 Private Methods 3#########4#########5#########6#########7#########8#########9
77              
78             sub _should_block_formats{
79 0     0     my( $self, ) = @_;
80             ###LogSD my $phone = Log::Shiras::Telephone->new( name_space =>
81             ###LogSD $self->get_all_space . '::_should_block_formats', );
82             ###LogSD $phone->talk( level => 'debug', message => [
83             ###LogSD "determining if formats should be blocked: " . $self->get_group_return_type ] );
84 0 0         return ( $self->get_group_return_type =~ /(unformatted|value|xml_value)/) ? 1 : 0 ;
85             }
86              
87             #########1 Phinish 3#########4#########5#########6#########7#########8#########9
88              
89 3     3   1109 no Moose::Role;
  3         4  
  3         16  
90              
91             1;
92              
93             #########1 Documentation 3#########4#########5#########6#########7#########8#########9
94             __END__
95              
96             =head1 NAME
97              
98             Spreadsheet::Reader::ExcelXML::XMLReader::NamedSharedStrings - Name based sharedStrings Reader
99              
100             =head1 SYNOPSIS
101              
102             #!/usr/bin/env perl
103             use Data::Dumper;
104             use MooseX::ShortCut::BuildInstance qw( build_instance );
105             use Spreadsheet::Reader::ExcelXML::XMLReader;
106             use Spreadsheet::Reader::ExcelXML::XMLReader::NamedSharedStrings;
107             use Spreadsheet::Reader::ExcelXML::SharedStrings;
108              
109             my $file_instance = build_instance(
110             package => 'SharedStringsInstance',
111             workbook_inst => Spreadsheet::Reader::ExcelXML::Workbook->new,
112             superclasses =>[
113             'Spreadsheet::Reader::ExcelXML::XMLReader'
114             ],
115             add_roles_in_sequence =>[
116             'Spreadsheet::Reader::ExcelXML::XMLReader::NamedSharedStrings',
117             ],
118             );
119              
120             =head1 DESCRIPTION
121              
122             This documentation is written to explain ways to use this module when writing your
123             own excel parser or extending this package. To use the general package for excel
124             parsing out of the box please review the documentation for L<Workbooks
125             |Spreadsheet::Reader::ExcelXML>, L<Worksheets
126             |Spreadsheet::Reader::ExcelXML::Worksheet>, and
127             L<Cells|Spreadsheet::Reader::ExcelXML::Cell>.
128              
129             This role is written to extend L<Spreadsheet::Reader::ExcelXML::XMLReader>.
130             It adds functionality to read name based sharedStrings files. It presents this
131             functionality in compliance with the top level L<interface
132             |Spreadsheet::Reader::ExcelXML::SharedStrings>. This POD only describes the
133             functionality incrementally provided by this module. For an overview of
134             sharedStrings.xml reading see L<Spreadsheet::Reader::ExcelXML::SharedStrings>
135              
136             =head1 WARNING
137              
138             If your Excel 2003 xml based file does not include a SharedStrings portion
139             then ignore this warning since it will not matter. I don't have an example of an
140             Excel 2003 xml file that has SharedStrings content. I'm not even sure that
141             any generators build flat SpreadsheetML files with a SharedStrings subsection.
142             As a consequence this role is just a placeholder to allow the rest of the
143             package to work on Excel 2003 xml files. If you are actually parsing an xml
144             file that contains a SharedStrings portion then your parse will die with the
145             request to submit an issue on the L<github repo
146             |https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues>. Please
147             include the file that is failing. I will need an example in order to
148             complete this section of the parser.
149              
150             =head2 Requires
151              
152             These are the methods required by this role and their default provider. All
153             methods are imported straight across with no re-naming.
154              
155             =over
156              
157             L<Spreadsheet::Reader::ExcelXML::Error/set_error>
158              
159             L<Spreadsheet::Reader::ExcelXML::XMLReader/good_load>
160              
161             L<Spreadsheet::Reader::ExcelXML::XMLReader/close_the_file>
162              
163             L<Spreadsheet::Reader::ExcelXML::XMLReader/advance_element_position>
164              
165             L<Spreadsheet::Reader::ExcelXML::XMLReader/start_the_file_over>
166              
167             L<Spreadsheet::Reader::ExcelXML::XMLReader/parse_element>
168              
169             L<Spreadsheet::Reader::ExcelXML::XMLReader/squash_node>
170              
171             L<Spreadsheet::Reader::ExcelXML::XMLReader/current_named_node>
172              
173             L<Spreadsheet::Reader::ExcelXML::Workbook/get_group_return_type>
174              
175             =back
176              
177             =head2 Methods
178              
179             These are the primary ways to use this class. For additional SharedStrings options
180             see the L<Attributes|/Attributes> section.
181              
182             =head3 get_shared_string( $name)
183              
184             =over
185              
186             B<Definition:> This is the primary method that needs an example for completion.
187              
188             B<Accepts:> $name = the node name of the shared string to be returned
189              
190             B<Returns:> dies with a message to submit the file to my L<github repo
191             |https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues>
192              
193             =back
194              
195             =head3 load_unique_bits
196              
197             =over
198              
199             B<Definition:> When the xml file first loads this is available to pull customized data.
200             It mostly pulls metadata and stores it in hidden attributes for use later. If all goes
201             according to plan it sets L<Spreadsheet::Reader::ExcelXML::XMLReader/good_load> to 1.
202              
203             B<Accepts:> Nothing
204              
205             B<Returns:> Nothing
206              
207             =back
208              
209             =head2 Attributes
210              
211             Data passed to new when creating an instance of this class. For
212             modification of this(ese) attribute(s) see the listed 'attribute
213             methods'. For more information on attributes see
214             L<Moose::Manual::Attributes>. The easiest way to modify this(ese)
215             attribute(s) is when a classinstance is created and before it is
216             passed to the workbook or parser.
217              
218             =head3 cache_positions
219              
220             =over
221              
222             B<Definition:> Especially for sheets with lots of stored text the
223             parser can slow way down when accessing each postion. This is
224             because the text is not always stored sequentially and the reader
225             is a JIT linear parser. To go back it must restart and index
226             through each position till it gets to the right place. This is
227             especially true for excel sheets that have experienced any
228             significant level of manual intervention prior to being read.
229             This attribute turns (default) on caching for shared strings so
230             the parser only has to read through the shared strings once. When
231             the read is complete all the way to the end it will also release
232             the shared strings file in order to free up some space.
233             (a small win in exchange for the space taken by the cache). The
234             trade off here is that all intermediate shared strings are
235             L<fully|/get_shared_string( $positive_intE<verbar>$name )> read
236             before reading the target string. This means early reads will be
237             slower. For sheets that only have numbers stored or at least have
238             very few strings this will likely not be a initial hit (or speed
239             improvement). In order to minimize the physical size of the cache,
240             if there is only a text string stored in the shared strings position
241             then only the string will be stored (not as a value to a raw_text
242             hash key). It will then reconstitue into a hashref when requested.
243              
244             B<Default:> 1 = caching is on
245              
246             B<Range:> 1|0
247              
248             B<Attribute required:> yes
249              
250             B<attribute methods> Methods provided to adjust this attribute
251              
252             =over
253              
254             none - (will be autoset by L<Spreadsheet::Reader::ExcelXML/cache_positions>)
255              
256             =back
257              
258             =back
259              
260             =head1 SUPPORT
261              
262             =over
263              
264             L<github Spreadsheet::Reader::ExcelXML/issues
265             |https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues>
266              
267             =back
268              
269             =head1 TODO
270              
271             =over
272              
273             B<1.> Nothing yet
274              
275             =back
276              
277             =head1 AUTHOR
278              
279             =over
280              
281             Jed Lund
282              
283             jandrew@cpan.org
284              
285             =back
286              
287             =head1 COPYRIGHT
288              
289             This program is free software; you can redistribute
290             it and/or modify it under the same terms as Perl itself.
291              
292             The full text of the license can be found in the
293             LICENSE file included with this module.
294              
295             This software is copyrighted (c) 2016 by Jed Lund
296              
297             =head1 DEPENDENCIES
298              
299             =over
300              
301             L<Spreadsheet::Reader::ExcelXML> - the package
302              
303             =back
304              
305             =head1 SEE ALSO
306              
307             =over
308              
309             L<Spreadsheet::Read> - generic Spreadsheet reader
310              
311             L<Spreadsheet::ParseExcel> - Excel binary version 2003 and earlier (.xls files)
312              
313             L<Spreadsheet::XLSX> - Excel version 2007 and later
314              
315             L<Spreadsheet::ParseXLSX> - Excel version 2007 and later
316              
317             L<Log::Shiras|https://github.com/jandrew/Log-Shiras>
318              
319             =over
320              
321             All lines in this package that use Log::Shiras are commented out
322              
323             =back
324              
325             =back
326              
327             =cut
328              
329             #########1#########2 main pod documentation end 5#########6#########7#########8#########9