File Coverage

lib/Spreadsheet/Reader/ExcelXML/SharedStrings.pm
Criterion Covered Total %
statement 14 14 100.0
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 19 19 100.0


line stmt bran cond sub pod time code
1             package Spreadsheet::Reader::ExcelXML::SharedStrings;
2             our $AUTHORITY = 'cpan:JANDREW';
3 20     20   30000 use version; our $VERSION = version->declare('v0.16.8');
  20         32  
  20         148  
4             ###LogSD warn "You uncovered internal logging statements for Spreadsheet::Reader::ExcelXML::SharedStrings-$VERSION";
5              
6 20     20   2226 use 5.010;
  20         56  
7 20     20   74 use Moose::Role;
  20         37  
  20         146  
8             requires qw( should_cache_positions get_shared_string loaded_correctly );
9              
10 20     20   74850 use lib '../../../../../../lib';
  20         29  
  20         139  
11             ###LogSD use Log::Shiras::Telephone;
12              
13             #########1 Public Attributes 3#########4#########5#########6#########7#########8#########9
14              
15              
16              
17             #########1 Public Methods 3#########4#########5#########6#########7#########8#########9
18              
19             ###LogSD sub get_class_space{ 'SharedStringsInterface' }
20              
21             #########1 Private Attributes 3#########4#########5#########6#########7#########8#########9
22              
23              
24              
25             #########1 Private Methods 3#########4#########5#########6#########7#########8#########9
26              
27              
28              
29             #########1 Phinish 3#########4#########5#########6#########7#########8#########9
30              
31 20     20   2388 no Moose::Role;
  20         28  
  20         83  
32              
33             1;
34              
35             #########1 Documentation 3#########4#########5#########6#########7#########8#########9
36             __END__
37              
38             =head1 NAME
39              
40             Spreadsheet::Reader::ExcelXML::SharedStrings - The sharedStrings interface
41              
42             =head1 SYNOPSIS
43              
44             #!/usr/bin/env perl
45             $|=1;
46             use Data::Dumper;
47             use MooseX::ShortCut::BuildInstance qw( build_instance );
48             use Spreadsheet::Reader::ExcelXML::Workbook;
49             use Spreadsheet::Reader::ExcelXML::XMLReader;
50             use Spreadsheet::Reader::ExcelXML::SharedStrings;
51             use Spreadsheet::Reader::ExcelXML::XMLReader::PositionSharedStrings;
52              
53             # This whole thing is performed under the hood of
54             # Spreadsheet::Reader::ExcelXML
55             my $file_instance = build_instance(
56             package => 'SharedStringsInstance',
57             file => 'sharedStrings.xml',
58             workbook_inst => Spreadsheet::Reader::ExcelXML::Workbook->new,
59             superclasses =>[
60             'Spreadsheet::Reader::ExcelXML::XMLReader'
61             ],
62             add_roles_in_sequence =>[
63             'Spreadsheet::Reader::ExcelXML::XMLReader::PositionSharedStrings',
64             'Spreadsheet::Reader::ExcelXML::SharedStrings',
65             ],
66             );
67              
68             # Demonstrate output
69             print Dumper( $file_instance->get_shared_string( 3 ) );
70             print Dumper( $file_instance->get_shared_string( 12 ) );
71              
72             #######################################
73             # SYNOPSIS Screen Output
74             # 01: $VAR1 = {
75             # 02: 'raw_text' => ' '
76             # 03: };
77             # 04: $VAR1 = {
78             # 05: 'raw_text' => 'Superbowl Audibles'
79             # 06: };
80             #######################################
81              
82             =head1 DESCRIPTION
83              
84             This documentation is written to explain ways to use this module when writing your
85             own excel parser or extending this package. To use the general package for excel
86             parsing out of the box please review the documentation for L<Workbooks
87             |Spreadsheet::Reader::ExcelXML>, L<Worksheets
88             |Spreadsheet::Reader::ExcelXML::Worksheet>, and
89             L<Cells|Spreadsheet::Reader::ExcelXML::Cell>.
90              
91             This class is the interface for reading the sharedStrings file in a standard
92             xml based Excel file. The SYNOPSIS provides an example with a role added to
93             implement that type of reading ~PositionSharedStrings. The other role written
94             for this interface is L<Spreadsheet::Reader::ExcelXML::NamedSharedStrings>. It
95             does not provide connection to other file types or even the elements from other
96             files that are related to this file. This POD documents all functionaliy required
97             by this interface independant of where it is provided.
98              
99             =head2 Methods
100              
101             These are the primary ways to use this class. For additional SharedStrings
102             options see the L<Attributes|/Attributes> section.
103              
104             =head3 get_shared_string( $positive_int|$name )
105              
106             =over
107              
108             B<Definition:> This returns the data in the shared strings file identified
109             by either the $positive_int position for position based sharedStrings files
110             or $name in name based sharedStrings files. The position implementation is
111             L<Spreadsheet::Reader::ExcelXML::PositionSharedStrings>. The named
112             retrieval is implemented in L<Spreadsheet::Reader::ExcelXML::NamedSharedStrings>.
113              
114             B<Accepts:> $positive_int ( a positive integer ) or $name depending on the
115             associated role
116              
117             B<Returns:> a hash ref with the key 'raw_text' and all coallated text for that
118             xml node as the value. If there is associated rich text in the node and
119             L<Spreadsheet::Reader::ExcelXML/group_return_type> is set to 'instance'
120             then it will also have a 'rich_text' key with the value set as an arrayref of
121             pairs (not sub array refs) with the first value being the position of the
122             raw_text from zero that the formatting is applied and the second position as
123             the settings for that format. Ex.
124              
125             {
126             raw_text => 'Hello World',
127             rich_text =>[
128             2,# Starting with the letter 'l' apply the format
129             {
130             'color' => {
131             'rgb' => 'FFFF0000'
132             },
133             'sz' => '11',
134             'b' => undef,
135             'scheme' => 'minor',
136             'rFont' => 'Calibri',
137             'family' => '2'
138             },
139             6,# Starting with the letter 'W' apply the format
140             {
141             'color' => {
142             'rgb' => 'FF0070C0'
143             },
144             'sz' => '20',
145             'b' => undef,
146             'scheme' => 'minor',
147             'rFont' => 'Calibri',
148             'family' => '2'
149             }
150             ]
151             }
152              
153             =back
154              
155             =head3 loaded_correctly
156              
157             =over
158              
159             B<Definition:> This interface will check the sharedStrings file for a
160             global scope of the number of shared strings and store it when the file
161             is opened. If the process was succesful then this will return 1.
162              
163             B<Accepts:> nothing
164              
165             B<Returns:> (1|0) depending on if file opened as a shared strings file
166              
167             =back
168              
169             =head2 Attributes
170              
171             Data passed to new when creating an instance with this interface. For
172             modification of this(ese) attribute(s) see the listed 'attribute
173             methods'. For more information on attributes see
174             L<Moose::Manual::Attributes>. The easiest way to modify this(ese)
175             attribute(s) is during instance creation before it is passed to the
176             workbook or parser.
177              
178             =head3 file
179              
180             =over
181              
182             B<Definition:> This attribute holds the file handle for the file being read. If
183             the full file name and path is passed to the attribute the class will coerce that
184             into an L<IO::File> file handle.
185              
186             B<Default:> no default - this must be provided to read a file
187              
188             B<Required:> yes
189              
190             B<Range:> any unencrypted sharedStrings.xml file name and path or IO::File file
191             handle with that content.
192              
193             B<attribute methods> Methods provided to adjust this attribute
194              
195             =over
196              
197             B<set_file>
198              
199             =over
200              
201             B<Definition:> change the file value in the attribute (this will reboot
202             the file instance and lock the file)
203              
204             =back
205              
206             B<get_file>
207              
208             =over
209              
210             B<Definition:> Returns the file handle of the file even if a file name
211             was passed
212              
213             =back
214              
215             B<has_file>
216              
217             =over
218              
219             B<Definition:> this is used to see if the file loaded correctly.
220              
221             =back
222              
223             B<clear_file>
224              
225             =over
226              
227             B<Definition:> this clears (and unlocks) the file handle
228              
229             =back
230              
231             =back
232              
233             =back
234              
235             =head3 cache_positions
236              
237             =over
238              
239             B<Definition:> Especially for sheets with lots of stored text the
240             parser can slow way down when accessing each postion. This is
241             because the text is not always stored sequentially and the reader
242             is a JIT linear parser. To go back it must restart and index
243             through each position till it gets to the right place. This is
244             especially true for excel sheets that have experienced any
245             significant level of manual intervention prior to being read.
246             This attribute turns (default) on caching for shared strings so
247             the parser only has to read through the shared strings once. When
248             the read is complete all the way to the end it will also release
249             the shared strings file in order to free up some space.
250             (a small win in exchange for the space taken by the cache). The
251             trade off here is that all intermediate shared strings are
252             L<fully|/get_shared_string( $positive_intE<verbar>$name )> read
253             before reading the target string. This means early reads will be
254             slower. For sheets that only have numbers stored or at least have
255             very few strings this will likely not be a initial hit (or speed
256             improvement). In order to minimize the physical size of the cache,
257             if there is only a text string stored in the shared strings position
258             then only the string will be stored (not as a value to a raw_text
259             hash key). It will then reconstitue into a hashref when requested.
260              
261             B<Default:> 1 = caching is on
262              
263             B<Range:> 1|0
264              
265             B<Attribute required:> yes
266              
267             B<attribute methods> Methods provided to adjust this attribute
268              
269             =over
270              
271             none - (will be autoset by L<Spreadsheet::Reader::ExcelXML/cache_positions>)
272              
273             =back
274              
275             =back
276              
277             =head1 SUPPORT
278              
279             =over
280              
281             L<github Spreadsheet::Reader::ExcelXML/issues
282             |https://github.com/jandrew/p5-spreadsheet-reader-excelxml/issues>
283              
284             =back
285              
286             =head1 TODO
287              
288             =over
289              
290             B<1.> Nothing yet
291              
292             =back
293              
294             =head1 AUTHOR
295              
296             =over
297              
298             Jed Lund
299              
300             jandrew@cpan.org
301              
302             =back
303              
304             =head1 COPYRIGHT
305              
306             This program is free software; you can redistribute
307             it and/or modify it under the same terms as Perl itself.
308              
309             The full text of the license can be found in the
310             LICENSE file included with this module.
311              
312             This software is copyrighted (c) 2016 by Jed Lund
313              
314             =head1 DEPENDENCIES
315              
316             =over
317              
318             L<Spreadsheet::Reader::ExcelXML> - the package
319              
320             =back
321              
322             =head1 SEE ALSO
323              
324             =over
325              
326             L<Spreadsheet::Read> - generic Spreadsheet reader
327              
328             L<Spreadsheet::ParseExcel> - Excel binary version 2003 and earlier (.xls files)
329              
330             L<Spreadsheet::XLSX> - Excel version 2007 and later
331              
332             L<Spreadsheet::ParseXLSX> - Excel version 2007 and later
333              
334             L<Log::Shiras|https://github.com/jandrew/Log-Shiras>
335              
336             =over
337              
338             All lines in this package that use Log::Shiras are commented out
339              
340             =back
341              
342             =back
343              
344             =cut
345              
346             #########1#########2 main pod documentation end 5#########6#########7#########8#########9