File Coverage

blib/lib/Spreadsheet/BasicReadNamedCol.pm
Criterion Covered Total %
statement 64 80 80.0
branch 15 30 50.0
condition 5 12 41.6
subroutine 9 10 90.0
pod 5 6 83.3
total 98 138 71.0


line stmt bran cond sub pod time code
1             #
2             # BasicReadNamedCol.pm
3             #
4             # Synopsis: see POD at end of file
5             # Description: see POD at end of file
6             #
7             #--------------------------------------------------
8            
9             package Spreadsheet::BasicReadNamedCol;
10            
11             our $VERSION = sprintf("%d.%02d", q'$Revision: 1.3 $' =~ /(\d+)\.(\d+)/);
12            
13             #--------------------------------------------------
14             #
15             #
16            
17             #-- Linage
18             #---------
19             our @ISA = ( 'Spreadsheet::BasicRead' );
20            
21            
22             #-- Required Modules
23             #-------------------
24 1     1   9863 use strict;
  1         3  
  1         45  
25 1     1   7 use warnings;
  1         2  
  1         38  
26 1     1   2289 use Spreadsheet::BasicRead;
  1         211689  
  1         445  
27            
28            
29             sub new
30             {
31 1     1 1 184 my $self = shift;
32 1         15 $self = $self->SUPER::new(@_);
33            
34             # Process any arguments specific to this package
35 1 50       23767 my %args = @_ if (@_ > 1);
36            
37 1 50       6 if (defined $args{columns})
38             {
39 0 0       0 unless (ref($args{columns}) eq 'ARRAY')
40             {
41 0         0 $self->logexp("Expected the argument to 'columns' to be an ARRAY reference! BYE\n");
42             }
43            
44 0         0 $self->{columns} = $args{columns};
45             }
46            
47 1 50       6 if (defined $args{skipHeadings})
48             {
49 0         0 $self->{skipHeadings} = $args{skipHeadings};
50             }
51            
52             # By default, data has not been ordered
53 1         3 $self->{isOrdered} = 0;
54            
55 1         9 return $self;
56             }
57            
58            
59            
60             sub setColumns
61             {
62 1     1 1 114 my $self = shift;
63 1         3 my @cols = @_;
64            
65 1 50       5 if (ref($cols[0]) eq 'ARRAY')
66             {
67 0         0 $self->{columns} = $cols[0];
68             }
69             else
70             {
71 1         5 $self->{columns} = \@cols;
72             }
73             }
74            
75            
76             sub setHeadingRow
77             {
78 0     0 1 0 my $self = shift;
79 0         0 my $headingRow = shift;
80            
81 0 0 0     0 $self->{headingRow} = ($headingRow >= $self->{ssSheet}->{MinRow} &&
82             $headingRow <= $self->{ssSheet}->{MaxRow}) ?
83             $headingRow : $self->{ssSheet}->{MinRow};
84             }
85            
86            
87             sub getFirstRow
88             {
89 4     4 1 40 my $self = shift;
90            
91 4 50       13 return undef unless defined($self->{ssSheet});
92            
93             # Do we have the columns defined?
94 4 50       10 unless (defined $self->{columns})
95             {
96 0         0 $self->logexp("Need to define the name of the columns before calling getFirstRow\nDefine the column names either in the call to new() or using setColumns()\n");
97             }
98            
99 4   33     20 my $row = $self->{headingRow} || $self->{ssSheet}->{MinRow};
100 4         7 $self->{ssSheetRow} = $row;
101            
102            
103             # Loop through each column and put into array
104 4         4 my $x = 0;
105 4         7 my @data = ();
106 4         6 my $blank = 0;
107 4         16 for (my $col = $self->{ssSheet}->{MinCol}; $col <= $self->{ssSheet}->{MaxCol}; $x++, $col++)
108             {
109 1     1   14 no warnings qw(uninitialized);
  1         2  
  1         706  
110            
111             # Note that this is the formatted value of the cell (ie what you see, no the real value)
112 12         38 $data[$x] = $self->cellValue($row, $col);
113            
114             # remove leading and trailing whitespace
115 12         198 $data[$x] =~ s/^\s+//;
116 12         32 $data[$x] =~ s/\s+$//;
117 12 50       63 $blank++ unless $data[$x] =~ /^$/;
118             }
119            
120             # Check if this row is blank, if it is keep getting rows until we have some data
121 4 50       21 if ($blank == 0)
122             {
123 0         0 my $currentBlankSetting = $self->{skipBlankRows};
124 0         0 $self->{skipBlankRows} = 1;
125 0         0 my $res = $self->getNextRow();
126 0         0 $self->{skipBlankRows} = $currentBlankSetting;
127 0         0 @data = @$res;
128             }
129            
130             # Determine the correct order to return the rows
131 4         6 my @order;
132 4         5 foreach my $name (@{$self->{columns}})
  4         9  
133             {
134 12 50 33     56 next unless (defined($name) && $name ne ''); # Skip this column
135            
136 12         15 my $colNum = 0;
137 12         11 my $found = 0;
138 12         16 foreach my $realColName (@data)
139             {
140 24 100       208 if ($realColName =~ /^\Q$name/i)
141             {
142 12         16 push @order, $colNum;
143 12         13 $found = 1;
144 12         24 last;
145             }
146            
147 12         17 $colNum++;
148             }
149            
150             # Quit if we can't find a column name
151 12 50       37 unless ($found)
152             {
153 0         0 $self->logexp("Could not find column '$name' on sheet '", $self->currentSheetName(), "', Quitting\n");
154 0         0 return undef;
155             }
156             }
157            
158             # Store the order
159 4         11 $self->{colOrder} = \@order;
160            
161 4 50       17 return $self->{skipHeadings} ? $self->getNextRow() : $self->returnOrdered(\@data);
162             }
163            
164            
165            
166             sub getNextRow
167             {
168 13     13 1 1037 my $self = shift;
169            
170 13         25 $self->{isOrdered} = 0;
171 13         49 my $data = $self->SUPER::getNextRow();
172            
173 13         471 return $self->returnOrdered($data);
174             }
175            
176            
177            
178             sub returnOrdered
179             {
180 17     17 0 28 my ($self, $data) = @_;
181            
182 17 100 100     85 return $data if ($self->{isOrdered} || !defined($data));
183            
184 10         14 my @ordered;
185            
186 10         10 foreach my $col (@{$self->{colOrder}})
  10         24  
187             {
188 30         59 push @ordered, $data->[$col];
189             }
190            
191 10         23 $self->{isOrdered} = 1;
192 10         43 return \@ordered;
193             }
194            
195            
196            
197             #####################################################################
198             # DO NOT REMOVE THE FOLLOWING LINE, IT IS NEEDED TO LOAD THIS LIBRARY
199             1;
200            
201            
202             =head1 NAME
203            
204             Spreadsheet::BasicReadNamedCol - Methods to easily read data from spreadsheets with columns in the order you want based on the names of the column headings
205            
206            
207             =head1 DESCRIPTION
208            
209             Provides methods for simple reading of a Excel spreadsheet, where the columns
210             are returned in the order defined.
211            
212             Assumes a specific format of the spreadsheet where the first row of
213             data defined the names of the columns.
214            
215            
216             =head1 SYNOPSIS
217            
218             use Spreadsheet::BasicReadNamedCol;
219            
220             my $xlsFileName = 'Excel Price Sheet 021203.xls';
221             my @columnHeadings = (
222             'Supplier Part Number',
223             'Customer Price',
224             'Currency Code',
225             'Price UOM',
226             'Short Description',
227             'Long Description',
228             );
229            
230             my $ss = new Spreadsheet::BasicReadNamedCol($xlsFileName) ||
231             die "Could not open '$xlsFileName': $!";
232             $ss->setColumns(@columnHeadings);
233            
234             # Print each row of the spreadsheet in the order defined in
235             # the columnHeadings array
236             my $row = 0;
237             while (my $data = $ss->getNextRow())
238             {
239             $row++;
240             print join('|', $row, @$data), "\n";
241             }
242            
243            
244             =head1 REQUIRED MODULES
245            
246             The following modules are required:
247            
248             Spreadsheet::BasicRead
249             Spreadsheet::ParseExcel
250            
251            
252             =head1 METHODS
253            
254             There are no class methods, the object methods are described below.
255             Private class method start with the underscore character '_' and
256             should be treated as I.
257            
258            
259             =head2 new
260            
261             Called to create a new BasicReadNamedCol object. The arguments can
262             be either a single string (see L<'SYNOPSIS'|"SYNOPSIS">)
263             which is taken as the filename of the spreadsheet of as named arguments.
264            
265             eg. my $ss = Spreadsheet::BasicReadNamedCol->new(
266             columns => \@columnNames,
267             fileName => 'MyExcelSpreadSheet.xls',
268             skipHeadings => 1,
269             skipBlankRows => 1,
270             log => $log,
271             );
272            
273             The following named arguments are available:
274            
275             =over 4
276            
277             =item columns
278            
279             Value expected to be an array reference to a list of column
280             names that appear in the first line of the spreadsheet. The
281             order of the column names defines the order in which the data
282             is returned by the L<'getNextRow'|"getNextRow"> method.
283            
284             This is really useful where spreadsheet files from sources out
285             of your control are not consistant in the ordering of columns.
286            
287             Note that the match on column name uses the following pattern match:
288            
289             if ($realColName =~ /^\Q$name/i)
290            
291             where:
292             realColName - is the actual column name in the spreadsheet and
293             name - is the pattern to match
294            
295            
296             =item skipHeadings
297            
298             Don't output the headings line in the first call to
299             L<'getNextRow'|"getNextRow"> if true.
300            
301            
302             =item skipBlankRows
303            
304             Skip blank lines in the spreadsheet if true.
305            
306            
307             =item setColumns(array or array_ref)
308            
309             Sets the order that columns will be returned in based on the
310             names in the array provided. The names are expected to match
311             the values in the first row of the spreadsheet.
312            
313             =item log
314            
315             Use the File::Log object to log exceptions.
316            
317            
318             =item fileName
319            
320             The name (and optionally path) of the spreadsheet file to process.
321            
322             =back
323            
324             =head2 getNextRow()
325            
326             Get the next row of data from the spreadsheet. The data is
327             returned as an array reference.
328            
329             eg. $rowDataArrayRef = $ss->getNextRow();
330            
331            
332             =head2 numSheets()
333            
334             Returns the number of sheets in the spreadsheet
335            
336            
337             =head2 openSpreadsheet(fileName)
338            
339             Open a new spreadsheet file and set the current sheet to the first
340             sheet. The name and optionally path of the
341             spreadsheet file is a required argument to this method.
342            
343            
344             =head2 currentSheetNum()
345            
346             Returns the current sheet number or undef if there is no current sheet.
347             L<'setCurrentSheetNum'|"setCurrentSheetNum"> can be called to set the
348             current sheet.
349            
350            
351             =head2 currentSheetName()
352            
353             Return the name of the current sheet or undef if the current sheet is
354             not defined. see L<'setCurrentSheetNum'|"setCurrentSheetNum">.
355            
356            
357             =head2 setCurrentSheetNum(num)
358            
359             Sets the current sheet to the integer value 'num' passed as the required
360             argument to this method. Note that this should not be bigger than
361             the value returned by L<'numSheets'|"numSheets">.
362            
363            
364             =head2 getNextSheet()
365            
366             Returns the next sheet "ssBook" object or undef if there are no more sheets
367             to process. If there is no current sheet defined the first sheet
368             is returned.
369            
370            
371             =head2 getFirstSheet()
372            
373             Returns the first sheet "ssBook" object.
374            
375            
376             =head2 cellValue(row, col)
377            
378             Returns the value of the cell defined by (row, col)in the current sheet.
379            
380            
381             =head2 getFirstRow()
382            
383             Returns the first row of data from the spreadsheet (possibly skipping the
384             column headings L<'skipHeadings'|"new">) as an array reference.
385            
386            
387             =head2 setHeadingRow(rowNumber)
388            
389             Sets the effective minimum row for the spreadsheet to 'rowNumber', since it
390             is assumed that the heading is on this row and anything above the heading is
391             not relavent.
392            
393             B the row (and column) numbers are zero indexed.
394            
395            
396             =head2 logexp(message)
397            
398             Logs an exception message (can be a list of strings) using the File::Log
399             object if it was defined and then calls die message.
400            
401            
402             =head2 logmsg(debug, message)
403            
404             If a File::Log object was passed as a named argument L<'new'|"new">) and
405             if 'debug' (integer value) is equal to or greater than the current debug
406             Level (see File::Log) then the message is added to the log file.
407            
408             If a File::Log object was not passed to new then the message is output to
409             STDERR.
410            
411            
412             =head1 KNOWN ISSUES
413            
414             None
415            
416             =head1 SEE ALSO
417            
418             Spreadsheet::BasicRead
419            
420             =head1 AUTHOR
421            
422             Greg George, IT Technology Solutions P/L, Australia
423             Mobile: 0404-892-159, Email: gng@cpan.org
424            
425             =head1 LICENSE
426            
427             Copyright (c) 1999- Greg George. All rights reserved. This
428             program is free software; you can redistribute it and/or modify it under
429             the same terms as Perl itself.
430            
431             =head1 CVS ID
432            
433             $Id: BasicReadNamedCol.pm,v 1.3 2006/04/30 05:57:29 Greg Exp $
434            
435            
436             =head1 UPDATE HISTORY
437            
438             $Log: BasicReadNamedCol.pm,v $
439             Revision 1.3 2006/04/30 05:57:29 Greg
440             - removed tabs from file
441            
442             Revision 1.2 2006/03/07 10:03:26 Greg
443             - minor pod changes
444            
445             Revision 1.1 2006/03/05 03:07:58 Greg
446             - initial CPAN upload
447            
448            
449             Revision 1.0 2003/12/02 23:58:34 gxg6
450             - Initial development, need POD
451            
452             =cut
453            
454            
455             #---< End of File >---#