| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Spreadsheet::Compare::Reader; |
|
2
|
|
|
|
|
|
|
|
|
3
|
4
|
|
|
4
|
|
2907
|
use Mojo::Base -base, -signatures; |
|
|
4
|
|
|
|
|
11
|
|
|
|
4
|
|
|
|
|
29
|
|
|
4
|
4
|
|
|
4
|
|
1178
|
use Spreadsheet::Compare::Common; |
|
|
4
|
|
|
|
|
10
|
|
|
|
4
|
|
|
|
|
36
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#<<< |
|
7
|
|
|
|
|
|
|
use Spreadsheet::Compare::Config { |
|
8
|
0
|
|
|
|
|
0
|
identity => sub { [] }, |
|
9
|
36
|
|
|
|
|
283
|
skip => sub { {} }, |
|
10
|
4
|
|
|
|
|
62
|
chunk => undef, |
|
11
|
|
|
|
|
|
|
has_header => undef, |
|
12
|
4
|
|
|
4
|
|
41
|
}, make_attributes => 1; |
|
|
4
|
|
|
|
|
10
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
has can_chunk => 0, ro => 1; |
|
15
|
|
|
|
|
|
|
has exhausted => undef, ro => 1; |
|
16
|
|
|
|
|
|
|
has chunker => sub {}, ro => 1; |
|
17
|
|
|
|
|
|
|
has skipper => sub {}, ro => 1; |
|
18
|
|
|
|
|
|
|
has header => undef, ro => 1; |
|
19
|
|
|
|
|
|
|
has result => sub { [] }, ro => 1; |
|
20
|
|
|
|
|
|
|
has side => sub { $_[0]->index ? 'right' : 'left' }, ro => 1; |
|
21
|
|
|
|
|
|
|
has side_name => sub { $_[0]->index ? 'right' : 'left' }, ro => 1; |
|
22
|
|
|
|
|
|
|
has index => sub { croak 'Parameter "index" not set' }, ro => 1; |
|
23
|
|
|
|
|
|
|
#>>> |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has h2i => sub { |
|
26
|
|
|
|
|
|
|
my $hd = $_[0]->header; |
|
27
|
|
|
|
|
|
|
return { map { $hd->[$_] => $_ } 0 .. $#$hd }; |
|
28
|
|
|
|
|
|
|
}; |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
|
31
|
72
|
|
|
72
|
0
|
119
|
sub init ($self) { |
|
|
72
|
|
|
|
|
110
|
|
|
|
72
|
|
|
|
|
97
|
|
|
32
|
72
|
50
|
|
|
|
174
|
$self->{__ro__skipper} = _make_skipper( $self->skip ) if $self->skip; |
|
33
|
72
|
50
|
66
|
|
|
240
|
WARN 'chunking not supported by ', ref($self), "\n" |
|
34
|
|
|
|
|
|
|
if defined( $self->chunk ) && !$self->can_chunk; |
|
35
|
72
|
100
|
66
|
|
|
438
|
$self->{__ro__chunker} = _make_chunker( $self->chunk ) |
|
36
|
|
|
|
|
|
|
if defined( $self->chunk ) && $self->can_chunk; |
|
37
|
72
|
|
|
|
|
438
|
return $self; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
|
41
|
0
|
|
|
0
|
1
|
0
|
sub setup () { croak 'Method "setup" not implemented by subclass' } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
42
|
|
|
|
|
|
|
|
|
43
|
0
|
|
|
0
|
1
|
0
|
sub fetch () { croak 'Method "fetch" not implemented by subclass' } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Returns reference to a subroutine that checks a given record |
|
47
|
|
|
|
|
|
|
# for being subject to a "skip record" according to the test definition. |
|
48
|
|
|
|
|
|
|
# Returns true, when the record should be skipped. |
|
49
|
|
|
|
|
|
|
#<<< |
|
50
|
72
|
|
|
72
|
|
414
|
sub _make_skipper ($skip) { |
|
|
72
|
|
|
|
|
122
|
|
|
|
72
|
|
|
|
|
103
|
|
|
51
|
|
|
|
|
|
|
my %skip_info = pairmap { |
|
52
|
0
|
|
|
0
|
|
0
|
my( $negate, $regex ) = $b =~ /^(!?)(.+)$/; |
|
53
|
0
|
0
|
|
|
|
0
|
$a => { |
|
54
|
|
|
|
|
|
|
negate => $negate ? 1 : 0, |
|
55
|
|
|
|
|
|
|
regex => qr/$regex/ |
|
56
|
|
|
|
|
|
|
}; |
|
57
|
72
|
|
|
|
|
853
|
} %$skip; |
|
58
|
9542
|
|
|
9542
|
|
13086
|
return sub ($rec) { |
|
|
9542
|
|
|
|
|
14487
|
|
|
|
9542
|
|
|
|
|
13649
|
|
|
59
|
9542
|
|
|
|
|
92578
|
return any { $_ } pairgrep { $rec->val($a) =~ /$b->{regex}/ ^ $b->{negate} } %skip_info; |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
60
|
72
|
|
|
|
|
584
|
}; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
#>>> |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
# Returns reference to a subroutine that generates a chunk name for a given record |
|
66
|
|
|
|
|
|
|
# with the settings under 'chunk' in the test definition. |
|
67
|
20
|
|
|
20
|
|
170
|
sub _make_chunker ( $chunk ) { |
|
|
20
|
|
|
|
|
33
|
|
|
|
20
|
|
|
|
|
29
|
|
|
68
|
20
|
|
|
|
|
66
|
DEBUG "returning chunker"; |
|
69
|
3042
|
|
|
3042
|
|
4178
|
return sub ($rec) { |
|
|
3042
|
|
|
|
|
12297
|
|
|
|
3042
|
|
|
|
|
4527
|
|
|
70
|
3042
|
|
|
|
|
4476
|
my $chunk_name; |
|
71
|
3042
|
100
|
|
|
|
6691
|
if ( ref($chunk) ) { |
|
72
|
1274
|
|
|
|
|
2430
|
my $key = $chunk->{column}; |
|
73
|
1274
|
|
|
|
|
5185
|
my $regex = qr/$chunk->{regex}/; |
|
74
|
1274
|
|
|
|
|
3785
|
($chunk_name) = $rec->val($key) =~ /$regex/; |
|
75
|
1274
|
|
50
|
|
|
3957
|
$chunk_name //= ''; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
else { |
|
78
|
1768
|
|
|
|
|
4745
|
$chunk_name = $rec->val($chunk); |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
3042
|
|
|
|
|
11960
|
DEBUG "Chunk name: $chunk_name"; |
|
82
|
|
|
|
|
|
|
|
|
83
|
3042
|
|
|
|
|
25032
|
return $chunk_name; |
|
84
|
20
|
|
|
|
|
228
|
}; |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
1; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head1 NAME |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Spreadsheet::Compare::Reader - Abstract Reader Base Class |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
package Spreadsheet::Compare::MyReader; |
|
97
|
|
|
|
|
|
|
use Mojo::Base 'Spreadsheet::Compare::Reader'; |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub setup {...} |
|
100
|
|
|
|
|
|
|
sub fetch {...} |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Spreadsheet::Compare::Reader is an abstract base class for spreadsheet reader backends. |
|
105
|
|
|
|
|
|
|
Available reader classes in this distribution are |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=over 4 |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item * L<Spreadsheet::Compare::Reader::CSV> for CSV files |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item * L<Spreadsheet::Compare::Reader::DB> for Databases |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item * L<Spreadsheet::Compare::Reader::FIX> for fixed size column files |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=item * L<Spreadsheet::Compare::Reader::WB> for various spreadsheet formats like XLSX, ODS, ... |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=back |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
This module defines the methods and attributes that are used by a Spreadsheet::Compare::Reader |
|
120
|
|
|
|
|
|
|
subclass. The methods setup and fetch have to be overridden by the derived class and will |
|
121
|
|
|
|
|
|
|
croak otherwise. |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
When subclassing consider using L<Spreadsheet::Compare::Common> for convenience. |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
If not stated otherwise, read write attributes can be set as options from the config file |
|
128
|
|
|
|
|
|
|
passed to L<Spreadsheet::Compare> or L<spreadcomp>. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=head2 can_chunk |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
(B<readonly>) Will be set to a true value by the Reader module if the Reader supports |
|
133
|
|
|
|
|
|
|
chunking. |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 chunk |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
possible values: <column> |
|
138
|
|
|
|
|
|
|
or |
|
139
|
|
|
|
|
|
|
{ column => <column>, regex => <regex> }, |
|
140
|
|
|
|
|
|
|
default: undef |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Process the input in batches defined by the content of a column. When the |
|
143
|
|
|
|
|
|
|
regex form is used it has to have a capturing expression. The result will |
|
144
|
|
|
|
|
|
|
be used as identifier for the chunk. For example: |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
chunk: |
|
147
|
|
|
|
|
|
|
column: RECORD_NBR |
|
148
|
|
|
|
|
|
|
regex: '(\d{2})$' |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
will take the last two digits of the numbers in column RECORD_NBR, resulting |
|
151
|
|
|
|
|
|
|
in up to 100 batches. This is useful for very large files that do not fit |
|
152
|
|
|
|
|
|
|
entirely into memory (see L<Spreadsheet::Compare/LIMITING MEMORY USAGE>). |
|
153
|
|
|
|
|
|
|
Reading for each batch will be handled sequentially to save memory. |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
All records will be read twice, first for creating the lookup info for the chunks |
|
156
|
|
|
|
|
|
|
and second for the actual data. This will significantly increase execution time. |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head2 chunker |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
(B<readonly>) A reference to a generated subroutine that returns the chunk name |
|
161
|
|
|
|
|
|
|
for a record based on the settings from L</chunk>. This will be called from the |
|
162
|
|
|
|
|
|
|
Reader sublasses. |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head2 exhausted |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
(B<readonly>) Will be true if the reader has no more records to read. |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head2 has_header |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
possible values: bool |
|
171
|
|
|
|
|
|
|
default: undefined |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Specify whether the file contains a header line. |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head2 header |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
(B<readonly>) A reference to an array with the header names or (in case there is no |
|
178
|
|
|
|
|
|
|
named header) the zero based indexes. |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head2 identity |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
possible values: <array of column numbers or names> |
|
183
|
|
|
|
|
|
|
default: [] |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Defines the identity to indentify and match a single record. If L</has_header> is |
|
186
|
|
|
|
|
|
|
true, the header names can be used. If not, the column numbers (zero based) will |
|
187
|
|
|
|
|
|
|
be used as header names. |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
examples for config file entries: |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
identity: [rec_nbr, rec_type] |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
identity: |
|
194
|
|
|
|
|
|
|
- rec_nbr |
|
195
|
|
|
|
|
|
|
- rec_type |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
identity: [3, 4, 17] |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
=head2 index |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
(B<readonly>) 0 for the reader on the left and 1 for the reader on the right side of the comparison. |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
=head2 result |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
(B<readonly>) A reference to an array with the currently read data after a call to fetch |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=head2 side |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
(B<readonly>) 'left' for the reader on the left and 'right' for the reader on the right side of the comparison. |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head2 side_name |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
possible values: <string> |
|
214
|
|
|
|
|
|
|
default: '' |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
The name for the side of the comparison used for reporting. |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head2 skip |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
possible values: <key value pairs> |
|
221
|
|
|
|
|
|
|
default: undef |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
Skip lines by column content. Keys must be column names (when the input has column |
|
224
|
|
|
|
|
|
|
headers, see L</has_header>) or numbers, the |
|
225
|
|
|
|
|
|
|
values are interpreted as regular expressions. A leading '!' negates the regex. |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
Example: |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
skip: |
|
230
|
|
|
|
|
|
|
Name: ^XYZ- |
|
231
|
|
|
|
|
|
|
Price: !\d |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head2 skipper |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
(B<readonly>) A reference to a generated subroutine that returns true or false |
|
236
|
|
|
|
|
|
|
depending on whether the record should be skipped according to the value of L</skip>. |
|
237
|
|
|
|
|
|
|
This will be called from the Reader sublasses. |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
=head1 METHODS |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
The methods L</setup> and L</fetch> have to be overridden by derived classes. |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=head2 fetch($size) |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
Fetch $size records from the source. |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
=head2 setup() |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Will be called by L<Spreadsheet::Compare::Single> at the start of a comparison. |
|
250
|
|
|
|
|
|
|
This is for setup tasks before handling the first fetch (eg. opening a file, |
|
251
|
|
|
|
|
|
|
reading the header, ...) |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |