File Coverage

blib/lib/Spreadsheet/Compare/Single.pm

Criterion	Covered	Total	%
statement	302	334	90.4
branch	112	182	61.5
condition	59	93	63.4
subroutine	14	23	60.8
pod	1	3	33.3
total	488	635	76.8

line	stmt	bran	cond	sub	pod	time	code
1							package Spreadsheet::Compare::Single;
2
3	14			14		108	use Mojo::Base 'Mojo::EventEmitter', -signatures;
	14					33
	14					112
4	14			14		3890	use Spreadsheet::Compare::Common;
	14					176
	14					135
5	14			14		7504	use Spreadsheet::Compare::Record;
	14					42
	14					172
6
7							#<<<
8							use Spreadsheet::Compare::Config {
9							allow_duplicates => 0,
10							below_limit_is_equal => 0,
11							convert_numbers => 0,
12							decimal_separator => '.',
13							diff_relative => [],
14							digital_grouping_symbol => '',
15							fetch_size => 1000,
16							fetch_limit => 0,
17							ignore => [],
18							ignore_strings => 0,
19							is_sorted => 0,
20							left => 'left',
21							limit_abs => 0,
22							limit_rel => 0,
23							_numerical_regex => sub { #<<<
24	18					396	my $ds = $_[0]->_ds;
25	18					104	my $dgs = $_[0]->_dgs;
26	18					470	return qr/
27							^
28							[-+]?
29							[0-9$dgs]*
30							$ds?
31							[0-9]+
32							([eE][-+]?[0-9]+)?
33							$
34							/x;
35							}, #>>>
36	14					411	readers => [],
37							report_all_data => 0,
38							report_diff_row => 0,
39							right => 'right',
40							title => '',
41	14			14		2526	}, protected => 1, make_attributes => 1;
	14					33
42							#>>>
43	14			14	0	92	sub counter_names { return qw/left right same diff limit miss add dup/ }
44
45							has _ds => sub { quotemeta( $_[0]->decimal_separator ) };
46							has _dgs => sub { quotemeta( $_[0]->digital_grouping_symbol ) };
47
48							my( $debug, $trace );
49
50	18			18	0	50	sub init ($self) {
	18					81
	18					75
51
52	18	50				69	croak 'The ignore parameter has to be an array reference'
53							unless ref( my $ignore = $self->ignore ) eq 'ARRAY';
54	18	50				113	croak 'The diff_relative parameter has to be an array reference'
55							unless ref( my $diffr = $self->diff_relative ) eq 'ARRAY';
56
57							# speed up logging
58	18					130	( $trace, $debug ) = get_log_settings();
59
60	18					319	$self->diff_relative( { map { $_ => 1 } @$diffr } );
	0					0
61
62							# make sure certain attributes are evaluated and available as hash values
63	18					188	$self->$_ for qw(
64							convert_numbers diff_relative _dgs _ds ignore
65							limit_abs limit_rel _numerical_regex
66							);
67
68	18					53	return $self;
69							}
70
71
72	18			18	1	40	sub compare ($self) { ## no critic (ProhibitExcessComplexity)
	18					45
	18					32
73
74	18					60	my $readers = $self->readers;
75	18	50	33			181	croak "readers is not an array ref" unless $readers and ref($readers) eq 'ARRAY';
76	18					56	for my $idx ( 0, 1 ) {
77	36					101	my $r = $readers->[$idx];
78	36	50	33			319	croak "invalid reader at index $idx" unless ref($r) and $r->isa('Spreadsheet::Compare::Reader');
79	36					152	$r->setup();
80							}
81
82	18	50		0		62	$debug and DEBUG "Header left :", sub { Dump( $readers->[0]->header ) };
	0					0
83	18	50		0		53	$debug and DEBUG "Header right:", sub { Dump( $readers->[1]->header ) };
	0					0
84
85							# create internal lookups
86	18					110	$self->{_look}{hdr} = $readers->[0]->header;
87	18					166	$self->{_look}{h2i} = my $h2i = $readers->[0]->h2i;
88	18					716	$self->{_look}{i2h} = { reverse %$h2i };
89	18	50				108	$self->{_look}{ign} = { map { defined( $h2i->{$_} ) ? ( $h2i->{$_} => 1 ) : () } $self->ignore->@* };
	25					153
90
91	18					202	$self->emit( '_after_reader_setup', $self->{_look} );
92	18					240	$self->_set_limits( $self->{_look}{hdr} );
93
94							# shortcuts to the result arrays
95	18					97	my $list_l = $readers->[0]->result();
96	18					85	my $list_r = $readers->[1]->result();
97
98	18	50				56	$trace and TRACE "Array ref left:", $list_l;
99	18	50				55	$trace and TRACE "Array ref right:", $list_r;
100
101	18					70	my @streams = qw/Differences Missing Additional Duplicates/;
102	18	50				75	push @streams, 'All' if $self->report_all_data;
103
104							# emit events for each stream, this will be subscribed by reporters
105	18					161	for my $name (@streams) {
106	72					523	$self->emit( 'add_stream', $name );
107	72					508	$self->emit( 'write_header', $name );
108							}
109
110	18					140	my @diff_columns;
111
112							# fetch a configured number of records possibly sorted by the identity column
113							# and compare everything that is alphanumerically smaller than the
114							# last fetched identity value
115	18	100				69	my $size = $self->is_sorted ? $self->fetch_size : ~0;
116	18					131	my $last_pass = 0;
117	18					40	my $fetches = 0;
118	18					170	my %count = qw/left 0 right 0 same 0 diff 0 miss 0 add 0 dup 0 limit 0/;
119	18					108	while ( $last_pass == 0 ) {
120
121	79					2904	my $done = 1;
122	79					252	for my $r (@$readers) {
123	158					1127	my $fnbr = $r->fetch($size);
124	158					862	INFO "Fetched $fnbr records from ", $r->side;
125	158		100			3237	$done &&= $r->exhausted;
126							}
127
128	79		100			452	my $limit_reached =
129							$self->is_sorted && $self->fetch_size && $self->fetch_limit && ( ++$fetches >= $self->fetch_limit );
130	79		100			1150	$last_pass = $done \|\| $limit_reached;
131
132	79					443	$self->emit('after_fetch');
133
134							# TODO: (issue) solve contradiction of chunks <> partial fetches
135							# chunks are always complete but can be unsorted, last_id makes no sense in that case
136							# this will mess up duplicate counting
137							# partial fetches are always sorted
138
139	79	100	66			1246	$last_pass = 1 if $readers->[0]->exhausted and $readers->[1]->exhausted;
140	79					802	INFO "last_pass:$last_pass";
141
142							$trace and TRACE "Ids on the left:", sub {
143	0			0		0	Dump( [ map { $_->id } @$list_l ] );
	0					0
144	79	50				710	};
145							$trace and TRACE "Ids on the right:", sub {
146	0			0		0	Dump( [ map { $_->id } @$list_r ] );
	0					0
147	79	50				272	};
148
149	79	50				462	my $last_id_l = @$list_l ? $list_l->[-1]->id : '';
150	79	50				494	my $last_id_r = @$list_r ? $list_r->[-1]->id : '';
151
152	79	50				375	unless ( $self->allow_duplicates ) {
153							# check for duplicates in the id column and
154							# put them into the 'Duplicates' sheet
155
156	79					746	for my $rec ( $self->_check_duplicates( $list_l, $readers->[0]->side )->@* ) {
157	18					216	$self->emit( 'write_row', 'Duplicates', $rec );
158							}
159
160	79					714	for my $rec ( $self->_check_duplicates( $list_r, $readers->[1]->side )->@* ) {
161	18					265	$self->emit( 'write_row', 'Duplicates', $rec );
162							}
163
164	79					459	$count{dup} = $self->{_dup_sum};
165							}
166
167							# generate lookup hash by identity column
168	79					219	my %look_r;
169	79					372	for my $i ( 0 .. $#$list_r ) {
170	4780					21376	my $r = $list_r->[$i];
171	4780		100			8781	$look_r{ $r->id } //= [];
172	4780					34767	push $look_r{ $r->id }->@*, $i;
173							}
174
175	79	50				609	$debug and DEBUG scalar( keys %look_r ), " unique ids found on the right";
176
177							# pass 1: loop over all records on the left
178							# if a corresponding record on the right is found
179							# ==> calculate the diffs
180							# ==> write to the corresponding output stream
181							# ==> mark record on the right side as DONE
182	79	50				322	$debug and DEBUG "Processing ", scalar(@$list_l), " records on the left";
183	79					320	LEFT_ROW: while ( my $rec = shift @$list_l ) {
184
185	4780	100				129624	$self->emit( 'counters', \%count ) if $count{left} % $self->fetch_size == 0;
186
187	4780					39361	my $id = $rec->id;
188
189	4780	50				32451	$debug and DEBUG "left row with ID >>$id<<";
190	4780	50				12894	$self->emit( 'write_row', 'All', $rec )
191							if $self->report_all_data;
192
193							# compare only those records that are safe and will not
194							# be fetched later
195	4780	100	100			25735	if (
			66
			66
196							$self->is_sorted
197							and not $last_pass
198							and ( $id eq $last_id_l
199							or $id eq $last_id_r )
200							) {
201	9	50				103	$debug and DEBUG "reached end of last fetch on the left side";
202	9					32	unshift @$list_l, $rec;
203	9	50				45	if ( !$self->allow_duplicates ) {
204	9					121	$self->{_dup_seen}{ $readers->[0]->side }{$id}--;
205							}
206	9					89	last LEFT_ROW;
207							}
208
209	4771					30281	$count{left}++;
210
211							# get the corresponding record on the right side
212	4771					20968	my( $cor, $diff_rec ) = $self->_get_match( $rec, $list_r, $look_r{$id} );
213
214							# add to missing sheet if no corresponding record is found
215	4771	50				13578	unless ($cor) {
216
217	0	0				0	$debug and DEBUG "Missing right row for ID >>$id<<";
218
219	0					0	$self->emit( 'write_row', 'Missing', $rec );
220	0					0	$count{miss}++;
221
222	0	0				0	$self->emit(
223							'write_row',
224							'All',
225							Spreadsheet::Compare::Record->new(
226							rec => [],
227							reader => $readers->[1],
228							)
229							) if $self->report_all_data;
230
231	0					0	next LEFT_ROW;
232							}
233
234	4771	50				10369	$debug and DEBUG "found record on the right side";
235
236	4771					11783	$count{right}++;
237
238	4771	50				17662	$self->emit( 'write_row', 'All', $cor )
239							if $self->report_all_data;
240
241	4771					26819	my $diff = $diff_rec->diff_info;
242
243							# skip record if no difference is found
244	4771	100	66			26421	if (
			66
245							$diff->{equal}
246							or ( $diff->{limit}
247							and $self->below_limit_is_equal )
248							) {
249	2087	50				3773	$debug and DEBUG "the records are identical";
250	2087					4176	$count{same}++;
251	2087					37602	next LEFT_ROW;
252							}
253
254	2684	50				15524	$debug and DEBUG "difference found";
255
256	2684					6275	$count{diff}++;
257	2684	100				8079	$count{limit}++ if $diff->{limit};
258
259	2684					6402	for my $i ( 0 .. $diff_rec->limit_mask->$#* ) {
260	182384	100				735806	$diff_columns[$i]++ if $diff_rec->limit_mask->[$i] > 0;
261							}
262
263	2684					19415	$self->emit( 'write_fmt_row', 'Differences', $rec );
264
265	2684					24565	$self->emit( 'write_fmt_row', 'Differences', $cor );
266
267	2684	50				24616	$self->emit( 'write_fmt_row', 'Differences', $diff_rec )
268							if $self->report_diff_row;
269
270							} # pass 1 LEFT_ROW
271
272	79					1850	$self->emit( 'counters', \%count );
273
274	79	50				1085	$debug and DEBUG scalar( keys $self->{_matched_right}->%* ), " matched records on the right";
275
276							# use not already matched records from the right
277	79	100				775	@$list_r = map { $self->{_matched_right}{$_} ? () : $list_r->[$_] } 0 .. $#$list_r;
	4780					110509
278
279	79	50				1066	$debug and DEBUG scalar(@$list_r), " records left after pass 1";
280
281							# pass 2: loop over all remaining entries on the right
282							# and add them to the additional sheet
283	79					356	RIGHT_ROW: while ( my $rec = shift @$list_r ) {
284
285	9					69	my $id = $rec->{id};
286							# compare only those records that are safe and will not
287							# be fetched later
288	9	50	33			92	if (
			33
289							not $last_pass
290							and ( $id eq $last_id_l
291							or $id eq $last_id_r )
292							) {
293	9	50				34	$debug and DEBUG "reached end of last fetch on the right side";
294	9					33	unshift @$list_r, $rec;
295	9	50				61	if ( !$self->allow_duplicates ) {
296	9					117	$self->{_dup_seen}{ $readers->[1]->side }{$id}--;
297							}
298	9					79	last RIGHT_ROW;
299							}
300
301	0	0				0	$debug and DEBUG "found additional record on the right side $id";
302
303	0					0	$count{right}++;
304
305	0	0				0	if ( $self->report_all_data ) {
306	0					0	$self->emit(
307							'write_row',
308							'All',
309							Spreadsheet::Compare::Record->new(
310							rec => [],
311							reader => $readers->[0],
312							)
313							);
314	0					0	$self->emit( 'write_row', 'All', $rec );
315							}
316
317	0					0	$self->emit( 'write_row', 'Additional', $rec );
318
319	0					0	$count{add}++;
320							} # pass 2 RIGHT_ROW
321
322	79					402	$self->emit( 'counters', \%count );
323
324							} # fetch data
325
326	18					1449	$self->emit( 'mark_header', 'Differences', \@diff_columns );
327
328	18					212	$self->emit( 'final_counters', \%count );
329
330	18	50		0		120	$debug and DEBUG "Counters:", sub { Dump( \%count ) };
	0					0
331
332	18					111	return \%count;
333							}
334
335
336							# Check a list of record-hashes for duplicates in the field(s)
337							# specified by the identity. Return all affected records.
338	158			158		1209	sub _check_duplicates ( $self, $list, $side ) {
	158					336
	158					329
	158					263
	158					279
339
340	158	50				468	$debug and DEBUG "checking duplicates";
341
342	158					283	my @dup_list;
343
344	158					454	for my $rec (@$list) {
345
346	9560					20578	my $id = $rec->id;
347
348	9560	50				31045	$trace and TRACE "id:$id";
349	9560	100				20577	if ( $self->{_dup_seen}{$side}{$id} ) {
350	36	50				113	$debug and DEBUG "duplicate for id=$id, side $side";
351	36					121	push @dup_list, $rec;
352							}
353	9560					25794	$self->{_dup_seen}{$side}{$id}++;
354							}
355
356	158					410	for my $side (qw/left right/) {
357	316					976	$self->{_dup_side}{$side} = 0;
358	316					29366	for my $id ( keys $self->{_dup_seen}{$side}->%* ) {
359	103067	100				229073	$self->{_dup_side}{$side}++ if $self->{_dup_seen}{$side}{$id} > 1;
360							}
361							}
362
363	158					1120	$self->{_dup_sum} = max( $self->{_dup_side}{$side}, $self->{_dup_side}{$side} );
364
365	158					774	return ( \@dup_list );
366							}
367
368
369							# compare two records $l and $r:
370							# - ignore fields in the ign lookup
371							# - mark records as different if the absolute or relative limits are exceeded
372							# - convert strings to numbers if configured
373	4786			4786		7452	sub _compare_record ( $self, $l, $r ) { ## no critic (ProhibitExcessComplexity)
	4786					8499
	4786					6782
	4786					7057
	4786					7384
374
375	4786					8409	state $diff_default = {
376							ABS_FIELD => '',
377							ABS_VALUE => '',
378							REL_FIELD => '',
379							REL_VALUE => '',
380							limit => 0,
381							equal => 1,
382							};
383
384	4786					33354	my %diff = %$diff_default;
385	4786					11552	my @limit_mask;
386							my @diff_rec;
387
388							my $drec = Spreadsheet::Compare::Record->new(
389							diff_info => \%diff,
390							limit_mask => \@limit_mask,
391							rec => \@diff_rec,
392							side => 'diff',
393							side_name => 'diff',
394							h2i => $self->{_look}{h2i},
395	4786					30425	);
396
397	4786					15218	$l->diff_info( \%diff );
398	4786					36111	$r->diff_info( \%diff );
399	4786					35044	$l->limit_mask( \@limit_mask );
400	4786					28590	$r->limit_mask( \@limit_mask );
401
402	4786					29173	my $lrec = $l->rec;
403	4786					23917	my $rrec = $r->rec;
404
405	4786	50		0		25384	$trace and TRACE "LREC", sub { Dump($lrec) };
	0					0
406	4786	50		0		10461	$trace and TRACE "RREC", sub { Dump($rrec) };
	0					0
407
408	4786	100				11325	if ( $l->hash eq $r->hash ) {
409	474	50				1607	$debug and DEBUG "the record hashes are identical";
410	474					1060	@diff_rec = map { 'EQ_HASH' } @$lrec;
	36795					51998
411	474					1234	@limit_mask = map { 0 } @$lrec;
	36795					48303
412	474					1762	return $drec;
413							}
414
415	4312					14651	my $i2h = $self->{_look}{i2h};
416	4312					8927	my $all_check = my $all_below = 0;
417	4312					14767	for my $idx ( 0 .. $#$lrec ) {
418
419	271809					478767	my $key = $i2h->{$idx};
420
421	271809					400362	$limit_mask[$idx] = 0;
422
423	271809	100				490670	if ( $self->{_look}{ign}{$idx} ) {
424	14220					24533	$diff_rec[$idx] = 'IGNORED';
425	14220					23944	next;
426							}
427
428	257589		50			507008	my $lorig = my $lval = $lrec->[$idx] // '';
429	257589		50			492011	my $rorig = my $rval = $rrec->[$idx] // '';
430
431	257589	50				434808	if ( $self->{convert_numbers} ) {
432	0					0	$self->_convert_number($lval);
433	0					0	$self->_convert_number($rval);
434	0	0				0	$debug and DEBUG "$key: converted values >>$lval<< >>$rval<<";
435	0					0	$lrec->[$idx] = $lval;
436	0					0	$rrec->[$idx] = $rval;
437							}
438
439	257589	100				446930	if ( $lorig eq $rorig ) {
440	246227	50				394559	$debug and DEBUG "$key: $lorig == $rorig in string comparison";
441	246227					366466	$diff_rec[$idx] = 'EQ_STR';
442	246227					397275	next;
443							}
444
445	11362					19465	my $rxreal = $self->{_numerical_regex};
446	11362	50	33			31909	if ( $self->ignore_strings and $lorig !~ /$rxreal/ and $rorig !~ /$rxreal/ ) {
		100	33
			66
447	0	0				0	$debug and DEBUG "$key: skip string comparison because ignore_strings is set";
448	0					0	$diff_rec[$idx] = 'SKIP_STR';
449	0					0	next;
450							}
451							elsif ( $lorig !~ /$rxreal/ or $rorig !~ /$rxreal/ ) {
452	208	50				1883	$debug and DEBUG "$key: $lorig != $rorig in string comparison";
453	208					401	$diff_rec[$idx] = 'NEQ_STR';
454	208					296	$diff{equal} = 0;
455	208					299	$limit_mask[$idx] = 1;
456	208					400	next;
457							}
458
459	14			14		185	no warnings qw/numeric/; ## no critic (ProhibitNoWarnings)
	14					34
	14					19177
460
461	11154					192856	$self->_convert_number( $lval, 1 );
462	11154					25319	$self->_convert_number( $rval, 1 );
463
464	11154	100				24461	if ( $lval == $rval ) {
465	3893	50				10895	$debug and DEBUG "$key: $lval == $rval in numerical comparison";
466	3893					10798	$diff_rec[$idx] = 'EQ_NUM';
467	3893					10092	next;
468							}
469
470	7261	50				13311	$debug and DEBUG "$key: $lval != $rval in numerical comparison";
471
472	7261					18095	my $limit_abs = $self->{limit_abs}{$key};
473	7261					16130	my $limit_rel = $self->{limit_rel}{$key};
474
475	7261					15002	my $diff = abs( $rval - $lval );
476
477	7261					16746	$diff_rec[$idx] = $diff;
478	7261					12146	$diff{equal} = 0;
479	7261					11695	$limit_mask[$idx] = 1;
480
481	7261					11248	my $below = my $check = 0;
482	7261	50				15697	if ( $limit_abs ne 'none' ) {
483	7261					10209	$check++;
484	7261	100				18828	if ( $diff <= $limit_abs ) {
485	5968	50				11458	$debug and DEBUG "$key: diff $diff is below absolute limit ", $limit_abs;
486	5968					9487	$below++;
487							}
488	7261	100				18909	if ( $diff > $diff{ABS_VALUE} ) {
489	3463					7040	$diff{ABS_FIELD} = $key;
490	3463					7434	$diff{ABS_VALUE} = $diff;
491							}
492							}
493
494	7261	100	100			28821	my $rdiff =
495							( $rval == 0 or $lval == 0 )
496							? 1
497							: ( $diff / abs($lval) );
498
499	7261	50				17186	$diff_rec[$idx] = sprintf( '%.4f', $rdiff ) if $self->{diff_relative}{$key};
500
501	7261	50				14221	if ( $limit_rel ne 'none' ) {
502	7261					12118	$check++;
503	7261	100				17307	if ( $rdiff <= $limit_rel ) {
504	6706	50				12355	$debug and DEBUG "$key: diff $rdiff is below relative limit ", $limit_rel;
505	6706					9510	$below++;
506							}
507	7261	100				16425	if ( $rdiff > $diff{REL_VALUE} ) {
508	4432					7969	$diff{REL_FIELD} = $key;
509	4432					8813	$diff{REL_VALUE} = $rdiff;
510							}
511							}
512
513	7261	100	66			23649	if ( $check && ( $below == $check ) ) {
514	5956	50				11008	$debug and DEBUG "$key: mark as diff but below limit";
515	5956					9027	$limit_mask[$idx] = -1;
516							}
517
518	7261					10660	$all_check += $check;
519	7261					16617	$all_below += $below;
520							}
521
522	4312		100			17237	$diff{limit} = $all_check && ( $all_check == $all_below );
523
524	4312					14754	return $drec;
525							}
526
527
528							# Try to find the best matching record on the right list and
529							# save the result in an internal lookup hash
530
531	4771			4771		8143	sub _get_match ( $self, $lrec, $rrecs, $rindexes ) {
	4771					8436
	4771					6908
	4771					8668
	4771					8385
	4771					7254
532
533	4771					7912	my @found;
534	4771					7800	my $pos = 0;
535	4771					12051	for my $ridx (@$rindexes) {
536	4786					10973	my $rrec = $rrecs->[$ridx];
537
538							# compare the records
539	4786					12941	my $diff_rec = $self->_compare_record( $lrec, $rrec );
540	4786					18429	my $limit_mask = $diff_rec->limit_mask;
541							my %found = (
542							ridx => $ridx,
543							drec => $diff_rec,
544							pos => $pos++,
545	308604					478096	below => scalar( grep { $_ == -1 } @$limit_mask ),
546	4786					31178	above => scalar( grep { $_ == 1 } @$limit_mask ),
	308604					485115
547							);
548
549	4786					13224	my $diff = $diff_rec->diff_info;
550	4786	100	66			35748	if ( $diff->{equal} or ( $diff->{limit} and $self->below_limit_is_equal ) ) {
			66
551	2087	50				4476	$debug and DEBUG "found direct match";
552	2087					4264	@found = \%found;
553	2087					4304	last;
554							}
555							else {
556	2699					22269	push @found, \%found;
557							}
558							}
559
560	4771	50				11680	return unless @found;
561
562	4771	0				14285	my $match = ( sort { $a->{below} <=> $b->{below} \|\| $a->{above} <=> $b->{above} } @found )[0];
	15					138
563	4771					12896	splice( @$rindexes, $match->{pos}, 1 );
564	4771					10607	my $ridx = $match->{ridx};
565	4771					19498	$self->{_matched_right}{$ridx} = 1;
566
567	4771					21231	return $rrecs->[$ridx], $match->{drec};
568							}
569
570							# TODO: (issue) tests for individual limits
571
572							# transform limit configuration into separate limits for each header column
573	18			18		48	sub _set_limits ( $self, $head ) {
	18					41
	18					33
	18					28
574
575	18					59	my $labs = $self->limit_abs;
576	18					121	my $lrel = $self->limit_rel;
577	18					139	my $def = config_defaults();
578
579							my $limit_abs_def =
580							ref($labs)
581							? ( $labs->{__default__} // $def->{limit_abs} )
582	18	100	33			97	: ( $labs // $def->{limit_abs} );
			33
583
584							my $limit_rel_def =
585							ref($lrel)
586							? ( $lrel->{__default__} // $def->{limit_rel} )
587	18	100	33			79	: ( $lrel // $def->{limit_rel} );
			33
588
589	18	100				97	$self->limit_abs( $labs = {} ) if ref($labs) ne 'HASH';
590	18	100				179	$self->limit_rel( $lrel = {} ) if ref($lrel) ne 'HASH';
591	18					107	for my $key (@$head) {
592	521		100			2124	$labs->{$key} //= $limit_abs_def;
593	521		100			1574	$lrel->{$key} //= $limit_rel_def;
594							}
595
596	18	50		0		64	$debug and DEBUG "absolute limits:", sub { Dump($labs) };
	0					0
597	18	50		0		55	$debug and DEBUG "relative limits:", sub { Dump($lrel) };
	0					0
598
599	18					44	return 1;
600							}
601
602
603							# Converts $string to a numerical value. Unless $force is
604							# true conversion will only be performed if $string matches
605							# the internal representation for numerical strings (depending
606							# on the setings for decimal_separator and digital_grouping_symbol)
607	22308			22308		32382	sub _convert_number ( $self, $string, $force = '' ) { ## no critic (RequireArgUnpacking)
	22308					31999
	22308					34973
	22308					31038
	22308					27658
608	22308					34455	my $rir = $self->{_numerical_regex};
609	22308					33300	my $dgs = $self->{_dgs};
610	22308					35054	my $ds = $self->{_ds};
611
612	22308	50	33			46426	return $self unless $force or $string =~ /^$rir$/;
613
614	14			14		145	no warnings qw/numeric/; ## no critic (ProhibitNoWarnings)
	14					36
	14					2542
615	22308	100				39094	$_[1] =~ s/$dgs//g if $dgs;
616	22308	100	66			67216	$_[1] =~ s/$ds/\./ if $ds and $ds ne '\.';
617	22308					49603	$_[1] += 0;
618
619	22308					37367	return $self;
620							}
621
622
623							1;
624
625							=head1 NAME
626
627							Spreadsheet::Compare::Single - Module for comparing two spreadsheet datasets
628
629							=head1 SYNOPSIS
630
631							use Spreadsheet::Compare::Single;
632							my $single = Spreadsheet::Compare::Single->new(%args);
633							my $result = $single->compare();
634
635							=head1 DESCRIPTION
636
637							Spreadsheet::Compare::Single analyses differences between two similar record sets
638							according to a defined configuration set.
639
640							=head1 ATTRIBUTES
641
642							All attributes return the object on setting and the current value if called without parameter.
643
644							$single->attr($value);
645							my $value = $single->attr;
646
647							They will usually be set by L<Spreadsheet::Compare>, after reading the values from a config file.
648
649							=head2 allow_duplicates
650
651							possible values: 0\|1
652							default: 0
653
654							Try to match identical records even when a unique identity cannot be
655							constructed. This can significantly increase compare times on large datasets.
656
657							=head2 below_limit_is_equal
658
659							possible values: 0\|1
660							default: 0
661
662							Normally differences that are inside configured limits will still be counted as
663							differences (only marked visually as low priority). Setting below_limit_is_equal
664							to a true value will result in the record counted as equal.
665
666							=head2 convert_numbers
667
668							possible values: 0\|1
669							default: 0
670
671							Convert content that is treated as a numerical value to an actual numeric
672							value (by simply adding 0). This is e.g. handy for having numerical data in
673							Excel output instead of strings that look like numbers. This will not affect
674							the optional 'All' report that can be created with the L<report_all_data> option.
675
676							=head2 decimal_separator
677
678							possible values: <string>
679							default: '.'
680
681							Decimal separator for numerical values
682
683							=head2 diff_relative
684
685							possible values: <list of column names>
686							default: []
687
688							Report the relative instead of the absolute difference if
689							L<Spreadsheet::Compare::Reporter/report_diff_row> is set to a true value.
690
691							Example (as YAML config):
692
693							diff_relative: [2,3,4]
694
695							or
696
697							diff_relative:
698							- Price
699							- Quantity
700
701							=head2 digital_grouping_symbol
702
703							possible values: <string>
704							default: ','
705
706							Digital grouping symbol for numerical values
707
708							=head2 fetch_size
709
710							possible values: <integer>
711							default: 1000
712
713							When L</is_sorted> is set, L</fetch_size> determines the number of records
714							fetched into memory at a time.
715
716							=head2 fetch_limit
717
718							possible values: <integer>
719							default: 0
720
721							When L</is_sorted> is set, L</fetch_limit> determines the number of fetches
722							(of size L</fetch_size>) before the comparison stops. This is useful during
723							setup with large datasets where you may have columns that are different for
724							every row and that you better add to the ignore list. Just remember to unset
725							this value once you are done.
726
727							=head2 ignore
728
729							possible values: <list of columns>
730							default: empty list
731
732							Columns to ignore while comparing data. If L<Spreadsheet::Compare::Reader/header> is set
733							the column names have to be used. Else use the zero based column number.
734
735							=head2 ignore_strings
736
737							possible values: 0\|1
738							default: 0
739
740							Only compare numerical data. This skips comparisons where both sides are not considered
741							to be numerical values. This depends on the setting for L</decimal_separator> and
742							L</digital_grouping_symbol>
743
744							=head2 is_sorted
745
746							possible values: 0\|1
747							default: 0
748
749							Assume data is sorted by identity. This is needed for fetching data in
750							smaller batches (see L</fetch_size>) to use less memory.
751
752							=head2 left
753
754							possible values: <string>
755							default: 'left'
756
757							Name for the input on the left side of the comparison. Used for reporting.
758
759							=head2 limit_abs
760
761							possible values: <number or key/value pairs>
762							default: 0
763
764							Single value or one entry per column for specifying absolute tolorance intervals.
765							Differences inside the tolerance interval will be counted and reported
766							separately from differences outside of it. The default value of 0 means no tolerance
767							limit, the value B<'none'> skips the limit check with the side effect that the deviation
768							will not be considered in statistics output (column with highest absolute deviation).
769							The special key B<'__default__'> can be used to set a default for all (numerical)
770							columns, and subsequently setting a different limit on selected columns.
771
772							Example (as YAML config):
773
774							limit_abs: 0.01
775
776							or
777
778							limit_abs:
779							__default__: 0.01
780							Price: 0.0001
781							Quantity: 1
782							Size: none
783
784							=head2 limit_rel
785
786							possible values: <number or keys/values>
787							default: undef
788
789							Single value or one entry per column for specifying relative tolerance intervals
790							(decimal value, not a percentage). Differences inside the tolerance
791							interval will be counted and reported separately from differences outside of it.
792							The default value of 0 means no tolerance limit, the value B<'none'> skips the
793							limit check with the side effect that the deviation will not be considered in
794							statistics output (column with highest relative deviation). The special key
795							B<'__default__'> can be used to set a default for all (numerical) columns, and
796							subsequently setting a different limit on selected columns.
797
798							limit_rel: 0.01
799
800							or
801
802							limit_rel:
803							__default__: 0.1
804							Price: 0.01
805							Quantity: 1
806							Size: none
807
808
809							=head2 readers
810
811							This attribute cannot be set from a config file.
812
813							possible values: <list of exactly 2 Reader objects>
814							default: []
815
816							The readers have to be two objects of L<Spreadsheet::Compare::Reader> subclasses
817							representing the left and the right side of the comparison.
818
819							=head2 right
820
821							possible values: <string>
822							default: 'right'
823
824							Name for the input on the right side of the comparison. Used for reporting.
825
826							=head2 title
827
828							possible values: <string>
829							default: ''
830
831							A title for the comparison
832
833							=head1 CONSTRUCTOR
834
835							=head2 new
836
837							my $single = Spreadsheet::Compare::Single->new(%attributes);
838							my $single = Spreadsheet::Compare::Single->new(\%attributes);
839
840							or
841
842							my $single = Spreadsheet::Compare::Single->new
843							->title('Regression Test 1')
844							->readers([$r_left, $r_right]);
845
846							Construct a new Spreadsheet::Compare::Single object. All comparison attributes can be given to the
847							constructor or set individualy via their chainable set methods.
848
849							=head1 METHODS
850
851							=head2 compare
852
853							Run all configured tests of the run configuration.
854							Return a hashref with counters:
855
856							{
857							add => <number of additional records on the right>,
858							diff => <number of found differences>,
859							dup => <number of duplicate rows (maximum of left and right)>,
860							left => <number of records on the left>,
861							limit => <number of record with differences below set ste limits>,
862							miss => <number of records missing on the right>,
863							right => <number of records on the right>,
864							same => <number of identical records>,
865							}
866
867							Before running compare the readers have to be set.
868
869							=head1 EVENTS
870
871							Spreadsheet::Compare::Single is a L<Mojo::EventEmitter>.
872
873							The reporting events correspond to the methods that are implemented by
874							sublasses of L<Spreadsheet::Compare::Reporter>. L<Spreadsheet::Compare>
875							will subscribe to the events and call the methods.
876
877							Spreadsheet::Compare::Single emits the following events
878
879							=head2 add_stream
880
881							$single->on(add_stream => sub ($obj, $name) {
882							say "new stream $name for ", $obj->title;
883							});
884
885							Reporting event. Signaling that a new reporting stream should be created and will be later
886							referenced for reporting data lines.
887
888							The possible stream names are 'Differences', 'Missing', 'Additional', 'Duplicates' and 'All'.
889
890							=head2 after_fetch
891
892							$single->on(after_fetch => sub ($obj) {
893							say "next fetch for ", $obj->title;
894							});
895
896							Emitted directly after a fetch from the readers.
897
898							=head2 counters
899
900							require Data::Dumper;
901							$single->on(counters => sub ($obj, $counters) {
902							say "next fetch for ", $obj->title, ":", Dumper($counters);
903							});
904
905							Emitted for every handled record. Don't rely on the numbers of calls, but on the content
906							of the %$counters hash if you want to know how many lines where actually read from the readers.
907							This can be used for progress reporting.
908
909							=head2 final_counters
910
911							require Data::Dumper;
912							$single->on(final_counters => sub ($obj, $counters) {
913							say "next fetch for $title:", Dumper($counters);
914							});
915
916							Emitted after completing a single comparison.
917
918							=head2 mark_header
919
920							$single->on(mark_header => sub ($obj, 'Differences', $mask) {
921							# mark columns
922							});
923
924							Reporting event. Emitted after completing a single comparison with a mask describing which columns
925							had differences (key:column_index, value:false/true)
926
927							=head2 write_fmt_row
928
929							$single->on(write_fmt_row => sub ($obj, 'Differences', $record) {
930							# write record to stream;
931							});
932
933							Reporting event. Write a formatted record to the 'Differences' output stream
934							The record is an L<Spreadsheet::Compare::Record> and will contain information
935							about the differences found (see L<Spreadsheet::Compare::Record/limit_mask>).
936
937							=head2 write_header
938
939							$single->on(write_header => sub ($obj, $stream) {
940							# write header for stream;
941							});
942
943							Reporting event. Write the header for an output stream.
944
945							=head2 write_row
946
947							$single->on(write_row => sub ($obj, $stream, $record) {
948							# write record to stream;
949							});
950
951							Reporting event. Write a default formatted record to an output stream.
952							The record is an L<Spreadsheet::Compare::Record>
953
954							=cut