File Coverage

blib/lib/Array/Compare.pm
Criterion Covered Total %
statement 107 107 100.0
branch 50 52 96.1
condition 15 18 83.3
subroutine 15 15 100.0
pod 7 7 100.0
total 194 199 97.4


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Array::Compare - Perl extension for comparing arrays.
4              
5             =head1 SYNOPSIS
6              
7             use Array::Compare;
8              
9             my $comp1 = Array::Compare->new;
10             $comp->Sep('|');
11             $comp->Skip({3 => 1, 4 => 1});
12             $comp->WhiteSpace(0);
13             $comp->Case(1);
14              
15             my $comp2 = Array::Compare->new(Sep => '|',
16             WhiteSpace => 0,
17             Case => 1,
18             Skip => {3 => 1, 4 => 1});
19              
20             my @arr1 = 0 .. 10;
21             my @arr2 = 0 .. 10;
22              
23             $comp1->compare(\@arr1, \@arr2);
24             $comp2->compare(\@arr1, \@arr2);
25              
26             =head1 DESCRIPTION
27              
28             If you have two arrays and you want to know if they are the same or
29             different, then Array::Compare will be useful to you.
30              
31             All comparisons are carried out via a comparator object. In the
32             simplest usage, you can create and use a comparator object like
33             this:
34              
35             my @arr1 = 0 .. 10;
36             my @arr2 = 0 .. 10;
37              
38             my $comp = Array::Compare->new;
39              
40             if ($comp->compare(\@arr1, \@arr2)) {
41             print "Arrays are the same\n";
42             } else {
43             print "Arrays are different\n";
44             }
45              
46             Notice that you pass references to the two arrays to the comparison
47             method.
48              
49             Internally the comparator compares the two arrays by using C
50             to turn both arrays into strings and comparing the strings using
51             C. In the joined strings, the elements of the original arrays
52             are separated with the C<^G> character. This can cause problems if
53             your array data contains C<^G> characters as it is possible that
54             two different arrays can be converted to the same string.
55              
56             To avoid this, it is possible to override the default separator
57             character, either by passing an alternative to the C function
58              
59             my $comp = Array::Compare->new(Sep => '|');
60              
61             or by changing the separator for an existing comparator object
62              
63             $comp->Sep('|');
64              
65             In general you should choose a separator character that won't appear
66             in your data.
67              
68             You can also control whether or not whitespace within the elements of
69             the arrays should be considered significant when making the comparison.
70             The default is that all whitespace is significant. The alternative is
71             for all consecutive white space characters to be converted to a single
72             space for the purposes of the comparison. Again, this can be turned on
73             when creating a comparator object:
74              
75             my $comp = Array::Compare->new(WhiteSpace => 0);
76              
77             or by altering an existing object:
78              
79             $comp->WhiteSpace(0);
80              
81             You can also control whether or not the case of the data is significant
82             in the comparison. The default is that the case of data is taken into
83             account. This can be changed in the standard ways when creating a new
84             comparator object:
85              
86             my $comp = Array::Compare->new(Case => 0);
87              
88             or by altering an existing object:
89              
90             $comp->Case(0);
91              
92             In addition to the simple comparison described above (which returns true
93             if the arrays are the same and false if they're different) there is also
94             a full comparison which returns a list containing the indexes of elements
95             which differ between the two arrays. If the arrays are the same it returns
96             an empty list. In scalar context the full comparison returns the length of
97             this list (i.e. the number of elements that differ). You can access the full
98             comparison in two ways. Firstly, there is a C attribute. If this
99             is C then a full comparison is carried out whenever the C
100             method is called.
101              
102             my $comp = Array::Compare->new(DefFull => 1);
103             $comp->compare(\@arr1, \@arr2); # Full comparison
104              
105             $comp->DefFull(0);
106             $comp->compare(\@arr1, \@arr2); # Simple comparison
107              
108             $comp->DefFull(1);
109             $comp->compare(\@arr1, \@arr2); # Full comparison again
110              
111              
112             Secondly, you can access the full comparison method directly
113              
114             $comp->full_compare(\@arr1, \@arr2);
115              
116             For symmetry, there is also a direct method to use to call the simple
117             comparison.
118              
119             $comp->simple_compare(\@arr1, \@arr2);
120              
121             The final complication is the ability to skip elements in the comparison.
122             If you know that two arrays will always differ in a particular element
123             but want to compare the arrays I this element, you can do it
124             with Array::Compare without taking array slices. To do this, a
125             comparator object has an optional attribute called C which is a
126             reference to a hash. The keys in this hash are the indexes of the array
127             elements and the values should be any true value for elements that should
128             be skipped.
129              
130             For example, if you want to compare two arrays, ignoring the values in
131             elements two and four, you can do something like this:
132              
133             my %skip = (2 => 1, 4 => 1);
134             my @a = (0, 1, 2, 3, 4, 5);
135             my @b = (0, 1, X, 3, X, 5);
136              
137             my $comp = Array::Compare->new(Skip => \%skip);
138              
139             $comp->compare(\@a, \@b);
140              
141             This should return I, as we are explicitly ignoring the columns
142             which differ.
143              
144             Of course, having created a comparator object with no skip hash, it is
145             possible to add one later:
146              
147             $comp->Skip({1 => 1, 2 => 1});
148              
149             or:
150              
151             my %skip = (1 => 1, 2 => 2);
152             $comp->Skip(\%skip);
153              
154             To reset the comparator so that no longer skips elements, call NoSkip().
155              
156             $comp->NoSkip();
157              
158             You can also check to see if one array is a permutation of another, i.e.
159             they contain the same elements but in a different order.
160              
161             if ($comp->perm(\@a, \@b) {
162             print "Arrays are perms\n";
163             } else {
164             print "Nope. Arrays are completely different\n";
165             }
166              
167             In this case the values of C and C are still used,
168             but C is ignored for, hopefully, obvious reasons.
169              
170             =head1 METHODS
171              
172             =cut
173              
174             package Array::Compare;
175              
176             require 5.010_000;
177 1     1   862 use strict;
  1         2  
  1         30  
178 1     1   5 use warnings;
  1         2  
  1         50  
179             our ($VERSION, $AUTOLOAD);
180              
181 1     1   599 use Moo;
  1         11717  
  1         5  
182 1     1   2122 use Types::Standard qw(Str Bool HashRef);
  1         81917  
  1         10  
183 1     1   1143 use Carp;
  1         2  
  1         1154  
184              
185             $VERSION = '3.0.8';
186              
187             has Sep => ( is => 'rw', isa => Str, default => '^G' );
188             has WhiteSpace => ( is => 'rw', isa => Bool, default => 1 );
189             has Case => ( is => 'rw', isa => Bool, default => 1 );
190             has DefFull => ( is => 'rw', isa => Bool, default => 0 );
191             has Skip => ( is => 'rw', isa => HashRef, default => sub { {} } );
192              
193             =head2 new [ %OPTIONS ]
194              
195             Constructs a new comparison object.
196              
197             Takes an optional hash containing various options that control how
198             comparisons are carried out. Any omitted options take useful defaults.
199              
200             =over 4
201              
202             =item Sep
203              
204             This is the value that is used to separate fields when the array is joined
205             into a string. It should be a value which doesn't appear in your data.
206             Default is '^G'.
207              
208             =item WhiteSpace
209              
210             Flag that indicates whether or not whitespace is significant in the
211             comparison. If this value is false then all multiple whitespace characters
212             are changed into a single space before the comparison takes place. Default
213             is 1 (whitespace is significant).
214              
215             =item Case
216              
217             Flag that indicates whther or not the case of the data should be significant
218             in the comparison. Default is 1 (case is significant).
219              
220             =item Skip
221              
222             a reference to a hash which contains the numbers of any columns that should
223             be skipped in the comparison. Default is an empty hash (all columns are
224             significant).
225              
226             =item NoSkip
227              
228             Reset skipped column details. It assigns {} to the attribute C.
229              
230             =cut
231              
232             sub NoSkip {
233 1     1 1 2 my $self = shift;
234              
235 1         26 $self->Skip({});
236             }
237              
238             =item DefFull
239              
240             Flag which indicates whether the default comparison is simple (just returns
241             true if the arrays are the same or false if they're not) or full (returns an
242             array containing the indexes of the columns that differ). Default is 0 (simple
243             comparison).
244              
245             =back
246              
247             =cut
248              
249             #
250             # Utility function to check the arguments to any of the comparison
251             # function. Ensures that there are two arguments and that they are
252             # both arrays.
253             #
254             sub _check_args {
255 61     61   84 my $self = shift;
256              
257 61         76 my @errs;
258              
259 61 100       136 push @errs, 'Must compare two arrays.' unless @_ == 2;
260 61 100       132 push @errs, 'Argument 1 is not an array' unless ref($_[0]) eq 'ARRAY';
261 61 100       115 push @errs, 'Argument 2 is not an array' unless ref($_[1]) eq 'ARRAY';
262              
263 61 100       497 croak join "\n", @errs if @errs;
264              
265 58         89 return;
266             }
267              
268             =head2 compare_len \@ARR1, \@ARR2
269              
270             Very simple comparison. Just checks the lengths of the arrays are
271             the same.
272              
273             =cut
274              
275             sub compare_len {
276 29     29 1 35 my $self = shift;
277              
278 29         68 $self->_check_args(@_);
279              
280 29         35 return @{$_[0]} == @{$_[1]};
  29         43  
  29         103  
281             }
282              
283             =head2 different_len \@ARR1, \@ARR2
284              
285             Passed two arrays and returns true if they are of different lengths.
286              
287             This is just the inverse of C (which is badly named).
288              
289             =cut
290              
291             sub different_len {
292 14     14 1 21 my $self = shift;
293 14         30 return ! $self->compare_len(@_);
294             }
295              
296             =head2 compare \@ARR1, \@ARR2
297              
298             Compare the values in two arrays and return a data indicating whether
299             the arrays are the same. The exact return values differ depending on
300             the comparison method used. See the descriptions of L
301             and L for details.
302              
303             Uses the value of DefFull to determine which comparison routine
304             to use.
305              
306             =cut
307              
308             sub compare {
309 28     28 1 6061 my $self = shift;
310              
311 28 100       538 if ($self->DefFull) {
312 13         99 return $self->full_compare(@_);
313             } else {
314 15         107 return $self->simple_compare(@_);
315             }
316             }
317              
318             =head2 simple_compare \@ARR1, \@ARR2
319              
320             Compare the values in two arrays and return a flag indicating whether or
321             not the arrays are the same.
322              
323             Returns true if the arrays are the same or false if they differ.
324              
325             Uses the values of 'Sep', 'WhiteSpace' and 'Skip' to influence
326             the comparison.
327              
328             =cut
329              
330             sub simple_compare {
331 18     18 1 30 my $self = shift;
332              
333 18         48 $self->_check_args(@_);
334              
335 15         33 my ($row1, $row2) = @_;
336              
337             # No point in continuing if the number of elements is different.
338 15 100       31 return unless $self->compare_len(@_);
339              
340             # @check contains the indexes into the two arrays, i.e. the numbers
341             # from 0 to one less than the number of elements.
342 14         37 my @check = 0 .. $#$row1;
343              
344 14         97 my ($pkg, $caller) = (caller(1))[0, 3];
345 14 50       35 $caller = '' unless defined $caller;
346 14         27 my $perm = $caller eq __PACKAGE__ . "::perm";
347              
348             # Filter @check so it only contains indexes that should be compared.
349             # N.B. Makes no sense to do this if we are called from 'perm'.
350 14 100       27 unless ($perm) {
351 52   100     1103 @check = grep {!(exists $self->Skip->{$_} && $self->Skip->{$_}) } @check
352 11 100       14 if keys %{$self->Skip};
  11         228  
353             }
354              
355             # Build two strings by taking array slices containing only the columns
356             # that we shouldn't skip and joining those array slices using the Sep
357             # character. Hopefully we can then just do a string comparison.
358             # Note: this makes the function liable to errors if your arrays
359             # contain the separator character.
360 14 100       337 my $str1 = join($self->Sep, map { defined $_ ? $_ : '' } @{$row1}[@check]);
  83         173  
  14         109  
361 14 100       222 my $str2 = join($self->Sep, map { defined $_ ? $_ : '' } @{$row2}[@check]);
  83         156  
  14         84  
362              
363             # If whitespace isn't significant, collapse it
364 14 100       225 unless ($self->WhiteSpace) {
365 1         10 $str1 =~ s/\s+/ /g;
366 1         6 $str2 =~ s/\s+/ /g;
367             }
368              
369             # If case isn't significant, change to lower case
370 14 100       266 unless ($self->Case) {
371 2         15 $str1 = lc $str1;
372 2         3 $str2 = lc $str2;
373             }
374              
375 14         140 return $str1 eq $str2;
376             }
377              
378             =head2 full_compare \@ARR1, \@ARR2
379              
380             Do a full comparison between two arrays.
381              
382             Checks each individual column. In scalar context returns the number
383             of columns that differ (zero if the arrays are the same). In list
384             context returns a list containing the indexes of the columns that
385             differ (an empty list if the arrays are the same).
386              
387             Uses the values of 'Sep' and 'WhiteSpace' to influence the comparison.
388              
389             B If the two arrays are of different lengths then this method
390             just returns the indexes of the elements that appear in one array but
391             not the other (i.e. the indexes from the longer array that are beyond
392             the end of the shorter array). This might be a little
393             counter-intuitive.
394              
395             =cut
396              
397             sub full_compare {
398 14     14 1 365 my $self = shift;
399              
400 14         41 $self->_check_args(@_);
401              
402 14         24 my ($row1, $row2) = @_;
403              
404             # No point in continuing if the number of elements is different.
405             # Because of the expected return value from this function we can't
406             # just say 'the arrays are different'. We need to do some work to
407             # calculate a meaningful return value.
408             # If we've been called in array context we return a list containing
409             # the number of the columns that appear in the longer list and aren't
410             # in the shorter list. If we've been called in scalar context we
411             # return the difference in the lengths of the two lists.
412 14 100       45 if ($self->different_len(@_)) {
413 3         8 return $self->_different_len_returns(@_);
414             }
415              
416 11         22 my ($arr1, $arr2) = @_;
417              
418 11         15 my @diffs = ();
419              
420 11         18 foreach (0 .. $#{$arr1}) {
  11         32  
421 69 100 100     180 next if keys %{$self->Skip} && $self->Skip->{$_};
  69         1070  
422              
423 65         1301 my ($val1, $val2) = ($arr1->[$_], $arr2->[$_]);
424              
425 65 100 100     210 if (not defined $val1 or not defined $val2) {
426 4 100       12 push @diffs, $_ if $self->_defined_diff($val1, $val2);
427 4         8 next;
428             }
429              
430 61 100       902 unless ($self->WhiteSpace) {
431 2         18 $val1 =~ s/\s+/ /g;
432 2         8 $val2 =~ s/\s+/ /g;
433             }
434              
435 61 100       1105 unless ($self->Case) {
436 7         41 $val1 = lc $val1;
437 7         11 $val2 = lc $val2;
438             }
439              
440 61 100       370 push @diffs, $_ unless $val1 eq $val2;
441             }
442              
443 11 100       57 return wantarray ? @diffs : scalar @diffs;
444             }
445              
446             sub _different_len_returns {
447 3     3   7 my $self = shift;
448 3         5 my ($row1, $row2) = @_;
449              
450 3 100       8 if (wantarray) {
451 2 100       3 if ($#{$row1} > $#{$row2}) {
  2         5  
  2         5  
452 1         2 return ( $#{$row2} + 1 .. $#{$row1} );
  1         3  
  1         6  
453             } else {
454 1         3 return ( $#{$row1} + 1 .. $#{$row2} );
  1         3  
  1         6  
455             }
456             } else {
457 1         3 return abs(@{$row1} - @{$row2});
  1         2  
  1         5  
458             }
459             }
460              
461             sub _defined_diff {
462 4     4   6 my $self = shift;
463 4         10 my ($val1, $val2) = @_;
464              
465 4 100 100     18 return if not defined $val1 and not defined $val2;
466 2 100 66     9 return 1 if defined $val1 and not defined $val2;
467 1 50 33     11 return 1 if not defined $val1 and defined $val2;
468             }
469              
470             =head2 perm \@ARR1, \@ARR2
471              
472             Check to see if one array is a permutation of the other (i.e. contains
473             the same set of elements, but in a different order).
474              
475             We do this by sorting the arrays and passing references to the assorted
476             versions to simple_compare. There are also some small changes to
477             simple_compare as it should ignore the Skip hash if we are called from
478             perm.
479              
480             =cut
481              
482             sub perm {
483 3     3 1 592 my $self = shift;
484              
485 3         5 return $self->simple_compare([sort @{$_[0]}], [sort @{$_[1]}]);
  3         16  
  3         14  
486             }
487              
488             1;
489             __END__