File Coverage

blib/lib/Array/Compare.pm
Criterion Covered Total %
statement 107 107 100.0
branch 50 52 96.1
condition 15 18 83.3
subroutine 15 15 100.0
pod 7 7 100.0
total 194 199 97.4


line stmt bran cond sub pod time code
1             #
2             # $Id$
3             #
4              
5             =head1 NAME
6              
7             Array::Compare - Perl extension for comparing arrays.
8              
9             =head1 SYNOPSIS
10              
11             use Array::Compare;
12              
13             my $comp1 = Array::Compare->new;
14             $comp->Sep('|');
15             $comp->Skip({3 => 1, 4 => 1});
16             $comp->WhiteSpace(0);
17             $comp->Case(1);
18              
19             my $comp2 = Array::Compare->new(Sep => '|',
20             WhiteSpace => 0,
21             Case => 1,
22             Skip => {3 => 1, 4 => 1});
23              
24             my @arr1 = 0 .. 10;
25             my @arr2 = 0 .. 10;
26              
27             $comp1->compare(\@arr1, \@arr2);
28             $comp2->compare(\@arr1, \@arr2);
29              
30             =head1 DESCRIPTION
31              
32             If you have two arrays and you want to know if they are the same or
33             different, then Array::Compare will be useful to you.
34              
35             All comparisons are carried out via a comparator object. In the
36             simplest usage, you can create and use a comparator object like
37             this:
38              
39             my @arr1 = 0 .. 10;
40             my @arr2 = 0 .. 10;
41              
42             my $comp = Array::Compare->new;
43              
44             if ($comp->compare(\@arr1, \@arr2)) {
45             print "Arrays are the same\n";
46             } else {
47             print "Arrays are different\n";
48             }
49              
50             Notice that you pass references to the two arrays to the comparison
51             method.
52              
53             Internally the comparator compares the two arrays by using C
54             to turn both arrays into strings and comparing the strings using
55             C. In the joined strings, the elements of the original arrays
56             are separated with the C<^G> character. This can cause problems if
57             your array data contains C<^G> characters as it is possible that
58             two different arrays can be converted to the same string.
59              
60             To avoid this, it is possible to override the default separator
61             character, either by passing an alternative to the C function
62              
63             my $comp = Array::Compare->new(Sep => '|');
64              
65             or by changing the separator for an existing comparator object
66              
67             $comp->Sep('|');
68              
69             In general you should choose a separator character that won't appear
70             in your data.
71              
72             You can also control whether or not whitespace within the elements of
73             the arrays should be considered significant when making the comparison.
74             The default is that all whitespace is significant. The alternative is
75             for all consecutive white space characters to be converted to a single
76             space for the purposes of the comparison. Again, this can be turned on
77             when creating a comparator object:
78              
79             my $comp = Array::Compare->new(WhiteSpace => 0);
80              
81             or by altering an existing object:
82              
83             $comp->WhiteSpace(0);
84              
85             You can also control whether or not the case of the data is significant
86             in the comparison. The default is that the case of data is taken into
87             account. This can be changed in the standard ways when creating a new
88             comparator object:
89              
90             my $comp = Array::Compare->new(Case => 0);
91              
92             or by altering an existing object:
93              
94             $comp->Case(0);
95              
96             In addition to the simple comparison described above (which returns true
97             if the arrays are the same and false if they're different) there is also
98             a full comparison which returns a list containing the indexes of elements
99             which differ between the two arrays. If the arrays are the same it returns
100             an empty list. In scalar context the full comparison returns the length of
101             this list (i.e. the number of elements that differ). You can access the full
102             comparison in two ways. Firstly, there is a C attribute. If this
103             is C then a full comparison is carried out whenever the C
104             method is called.
105              
106             my $comp = Array::Compare->new(DefFull => 1);
107             $comp->compare(\@arr1, \@arr2); # Full comparison
108              
109             $comp->DefFull(0);
110             $comp->compare(\@arr1, \@arr2); # Simple comparison
111              
112             $comp->DefFull(1);
113             $comp->compare(\@arr1, \@arr2); # Full comparison again
114              
115              
116             Secondly, you can access the full comparison method directly
117              
118             $comp->full_compare(\@arr1, \@arr2);
119              
120             For symmetry, there is also a direct method to use to call the simple
121             comparison.
122              
123             $comp->simple_compare(\@arr1, \@arr2);
124              
125             The final complication is the ability to skip elements in the comparison.
126             If you know that two arrays will always differ in a particular element
127             but want to compare the arrays I this element, you can do it
128             with Array::Compare without taking array slices. To do this, a
129             comparator object has an optional attribute called C which is a
130             reference to a hash. The keys in this hash are the indexes of the array
131             elements and the values should be any true value for elements that should
132             be skipped.
133              
134             For example, if you want to compare two arrays, ignoring the values in
135             elements two and four, you can do something like this:
136              
137             my %skip = (2 => 1, 4 => 1);
138             my @a = (0, 1, 2, 3, 4, 5);
139             my @b = (0, 1, X, 3, X, 5);
140              
141             my $comp = Array::Compare->new(Skip => \%skip);
142              
143             $comp->compare(\@a, \@b);
144              
145             This should return I, as we are explicitly ignoring the columns
146             which differ.
147              
148             Of course, having created a comparator object with no skip hash, it is
149             possible to add one later:
150              
151             $comp->Skip({1 => 1, 2 => 1});
152              
153             or:
154              
155             my %skip = (1 => 1, 2 => 2);
156             $comp->Skip(\%skip);
157              
158             To reset the comparator so that no longer skips elements, call NoSkip().
159              
160             $comp->NoSkip();
161              
162             You can also check to see if one array is a permutation of another, i.e.
163             they contain the same elements but in a different order.
164              
165             if ($comp->perm(\@a, \@b) {
166             print "Arrays are perms\n";
167             } else {
168             print "Nope. Arrays are completely different\n";
169             }
170              
171             In this case the values of C and C are still used,
172             but C is ignored for, hopefully, obvious reasons.
173              
174             =head1 METHODS
175              
176             =cut
177              
178             package Array::Compare;
179              
180             require 5.010_000;
181 1     1   852 use strict;
  1         2  
  1         28  
182 1     1   5 use warnings;
  1         2  
  1         38  
183             our ($VERSION, $AUTOLOAD);
184              
185 1     1   649 use Moo;
  1         12367  
  1         5  
186 1     1   2270 use Types::Standard qw(Str Bool HashRef);
  1         82217  
  1         9  
187 1     1   1167 use Carp;
  1         2  
  1         1161  
188              
189             $VERSION = '3.0.7';
190              
191             has Sep => ( is => 'rw', isa => Str, default => '^G' );
192             has WhiteSpace => ( is => 'rw', isa => Bool, default => 1 );
193             has Case => ( is => 'rw', isa => Bool, default => 1 );
194             has DefFull => ( is => 'rw', isa => Bool, default => 0 );
195             has Skip => ( is => 'rw', isa => HashRef, default => sub { {} } );
196              
197             =head2 new [ %OPTIONS ]
198              
199             Constructs a new comparison object.
200              
201             Takes an optional hash containing various options that control how
202             comparisons are carried out. Any omitted options take useful defaults.
203              
204             =over 4
205              
206             =item Sep
207              
208             This is the value that is used to separate fields when the array is joined
209             into a string. It should be a value which doesn't appear in your data.
210             Default is '^G'.
211              
212             =item WhiteSpace
213              
214             Flag that indicates whether or not whitespace is significant in the
215             comparison. If this value is false then all multiple whitespace characters
216             are changed into a single space before the comparison takes place. Default
217             is 1 (whitespace is significant).
218              
219             =item Case
220              
221             Flag that indicates whther or not the case of the data should be significant
222             in the comparison. Default is 1 (case is significant).
223              
224             =item Skip
225              
226             a reference to a hash which contains the numbers of any columns that should
227             be skipped in the comparison. Default is an empty hash (all columns are
228             significant).
229              
230             =item NoSkip
231              
232             Reset skipped column details. It assigns {} to the attribute C.
233              
234             =cut
235              
236             sub NoSkip {
237 1     1 1 3 my $self = shift;
238              
239 1         25 $self->Skip({});
240             }
241              
242             =item DefFull
243              
244             Flag which indicates whether the default comparison is simple (just returns
245             true if the arrays are the same or false if they're not) or full (returns an
246             array containing the indexes of the columns that differ). Default is 0 (simple
247             comparison).
248              
249             =back
250              
251             =cut
252              
253             #
254             # Utility function to check the arguments to any of the comparison
255             # function. Ensures that there are two arguments and that they are
256             # both arrays.
257             #
258             sub _check_args {
259 61     61   90 my $self = shift;
260              
261 61         76 my @errs;
262              
263 61 100       153 push @errs, 'Must compare two arrays.' unless @_ == 2;
264 61 100       137 push @errs, 'Argument 1 is not an array' unless ref($_[0]) eq 'ARRAY';
265 61 100       119 push @errs, 'Argument 2 is not an array' unless ref($_[1]) eq 'ARRAY';
266              
267 61 100       467 croak join "\n", @errs if @errs;
268              
269 58         94 return;
270             }
271              
272             =head2 compare_len \@ARR1, \@ARR2
273              
274             Very simple comparison. Just checks the lengths of the arrays are
275             the same.
276              
277             =cut
278              
279             sub compare_len {
280 29     29 1 35 my $self = shift;
281              
282 29         67 $self->_check_args(@_);
283              
284 29         35 return @{$_[0]} == @{$_[1]};
  29         61  
  29         97  
285             }
286              
287             =head2 different_len \@ARR1, \@ARR2
288              
289             Passed two arrays and returns true if they are of different lengths.
290              
291             This is just the inverse of C (which is badly named).
292              
293             =cut
294              
295             sub different_len {
296 14     14 1 23 my $self = shift;
297 14         28 return ! $self->compare_len(@_);
298             }
299              
300             =head2 compare \@ARR1, \@ARR2
301              
302             Compare the values in two arrays and return a data indicating whether
303             the arrays are the same. The exact return values differ depending on
304             the comparison method used. See the descriptions of L
305             and L for details.
306              
307             Uses the value of DefFull to determine which comparison routine
308             to use.
309              
310             =cut
311              
312             sub compare {
313 28     28 1 6209 my $self = shift;
314              
315 28 100       601 if ($self->DefFull) {
316 13         99 return $self->full_compare(@_);
317             } else {
318 15         114 return $self->simple_compare(@_);
319             }
320             }
321              
322             =head2 simple_compare \@ARR1, \@ARR2
323              
324             Compare the values in two arrays and return a flag indicating whether or
325             not the arrays are the same.
326              
327             Returns true if the arrays are the same or false if they differ.
328              
329             Uses the values of 'Sep', 'WhiteSpace' and 'Skip' to influence
330             the comparison.
331              
332             =cut
333              
334             sub simple_compare {
335 18     18 1 32 my $self = shift;
336              
337 18         69 $self->_check_args(@_);
338              
339 15         35 my ($row1, $row2) = @_;
340              
341             # No point in continuing if the number of elements is different.
342 15 100       32 return unless $self->compare_len(@_);
343              
344             # @check contains the indexes into the two arrays, i.e. the numbers
345             # from 0 to one less than the number of elements.
346 14         42 my @check = 0 .. $#$row1;
347              
348 14         97 my ($pkg, $caller) = (caller(1))[0, 3];
349 14 50       39 $caller = '' unless defined $caller;
350 14         24 my $perm = $caller eq __PACKAGE__ . "::perm";
351              
352             # Filter @check so it only contains indexes that should be compared.
353             # N.B. Makes no sense to do this if we are called from 'perm'.
354 14 100       32 unless ($perm) {
355 52   100     1168 @check = grep {!(exists $self->Skip->{$_} && $self->Skip->{$_}) } @check
356 11 100       13 if keys %{$self->Skip};
  11         193  
357             }
358              
359             # Build two strings by taking array slices containing only the columns
360             # that we shouldn't skip and joining those array slices using the Sep
361             # character. Hopefully we can then just do a string comparison.
362             # Note: this makes the function liable to errors if your arrays
363             # contain the separator character.
364 14 100       358 my $str1 = join($self->Sep, map { defined $_ ? $_ : '' } @{$row1}[@check]);
  83         169  
  14         107  
365 14 100       231 my $str2 = join($self->Sep, map { defined $_ ? $_ : '' } @{$row2}[@check]);
  83         150  
  14         79  
366              
367             # If whitespace isn't significant, collapse it
368 14 100       224 unless ($self->WhiteSpace) {
369 1         10 $str1 =~ s/\s+/ /g;
370 1         5 $str2 =~ s/\s+/ /g;
371             }
372              
373             # If case isn't significant, change to lower case
374 14 100       275 unless ($self->Case) {
375 2         20 $str1 = lc $str1;
376 2         5 $str2 = lc $str2;
377             }
378              
379 14         146 return $str1 eq $str2;
380             }
381              
382             =head2 full_compare \@ARR1, \@ARR2
383              
384             Do a full comparison between two arrays.
385              
386             Checks each individual column. In scalar context returns the number
387             of columns that differ (zero if the arrays are the same). In list
388             context returns a list containing the indexes of the columns that
389             differ (an empty list if the arrays are the same).
390              
391             Uses the values of 'Sep' and 'WhiteSpace' to influence the comparison.
392              
393             B If the two arrays are of different lengths then this method
394             just returns the indexes of the elements that appear in one array but
395             not the other (i.e. the indexes from the longer array that are beyond
396             the end of the shorter array). This might be a little
397             counter-intuitive.
398              
399             =cut
400              
401             sub full_compare {
402 14     14 1 367 my $self = shift;
403              
404 14         39 $self->_check_args(@_);
405              
406 14         25 my ($row1, $row2) = @_;
407              
408             # No point in continuing if the number of elements is different.
409             # Because of the expected return value from this function we can't
410             # just say 'the arrays are different'. We need to do some work to
411             # calculate a meaningful return value.
412             # If we've been called in array context we return a list containing
413             # the number of the columns that appear in the longer list and aren't
414             # in the shorter list. If we've been called in scalar context we
415             # return the difference in the lengths of the two lists.
416 14 100       35 if ($self->different_len(@_)) {
417 3         16 return $self->_different_len_returns(@_);
418             }
419              
420 11         22 my ($arr1, $arr2) = @_;
421              
422 11         21 my @diffs = ();
423              
424 11         14 foreach (0 .. $#{$arr1}) {
  11         34  
425 69 100 100     176 next if keys %{$self->Skip} && $self->Skip->{$_};
  69         1080  
426              
427 65         1279 my ($val1, $val2) = ($arr1->[$_], $arr2->[$_]);
428              
429 65 100 100     197 if (not defined $val1 or not defined $val2) {
430 4 100       10 push @diffs, $_ if $self->_defined_diff($val1, $val2);
431 4         11 next;
432             }
433              
434 61 100       997 unless ($self->WhiteSpace) {
435 2         17 $val1 =~ s/\s+/ /g;
436 2         7 $val2 =~ s/\s+/ /g;
437             }
438              
439 61 100       1137 unless ($self->Case) {
440 7         39 $val1 = lc $val1;
441 7         12 $val2 = lc $val2;
442             }
443              
444 61 100       338 push @diffs, $_ unless $val1 eq $val2;
445             }
446              
447 11 100       71 return wantarray ? @diffs : scalar @diffs;
448             }
449              
450             sub _different_len_returns {
451 3     3   9 my $self = shift;
452 3         10 my ($row1, $row2) = @_;
453              
454 3 100       7 if (wantarray) {
455 2 100       3 if ($#{$row1} > $#{$row2}) {
  2         3  
  2         5  
456 1         2 return ( $#{$row2} + 1 .. $#{$row1} );
  1         3  
  1         7  
457             } else {
458 1         3 return ( $#{$row1} + 1 .. $#{$row2} );
  1         2  
  1         5  
459             }
460             } else {
461 1         2 return abs(@{$row1} - @{$row2});
  1         2  
  1         5  
462             }
463             }
464              
465             sub _defined_diff {
466 4     4   5 my $self = shift;
467 4         7 my ($val1, $val2) = @_;
468              
469 4 100 100     16 return if not defined $val1 and not defined $val2;
470 2 100 66     10 return 1 if defined $val1 and not defined $val2;
471 1 50 33     9 return 1 if not defined $val1 and defined $val2;
472             }
473              
474             =head2 perm \@ARR1, \@ARR2
475              
476             Check to see if one array is a permutation of the other (i.e. contains
477             the same set of elements, but in a different order).
478              
479             We do this by sorting the arrays and passing references to the assorted
480             versions to simple_compare. There are also some small changes to
481             simple_compare as it should ignore the Skip hash if we are called from
482             perm.
483              
484             =cut
485              
486             sub perm {
487 3     3 1 538 my $self = shift;
488              
489 3         4 return $self->simple_compare([sort @{$_[0]}], [sort @{$_[1]}]);
  3         16  
  3         14  
490             }
491              
492             1;
493             __END__