File Coverage

blib/lib/Array/Unique.pm
Criterion Covered Total %
statement 66 67 98.5
branch 9 10 90.0
condition 10 12 83.3
subroutine 18 18 100.0
pod 1 1 100.0
total 104 108 96.3


line stmt bran cond sub pod time code
1             package Array::Unique;
2              
3 4     4   150813 use 5.006;
  4         17  
  4         172  
4 4     4   23 use strict;
  4         11  
  4         148  
5 4     4   33 use warnings;
  4         7  
  4         156  
6 4     4   19 use Carp;
  4         16  
  4         3809  
7              
8             our $VERSION = '0.08';
9              
10             # Strips out any duplicate values (leaves the first occurrence
11             # of every duplicated value and drops the later occurrences).
12             # Removes all undef values.
13             sub unique {
14 80     80 1 965 my $self = shift; # self or class
15              
16 80         84 my %seen;
17 80   100     1640 my @unique = grep defined $_ && !$seen{$_}++, @_;
18             # based on the Cookbook 1st edition and on suggestion by Jeff 'japhy' Pinyan
19             # fixed by Werner Weichselberger
20             }
21              
22              
23             sub TIEARRAY {
24 3     3   1723 my $class = shift;
25 3         12 my $self = {
26             array => [],
27             hash => {},
28             };
29 3         18 bless $self, $class;
30             }
31              
32              
33             sub CLEAR {
34 9     9   6895 my $self = shift;
35 9         39 $self->{array} = [];
36 9         50 $self->{hash} = {};
37             }
38              
39 9     9   41 sub EXTEND {}
40              
41             sub STORE {
42 65     65   12500 my ($self, $index, $value) = @_;
43 65         141 $self->SPLICE($index, 1, $value);
44             }
45              
46              
47              
48             sub FETCHSIZE {
49 290     290   22650 my $self = shift;
50 290         303 return scalar @{$self->{array}};
  290         955  
51             }
52              
53             sub FETCH {
54 183     183   1345 my ($self, $index) = @_;
55 183         190 ${$self->{array}}[$index];
  183         637  
56             }
57              
58              
59             sub STORESIZE {
60 3     3   1316 my $self = shift;
61 3         5 my $size = shift;
62              
63             # We cannot enlarge the array as the values would be undef
64              
65             # But we can make it smaller
66             # if ($self->FETCHSIZE > $size) {
67             # $self->{->_splice($size);
68             # }
69              
70 3         5 $#{$self->{array}} = $size-1;
  3         12  
71 3         10 return $size;
72             }
73              
74             sub SPLICE {
75 76     76   1454 my $self = shift;
76 76         83 my $offset = shift;
77 76         77 my $length = shift;
78              
79             # reset length value to positive (this is done by the normal splice too)
80 76 100 100     337 if (defined $length and $length < 0) {
81             #$length = @{$self->{array}} + $length;
82 1         4 $length += $self->FETCHSIZE - $offset;
83             }
84              
85             # reset offset to positive (this is done by the normal splice too)
86 76 100 66     295 if (defined $offset and $offset < 0) {
87 3         6 $offset += $self->FETCHSIZE;
88             }
89              
90 76 100 66     215 if (defined $offset and $offset > $self->FETCHSIZE) {
91 7         15 $offset = $self->FETCHSIZE;
92             # should give a warning like this: splice() offset past end of array
93             # if this was really a splice (and warning set) but no warning if this
94             # was an assignment to a high index.
95             }
96              
97             # my @s = @{$self->{array}}[$offset..$offset+$length]; # the old values to be returned
98 76         88 my @original;
99             # if (defined $length) {
100 76         185 @original = $self->_splice($self->{array}, $offset, $length, @_);
101             # } elsif (defined $offset) {
102             # @original = $self->_splice($self->{array}, $offset);
103             # } else {
104             # @original = $self->_splice($self->{array});
105             # }
106              
107 76         301 return @original;
108             }
109              
110              
111              
112             sub PUSH {
113 3     3   14 my $self = shift;
114              
115 3         11 $self->SPLICE($self->FETCHSIZE, 0, @_);
116             # while (my $value = shift) {
117             # $self->STORE($self->FETCHSIZE+1, $value);
118             # }
119 3         8 return $self->FETCHSIZE;
120             }
121              
122             sub POP {
123 1     1   2 my $self = shift;
124 1         3 ($self->SPLICE(-1))[0];
125             }
126              
127             sub SHIFT {
128 1     1   348 my $self = shift;
129             # #($self->{array})[0];
130 1         4 ($self->SPLICE(0,1))[0];
131             }
132              
133             sub UNSHIFT {
134 1     1   361 my $self = shift;
135 1         3 $self->SPLICE(0,0,@_);
136             }
137              
138              
139             sub _splice {
140 76     76   77 my $self = shift;
141 76         75 my $a = shift;
142 76         80 my $offset = shift;
143 76         108 my $length = shift;
144              
145 76         76 my @original;
146 76 100       139 if (defined $length) {
    50          
147 74         204 @original = splice(@$a, $offset, $length, @_);
148             } elsif (defined $offset) {
149 2         5 @original = splice(@$a, $offset);
150             } else {
151 0         0 @original = splice(@$a);
152             }
153 76         175 @$a = $self->unique(@$a);
154 76         218 return @original;
155             }
156              
157             =head1 NAME
158              
159             Array::Unique - Tie-able array that allows only unique values
160              
161             =head1 SYNOPSIS
162              
163             use Array::Unique;
164             tie @a, 'Array::Unique';
165              
166             Now use @a as a regular array.
167              
168             =head1 DESCRIPTION
169              
170             This package lets you create an array which will allow
171             only one occurrence of any value.
172              
173             In other words no matter how many times you put in 42
174             it will keep only the first occurrence and the rest will
175             be dropped.
176              
177             You use the module via tie and once you tied your array to
178             this module it will behave correctly.
179              
180             Uniqueness is checked with the 'eq' operator so
181             among other things it is case sensitive.
182              
183             As a side effect the module does not allow undef as a value in the array.
184              
185             =head1 EXAMPLES
186              
187             use Array::Unique;
188             tie @a, 'Array::Unique';
189              
190             @a = qw(a b c a d e f);
191             push @a, qw(x b z);
192             print "@a\n"; # a b c d e f x z
193              
194             =head1 DISCUSSION
195              
196             When you are collecting a list of items and you want
197             to make sure there is only one occurrence of each item,
198             you have several option:
199              
200              
201             =over 4
202              
203             =item 1) using an array and extracting the unique elements later
204              
205             You might use a regular array to hold this unique set of values
206             and either remove duplicates on each update by that keeping the array
207             always unique or remove duplicates just before you want to use the
208             uniqueness feature of the array. In either case you might run a
209             function you call @a = unique_value(@a);
210              
211             The problem with this approach is that you have to implement
212             the unique_value function (see later) AND you have to make sure you
213             don't forget to call it. I would say don't rely on remembering this.
214            
215              
216             There is good discussion about it in the 1st edition of the
217             Perl Cookbook of O'Reilly. I have copied the solutions here,
218             you can see further discussion in the book.
219              
220             Extracting Unique Elements from a List (Section 4.6 in the Perl Cookbook 1st ed.)
221              
222             # Straightforward
223              
224             %seen = ();
225             @uniq = ();
226             foreach $item (@list) [
227             unless ($seen{$item}) {
228             # if we get here we have not seen it before
229             $seen{$item} = 1;
230             push (@uniq, $item);
231             }
232             }
233              
234             # Faster
235              
236             %seen = ();
237             foreach $item (@list) {
238             push(@uniq, $item) unless $seen{$item}++;
239             }
240              
241             # Faster but different
242              
243             %seen;
244             foreach $item (@list) {
245             $seen{$item}++;
246             }
247             @uniq = keys %seen;
248              
249             # Faster and even more different
250             %seen;
251             @uniq = grep {! $seen{$_}++} @list;
252              
253              
254             =item 2) using a hash
255              
256             Some people use the keys of a hash to keep the items and
257             put an arbitrary value as the values of the hash:
258              
259             To build such a list:
260              
261             %unique = map { $_ => 1 } qw( one two one two three four! );
262              
263             To print it:
264              
265             print join ", ", sort keys %unique;
266              
267             To add values to it:
268              
269             $unique{$_}=1 foreach qw( one after the nine oh nine );
270              
271             To remove values:
272              
273             delete @unique{ qw(oh nine) };
274              
275             To check if a value is there:
276              
277             $unique{ $value }; # which is why I like to use "1" as my value
278              
279             (thanks to Gaal Yahas for the above examples)
280              
281             There are three drawbacks I see:
282              
283             =over 4
284              
285             =item 1) You type more.
286              
287             =item 2) Your reader might not understand at first why did you use hash
288             and what will be the values.
289              
290             =item 3) You lose the order.
291              
292             =back
293              
294             Usually non of them is critical but when I saw this the 10th time
295             in a code I had to understand with 0 documentation I got frustrated.
296              
297              
298             =item 3) using Array::Unique
299              
300             So I decided to write this module because I got frustrated
301             by my lack of understanding what's going on in that code
302             I mentioned.
303              
304             In addition I thought it might be interesting to write this and
305             then benchmark it.
306              
307             Additionally it is nice to have your name displayed in
308             bright lights all over CPAN ... or at least in a module.
309              
310             Array::Unique lets you tie an array to hmmm, itself (?)
311             and makes sure the values of the array are always unique.
312              
313             Since writing this I am not sure if I really recommend its usage.
314             I would say stick with the hash version and document that the
315             variable is aggregating a unique list of values.
316              
317              
318             =item 4) Using real SET
319              
320             There are modules on CPAN that let you create and maintain SETs.
321             I have not checked any of those but I guess they just as much of
322             an overkill for this functionality as Unique::Array.
323              
324              
325             =back
326              
327             =head1 BUGS
328              
329             use Array::Unique;
330             tie @a, 'Array::Unique';
331              
332             @c = @a = qw(a b c a d e f b);
333            
334             @c will contain the same as @a AND two undefs at the end because
335             @c you get the same length as the right most list.
336              
337             =head1 TODO
338              
339             Test:
340              
341             Change size of the array
342             Elements with false values ('', '0', 0)
343              
344             splice:
345             splice @a;
346             splice @a, 3;
347             splice @a, -3;
348             splice @a, 3, 5;
349             splice @a, 3, -5;
350             splice @a, -3, 5;
351             splice @a, -3, -5;
352             splice @a, ?, ?, @b;
353              
354              
355              
356             Benchmark speed
357              
358             Add faster functions that don't check uniqueness so if I
359             know part of the data that comes from a unique source then
360             I can speed up the process,
361             In short shoot myself in the leg.
362              
363             Enable optional compare with other functions
364              
365             Write even better implementations.
366              
367             =head1 AUTHOR
368              
369             Gabor Szabo <gabor@pti.co.il>
370              
371             =head1 LICENSE
372              
373             Copyright (C) 2002-2008 Gabor Szabo <gabor@pti.co.il>
374             All rights reserved. http://www.pti.co.il/
375              
376             You may distribute under the terms of either the GNU
377             General Public License or the Artistic License, as
378             specified in the Perl README file.
379              
380             No WARRANTY whatsoever.
381              
382             =head1 CREDITS
383              
384             Thanks for suggestions and bug reports to
385             Szabo Balazs (dLux)
386             Shlomo Yona
387             Gaal Yahas
388             Jeff 'japhy' Pinyan
389             Werner Weichselberger
390              
391             =head1 VERSION
392              
393             Version: 0.08
394              
395             Date: 2008 June 04
396              
397             =cut
398              
399             1;
400