File Coverage

blib/lib/Array/Unique.pm

Criterion	Covered	Total	%
statement	66	67	98.5
branch	9	10	90.0
condition	10	12	83.3
subroutine	18	18	100.0
pod	1	1	100.0
total	104	108	96.3

line	stmt	bran	cond	sub	pod	time	code
1							package Array::Unique;
2
3	4			4		150813	use 5.006;
	4					17
	4					172
4	4			4		23	use strict;
	4					11
	4					148
5	4			4		33	use warnings;
	4					7
	4					156
6	4			4		19	use Carp;
	4					16
	4					3809
7
8							our $VERSION = '0.08';
9
10							# Strips out any duplicate values (leaves the first occurrence
11							# of every duplicated value and drops the later occurrences).
12							# Removes all undef values.
13							sub unique {
14	80			80	1	965	my $self = shift; # self or class
15
16	80					84	my %seen;
17	80		100			1640	my @unique = grep defined $_ && !$seen{$_}++, @_;
18							# based on the Cookbook 1st edition and on suggestion by Jeff 'japhy' Pinyan
19							# fixed by Werner Weichselberger
20							}
21
22
23							sub TIEARRAY {
24	3			3		1723	my $class = shift;
25	3					12	my $self = {
26							array => [],
27							hash => {},
28							};
29	3					18	bless $self, $class;
30							}
31
32
33							sub CLEAR {
34	9			9		6895	my $self = shift;
35	9					39	$self->{array} = [];
36	9					50	$self->{hash} = {};
37							}
38
39	9			9		41	sub EXTEND {}
40
41							sub STORE {
42	65			65		12500	my ($self, $index, $value) = @_;
43	65					141	$self->SPLICE($index, 1, $value);
44							}
45
46
47
48							sub FETCHSIZE {
49	290			290		22650	my $self = shift;
50	290					303	return scalar @{$self->{array}};
	290					955
51							}
52
53							sub FETCH {
54	183			183		1345	my ($self, $index) = @_;
55	183					190	${$self->{array}}[$index];
	183					637
56							}
57
58
59							sub STORESIZE {
60	3			3		1316	my $self = shift;
61	3					5	my $size = shift;
62
63							# We cannot enlarge the array as the values would be undef
64
65							# But we can make it smaller
66							# if ($self->FETCHSIZE > $size) {
67							# $self->{->_splice($size);
68							# }
69
70	3					5	$#{$self->{array}} = $size-1;
	3					12
71	3					10	return $size;
72							}
73
74							sub SPLICE {
75	76			76		1454	my $self = shift;
76	76					83	my $offset = shift;
77	76					77	my $length = shift;
78
79							# reset length value to positive (this is done by the normal splice too)
80	76	100	100			337	if (defined $length and $length < 0) {
81							#$length = @{$self->{array}} + $length;
82	1					4	$length += $self->FETCHSIZE - $offset;
83							}
84
85							# reset offset to positive (this is done by the normal splice too)
86	76	100	66			295	if (defined $offset and $offset < 0) {
87	3					6	$offset += $self->FETCHSIZE;
88							}
89
90	76	100	66			215	if (defined $offset and $offset > $self->FETCHSIZE) {
91	7					15	$offset = $self->FETCHSIZE;
92							# should give a warning like this: splice() offset past end of array
93							# if this was really a splice (and warning set) but no warning if this
94							# was an assignment to a high index.
95							}
96
97							# my @s = @{$self->{array}}[$offset..$offset+$length]; # the old values to be returned
98	76					88	my @original;
99							# if (defined $length) {
100	76					185	@original = $self->_splice($self->{array}, $offset, $length, @_);
101							# } elsif (defined $offset) {
102							# @original = $self->_splice($self->{array}, $offset);
103							# } else {
104							# @original = $self->_splice($self->{array});
105							# }
106
107	76					301	return @original;
108							}
109
110
111
112							sub PUSH {
113	3			3		14	my $self = shift;
114
115	3					11	$self->SPLICE($self->FETCHSIZE, 0, @_);
116							# while (my $value = shift) {
117							# $self->STORE($self->FETCHSIZE+1, $value);
118							# }
119	3					8	return $self->FETCHSIZE;
120							}
121
122							sub POP {
123	1			1		2	my $self = shift;
124	1					3	($self->SPLICE(-1))[0];
125							}
126
127							sub SHIFT {
128	1			1		348	my $self = shift;
129							# #($self->{array})[0];
130	1					4	($self->SPLICE(0,1))[0];
131							}
132
133							sub UNSHIFT {
134	1			1		361	my $self = shift;
135	1					3	$self->SPLICE(0,0,@_);
136							}
137
138
139							sub _splice {
140	76			76		77	my $self = shift;
141	76					75	my $a = shift;
142	76					80	my $offset = shift;
143	76					108	my $length = shift;
144
145	76					76	my @original;
146	76	100				139	if (defined $length) {
		50
147	74					204	@original = splice(@$a, $offset, $length, @_);
148							} elsif (defined $offset) {
149	2					5	@original = splice(@$a, $offset);
150							} else {
151	0					0	@original = splice(@$a);
152							}
153	76					175	@$a = $self->unique(@$a);
154	76					218	return @original;
155							}
156
157							=head1 NAME
158
159							Array::Unique - Tie-able array that allows only unique values
160
161							=head1 SYNOPSIS
162
163							use Array::Unique;
164							tie @a, 'Array::Unique';
165
166							Now use @a as a regular array.
167
168							=head1 DESCRIPTION
169
170							This package lets you create an array which will allow
171							only one occurrence of any value.
172
173							In other words no matter how many times you put in 42
174							it will keep only the first occurrence and the rest will
175							be dropped.
176
177							You use the module via tie and once you tied your array to
178							this module it will behave correctly.
179
180							Uniqueness is checked with the 'eq' operator so
181							among other things it is case sensitive.
182
183							As a side effect the module does not allow undef as a value in the array.
184
185							=head1 EXAMPLES
186
187							use Array::Unique;
188							tie @a, 'Array::Unique';
189
190							@a = qw(a b c a d e f);
191							push @a, qw(x b z);
192							print "@a\n"; # a b c d e f x z
193
194							=head1 DISCUSSION
195
196							When you are collecting a list of items and you want
197							to make sure there is only one occurrence of each item,
198							you have several option:
199
200
201							=over 4
202
203							=item 1) using an array and extracting the unique elements later
204
205							You might use a regular array to hold this unique set of values
206							and either remove duplicates on each update by that keeping the array
207							always unique or remove duplicates just before you want to use the
208							uniqueness feature of the array. In either case you might run a
209							function you call @a = unique_value(@a);
210
211							The problem with this approach is that you have to implement
212							the unique_value function (see later) AND you have to make sure you
213							don't forget to call it. I would say don't rely on remembering this.
214
215
216							There is good discussion about it in the 1st edition of the
217							Perl Cookbook of O'Reilly. I have copied the solutions here,
218							you can see further discussion in the book.
219
220							Extracting Unique Elements from a List (Section 4.6 in the Perl Cookbook 1st ed.)
221
222							# Straightforward
223
224							%seen = ();
225							@uniq = ();
226							foreach $item (@list) [
227							unless ($seen{$item}) {
228							# if we get here we have not seen it before
229							$seen{$item} = 1;
230							push (@uniq, $item);
231							}
232							}
233
234							# Faster
235
236							%seen = ();
237							foreach $item (@list) {
238							push(@uniq, $item) unless $seen{$item}++;
239							}
240
241							# Faster but different
242
243							%seen;
244							foreach $item (@list) {
245							$seen{$item}++;
246							}
247							@uniq = keys %seen;
248
249							# Faster and even more different
250							%seen;
251							@uniq = grep {! $seen{$_}++} @list;
252
253
254							=item 2) using a hash
255
256							Some people use the keys of a hash to keep the items and
257							put an arbitrary value as the values of the hash:
258
259							To build such a list:
260
261							%unique = map { $_ => 1 } qw( one two one two three four! );
262
263							To print it:
264
265							print join ", ", sort keys %unique;
266
267							To add values to it:
268
269							$unique{$_}=1 foreach qw( one after the nine oh nine );
270
271							To remove values:
272
273							delete @unique{ qw(oh nine) };
274
275							To check if a value is there:
276
277							$unique{ $value }; # which is why I like to use "1" as my value
278
279							(thanks to Gaal Yahas for the above examples)
280
281							There are three drawbacks I see:
282
283							=over 4
284
285							=item 1) You type more.
286
287							=item 2) Your reader might not understand at first why did you use hash
288							and what will be the values.
289
290							=item 3) You lose the order.
291
292							=back
293
294							Usually non of them is critical but when I saw this the 10th time
295							in a code I had to understand with 0 documentation I got frustrated.
296
297
298							=item 3) using Array::Unique
299
300							So I decided to write this module because I got frustrated
301							by my lack of understanding what's going on in that code
302							I mentioned.
303
304							In addition I thought it might be interesting to write this and
305							then benchmark it.
306
307							Additionally it is nice to have your name displayed in
308							bright lights all over CPAN ... or at least in a module.
309
310							Array::Unique lets you tie an array to hmmm, itself (?)
311							and makes sure the values of the array are always unique.
312
313							Since writing this I am not sure if I really recommend its usage.
314							I would say stick with the hash version and document that the
315							variable is aggregating a unique list of values.
316
317
318							=item 4) Using real SET
319
320							There are modules on CPAN that let you create and maintain SETs.
321							I have not checked any of those but I guess they just as much of
322							an overkill for this functionality as Unique::Array.
323
324
325							=back
326
327							=head1 BUGS
328
329							use Array::Unique;
330							tie @a, 'Array::Unique';
331
332							@c = @a = qw(a b c a d e f b);
333
334							@c will contain the same as @a AND two undefs at the end because
335							@c you get the same length as the right most list.
336
337							=head1 TODO
338
339							Test:
340
341							Change size of the array
342							Elements with false values ('', '0', 0)
343
344							splice:
345							splice @a;
346							splice @a, 3;
347							splice @a, -3;
348							splice @a, 3, 5;
349							splice @a, 3, -5;
350							splice @a, -3, 5;
351							splice @a, -3, -5;
352							splice @a, ?, ?, @b;
353
354
355
356							Benchmark speed
357
358							Add faster functions that don't check uniqueness so if I
359							know part of the data that comes from a unique source then
360							I can speed up the process,
361							In short shoot myself in the leg.
362
363							Enable optional compare with other functions
364
365							Write even better implementations.
366
367							=head1 AUTHOR
368
369							Gabor Szabo <gabor@pti.co.il>
370
371							=head1 LICENSE
372
373							Copyright (C) 2002-2008 Gabor Szabo <gabor@pti.co.il>
374							All rights reserved. http://www.pti.co.il/
375
376							You may distribute under the terms of either the GNU
377							General Public License or the Artistic License, as
378							specified in the Perl README file.
379
380							No WARRANTY whatsoever.
381
382							=head1 CREDITS
383
384							Thanks for suggestions and bug reports to
385							Szabo Balazs (dLux)
386							Shlomo Yona
387							Gaal Yahas
388							Jeff 'japhy' Pinyan
389							Werner Weichselberger
390
391							=head1 VERSION
392
393							Version: 0.08
394
395							Date: 2008 June 04
396
397							=cut
398
399							1;
400