File Coverage

blib/lib/Set/Similarity.pm

Criterion	Covered	Total	%
statement	50	50	100.0
branch	20	20	100.0
condition	11	12	100.0
subroutine	13	13	100.0
pod	9	9	100.0
total	103	104	100.0

line	stmt	bran	cond	sub	pod	time	code
1							package Set::Similarity;
2
3	5			5		2606	use strict;
	5					13
	5					136
4	5			5		24	use warnings;
	5					10
	5					210
5
6							our $VERSION = '0.027';
7
8	5			5		26	use Carp 'croak';
	5					9
	5					3213
9
10							sub new {
11	7			7	1	1919	my $class = shift;
12
13							# uncoverable condition false
14	7	100	66			59	bless @_ ? @_ > 1 ? {@_} : {%{$_[0]}} : {}, ref $class \|\| $class;
	2	100				27
15							}
16
17							sub similarity {
18	104			104	1	3917	my ($self, $any1, $any2, $width) = @_;
19
20	104					225	return $self->from_tokens(
21							$self->_any($any1,$width),
22							$self->_any($any2,$width)
23							);
24							}
25
26							sub _any {
27	222			222		371	my ($self, $any, $width) = @_;
28
29	222	100				538	if (ref($any) eq 'ARRAY') {
		100
		100
30	58					123	return $any;
31							}
32							elsif (ref($any) eq 'HASH') {
33	13					41	return [grep { $any->{$_} } keys %$any];
	15					55
34							}
35							elsif (ref($any)) {
36	1					4	return [];
37							}
38							else {
39	150					268	return [$self->ngrams($any,$width)];
40							}
41							}
42
43							sub ngrams {
44	161			161	1	247	my ($self, $word, $width) = @_;
45
46	161	100	100			680	$width = 1 unless ($width && $width =~ m/^[1-9][0-9]*$/x);
47	161		100			317	$word \|\|= '';
48
49	161	100				356	return ($word) unless ($width <= length($word));
50
51	142					284	return map {substr $word,$_,$width;} (0..length($word)-$width);
	546					1330
52							}
53
54							sub from_tokens {
55	108			108	1	719	my ($self, $tokens1, $tokens2) = @_;
56
57	108	100	100			271	return 1 if (!(scalar @$tokens1 \|\| scalar @$tokens2));
58	102	100	100			378	return 0 unless (scalar @$tokens1 && scalar @$tokens2 );
59
60	90					183	return $self->from_sets(
61							[$self->uniq($tokens1)],
62							[$self->uniq($tokens2)],
63							);
64							}
65
66	1			1	1	455	sub from_sets { croak 'Method "from_sets" not implemented in subclass' }
67
68							sub intersection {
69	90			90	1	107	my %uniq;
70	90					116	@uniq{@{$_[1]}} = ();
	90					168
71	90					106	scalar grep { exists $uniq{$_} } @{$_[2]};
	255					670
	90					165
72							}
73
74							sub uniq {
75	180			180	1	252	my %uniq;
76	180					220	@uniq{@{$_[1]}} = ();
	180					500
77	180					664	return keys %uniq;
78							}
79
80							sub combined_length {
81	49			49	1	68	scalar(@{$_[1]}) + scalar(@{$_[2]});
	49					72
	49					197
82							}
83
84							sub min {
85	20					21	(scalar(@{$_[1]}) < scalar(@{$_[2]}))
	20					33
86	20	100		20	1	24	? scalar(@{$_[1]}) : scalar(@{$_[2]});
	2					11
	18					102
87							}
88
89							1;
90
91							__END__