File Coverage

blib/lib/Statistics/Simpson.pm

Criterion	Covered	Total	%
statement	39	42	92.8
branch	19	24	79.1
condition	2	3	66.6
subroutine	5	6	83.3
pod	2	2	100.0
total	67	77	87.0

line	stmt	bran	cond	sub	pod	time	code
1							package Statistics::Simpson;
2
3	3			3		2268	use strict;
	3					6
	3					128
4
5	3			3		17	use vars qw($VERSION @ISA);
	3					5
	3					242
6
7							$VERSION = '0.01';
8
9	3			3		3712	use Statistics::Frequency 0.03;
	3					11339
	3					1543
10							@ISA = qw(Statistics::Frequency);
11
12							my $Napier = exp(1);
13
14							=head1 NAME
15
16							Statistics::Simpson - Simpson index
17
18							=head1 SYNOPSIS
19
20							The object-oriented interface:
21
22							use Statistics::Simpson;
23
24							# The constructor is inherited from Statistics::Frequency.
25
26							my $pop = Statistics::Simpson->new(@data);
27							my $pop = Statistics::Simpson->new(\@data);
28							my $pop = Statistics::Simpson->new(\%data);
29							my $pop = Statistics::Simpson->new($another);
30
31							# The Simpson index and the Simpson evenness.
32
33							print $pop->index, "\n";
34
35							print $pop->evenness, "\n";
36
37							The "anonymous" interface where the population data is not a
38							Statistics::Frequency object but instead either an array reference,
39							in which case the array elements are the frequencies, or a hash
40							reference, in which keys the hash values are the frequencies.
41
42							use Statistics::Simpson;
43
44							print Statistics::Simpson::index([ data ]), "\n";
45
46							print Statistics::Simpson::index({ data }), "\n";
47
48							print Statistics::Simpson::evenness([ data ]), "\n";
49
50							print Statistics::Simpson::evenness({ data }), "\n";
51
52							The rest of data manipulation interface inherited from Statistics::Frequency:
53
54							$pop->add_data(@more_data);
55							$pop->add_data(\@more_data);
56							$pop->add_data(\%more_data);
57							$pop->add_data($another);
58
59							$pop->remove_data(@less_data);
60							$pop->remove_data(\@less_data);
61							$pop->remove_data(\%less_data);
62							$pop->remove_data($another);
63
64							$pop->copy_data($another);
65
66							$pop->clear_data();
67
68							=head1 DESCRIPTION
69
70							The Statistics::Simpson module can be used to compute the Simpson
71							index of data, which measures the variability of data.
72
73							The index() and evenness() interfaces are the only genuine interfaces
74							of this module, the constructor and the rest of the data manipulation
75							interface is inherited from Statistics::Frequency.
76
77							=head2 new
78
79							my $pop = Statistics::Simpson->new(@data);
80							my $pop = Statistics::Simpson->new(\@data);
81							my $pop = Statistics::Simpson->new(\%data);
82							my $pop = Statistics::Simpson->new($another);
83
84							Creates a new Simpson object from the initial data.
85
86							The data may be either a list, a reference to an array or a reference
87							to a hash.
88
89							=over 4
90
91							=item *
92
93							If the data is a list (or an array), the list elements are counted
94							to find out their frequencies.
95
96							=item *
97
98							If the data is a reference to an array, the array elements are counted
99							to find out their frequencies.
100
101							=item *
102
103							If the data is a reference to a hash, the hash keys are the data
104							elements and the hash values are the data frequencies.
105
106							=item *
107
108							If the data is another Statistics::Simpson object, its
109							frequencies are used.
110
111							=back
112
113							=head2 index
114
115							$pop->index;
116
117							Return the Simpson index of the data. The index is defined as
118
119							$Simpson = 1 / sum($p{$e}**2)
120
121							where the $p{$e} is the proportional [0,1] frequency of the element $e.
122							The value of the index ranges from 1 (the population is dominated by
123							one kind) to the number of different elements (the population is
124							evenly divided).
125
126							The Simpson index is used in biology and ecology, especially when
127							talking about populations and biodiversity.
128
129							=head2 evenness
130
131							Evenness measures how similar the frequencies are.
132
133							$Evenness = $Simpson / $NumberOfDifferentElements
134
135							When all the frequencies are equal, evenness is one. Frequency
136							imbalance lowers the evenness value.
137
138							=head2 add_data
139
140							$pop->add_data(@more_data);
141							$pop->add_data(\@more_data);
142							$pop->add_data(\%more_data);
143							$pop->add_data($another);
144
145							Add more data to the object. The arguments are as in new().
146
147							=head2 remove_data
148
149							$pop->remove_data(@less_data);
150							$pop->remove_data(\@less_data);
151							$pop->remove_data(\%less_data);
152							$pop->remove_data($another);
153
154							Remove data from the object. The arguments are as in new().
155							The frequencies of data elements are gapped at zero.
156
157							=head2 copy_data
158
159							$pop->clear_data($another);
160
161							Copy all data from another object. The old data is discarded.
162
163							=head2 clear_data
164
165							$pop->clear_data();
166
167							Remove all data from the object.
168
169							=head1 SEE ALSO
170
171							For another variability index see
172
173							L
174
175							For the data manipulation interface see (though the whole
176							interface is documented here)
177
178							L
179
180							=head1 AUTHOR, COPYRIGHT, LICENSE
181
182							Jarkko Hietaniemi Copyright 2002
183
184							This library is free software; you can redistribute it and/or modify
185							it under the same terms as Perl itself.
186
187							=cut
188
189							sub index {
190	7			7	1	250	my ($self) = @_;
191	7					13	my $simpson = 0;
192	7	50				26	if (ref $self eq 'HASH') {
193	0					0	$self = [ values %$self ];
194							}
195	7	100				21	if (ref $self eq 'ARRAY') {
196	2					5	my $total;
197	2					5	for my $e (@$self) {
198	6					9	$total += $e;
199							}
200	2					4	for my $e (@$self) {
201	6					10	my $prop = $e / $total;
202	6	50				13	next unless $prop;
203	6					12	$simpson += $prop * $prop;
204							}
205	2	50				9	$simpson = 1 / $simpson if $simpson;
206							} else {
207	5	100	66			31	if (!exists $self->{simpson} \|\| !defined $self->{simpson}) {
208	3					26	my %prop = $self->proportional_frequencies;
209	3					147	for my $e (keys %prop) {
210	4	50				13	next unless $prop{$e};
211	4					10	$simpson += $prop{$e} * $prop{$e};
212							}
213	3	100				12	if ($simpson) {
214	2					4	$simpson = 1 / $simpson;
215	2					6	$self->{simpson} = $simpson;
216	2			0		50	$self->_set_update_callback( sub { delete $_[0]->{simpson} } );
	0					0
217							}
218							}
219	5					20	$simpson = $self->{simpson};
220							}
221	7					19	return $simpson;
222							}
223
224							sub evenness {
225	4			4	1	26	my ($self) = @_;
226	4	50				17	if (ref $self eq 'HASH') {
227	0					0	$self = [ values %$self ];
228							}
229	4					11	my $a = ref $self eq 'ARRAY';
230	4	100				32	my $S = $a ? @$self : $self->elements;
231	4	100				40	my $i = $S ? ( $a ? Statistics::Simpson::index($self) : $self->index ) : undef;
		100
232	4	100				17	my $E = $S ? $i / $S : undef;
233	4					14	return $E;
234							}
235
236							1;