File Coverage

blib/lib/Statistics/Shannon.pm
Criterion Covered Total %
statement 46 49 93.8
branch 27 34 79.4
condition 13 15 86.6
subroutine 5 6 83.3
pod 2 2 100.0
total 93 106 87.7


line stmt bran cond sub pod time code
1             package Statistics::Shannon;
2              
3 4     4   2455 use strict;
  4         8  
  4         179  
4              
5 4     4   24 use vars qw($VERSION @ISA);
  4         7  
  4         330  
6              
7             $VERSION = '0.03';
8              
9 4     4   4926 use Statistics::Frequency 0.03;
  4         12748  
  4         20866  
10             @ISA = qw(Statistics::Frequency);
11              
12             my $Napier = exp(1);
13              
14             =head1 NAME
15              
16             Statistics::Shannon - Shannon index
17              
18             =head1 SYNOPSIS
19              
20             The object-oriented interface:
21              
22             use Statistics::Shannon;
23              
24             # The constructor is inherited from Statistics::Frequency.
25              
26             my $pop = Statistics::Shannon->new(@data);
27             my $pop = Statistics::Shannon->new(\@data);
28             my $pop = Statistics::Shannon->new(\%data);
29             my $pop = Statistics::Shannon->new($another);
30              
31             # The Shannon index and the Shannon evenness.
32             # The default base uses natural logarithm.
33              
34             print $pop->index, "\n";
35             print $pop->index($base), "\n";
36              
37             print $pop->evenness, "\n";
38             print $pop->evenness($base), "\n";
39              
40             The "anonymous" interface where the population data is not a
41             Statistics::Frequency object but instead either an array reference,
42             in which case the array elements are the frequencies, or a hash
43             reference, in which keys the hash values are the frequencies.
44              
45             use Statistics::Shannon;
46              
47             print Statistics::Shannon::index([ data ]), "\n";
48             print Statistics::Shannon::index([ data ], $base), "\n";
49              
50             print Statistics::Shannon::index({ data }), "\n";
51             print Statistics::Shannon::index({ data }, $base), "\n";
52              
53             print Statistics::Shannon::evenness([ data ]), "\n";
54             print Statistics::Shannon::evenness([ data ], $base), "\n";
55              
56             print Statistics::Shannon::evenness({ data }), "\n";
57             print Statistics::Shannon::evenness({ data }, $base), "\n";
58              
59             The rest of data manipulation interface inherited from
60             Statistics::Frequency, see L.
61              
62             $pop->add_data(@more_data);
63             $pop->add_data(\@more_data);
64             $pop->add_data(\%more_data);
65             $pop->add_data($another);
66              
67             $pop->remove_data(@less_data);
68             $pop->remove_data(\@less_data);
69             $pop->remove_data(\%less_data);
70             $pop->remove_data($another);
71              
72             $pop->copy_data($another);
73              
74             $pop->clear_data();
75              
76             =head1 DESCRIPTION
77              
78             The Statistics::Shannon module can be used to compute the Shannon
79             index of data, which is a variability measure of data.
80              
81             The index() and evenness() interfaces are the only genuine interfaces
82             of this module, the constructor and the rest of the data manipulation
83             interface is inherited from Statistics::Frequency.
84              
85             The Shannon index is also known as Shannon-Wiener index and
86             as Shannon-Weaver index, especially when applied to biology
87             and ecology and when talking about populations and biodiversity.
88              
89             =head2 new
90              
91             my $pop = Statistics::Shannon->new(@data);
92             my $pop = Statistics::Shannon->new(\@data);
93             my $pop = Statistics::Shannon->new(\%data);
94             my $pop = Statistics::Shannon->new($another);
95              
96             Creates a new Shannon object from the initial data.
97              
98             The data may be either a list, a reference to an array or a reference
99             to a hash.
100              
101             =over 4
102              
103             =item *
104              
105             If the data is a list (or an array), the list elements are counted
106             to find out their frequencies.
107              
108             =item *
109              
110             If the data is a reference to an array, the array elements are counted
111             to find out their frequencies.
112              
113             =item *
114              
115             If the data is a reference to a hash, the hash keys are the data
116             elements and the hash values are the data frequencies.
117              
118             =item *
119              
120             If the data is another Statistics::Shannon object, its
121             frequencies are used.
122              
123             =back
124              
125             =head2 index
126              
127             $pop->index;
128             $pop->index($base);
129              
130             Return the Shannon index of the data. The index is
131             defined as
132              
133             $Shannon = -sum($p{$e}*log($p{$e})
134              
135             where the $p{$e} is the proportional [0,1] frequency of the element $e.
136             The log() is the natural logarithm: if you want to use some other base,
137             specify the base.
138              
139             =head2 evenness
140              
141             Evenness measures how similar the frequencies are.
142              
143             $Evenness = $Shannon / log($NumberOfDifferentElements)
144              
145             When all the frequencies are equal, evenness is one. Frequency
146             imbalance increases the evenness value.
147              
148             =head2 add_data
149              
150             $pop->add_data(@more_data);
151             $pop->add_data(\@more_data);
152             $pop->add_data(\%more_data);
153             $pop->add_data($another);
154            
155             Add more data to the object. The arguments are as in new().
156              
157             =head2 remove_data
158              
159             $pop->remove_data(@less_data);
160             $pop->remove_data(\@less_data);
161             $pop->remove_data(\%less_data);
162             $pop->remove_data($another);
163            
164             Remove data from the object. The arguments are as in new().
165             The frequencies of data elements are gapped at zero.
166              
167             =head2 copy_data
168              
169             $pop->clear_data($another);
170            
171             Copy all data from another object. The old data is discarded.
172              
173             =head2 clear_data
174              
175             $pop->clear_data();
176            
177             Remove all data from the object.
178              
179             =head1 ERRORS
180              
181             The optional base given to index() and evenness() must naturally
182             be greater than one. If not, an error like
183              
184             index: base cannot be <= 1.0
185              
186             will be thrown.
187              
188             =head1 SEE ALSO
189              
190             Claude Elwood Shannon is known as the father of information theory:
191             L
192             and L
193              
194             For another variability index see
195              
196             L
197              
198             For the data manipulation interface see (though the whole
199             interface is documented here)
200              
201             L
202              
203             =head1 AUTHOR, COPYRIGHT, LICENSE
204              
205             Jarkko Hietaniemi Copyright 2002
206              
207             This library is free software; you can redistribute it and/or modify
208             it under the same terms as Perl itself.
209              
210             =cut
211              
212             sub index {
213 10     10 1 431 my ($self, $base) = @_;
214 10 100 100     56 if (@_ == 2 && $base <= 1.0) {
215 1         10 require Carp;
216 1         201 Carp::croak("index: base cannot be <= 1.0");
217             }
218 9   66     55 $base ||= $Napier;
219 9         16 my $shannon = 0;
220 9 50       30 if (ref $self eq 'HASH') {
221 0         0 $self = [ values %$self ];
222             }
223 9 100       32 if (ref $self eq 'ARRAY') {
224 2         3 my $total;
225 2         4 for my $e (@$self) {
226 6         10 $total += $e;
227             }
228 2         5 for my $e (@$self) {
229 6         10 my $prop = $e / $total;
230 6 50       14 next unless $prop;
231 6         25 $shannon += $prop * log($prop);
232             }
233 2         4 $shannon = -$shannon;
234             } else {
235 7 100 66     57 if (!exists $self->{shannon} || !defined $self->{shannon}->{$base}) {
236 5         38 my %prop = $self->proportional_frequencies;
237 5         281 for my $e (keys %prop) {
238 10 50       24 next unless $prop{$e};
239 10         62 $shannon += $prop{$e} * log($prop{$e});
240             }
241 5 50       20 if (defined $shannon) {
242 5         9 $shannon = -$shannon;
243 5         47 $self->{shannon}->{$base} = $shannon;
244 5 0   0   47 $self->_set_update_callback( sub { delete $_[0]->{shannon}->{$base} if exists $_[0]->{shannon} } );
  0         0  
245             }
246             }
247 7         51 $shannon = $self->{shannon}->{$base};
248             }
249 9 100       45 return @_ == 2 ? $shannon / log($base) : $shannon;
250             }
251              
252             sub evenness {
253 7     7 1 152 my ($self, $base) = @_;
254 7 100 100     42 if (@_ == 2 && $base <= 1.0) {
255 1         5 require Carp;
256 1         98 Carp::croak("evenness: base cannot be <= 1.0");
257             }
258 6 50       23 if (ref $self eq 'HASH') {
259 0         0 $self = [ values %$self ];
260             }
261 6         16 my $a = ref $self eq 'ARRAY';
262 6 100       50 my $S = $a ? @$self : $self->elements;
263 6 100       56 my $i = $S > 1 ? ( $a ? Statistics::Shannon::index($self) : $self->index ) : undef;
    100          
264 6 100       36 my $E = $S > 1 ? ( @_ == 2 ? $i * log($base) / log($S) : $i / log($S) ) : undef;
    100          
265 6 100 100     61 return defined $E && @_ == 2 ? $E / log($base) : $E;
266             }
267              
268             1;