File Coverage

lib/Algorithm/Evolutionary/Utils.pm

Criterion	Covered	Total	%
statement	22	24	91.6
branch			n/a
condition			n/a
subroutine	8	8	100.0
pod			n/a
total	30	32	93.7

line	stmt	sub	time	code
1	1	1	23118	use strict; #--CPerl--
	1		2
	1		29
2	1	1	5	use warnings;
	1		1
	1		34
3
4	1	1	4	use lib qw( ../../../lib );
	1		5
	1		5
5
6				=encoding utf8
7
8				=head1 NAME
9
10				Algorithm::Evolutionary::Utils - Collection of functions used in all kind of evolutionary algorithms..
11
12				=head1 SYNOPSIS
13
14				use Algorithm::Evolutionary::Utils qw(entropy genotypic_entropy hamming consensus average random_bitstring random_number_array decode_string vector_compare );
15
16				my $this_entropy = entropy( $population );
17
18				#Computes consensus sequence (for binary chromosomes
19				my $this_consensus = consensus( $population);
20
21				=head1 DESCRIPTION
22
23				Miscellaneous class that contains functions that might be useful
24				somewhere else, especially when computing EA statistics.
25
26				=cut
27
28
29				=head1 METHODS
30
31				=cut
32
33				package Algorithm::Evolutionary::Utils;
34
35	1	1	106	use Exporter;
	1		2
	1		78
36				our @ISA = qw(Exporter);
37
38	1	1	631	use version; our $VERSION = qv("3.403");
	1		2003
	1		5
39
40				our @EXPORT_OK = qw( entropy genotypic_entropy consensus hamming
41				random_bitstring random_number_array average
42				parse_xml decode_string vector_compare);
43
44	1	1	117	use Carp;
	1		2
	1		82
45	1	1	679	use String::Random;
	1		4172
	1		72
46	1	1	298	use XML::Parser;
	0
	0
47				use Statistics::Basic qw(mean);
48
49				=head2 entropy( $population)
50
51				Computes the entropy using the well known Shannon's formula: L
52				'to avoid botching highlighting
53
54				=cut
55
56				sub entropy {
57				my $population = shift;
58				my %frequencies;
59				map( (defined $_->{'_fitness'})?$frequencies{$_->{'_fitness'}}++:1, @$population );
60				my $entropy = 0;
61				my $gente = scalar(@$population); # Population size
62				for my $f ( keys %frequencies ) {
63				my $this_freq = $frequencies{$f}/$gente;
64				$entropy -= $this_freq*log( $this_freq );
65				}
66				return $entropy;
67				}
68
69				=head2 genotypic_entropy( $population)
70
71				Computes the entropy using the well known Shannon's formula:
72				L 'to avoid botching
73				highlighting; in this case we use chromosome frequencies instead of
74				fitness.
75
76				=cut
77
78				sub genotypic_entropy {
79				my $population = shift;
80				my %frequencies;
81				map( $frequencies{$_->{'_str'}}++, @$population );
82				my $entropy = 0;
83				my $gente = scalar(@$population); # Population size
84				for my $f ( keys %frequencies ) {
85				my $this_freq = $frequencies{$f}/$gente;
86				$entropy -= $this_freq*log( $this_freq );
87				}
88				return $entropy;
89				}
90
91				=head2 hamming( $string_a, $string_b )
92
93				Computes the number of bit positions that are different among two strings, the well known Hamming distance.
94
95				=cut
96
97				sub hamming {
98				my ($string_a, $string_b) = @_;
99				return ( ( $string_a ^ $string_b ) =~ tr/\1//);
100				}
101
102				=head2 consensus( $population, $rough = 0 )
103
104				Consensus sequence representing the majority value for each bit;
105				returns the consensus binary string. If "rough", then the bit is set only if the
106				difference is bigger than 0.2 (60/40 proportion). Otherwise, it is set to C<->
107
108				=cut
109
110				sub consensus {
111				my $population = shift;
112				my $rough = shift;
113				my @frequencies;
114				for ( @$population ) {
115				for ( my $i = 0; $i < length($_->{'_str'}); $i ++ ) {
116				if ( !$frequencies[$i] ) {
117				$frequencies[$i]={ 0 => 0,
118				1 => 0};
119				}
120				$frequencies[$i]->{substr($_->{'_str'}, $i, 1)}++;
121				}
122				}
123				my $consensus;
124				for my $f ( @frequencies ) {
125				if ( !$rough ) {
126				if ( $f->{'0'} > $f->{'1'} ) {
127				$consensus.='0';
128				} else {
129				$consensus.='1';
130				}
131				} else {
132				my $total = $f->{'0'} + $f->{'1'};
133				my $difference = (abs( $f->{'0'} - $f->{'1'} ))/$total;
134				if ( $difference < 0.2 ) {
135				$consensus .= '-';
136				} else {
137				if ( $f->{'0'} > $f->{'1'} ) {
138				$consensus.='0';
139				} else {
140				$consensus.='1';
141				}
142				}
143				}
144				}
145				return $consensus;
146				}
147
148				=head2 average( $population )
149
150				Computes an average of population fitness
151
152				=cut
153
154				sub average {
155				my $population = shift;
156				my @frequencies;
157				my @fitnesses = map( $_->{'_fitness'}, @$population );
158				return mean( @fitnesses );
159
160				}
161
162				=head2 random_bitstring( $bits[, $chromify = 0 ] )
163
164				Returns a random bitstring with the stated number of bits. Useful for
165				testing, mainly. Returns a "chromosome-like" object if $chromify is 1,
166				mainly putting it into a hashref with the C<_str> key.
167
168				=cut
169
170				sub random_bitstring {
171				my $bits = shift \|\| croak "No bits!";
172				my $chromify = shift \|\| 0;
173				my $generator = new String::Random;
174				my $regex = "\[01\]{$bits}";
175				my $this_string = $generator->randregex($regex);
176				return $chromify?{_str => $this_string}:$this_string;
177				}
178
179				=head2 random_number_array( $dimensions [, $min = -1] [, $range = 2] )
180
181				Returns a random number array with the stated length. Useful for testing, mainly.
182
183				=cut
184
185				sub random_number_array {
186				my $dimensions = shift \|\| croak "Null dimension!";
187				my $min = shift \|\| -1;
188				my $range = shift \|\| 2;
189
190				my @array;
191				for ( my $i = 0; $i < $dimensions; $i ++ ) {
192				push @array, $min + rand($range);
193				}
194				return @array;
195				}
196
197
198				=head2 decode_string( $chromosome, $gene_size, $min, $range )
199
200				Decodes to a vector, each one of whose components ranges between $min
201				and $max. Returns that vector.
202
203				It does not work for $gene_size too big. Certainly not for 64, maybe for 32.
204
205				=cut
206
207				sub decode_string {
208				my ( $chromosome, $gene_size, $min, $range ) = @_;
209
210				my @output_vector;
211				my $max_range = eval "0b"."1"x$gene_size;
212				for (my $i = 0; $i < length($chromosome)/$gene_size; $i ++ ) {
213				my $substr = substr( $chromosome, $i*$gene_size, $gene_size );
214				push @output_vector, (($range - $min) * eval("0b$substr") / $max_range) + $min;
215				}
216				return @output_vector;
217				}
218
219				=head2 vector_compare( $vector_1, $vector_2 )
220
221				Compares vectors, returns 1 if 1 dominates 2, -1 if it's the other way
222				round, and 0 if neither dominates the other. Both vectors are supposed
223				to be numeric. Returns C<0> if neither is bigger, and they are not
224				equal. Fails if the length is not the same. None of the combinations
225				above, returns C.
226
227				=cut
228
229				sub vector_compare {
230				my ( $vector_1, $vector_2 ) = @_;
231
232				if ( scalar @$vector_1 != scalar @$vector_2 ) {
233				croak "Different lengths, can't compare\n";
234				}
235
236				my $length = scalar @$vector_1;
237				my @results = map( $vector_1->[$_] <=> $vector_2->[$_], 0..($length-1));
238				my %comparisons;
239				map( $comparisons{$_}++, @results );
240				if ( $comparisons{1} && !$comparisons{-1} ) {
241				return 1;
242				}
243				if ( !$comparisons{1} && $comparisons{-1} ) {
244				return -1;
245				}
246				if ( defined $comparisons{0} && $comparisons{0} == $length ) {
247				return 0;
248				}
249				return undef;
250				}
251
252				=head1 SEE ALSO
253
254				This is a spin off from L so it's worth the while to check it out. And the spinning was due to finding I needed to include it in examples for the much simpler L.
255
256				=head1 Copyright
257
258				This file is released under the GPL. See the LICENSE file included in this distribution,
259				or go to http://www.fsf.org/licenses/gpl.txt
260
261				=cut
262
263				"Still there?";