File Coverage

lib/Algorithm/Evolutionary/Utils.pm

Criterion	Covered	Total	%
statement	19	21	90.4
branch			n/a
condition			n/a
subroutine	7	7	100.0
pod			n/a
total	26	28	92.8

line	stmt	sub	time	code
1	49	49	248750	use strict; #--CPerl--
	49		91
	49		1979
2	49	49	1087	use warnings;
	49		97
	49		1573
3
4	49	49	249	use lib qw( ../../../lib );
	49		105
	49		331
5
6				=head1 NAME
7
8				Algorithm::Evolutionary::Utils - Container module with a hodgepodge of functions
9
10				=head1 SYNOPSIS
11
12				use Algorithm::Evolutionary::Utils qw(entropy genotypic_entropy hamming consensus average random_bitstring random_number_array decode_string vector_compare );
13
14				my $this_entropy = entropy( $population );
15
16				#Computes consensus sequence (for binary chromosomes
17				my $this_consensus = consensus( $population);
18
19				=head1 DESCRIPTION
20
21				Miscellaneous class that contains functions that might be useful
22				somewhere else, especially when computing EA statistics.
23
24				=cut
25
26
27				=head1 METHODS
28
29				=cut
30
31				package Algorithm::Evolutionary::Utils;
32
33	49	49	12346	use Exporter;
	49		99
	49		11356
34				our @ISA = qw(Exporter);
35
36				our $VERSION = sprintf "3.4";
37
38				our @EXPORT_OK = qw( entropy genotypic_entropy consensus hamming
39				random_bitstring random_number_array average
40				parse_xml decode_string vector_compare);
41
42	49	49	310	use Carp;
	49		102
	49		4529
43	49	49	58045	use String::Random;
	49		268663
	49		4193
44	49	49	83739	use XML::Parser;
	0
	0
45				use Statistics::Basic qw(mean);
46
47				=head2 entropy( $population)
48
49				Computes the entropy using the well known Shannon's formula: L
50				'to avoid botching highlighting
51
52				=cut
53
54				sub entropy {
55				my $population = shift;
56				my %frequencies;
57				map( (defined $_->Fitness())?$frequencies{$_->Fitness()}++:1, @$population );
58				my $entropy = 0;
59				my $gente = scalar(@$population); # Population size
60				for my $f ( keys %frequencies ) {
61				my $this_freq = $frequencies{$f}/$gente;
62				$entropy -= $this_freq*log( $this_freq );
63				}
64				return $entropy;
65				}
66
67				=head2 genotypic_entropy( $population)
68
69				Computes the entropy using the well known Shannon's formula:
70				L 'to avoid botching
71				highlighting; in this case we use chromosome frequencies instead of
72				fitness.
73
74				=cut
75
76				sub genotypic_entropy {
77				my $population = shift;
78				my %frequencies;
79				map( $frequencies{$_->{'_str'}}++, @$population );
80				my $entropy = 0;
81				my $gente = scalar(@$population); # Population size
82				for my $f ( keys %frequencies ) {
83				my $this_freq = $frequencies{$f}/$gente;
84				$entropy -= $this_freq*log( $this_freq );
85				}
86				return $entropy;
87				}
88
89				=head2 hamming( $string_a, $string_b )
90
91				Computes the number of positions that are different among two strings, the well known Hamming distance.
92
93				=cut
94
95				sub hamming {
96				my ($string_a, $string_b) = @_;
97				return ( ( $string_a ^ $string_b ) =~ tr/\1//);
98				}
99
100				=head2 consensus( $population, $rough = 0 )
101
102				Consensus sequence representing the majoritary value for each bit;
103				returns the consensus binary string. If "rough", then the bit is set only if the
104				difference is bigger than 0.4 (60/40 proportion).
105
106				=cut
107
108				sub consensus {
109				my $population = shift;
110				my $rough = shift;
111				my @frequencies;
112				for ( @$population ) {
113				for ( my $i = 0; $i < $_->size(); $i ++ ) {
114				if ( !$frequencies[$i] ) {
115				$frequencies[$i]={ 0 => 0,
116				1 => 0};
117				}
118				$frequencies[$i]->{substr($_->{'_str'}, $i, 1)}++;
119				}
120				}
121				my $consensus;
122				for my $f ( @frequencies ) {
123				if ( !$rough ) {
124				if ( $f->{'0'} > $f->{'1'} ) {
125				$consensus.='0';
126				} else {
127				$consensus.='1';
128				}
129				} else {
130				my $difference = abs( $f->{'0'} - $f->{'1'} );
131				if ( $difference < 0.4 ) {
132				$consensus .= '-';
133				} else {
134				if ( $f->{'0'} > $f->{'1'} ) {
135				$consensus.='0';
136				} else {
137				$consensus.='1';
138				}
139				}
140				}
141				}
142				return $consensus;
143				}
144
145				=head2 average( $population )
146
147				Computes an average of population fitness
148
149				=cut
150
151				sub average {
152				my $population = shift;
153				my @frequencies;
154				my @fitnesses = map( $_->Fitness(), @$population );
155				return mean( @fitnesses );
156
157				}
158
159				=head2 random_bitstring( $bits )
160
161				Returns a random bitstring with the stated number of bits. Useful for testing,mainly
162
163				=cut
164
165				sub random_bitstring {
166				my $bits = shift \|\| croak "No bits!";
167				my $generator = new String::Random;
168				my $regex = "\[01\]{$bits}";
169				return $generator->randregex($regex);
170				}
171
172				=head2 random_number_array( $dimensions [, $min = -1] [, $range = 2] )
173
174				Returns a random number array with the stated length. Useful for testing, mainly.
175
176				=cut
177
178				sub random_number_array {
179				my $dimensions = shift \|\| croak "No bits!";
180				my $min = shift \|\| -1;
181				my $range = shift \|\| 2;
182
183				my @array;
184				for ( my $i = 0; $i < $dimensions; $i ++ ) {
185				push @array, $min + rand($range);
186				}
187				return @array;
188				}
189
190				=head2 parse_xml( $string )
191
192				Parses the string and returns an XML tree
193
194				=cut
195
196				sub parse_xml {
197				my $string = shift \|\| croak "No string to parse!\n";
198				my $p=new XML::Parser(Style=>'EasyTree');
199				$XML::Parser::EasyTree::Noempty=1;
200				my $xml_dom = $p->parse($string) \|\| croak "Problems parsing $string: $!\n";
201				return $xml_dom;
202				}
203
204				=head2 decode_string( $chromosome, $gene_size, $min, $range )
205
206				Decodes to a vector, each one of whose components ranges between $min
207				and $max. Returns that vector.
208
209				It does not work for $gene_size too big. Certainly not for 64, maybe for 32.
210
211				=cut
212
213				sub decode_string {
214				my ( $chromosome, $gene_size, $min, $range ) = @_;
215
216				my @output_vector;
217				my $max_range = eval "0b"."1"x$gene_size;
218				for (my $i = 0; $i < length($chromosome)/$gene_size; $i ++ ) {
219				my $substr = substr( $chromosome, $i*$gene_size, $gene_size );
220				push @output_vector, (($range - $min) * eval("0b$substr") / $max_range) + $min;
221				}
222				return @output_vector;
223				}
224
225				=head2 vector_compare( $vector_1, $vector_2 )
226
227				Compares vectors, returns 1 if 1 dominates 2, -1 if it's the other way
228				round, and 0 if neither dominates the other. Both vectors are supposed
229				to be numeric. Returns C if neither is bigger, and they are not
230				equal. Fails if the length is not the same.
231
232				=cut
233
234				sub vector_compare {
235				my ( $vector_1, $vector_2 ) = @_;
236
237				if ( scalar @$vector_1 != scalar @$vector_2 ) {
238				croak "Different lengths, can't compare\n";
239				}
240
241				my $length = scalar @$vector_1;
242				my @results = map( $vector_1->[$_] <=> $vector_2->[$_], 0..($length-1));
243				my %comparisons;
244				map( $comparisons{$_}++, @results );
245				if ( $comparisons{1} && !$comparisons{-1} ) {
246				return 1;
247				}
248				if ( !$comparisons{1} && $comparisons{-1} ) {
249				return -1;
250				}
251				if ( defined $comparisons{0} && $comparisons{0} == $length ) {
252				return 0;
253				}
254				}
255
256				=head1 Copyright
257
258				This file is released under the GPL. See the LICENSE file included in this distribution,
259				or go to http://www.fsf.org/licenses/gpl.txt
260
261				=cut
262
263				"Still there?";