File Coverage

lib/Algorithm/Evolutionary/Utils.pm

Criterion	Covered	Total	%
statement	19	21	90.4
branch			n/a
condition			n/a
subroutine	7	7	100.0
pod			n/a
total	26	28	92.8

line	stmt	sub	time	code
1	49	49	213811	use strict; #--CPerl--
	49		115
	49		2900
2	49	49	275	use warnings;
	49		84
	49		1628
3
4	49	49	400	use lib qw( ../../../lib );
	49		707
	49		344
5
6				=head1 NAME
7
8				Algorithm::Evolutionary::Utils - Container module with a hodgepodge of functions
9
10				=head1 SYNOPSIS
11
12				use Algorithm::Evolutionary::Utils qw(entropy);
13
14				my $this_entropy = entropy( $population );
15
16				#Computes consensus sequence (for binary chromosomes
17				my $this_consensus = consensus( $population);
18
19				=head1 DESCRIPTION
20
21				Miscellaneous class that contains functions that might be useful
22				somewhere else, especially when computing EA statistics.
23
24				=cut
25
26
27				=head1 METHODS
28
29				=cut
30
31				package Algorithm::Evolutionary::Utils;
32
33	49	49	13984	use Exporter;
	49		89
	49		8294
34				our @ISA = qw(Exporter);
35
36				our $VERSION = sprintf "%d.%03d", q$Revision: 3.3 $ =~ /(\d+)\.(\d+)/g;
37
38				our @EXPORT_OK = qw( entropy genotypic_entropy consensus hamming
39				random_bitstring random_number_array average
40				parse_xml decode_string vector_compare);
41
42	49	49	503	use Carp;
	49		99
	49		3623
43	49	49	55135	use String::Random;
	49		219775
	49		3254
44	49	49	70382	use XML::Parser;
	0
	0
45				use Statistics::Basic qw(mean);
46
47				=head2 entropy( $population)
48
49				Computes the entropy using the well known Shannon's formula: L
50				'to avoid botching highlighting
51
52				=cut
53
54				sub entropy {
55				my $population = shift;
56				my %frequencies;
57				map( (defined $_->Fitness())?$frequencies{$_->Fitness()}++:1, @$population );
58				my $entropy = 0;
59				my $gente = scalar(@$population); # Population size
60				for my $f ( keys %frequencies ) {
61				my $this_freq = $frequencies{$f}/$gente;
62				$entropy -= $this_freq*log( $this_freq );
63				}
64				return $entropy;
65				}
66
67				=head2 genotypic_entropy( $population)
68
69				Computes the entropy using the well known Shannon's formula:
70				L 'to avoid botching
71				highlighting; in this case we use chromosome frequencies instead of
72				fitness.
73
74				=cut
75
76				sub genotypic_entropy {
77				my $population = shift;
78				my %frequencies;
79				map( $frequencies{$_->{'_str'}}++, @$population );
80				my $entropy = 0;
81				my $gente = scalar(@$population); # Population size
82				for my $f ( keys %frequencies ) {
83				my $this_freq = $frequencies{$f}/$gente;
84				$entropy -= $this_freq*log( $this_freq );
85				}
86				return $entropy;
87				}
88
89				=head2 hamming( $string_a, $string_b )
90
91				Computes the number of positions that are different among two strings
92
93				=cut
94
95				sub hamming {
96				my ($string_a, $string_b) = @_;
97				return ( ( $string_a ^ $string_b ) =~ tr/\1//);
98				}
99
100				=head2 consensus( $population, $rough = 0 )
101
102				Consensus sequence representing the majoritary value for each bit;
103				returns the consensus string. If "rough", then the bit is set only if the difference is bigger than 0.4 (70/30 proportion)
104
105				=cut
106
107				sub consensus {
108				my $population = shift;
109				my $rough = shift;
110				my @frequencies;
111				for ( @$population ) {
112				for ( my $i = 0; $i < $_->size(); $i ++ ) {
113				if ( !$frequencies[$i] ) {
114				$frequencies[$i]={ 0 => 0,
115				1 => 0};
116				}
117				$frequencies[$i]->{substr($_->{'_str'}, $i, 1)}++;
118				}
119				}
120				my $consensus;
121				for my $f ( @frequencies ) {
122				if ( !$rough ) {
123				if ( $f->{'0'} > $f->{'1'} ) {
124				$consensus.='0';
125				} else {
126				$consensus.='1';
127				}
128				} else {
129				my $difference = abs( $f->{'0'} - $f->{'1'} );
130				if ( $difference < 0.4 ) {
131				$consensus .= '-';
132				} else {
133				if ( $f->{'0'} > $f->{'1'} ) {
134				$consensus.='0';
135				} else {
136				$consensus.='1';
137				}
138				}
139				}
140				}
141				return $consensus;
142				}
143
144				=head2 average( $population )
145
146				Computes an average of population fitness
147
148				=cut
149
150				sub average {
151				my $population = shift;
152				my @frequencies;
153				my @fitnesses = map( $_->Fitness(), @$population );
154				return mean( @fitnesses );
155
156				}
157
158				=head2 random_bitstring( $bits )
159
160				Returns a random bitstring with the stated number of bits. Useful for testing,mainly
161
162				=cut
163
164				sub random_bitstring {
165				my $bits = shift \|\| croak "No bits!";
166				my $generator = new String::Random;
167				my $regex = "\[01\]{$bits}";
168				return $generator->randregex($regex);
169				}
170
171				=head2 random_number_array( $dimensions [, $min = -1] [, $range = 2] )
172
173				Returns a random bitstring with the stated number of bits. Useful for testing,mainly
174
175				=cut
176
177				sub random_number_array {
178				my $dimensions = shift \|\| croak "No bits!";
179				my $min = shift \|\| -1;
180				my $range = shift \|\| 2;
181
182				my @array;
183				for ( my $i = 0; $i < $dimensions; $i ++ ) {
184				push @array, $min + rand($range);
185				}
186				return @array;
187				}
188
189				=head2 parse_xml( $string )
190
191				Parses the string and returns an XML tree
192
193				=cut
194
195				sub parse_xml {
196				my $string = shift \|\| croak "No string to parse!\n";
197				my $p=new XML::Parser(Style=>'EasyTree');
198				$XML::Parser::EasyTree::Noempty=1;
199				my $xml_dom = $p->parse($string) \|\| croak "Problems parsing $string: $!\n";
200				return $xml_dom;
201				}
202
203				=head2 decode_string( $chromosome, $gene_size, $min, $range )
204
205				Decodes to a vector, each one of whose components ranges between $min
206				and $max. Returns that vector.
207
208				It does not work for $gene_size too big. Certainly not for 64, maybe for 32
209
210				=cut
211
212				sub decode_string {
213				my ( $chromosome, $gene_size, $min, $range ) = @_;
214
215				my @output_vector;
216				my $max_range = eval "0b"."1"x$gene_size;
217				for (my $i = 0; $i < length($chromosome)/$gene_size; $i ++ ) {
218				my $substr = substr( $chromosome, $i*$gene_size, $gene_size );
219				push @output_vector, (($range - $min) * eval("0b$substr") / $max_range) + $min;
220				}
221				return @output_vector;
222				}
223
224				=head2 vector_compare( $vector_1, $vector_2 )
225
226				Compares vectors, returns 1 if 1 dominates 2, -1 if it's the other way
227				round, and 0 if neither dominates the other. Both vectors are supposed
228				to be numeric. Returns C if neither is bigger, and they are not
229				equal.
230
231				=cut
232
233				sub vector_compare {
234				my ( $vector_1, $vector_2 ) = @_;
235
236				if ( scalar @$vector_1 != scalar @$vector_2 ) {
237				croak "Different lengths, can't compare\n";
238				}
239
240				my $length = scalar @$vector_1;
241				my @results = map( $vector_1->[$_] <=> $vector_2->[$_], 0..($length-1));
242				my %comparisons;
243				map( $comparisons{$_}++, @results );
244				if ( $comparisons{1} && !$comparisons{-1} ) {
245				return 1;
246				}
247				if ( !$comparisons{1} && $comparisons{-1} ) {
248				return -1;
249				}
250				if ( defined $comparisons{0} && $comparisons{0} == $length ) {
251				return 0;
252				}
253				}
254
255				=head1 Copyright
256
257				This file is released under the GPL. See the LICENSE file included in this distribution,
258				or go to http://www.fsf.org/licenses/gpl.txt
259
260				CVS Info: $Date: 2010/09/24 08:39:07 $
261				$Header: /cvsroot/opeal/Algorithm-Evolutionary/lib/Algorithm/Evolutionary/Utils.pm,v 3.3 2010/09/24 08:39:07 jmerelo Exp $
262				$Author: jmerelo $
263				$Revision: 3.3 $
264
265				=cut
266
267				"Still there?";