| line | stmt | bran | cond | sub | pod | time | code | 
| 1 | 49 |  |  | 49 |  | 213811 | use strict; #-*-CPerl-*- | 
|  | 49 |  |  |  |  | 115 |  | 
|  | 49 |  |  |  |  | 2900 |  | 
| 2 | 49 |  |  | 49 |  | 275 | use warnings; | 
|  | 49 |  |  |  |  | 84 |  | 
|  | 49 |  |  |  |  | 1628 |  | 
| 3 |  |  |  |  |  |  |  | 
| 4 | 49 |  |  | 49 |  | 400 | use lib qw( ../../../lib ); | 
|  | 49 |  |  |  |  | 707 |  | 
|  | 49 |  |  |  |  | 344 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 |  |  |  |  |  |  | =head1 NAME | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | Algorithm::Evolutionary::Utils - Container module with a hodgepodge of functions | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | use Algorithm::Evolutionary::Utils qw(entropy); | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | my $this_entropy = entropy( $population ); | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | #Computes consensus sequence (for binary chromosomes | 
| 17 |  |  |  |  |  |  | my $this_consensus = consensus( $population); | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | Miscellaneous class that contains functions that might be useful | 
| 22 |  |  |  |  |  |  | somewhere else, especially when computing EA statistics. | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | =cut | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | =head1 METHODS | 
| 28 |  |  |  |  |  |  |  | 
| 29 |  |  |  |  |  |  | =cut | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | package Algorithm::Evolutionary::Utils; | 
| 32 |  |  |  |  |  |  |  | 
| 33 | 49 |  |  | 49 |  | 13984 | use Exporter; | 
|  | 49 |  |  |  |  | 89 |  | 
|  | 49 |  |  |  |  | 8294 |  | 
| 34 |  |  |  |  |  |  | our @ISA = qw(Exporter); | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | our $VERSION =   sprintf "%d.%03d", q$Revision: 3.3 $ =~ /(\d+)\.(\d+)/g; | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | our @EXPORT_OK = qw( entropy genotypic_entropy consensus hamming | 
| 39 |  |  |  |  |  |  | random_bitstring random_number_array average | 
| 40 |  |  |  |  |  |  | parse_xml decode_string vector_compare); | 
| 41 |  |  |  |  |  |  |  | 
| 42 | 49 |  |  | 49 |  | 503 | use Carp; | 
|  | 49 |  |  |  |  | 99 |  | 
|  | 49 |  |  |  |  | 3623 |  | 
| 43 | 49 |  |  | 49 |  | 55135 | use String::Random; | 
|  | 49 |  |  |  |  | 219775 |  | 
|  | 49 |  |  |  |  | 3254 |  | 
| 44 | 49 |  |  | 49 |  | 70382 | use XML::Parser; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | use Statistics::Basic qw(mean); | 
| 46 |  |  |  |  |  |  |  | 
| 47 |  |  |  |  |  |  | =head2 entropy( $population) | 
| 48 |  |  |  |  |  |  |  | 
| 49 |  |  |  |  |  |  | Computes the entropy using the well known Shannon's formula: L | 
| 50 |  |  |  |  |  |  | 'to avoid botching highlighting | 
| 51 |  |  |  |  |  |  |  | 
| 52 |  |  |  |  |  |  | =cut | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | sub entropy { | 
| 55 |  |  |  |  |  |  | my $population = shift; | 
| 56 |  |  |  |  |  |  | my %frequencies; | 
| 57 |  |  |  |  |  |  | map( (defined $_->Fitness())?$frequencies{$_->Fitness()}++:1, @$population ); | 
| 58 |  |  |  |  |  |  | my $entropy = 0; | 
| 59 |  |  |  |  |  |  | my $gente = scalar(@$population); # Population size | 
| 60 |  |  |  |  |  |  | for my $f ( keys %frequencies ) { | 
| 61 |  |  |  |  |  |  | my $this_freq = $frequencies{$f}/$gente; | 
| 62 |  |  |  |  |  |  | $entropy -= $this_freq*log( $this_freq ); | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  | return $entropy; | 
| 65 |  |  |  |  |  |  | } | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | =head2 genotypic_entropy( $population) | 
| 68 |  |  |  |  |  |  |  | 
| 69 |  |  |  |  |  |  | Computes the entropy using the well known Shannon's formula: | 
| 70 |  |  |  |  |  |  | L 'to avoid botching | 
| 71 |  |  |  |  |  |  | highlighting; in this case we use chromosome frequencies instead of | 
| 72 |  |  |  |  |  |  | fitness. | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | =cut | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | sub genotypic_entropy { | 
| 77 |  |  |  |  |  |  | my $population = shift; | 
| 78 |  |  |  |  |  |  | my %frequencies; | 
| 79 |  |  |  |  |  |  | map( $frequencies{$_->{'_str'}}++, @$population ); | 
| 80 |  |  |  |  |  |  | my $entropy = 0; | 
| 81 |  |  |  |  |  |  | my $gente = scalar(@$population); # Population size | 
| 82 |  |  |  |  |  |  | for my $f ( keys %frequencies ) { | 
| 83 |  |  |  |  |  |  | my $this_freq = $frequencies{$f}/$gente; | 
| 84 |  |  |  |  |  |  | $entropy -= $this_freq*log( $this_freq ); | 
| 85 |  |  |  |  |  |  | } | 
| 86 |  |  |  |  |  |  | return $entropy; | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | =head2 hamming( $string_a, $string_b ) | 
| 90 |  |  |  |  |  |  |  | 
| 91 |  |  |  |  |  |  | Computes the number of positions that are different among two strings | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | =cut | 
| 94 |  |  |  |  |  |  |  | 
| 95 |  |  |  |  |  |  | sub hamming { | 
| 96 |  |  |  |  |  |  | my ($string_a, $string_b) = @_; | 
| 97 |  |  |  |  |  |  | return ( ( $string_a ^ $string_b ) =~ tr/\1//); | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | =head2 consensus( $population, $rough = 0 ) | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | Consensus sequence representing the majoritary value for each bit; | 
| 103 |  |  |  |  |  |  | returns the consensus string. If "rough", then the bit is set only if the difference is bigger than 0.4 (70/30 proportion) | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | =cut | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | sub consensus { | 
| 108 |  |  |  |  |  |  | my $population = shift; | 
| 109 |  |  |  |  |  |  | my $rough = shift; | 
| 110 |  |  |  |  |  |  | my @frequencies; | 
| 111 |  |  |  |  |  |  | for ( @$population ) { | 
| 112 |  |  |  |  |  |  | for ( my $i = 0; $i < $_->size(); $i ++ ) { | 
| 113 |  |  |  |  |  |  | if ( !$frequencies[$i] ) { | 
| 114 |  |  |  |  |  |  | $frequencies[$i]={ 0 => 0, | 
| 115 |  |  |  |  |  |  | 1 => 0}; | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  | $frequencies[$i]->{substr($_->{'_str'}, $i, 1)}++; | 
| 118 |  |  |  |  |  |  | } | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  | my $consensus; | 
| 121 |  |  |  |  |  |  | for my $f ( @frequencies ) { | 
| 122 |  |  |  |  |  |  | if ( !$rough ) { | 
| 123 |  |  |  |  |  |  | if ( $f->{'0'} > $f->{'1'} ) { | 
| 124 |  |  |  |  |  |  | $consensus.='0'; | 
| 125 |  |  |  |  |  |  | } else { | 
| 126 |  |  |  |  |  |  | $consensus.='1'; | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  | } else { | 
| 129 |  |  |  |  |  |  | my $difference = abs( $f->{'0'} - $f->{'1'} ); | 
| 130 |  |  |  |  |  |  | if ( $difference < 0.4 ) { | 
| 131 |  |  |  |  |  |  | $consensus .= '-'; | 
| 132 |  |  |  |  |  |  | } else { | 
| 133 |  |  |  |  |  |  | if ( $f->{'0'} > $f->{'1'} ) { | 
| 134 |  |  |  |  |  |  | $consensus.='0'; | 
| 135 |  |  |  |  |  |  | } else { | 
| 136 |  |  |  |  |  |  | $consensus.='1'; | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  | } | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  | return $consensus; | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  | =head2 average( $population ) | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | Computes an average of population fitness | 
| 147 |  |  |  |  |  |  |  | 
| 148 |  |  |  |  |  |  | =cut | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | sub average { | 
| 151 |  |  |  |  |  |  | my $population = shift; | 
| 152 |  |  |  |  |  |  | my @frequencies; | 
| 153 |  |  |  |  |  |  | my @fitnesses = map( $_->Fitness(), @$population ); | 
| 154 |  |  |  |  |  |  | return mean( @fitnesses ); | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | =head2 random_bitstring( $bits ) | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | Returns a random bitstring with the stated number of bits. Useful for testing,mainly | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | =cut | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | sub random_bitstring { | 
| 165 |  |  |  |  |  |  | my $bits = shift || croak "No bits!"; | 
| 166 |  |  |  |  |  |  | my $generator = new String::Random; | 
| 167 |  |  |  |  |  |  | my $regex = "\[01\]{$bits}"; | 
| 168 |  |  |  |  |  |  | return $generator->randregex($regex); | 
| 169 |  |  |  |  |  |  | } | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | =head2 random_number_array( $dimensions [, $min = -1] [, $range = 2] ) | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  | Returns a random bitstring with the stated number of bits. Useful for testing,mainly | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | =cut | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | sub random_number_array { | 
| 178 |  |  |  |  |  |  | my $dimensions = shift || croak "No bits!"; | 
| 179 |  |  |  |  |  |  | my $min = shift || -1; | 
| 180 |  |  |  |  |  |  | my $range = shift || 2; | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | my @array; | 
| 183 |  |  |  |  |  |  | for ( my $i = 0; $i < $dimensions; $i ++ ) { | 
| 184 |  |  |  |  |  |  | push @array, $min + rand($range); | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  | return @array; | 
| 187 |  |  |  |  |  |  | } | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | =head2 parse_xml( $string ) | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | Parses the string and returns an XML tree | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | =cut | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | sub parse_xml { | 
| 196 |  |  |  |  |  |  | my $string = shift || croak "No string to parse!\n"; | 
| 197 |  |  |  |  |  |  | my $p=new XML::Parser(Style=>'EasyTree'); | 
| 198 |  |  |  |  |  |  | $XML::Parser::EasyTree::Noempty=1; | 
| 199 |  |  |  |  |  |  | my $xml_dom = $p->parse($string) || croak "Problems parsing $string: $!\n"; | 
| 200 |  |  |  |  |  |  | return $xml_dom; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | =head2 decode_string( $chromosome, $gene_size, $min, $range ) | 
| 204 |  |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  | Decodes to a vector, each one of whose components ranges between $min | 
| 206 |  |  |  |  |  |  | and $max. Returns that vector. | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | It does not work for $gene_size too big. Certainly not for 64, maybe for 32 | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | =cut | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  | sub decode_string { | 
| 213 |  |  |  |  |  |  | my ( $chromosome, $gene_size, $min, $range ) = @_; | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | my @output_vector; | 
| 216 |  |  |  |  |  |  | my $max_range = eval "0b"."1"x$gene_size; | 
| 217 |  |  |  |  |  |  | for (my $i = 0; $i < length($chromosome)/$gene_size; $i ++ ) { | 
| 218 |  |  |  |  |  |  | my $substr = substr( $chromosome, $i*$gene_size, $gene_size ); | 
| 219 |  |  |  |  |  |  | push @output_vector, (($range - $min) * eval("0b$substr") / $max_range) + $min; | 
| 220 |  |  |  |  |  |  | } | 
| 221 |  |  |  |  |  |  | return @output_vector; | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | =head2 vector_compare( $vector_1, $vector_2 ) | 
| 225 |  |  |  |  |  |  |  | 
| 226 |  |  |  |  |  |  | Compares vectors, returns 1 if 1 dominates 2, -1 if it's the other way | 
| 227 |  |  |  |  |  |  | round, and 0 if neither dominates the other. Both vectors are supposed | 
| 228 |  |  |  |  |  |  | to be numeric. Returns C if neither is bigger, and they are not | 
| 229 |  |  |  |  |  |  | equal. | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  | =cut | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | sub vector_compare { | 
| 234 |  |  |  |  |  |  | my ( $vector_1, $vector_2 ) = @_; | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | if ( scalar @$vector_1 != scalar @$vector_2 ) { | 
| 237 |  |  |  |  |  |  | croak "Different lengths, can't compare\n"; | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | my $length = scalar @$vector_1; | 
| 241 |  |  |  |  |  |  | my @results = map( $vector_1->[$_] <=> $vector_2->[$_], 0..($length-1)); | 
| 242 |  |  |  |  |  |  | my %comparisons; | 
| 243 |  |  |  |  |  |  | map( $comparisons{$_}++, @results ); | 
| 244 |  |  |  |  |  |  | if ( $comparisons{1} && !$comparisons{-1} ) { | 
| 245 |  |  |  |  |  |  | return 1; | 
| 246 |  |  |  |  |  |  | } | 
| 247 |  |  |  |  |  |  | if ( !$comparisons{1} && $comparisons{-1} ) { | 
| 248 |  |  |  |  |  |  | return -1; | 
| 249 |  |  |  |  |  |  | } | 
| 250 |  |  |  |  |  |  | if ( defined $comparisons{0} && $comparisons{0} == $length ) { | 
| 251 |  |  |  |  |  |  | return 0; | 
| 252 |  |  |  |  |  |  | } | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | =head1 Copyright | 
| 256 |  |  |  |  |  |  |  | 
| 257 |  |  |  |  |  |  | This file is released under the GPL. See the LICENSE file included in this distribution, | 
| 258 |  |  |  |  |  |  | or go to http://www.fsf.org/licenses/gpl.txt | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | CVS Info: $Date: 2010/09/24 08:39:07 $ | 
| 261 |  |  |  |  |  |  | $Header: /cvsroot/opeal/Algorithm-Evolutionary/lib/Algorithm/Evolutionary/Utils.pm,v 3.3 2010/09/24 08:39:07 jmerelo Exp $ | 
| 262 |  |  |  |  |  |  | $Author: jmerelo $ | 
| 263 |  |  |  |  |  |  | $Revision: 3.3 $ | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | =cut | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | "Still there?"; |