line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package AI::Genetic::Pro::Macromolecule; |
2
|
|
|
|
|
|
|
our $VERSION = '0.09280.0_001'; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
# ABSTRACT: Genetic Algorithms to evolve DNA, RNA and Protein sequences |
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
2350
|
use Moose; |
|
1
|
|
|
|
|
804607
|
|
|
1
|
|
|
|
|
14
|
|
9
|
1
|
|
|
1
|
|
9580
|
use MooseX::Types::Moose qw(Str Bool Int Num ArrayRef CodeRef); |
|
1
|
|
|
|
|
74800
|
|
|
1
|
|
|
|
|
14
|
|
10
|
1
|
|
|
1
|
|
7999
|
use AI::Genetic::Pro::Macromolecule::Types qw(AIGeneticPro Probability); |
|
1
|
|
|
|
|
14870
|
|
|
1
|
|
|
|
|
10
|
|
11
|
1
|
|
|
1
|
|
6473
|
use AI::Genetic::Pro; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use Moose::Util::TypeConstraints; |
13
|
|
|
|
|
|
|
use List::Util 'max'; |
14
|
|
|
|
|
|
|
use Modern::Perl; |
15
|
|
|
|
|
|
|
use MooseX::Throwable; |
16
|
|
|
|
|
|
|
use namespace::autoclean; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
my %_alphabet_for = ( |
19
|
|
|
|
|
|
|
protein => [qw(A C D E F G H I K L M N P Q R S T V W Y)], |
20
|
|
|
|
|
|
|
dna => [qw(A C G T)], |
21
|
|
|
|
|
|
|
rna => [qw(A C G U)], |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has fitness => ( |
26
|
|
|
|
|
|
|
is => 'ro', |
27
|
|
|
|
|
|
|
isa => CodeRef, |
28
|
|
|
|
|
|
|
required => 1, |
29
|
|
|
|
|
|
|
); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
has terminate => ( |
33
|
|
|
|
|
|
|
is => 'ro', |
34
|
|
|
|
|
|
|
isa => CodeRef, |
35
|
|
|
|
|
|
|
predicate => '_has_terminate', |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
has '_actual_' . $_ => ( |
39
|
|
|
|
|
|
|
is => 'ro', |
40
|
|
|
|
|
|
|
isa => CodeRef, |
41
|
|
|
|
|
|
|
lazy_build => 1, |
42
|
|
|
|
|
|
|
) for qw(fitness terminate); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub _build__actual_fitness { |
45
|
|
|
|
|
|
|
my $self = shift; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
return sub { |
48
|
|
|
|
|
|
|
my ($ga, $chromosome) = @_; |
49
|
|
|
|
|
|
|
my $seq = $ga->as_string($chromosome); |
50
|
|
|
|
|
|
|
$seq =~ s/_//g; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
return $self->fitness->($seq); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
sub _build__actual_terminate { |
57
|
|
|
|
|
|
|
my $self = shift; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
return sub { return $self->terminate }; |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
has variable_length => ( |
65
|
|
|
|
|
|
|
is => 'ro', |
66
|
|
|
|
|
|
|
isa => Bool, |
67
|
|
|
|
|
|
|
default => 1, |
68
|
|
|
|
|
|
|
); |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
has length => ( |
72
|
|
|
|
|
|
|
is => 'ro', |
73
|
|
|
|
|
|
|
isa => Num, |
74
|
|
|
|
|
|
|
lazy_build => 1, |
75
|
|
|
|
|
|
|
); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
has _ga => ( |
78
|
|
|
|
|
|
|
is => 'ro', |
79
|
|
|
|
|
|
|
isa => AIGeneticPro, |
80
|
|
|
|
|
|
|
init_arg => undef, |
81
|
|
|
|
|
|
|
handles => [qw(evolve generation)], |
82
|
|
|
|
|
|
|
lazy_build => 1, |
83
|
|
|
|
|
|
|
); |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub _build__ga { |
86
|
|
|
|
|
|
|
my $self = shift; |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $ga = AI::Genetic::Pro->new( |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
-type => 'listvector', |
91
|
|
|
|
|
|
|
-population => $self->population_size, |
92
|
|
|
|
|
|
|
-crossover => $self->crossover, |
93
|
|
|
|
|
|
|
-mutation => $self->mutation, |
94
|
|
|
|
|
|
|
-parents => $self->parents, |
95
|
|
|
|
|
|
|
-selection => $self->selection, |
96
|
|
|
|
|
|
|
-strategy => $self->strategy, |
97
|
|
|
|
|
|
|
-cache => $self->cache, |
98
|
|
|
|
|
|
|
-history => 1, |
99
|
|
|
|
|
|
|
-preserve => $self->preserve, |
100
|
|
|
|
|
|
|
-variable_length => $self->variable_length, |
101
|
|
|
|
|
|
|
-fitness => $self->_actual_fitness, |
102
|
|
|
|
|
|
|
); |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
# Consistency check for variable_length and input lengths |
105
|
|
|
|
|
|
|
if ( |
106
|
|
|
|
|
|
|
$self->_has_initial_population and |
107
|
|
|
|
|
|
|
!$self->variable_length and |
108
|
|
|
|
|
|
|
$self->_seq_lengths_are_different |
109
|
|
|
|
|
|
|
) { die "Initial population lengths cannot be different when variable_length is set to 0.\n"; } |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
if ( $self->_initial_population_size > $self->population_size ) { |
112
|
|
|
|
|
|
|
warn "initial_population has more sequences than population_size allows\n" |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
if ($self->_has_terminate) { $ga->terminate($self->_actual_terminate) }; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
$ga->init([ |
118
|
|
|
|
|
|
|
map { $_alphabet_for{ lc $self->type } } |
119
|
|
|
|
|
|
|
(1 .. $self->length) |
120
|
|
|
|
|
|
|
]); |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
$ga->inject([ map { [ split '', $_ ] } @{$self->initial_population} ]) |
123
|
|
|
|
|
|
|
if $self->_has_initial_population; |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
return $ga; |
126
|
|
|
|
|
|
|
} |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
sub _initial_population_size { |
129
|
|
|
|
|
|
|
my $self = shift; |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
if ($self->_has_initial_population) { |
132
|
|
|
|
|
|
|
return scalar @{$self->initial_population}; |
133
|
|
|
|
|
|
|
} |
134
|
|
|
|
|
|
|
else { |
135
|
|
|
|
|
|
|
return 0; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub _seq_lengths_are_different { |
140
|
|
|
|
|
|
|
# returns true if lengths of the inserted sequences are equal |
141
|
|
|
|
|
|
|
my $self = shift; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
my $initial_length = length($self->initial_population->[0]); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
return grep { length $_ != $initial_length } @{$self->initial_population}; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub fittest { |
150
|
|
|
|
|
|
|
my ($self, $n) = @_; |
151
|
|
|
|
|
|
|
$n //= 1; |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
my @fittest; |
154
|
|
|
|
|
|
|
my @chromosomes = $self->_ga->getFittest($n, 1); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
foreach my $chrom (@chromosomes) { |
157
|
|
|
|
|
|
|
my $seq = $self->_ga->as_string($chrom); |
158
|
|
|
|
|
|
|
$seq =~ s/_//g; |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
push @fittest, { |
161
|
|
|
|
|
|
|
seq => $seq, |
162
|
|
|
|
|
|
|
score => $self->_ga->as_value ($chrom), |
163
|
|
|
|
|
|
|
}; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
return ( $n == 1 ) ? $fittest[0] : @fittest; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
sub history { |
171
|
|
|
|
|
|
|
my $self = shift; |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
my $history = $self->_ga->getHistory; |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
return { |
176
|
|
|
|
|
|
|
max => $history->[0], |
177
|
|
|
|
|
|
|
mean => $history->[1], |
178
|
|
|
|
|
|
|
min => $history->[2], |
179
|
|
|
|
|
|
|
}; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
sub current_stats { |
184
|
|
|
|
|
|
|
my $self = shift; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
my ($max, $mean, $min) = $self->_ga->getAvgFitness; |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
return { max => $max, mean => $mean, min => $min }; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
sub current_population { |
193
|
|
|
|
|
|
|
my $self = shift; |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
my @population; |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
my $chromosomes = $self->_ga->people; |
198
|
|
|
|
|
|
|
foreach my $chrom (@$chromosomes) { |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
my $seq = $self->_ga->as_string( $chrom ); |
201
|
|
|
|
|
|
|
$seq =~ s/_//g; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
my $score = $self->_ga->as_value($chrom); |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
push @population, { seq => $seq, score => $score }; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
return @population; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
sub _build_length { |
214
|
|
|
|
|
|
|
my $self = shift; |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
unless ( $self->_has_initial_population ) { |
217
|
|
|
|
|
|
|
die "Either length or initial_population should be defined\n"; |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
my $max_length = max( map { length } @{$self->initial_population} ); |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
return $max_length; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
has type => ( |
227
|
|
|
|
|
|
|
is => 'ro', |
228
|
|
|
|
|
|
|
isa => enum([qw(protein Protein dna DNA rna RNA)]), |
229
|
|
|
|
|
|
|
required => 1, |
230
|
|
|
|
|
|
|
); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
has initial_population => ( |
234
|
|
|
|
|
|
|
is => 'ro', |
235
|
|
|
|
|
|
|
isa => ArrayRef[Str], |
236
|
|
|
|
|
|
|
predicate => '_has_initial_population', |
237
|
|
|
|
|
|
|
); |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
has cache => ( |
241
|
|
|
|
|
|
|
is => 'ro', |
242
|
|
|
|
|
|
|
isa => Bool, |
243
|
|
|
|
|
|
|
default => 1, |
244
|
|
|
|
|
|
|
); |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
has mutation => ( |
248
|
|
|
|
|
|
|
is => 'ro', |
249
|
|
|
|
|
|
|
isa => Probability, |
250
|
|
|
|
|
|
|
default => 0.05, |
251
|
|
|
|
|
|
|
); |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
has crossover => ( |
255
|
|
|
|
|
|
|
is => 'ro', |
256
|
|
|
|
|
|
|
isa => Probability, |
257
|
|
|
|
|
|
|
default => 0.95, |
258
|
|
|
|
|
|
|
); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
has population_size => ( |
262
|
|
|
|
|
|
|
is => 'ro', |
263
|
|
|
|
|
|
|
isa => Int, |
264
|
|
|
|
|
|
|
default => 300, |
265
|
|
|
|
|
|
|
); |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
has parents => ( |
269
|
|
|
|
|
|
|
is => 'ro', |
270
|
|
|
|
|
|
|
isa => Int, |
271
|
|
|
|
|
|
|
default => 2, |
272
|
|
|
|
|
|
|
); |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
has selection => ( |
276
|
|
|
|
|
|
|
is => 'ro', |
277
|
|
|
|
|
|
|
isa => ArrayRef, |
278
|
|
|
|
|
|
|
default => sub { ['Roulette'] }, |
279
|
|
|
|
|
|
|
); |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
has strategy => ( |
283
|
|
|
|
|
|
|
is => 'ro', |
284
|
|
|
|
|
|
|
isa => ArrayRef, |
285
|
|
|
|
|
|
|
default => sub { [ 'Points', 2 ] }, |
286
|
|
|
|
|
|
|
); |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
has preserve => ( |
290
|
|
|
|
|
|
|
is => 'ro', |
291
|
|
|
|
|
|
|
isa => Int, |
292
|
|
|
|
|
|
|
default => '5', |
293
|
|
|
|
|
|
|
); |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
296
|
|
|
|
|
|
|
1; |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
|
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=pod |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=head1 NAME |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
AI::Genetic::Pro::Macromolecule - Genetic Algorithms to evolve DNA, RNA and Protein sequences |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=head1 VERSION |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
version 0.09280.0_001 |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head1 SYNOPSIS |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
use AI::Genetic::Pro::Macromolecule; |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
my @proteins = ($seq1, $seq2, $seq3, ... ); |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
my $m = AI::Genetic::Pro::Macromolecule->new( |
318
|
|
|
|
|
|
|
type => 'protein', |
319
|
|
|
|
|
|
|
fitness => \&hydrophobicity, |
320
|
|
|
|
|
|
|
initial_population => \@proteins, |
321
|
|
|
|
|
|
|
); |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub hydrophobicity { |
324
|
|
|
|
|
|
|
my $seq = shift; |
325
|
|
|
|
|
|
|
my $score = f($seq) |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
return $score; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
$m->evolve(10) # evolve for 10 generations; |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
my $most_hydrophobic = $m->fittest->{seq}; # get the best sequence |
333
|
|
|
|
|
|
|
my $highest_score = $m->fittest->{score}; # get top score |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
# Want the score stats throughout generations? |
336
|
|
|
|
|
|
|
my $history = $m->history; |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
my $mean_history = $history->{mean}; # [ mean1, mean2, mean3, ... ] |
339
|
|
|
|
|
|
|
my $min_history = $history->{min}; # [ min1, min2, min3, ... ] |
340
|
|
|
|
|
|
|
my $max_history = $history->{max}; # [ max1, max2, max3, ... ] |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
=head1 DESCRIPTION |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
AI::Genetic::Pro::Macromolecule is a wrapper over L<AI::Genetic::Pro>, |
345
|
|
|
|
|
|
|
aimed at easily evolving protein, DNA or RNA sequences using arbitrary |
346
|
|
|
|
|
|
|
fitness functions. |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
Its purpose it to allow optimization of macromolecule sequences using |
349
|
|
|
|
|
|
|
Genetic Algorithms, with as little set up time and burdain as possible. |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Standing atop L<AI::Genetic::Pro>, it is reasonably fast and memory |
352
|
|
|
|
|
|
|
efficient. It is also highly customizable, although I've chosen what I |
353
|
|
|
|
|
|
|
think are sensible defaults for every parameter, so that you don't have |
354
|
|
|
|
|
|
|
to worry about them if you don't know what they mean. |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=head2 fitness |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Accepts a C<CodeRef> that should assign a numeric score to each string |
363
|
|
|
|
|
|
|
sequence that it's passed to it as an argument. Required. |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
sub fitness { |
366
|
|
|
|
|
|
|
my $seq = shift; |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
# Do something with $seq and return a score |
369
|
|
|
|
|
|
|
my $score = f($seq); |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
return $score; |
372
|
|
|
|
|
|
|
} |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
my $m = AI::Genetic::Pro::Macromolecule->new( |
375
|
|
|
|
|
|
|
fitness => \&fitness, |
376
|
|
|
|
|
|
|
... |
377
|
|
|
|
|
|
|
); |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
=head2 terminate |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
Accepts a C<CodeRef>. It will be applied once at the end of each |
384
|
|
|
|
|
|
|
generation. If returns true, evolution will stop, disregarding the |
385
|
|
|
|
|
|
|
generation steps passed to the C<evolve> method. |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
The C<CodeRef> should accept an C<AI::Genetic::Pro::Macromolecule> object |
388
|
|
|
|
|
|
|
as argument, and should return either true or false. |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
sub reached_max { |
391
|
|
|
|
|
|
|
my $m = shift; # an AI::G::P::Macromolecule object |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
my $highest_score = $m->fittest->{score}; |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
if ( $highest_score > 9000 ) { |
396
|
|
|
|
|
|
|
warn "It's over 9000!"; |
397
|
|
|
|
|
|
|
return 1; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
my $m = AI::Genetic::Pro::Macromolecule->new( |
402
|
|
|
|
|
|
|
terminate => \&reached_max, |
403
|
|
|
|
|
|
|
... |
404
|
|
|
|
|
|
|
); |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
In the above example, evolution will stop the moment the top score in |
407
|
|
|
|
|
|
|
any generation exceeds the value 9000. |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
=head2 variable_length |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
Decide whether the sequences can have different lengths. Accepts a C<Bool> |
414
|
|
|
|
|
|
|
value. Defaults to 1. |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
|
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
=head2 length |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
Manually set the allowed maximum length of the sequences, accepts C<Int>. |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
This attribute is required unless an initial population is provided. In |
423
|
|
|
|
|
|
|
that case, C<length> will be set as equal to the length of the longest |
424
|
|
|
|
|
|
|
sequence provided if it's not explicity specified. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
=head2 type |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
Macromolecule type: protein, dna, or rna. Required. |
431
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=head2 initial_population |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
Sequences to add to the initial pool before evolving. Accepts an |
437
|
|
|
|
|
|
|
C<ArrayRef[Str]>. |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
my $m = AI::Genetic::Pro::Macromolecule->new( |
440
|
|
|
|
|
|
|
initial_population => ['ACGT', 'CAAC', 'GTTT'], |
441
|
|
|
|
|
|
|
... |
442
|
|
|
|
|
|
|
); |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=head2 cache |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
Accepts a C<Bool> value. When true, score results for each sequence will |
449
|
|
|
|
|
|
|
be stored, to avoid costly and unnecesary recomputations. Set to 1 by |
450
|
|
|
|
|
|
|
default. |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=head2 mutation |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
Mutation rate, a C<Num> between 0 and 1. Default is 0.05. |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=head2 crossover |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
Crossover rate, a C<Num> between 0 and 1. Default is 0.95. |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head2 population_size |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Number of sequences per generation. Default is 300. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
=head2 parents |
473
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
Number of parents sequences in recombinations. Default is 2. |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=head2 selection |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
Defines how sequences are selected to crossover. It expects an C<ArrayRef>: |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
selection => [ $type, @params ] |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
See docs in L<AI::Genetic::Pro> for details on available selection |
485
|
|
|
|
|
|
|
strategies, parameters, and their meanings. Default is Roulette, in |
486
|
|
|
|
|
|
|
which at first the best individuals/chromosomes are selected. From this |
487
|
|
|
|
|
|
|
collection parents are selected with probability poportionaly to its |
488
|
|
|
|
|
|
|
fitness. |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head2 strategy |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Defines strategy of crossover operation. It expects an C<ArrayRef>: |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
strategy => [ $strategy, @params ] |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
See docs in L<AI::Genetic::Pro> for details on available crossover |
499
|
|
|
|
|
|
|
strategies, parameters, and their meanings. Default is [ Points, 2 ], in |
500
|
|
|
|
|
|
|
which parents are crossed at 2 points and the best child is moved to the |
501
|
|
|
|
|
|
|
next generation. |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
|
505
|
|
|
|
|
|
|
=head2 preserve |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
Whether to inject the best sequences for next generation, and if so, how |
508
|
|
|
|
|
|
|
many. Defaults to 5. |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=head1 METHODS |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=head2 evolve |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
$m->evolve($n); |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
Evolve the sequence population for the specified number of generations. |
519
|
|
|
|
|
|
|
Accepts an optional single C<Int> argument. If $n is 0 or undef, it will |
520
|
|
|
|
|
|
|
evolve undefinitely or C<terminate> returns true. |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head2 generation |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
Returns the current generation number. |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=head2 fittest |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
Returns an C<Array[HashRef]> with the desired number of top scoring |
531
|
|
|
|
|
|
|
sequences. The hash reference has two keys, 'seq' which points to the |
532
|
|
|
|
|
|
|
sequence string, and 'score' which points to the sequence's score. |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
my @top_2 = $m->fittest(2); |
535
|
|
|
|
|
|
|
# ( |
536
|
|
|
|
|
|
|
# { seq => 'VIKP', score => 10 }, |
537
|
|
|
|
|
|
|
# { seq => 'VLKP', score => 9 }, |
538
|
|
|
|
|
|
|
# ) |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
When called with no arguments, it returns a C<HashRef> with the top |
541
|
|
|
|
|
|
|
scoring sequence. |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
my $fittest = $m->fittest; |
544
|
|
|
|
|
|
|
# { seq => 'VIKP', score => 10 } |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=head2 history |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
Returns a C<HashRef> with the minimum, maximum and mean score for |
551
|
|
|
|
|
|
|
each generation. |
552
|
|
|
|
|
|
|
|
553
|
|
|
|
|
|
|
my $history = $m->history; |
554
|
|
|
|
|
|
|
# { |
555
|
|
|
|
|
|
|
# min => [ 0, 0, 0, 1, 2, ... ], |
556
|
|
|
|
|
|
|
# max => [ 1, 2, 2, 3, 4, ... ], |
557
|
|
|
|
|
|
|
# mean => [ 0.2, 0.3, 0.5, 1.5, 3, ... ], |
558
|
|
|
|
|
|
|
# } |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
To access the mean score for the C<$n>-th generation, for instance: |
561
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
$m->history->{mean}->[$n - 1]; |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=head2 current_stats |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
Returns a C<HashRef> with the minimum, maximum and mean score fore |
569
|
|
|
|
|
|
|
the current generation. |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
$m->current_stats; |
572
|
|
|
|
|
|
|
# { min => 2, max => 10, mean => 3.5 } |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
=head2 current_population |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
Returns an C<Array[HashRef]> with all the sequences of the current |
579
|
|
|
|
|
|
|
generation and their scores, in no particular order. |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
my @seqs = $m->current_population; |
582
|
|
|
|
|
|
|
# ( |
583
|
|
|
|
|
|
|
# { seq => 'VIKP', score => 10 }, |
584
|
|
|
|
|
|
|
# { seq => 'VLKP', score => 9 }, |
585
|
|
|
|
|
|
|
# ... |
586
|
|
|
|
|
|
|
# ) |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
=head1 AUTHOR |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
Bruno Vecchi <vecchi.b gmail.com> |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
This software is copyright (c) 2009 by Bruno Vecchi. |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
599
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
=cut |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
__END__ |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
|