File Coverage

blib/lib/AI/Gene/Sequence.pm
Criterion Covered Total %
statement 223 247 90.2
branch 83 98 84.6
condition 52 65 80.0
subroutine 17 21 80.9
pod 15 15 100.0
total 390 446 87.4


line stmt bran cond sub pod time code
1             package AI::Gene::Sequence;
2             require 5.6.0;
3 1     1   858 use strict;
  1         3  
  1         30  
4 1     1   5 use warnings;
  1         2  
  1         28  
5              
6             BEGIN {
7 1     1   5 use Exporter ();
  1         4  
  1         101  
8 1     1   2 our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
9 1         3 $VERSION = 0.22;
10 1         15 @ISA = qw(Exporter);
11 1         2 @EXPORT = ();
12 1         2 %EXPORT_TAGS = ();
13 1         2886 @EXPORT_OK = qw();
14             }
15             our @EXPORT_OK;
16              
17             my ($probs,$mut_keys) = _normalise( { map {$_ => 1}
18             qw(insert remove overwrite
19             duplicate minor major
20             switch shuffle reverse) } );
21              
22             ##
23             # calls mutation method at random
24             # 0: number of mutations to perform
25             # 1: ref to hash of probs to use (otherwise uses default mutations and probs)
26              
27             sub mutate {
28 60     60 1 2439 my $self = shift;
29 60   100     187 my $num_mutates = +$_[0] || 1;
30 60         68 my $rt = 0;
31 60         65 my ($hr_probs, $muts);
32 60 100       141 if (ref $_[1] eq 'HASH') { # use non standard mutations or probs
33 30         55 ($hr_probs, $muts) = _normalise($_[1]);
34             }
35             else { # use standard mutations and probs
36 30         34 $hr_probs = $probs;
37 30         44 $muts = $mut_keys;
38             }
39              
40 60         107 MUT_CYCLE: for (1..$num_mutates) {
41 60         118 my $rand = rand;
42 60         65 foreach my $mutation (@{$muts}) {
  60         102  
43 268 100       593 next unless $rand < $hr_probs->{$mutation};
44 60         96 my $mut = 'mutate_' . $mutation;
45 60         157 $rt += $self->$mut(1);
46 60         164 next MUT_CYCLE;
47             }
48             }
49 60         481 return $rt;
50             }
51              
52             ##
53             # creates a normalised and cumulative prob distribution for the
54             # keys of the referenced hash
55              
56             sub _normalise {
57 31     31   33 my $hr = $_[0];
58 31         37 my $h2 = {};
59 31         35 my $muts = [keys %{$hr}];
  31         118  
60 31         51 my $sum = 0;
61 31         27 foreach (values %{$hr}) {
  31         75  
62 249         305 $sum += $_;
63             }
64 31 50       62 if ($sum <= 0) {
65 0         0 die "Cannot randomly mutate with bad probability distribution";
66             }
67             else {
68 31         27 my $cum;
69 31         36 @{$h2}{ @{$muts} } = map {$cum +=$_; $cum / $sum} @{$hr}{ @{$muts} };
  31         137  
  31         35  
  249         213  
  249         461  
  31         64  
  31         35  
70 31         105 return ($h2, $muts);
71             }
72             }
73              
74             ##
75             # inserts one element into the sequence
76             # 0: number to perform ( or 1)
77             # 1: position to mutate (undef for random)
78              
79             sub mutate_insert {
80 42     42 1 1355 my $self = shift;
81 42   100     191 my $num = +$_[0] || 1;
82 42         45 my $rt = 0;
83 42         109 for (1..$num) {
84 42         65 my $length = length $self->[0];
85 42 100       104 my $pos = defined($_[1]) ? $_[1] : int rand $length;
86 42 100       84 next if $pos > $length; # further than 1 place after gene
87 41         97 my @token = $self->generate_token;
88 41         321 my $new = $self->[0];
89 41         62 substr($new, $pos, 0) = $token[0];
90 41 50       77 next unless $self->valid_gene($new, $pos);
91 41         74 $self->[0] = $new;
92 41         79 splice @{$self->[1]}, $pos, 0, $token[1];
  41         122  
93 41         128 $rt++;
94             }
95 42         251 return $rt;
96             }
97              
98             ##
99             # removes element(s) from sequence
100             # 0: number of times to perform
101             # 1: position to affect (undef for rand)
102             # 2: length to affect, undef => 1, 0 => random length
103              
104             sub mutate_remove {
105 40     40 1 2274 my $self = shift;
106 40   50     81 my $num = +$_[0] || 1;
107 40         42 my $rt = 0;
108 40         65 for (1..$num) {
109 40         54 my $length = length $self->[0];
110 40 100 66     157 my $len = !defined($_[2]) ? 1 : ($_[2] || int rand $length);
111 40 50       253 next if ($length - $len) <= 0;
112 40 100       73 my $pos = defined($_[1]) ? $_[1] : int rand $length;
113 40 100       106 next if $pos >= $length; # outside of gene
114 39         54 my $new = $self->[0];
115 39         49 substr($new, $pos, $len) = '';
116 39 50       70 next unless $self->valid_gene($new, $pos);
117 39         64 $self->[0] = $new;
118 39         64 splice @{$self->[1]}, $pos, $len;
  39         80  
119 39         91 $rt++;
120             }
121 40         181 return $rt;
122             }
123              
124             ##
125             # copies an element or run of elements into a random place in the gene
126             # 0: number to perform (or 1)
127             # 1: posn to copy from (undef for rand)
128             # 2: posn to splice in (undef for rand)
129             # 3: length (undef for 1, 0 for random)
130              
131             sub mutate_duplicate {
132 40     40 1 1934 my $self = shift;
133 40   50     3181 my $num = +$_[0] || 1;
134 40         47 my $rt = 0;
135 40         74 for (1..$num) {
136 40         65 my $length = length $self->[0];
137 40 100 100     228 my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
138 40 100       77 my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
139 40 100       74 my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
140 40         129 my $new = $self->[0];
141 40 100       112 next if ($pos1 + $len) > $length;
142 29 100       1379 next if $pos2 > $length;
143 28         45 my $seq = substr($new, $pos1, $len);
144 28         70 substr($new, $pos2,0) = $seq;
145 28 50       64 next unless $self->valid_gene($new);
146 28         56 $self->[0] = $new;
147 28         33 splice @{$self->[1]}, $pos2, 0, @{$self->[1]}[$pos1..($pos1+$len-1)];
  28         71  
  28         92  
148 28         84 $rt++;
149             }
150 40         251 return $rt;
151             }
152              
153             ##
154             # Duplicates a sequence and writes it on top of some other position
155             # 0: num to perform (or 1)
156             # 1: pos to get from (undef for rand)
157             # 2: pos to start replacement (undef for rand)
158             # 3: length to operate on (undef => 1, 0 => rand)
159              
160             sub mutate_overwrite {
161 49     49 1 1474 my $self = shift;
162 49   50     122 my $num = +$_[0] || 1;
163 49         56 my $rt = 0;
164            
165 49         84 for (1..$num) {
166 49         76 my $new = $self->[0];
167 49         73 my $length = length $self->[0];
168 49 100 100     191 my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
169 49 100       102 my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
170 49 100       107 my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
171 49 100 100     217 next if ( ($pos1 + $len) >= $length
172             or $pos2 > $length);
173 31         60 substr($new, $pos2, $len) = substr($new, $pos1, $len);
174 31 50       65 next unless $self->valid_gene($new);
175 31         65 $self->[0] = $new;
176 31         81 splice (@{$self->[1]}, $pos2, $len,
  31         93  
177 31         34 @{$self->[1]}[$pos1..($pos1+$len-1)] );
178 31         89 $rt++;
179             }
180              
181 49         265 return $rt;
182             }
183              
184             ##
185             # Takes a run of tokens and reverses their order, is a noop with 1 item
186             # 0: number to perform
187             # 1: posn to start from (undef for rand)
188             # 2: length (undef=>1, 0=>rand)
189              
190             sub mutate_reverse {
191 35     35 1 1364 my $self = shift;
192 35   50     87 my $num = +$_[0] || 1;
193 35         39 my $rt = 0;
194            
195 35         60 for (1..$num) {
196 35         50 my $length = length $self->[0];
197 35         83 my $new = $self->[0];
198 35 100       86 my $pos = defined($_[1]) ? $_[1] : int rand $length;
199 35 100 100     143 my $len = !defined($_[2]) ? 1 : ($_[2] || int rand $length);
200              
201 35 100 100     167 next if ($pos >= $length
202             or $pos + $len > $length);
203              
204 23         87 my $chunk = reverse split('', substr($new, $pos, $len));
205 23         82 substr($new, $pos, $len) = join('', $chunk);
206 23 50       51 next unless $self->valid_gene($new);
207 23         75 $self->[0] = $new;
208 23         50 splice (@{$self->[1]}, $pos, $len,
  23         77  
209 23         29 reverse( @{$self->[1]}[$pos..($pos+$len-1)] ));
210 23         106 $rt++;
211             }
212 35         187 return $rt;
213             }
214              
215             ##
216             # Changes token into one of same type (ie. passes type to generate..)
217             # 0: number to perform
218             # 1: position to affect (undef for rand)
219              
220             sub mutate_minor {
221 41     41 1 1372 my $self = shift;
222 41   100     133 my $num = +$_[0] || 1;
223 41         43 my $rt = 0;
224 41         71 for (1..$num) {
225 41 100       145 my $pos = defined $_[1] ? $_[1] : int rand length $self->[0];
226 41 100       97 next if $pos >= length($self->[0]); # pos lies outside of gene
227 40         62 my $type = substr($self->[0], $pos, 1);
228 40         108 my @token = $self->generate_token($type, $self->[1][$pos]);
229             # still need to check for niceness, just in case
230 40 50       362 if ($token[0] eq $type) {
231 40         68 $self->[1][$pos] = $token[1];
232             }
233             else {
234 0         0 my $new = $self->[0];
235 0         0 substr($new, $pos, 1) = $token[0];
236 0 0       0 next unless $self->valid_gene($new, $pos);
237 0         0 $self->[0] = $new;
238 0         0 $self->[1][$pos] = $token[1];
239             }
240 40         104 $rt++;
241             }
242 41         192 return $rt;
243             }
244              
245             ##
246             # Changes one token into some other token
247             # 0: number to perform
248             # 1: position to affect (undef for random)
249              
250             sub mutate_major {
251 41     41 1 2079 my $self = shift;
252 41   100     134 my $num = +$_[0] || 1;
253 41         44 my $rt = 0;
254 41         72 for (1..$num) {
255 41 100       104 my $pos = defined $_[1] ? $_[1] : int rand length $self->[0];
256 41 100       91 next if $pos >= length($self->[0]); # outside of gene
257 40         87 my @token = $self->generate_token();
258 40         305 my $new = $self->[0];
259 40         56 substr($new, $pos, 1) = $token[0];
260 40 50       86 next unless $self->valid_gene($new, $pos);
261 40         58 $self->[0] = $new;
262 40         53 $self->[1][$pos] = $token[1];
263 40         109 $rt++;
264             }
265 41         174 return $rt;
266             }
267              
268             ##
269             # swaps over two sequences within the gene
270             # any sort of oddness can occur if regions overlap
271             # 0: number to perform
272             # 1: start of first sequence (undef for rand)
273             # 2: start of second sequence (undef for rand)
274             # 3: length of first sequence (undef for 1, 0 for rand)
275             # 4: length of second sequence (undef for 1, 0 for rand)
276              
277             sub mutate_switch {
278 41     41 1 2385 my $self = shift;
279 41   50     310 my $num = $_[0] || 1;
280 41         59 my $rt = 0;
281 41         99 for (1..$num) {
282 41         72 my $length = length $self->[0];
283 41 100       125 my $pos1 = defined $_[1] ? $_[1] : int rand $length;
284 41 100       90 my $pos2 = defined $_[2] ? $_[2] : int rand $length;
285 41 100 66     510 my $len1 = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
286 41 100 100     429 my $len2 = !defined($_[4]) ? 1 : ($_[4] || int rand $length);
287              
288 41         76 my $new = $self->[0];
289 41 100       117 next if $pos1 == $pos2;
290 38 100       89 if ($pos1 > $pos2) { # ensure $pos1 comes first
291 14         30 ($pos1, $pos2) = ($pos2, $pos1);
292 14         31 ($len1, $len2) = ($len2, $len1);
293             }
294 38 100 100     196 if ( ($pos1 + $len1) > $pos2 # ensure no overlaps
      66        
295             or ($pos2 + $len2) > $length
296             or $pos1 >= $length ) {
297 27         90 next;
298             }
299 11         35 my $chunk1 = substr($new, $pos1, $len1, substr($new, $pos2, $len2,''));
300 11         22 substr($new,$pos2 -$len1 + $len2,0) = $chunk1;
301 11 50       28 next unless $self->valid_gene($new);
302 11         28 $self->[0]= $new;
303 11         25 my @chunk1 = splice(@{$self->[1]}, $pos1, $len1,
  11         53  
304 11         16 splice(@{$self->[1]}, $pos2, $len2) );
305 11         21 splice @{$self->[1]}, $pos2 + $len2 - $len1,0, @chunk1;
  11         37  
306 11         41 $rt++;
307             }
308 41         305 return $rt;
309             }
310              
311             ##
312             # takes a sequence, removes it, then inserts it at another position
313             # odd things might occur if posn to replace to lies within area taken from
314             # 0: number to perform
315             # 1: posn to get from (undef for rand)
316             # 2: posn to put (undef for rand)
317             # 3: length of sequence (undef for 1, 0 for rand)
318              
319             sub mutate_shuffle {
320 46     46 1 2157 my $self = shift;
321 46   50     115 my $num = +$_[0] || 1;
322 46         51 my $rt = 0;
323            
324 46         95 for (1..$num) {
325 46         68 my $length = length $self->[0];
326 46 100       321 my $pos1 = defined($_[1]) ? $_[1] : int rand $length;
327 46 100       95 my $pos2 = defined($_[2]) ? $_[2] : int rand $length;
328 46 100 100     304 my $len = !defined($_[3]) ? 1 : ($_[3] || int rand $length);
329              
330 46         73 my $new = $self->[0];
331 46 100 100     321 if ($pos1 +$len > $length # outside gene
      100        
      66        
332             or $pos2 >= $length # outside gene
333             or ($pos2 < ($pos1 + $len) and $pos2 > $pos1)) { # overlap
334 17         57 next;
335             }
336 29 100       56 if ($pos1 < $pos2) {
337 17         77 substr($new, $pos2-$len,0) = substr($new, $pos1, $len, '');
338             }
339             else {
340 12         30 substr($new, $pos2, 0) = substr($new, $pos1, $len, '');
341             }
342 29 50       153 next unless $self->valid_gene($new);
343 29         64 $self->[0] = $new;
344 29 100       52 if ($pos1 < $pos2) {
345 17         32 splice (@{$self->[1]}, $pos2-$len, 0,
  17         49  
346 17         21 splice(@{$self->[1]}, $pos1, $len) );
347             }
348             else {
349 12         26 splice(@{$self->[1]}, $pos2, 0,
  12         36  
350 12         12 splice(@{$self->[1]}, $pos1, $len) );
351             }
352 29         96 $rt++;
353             }
354 46         266 return $rt;
355             }
356              
357             # These are intended to be overriden, simple versions are
358             # provided for the sake of testing.
359              
360             # Generates things to make up genes
361             # can be called with a token type to produce, or with none.
362             # if called with a token type, it will also be passed the original
363             # token as the second argument.
364             # should return a two element list of the token type followed by the token itself.
365              
366             sub generate_token {
367 0     0 1 0 my $self = shift;
368 0         0 my $token_type = $_[0];
369 0         0 my $letter = ('a'..'z')[rand 25];
370 0 0       0 unless ($token_type) {
371 0         0 return ($letter) x2;
372             }
373 0         0 return ($token_type) x2;
374             }
375              
376             # takes sting of token types to be checked for validity.
377             # If a mutation affects only one place, then the position of the
378             # mutation can be passed as a second argument.
379 242     242 1 1107 sub valid_gene {1}
380              
381             ## You might also want to have methods like the following,
382             # they will not be called by the 'sequence' methods.
383              
384             # Default constructor
385             sub new {
386 0     0 1 0 my $gene = ['',[]];
387 0   0     0 return bless $gene, ref $_[0] || $_[0];
388             }
389              
390             # remember that clone method may require deep copying depending on
391             # your specific needs
392              
393             sub clone {
394 312     312 1 21715 my $self = shift;
395 312         1870 my $new = [$self->[0]];
396 312         422 $new->[1] = [@{$self->[1]}];
  312         1801  
397 312         2265 return bless $new, ref $self;
398             }
399              
400             # You need some way to use the gene you've made and mutated, but
401             # this will let you have a look, if it starts being odd.
402              
403             sub render_gene {
404 0     0 1   my $self = shift;
405 0           my $return = "$self\n";
406 0           $return .= $self->[0] . "\n";
407 0           $return .= (join ',', @{$self->[1]}). "\n";
  0            
408 0           return $return;
409             }
410              
411             # used for testing
412              
413             sub _test_dump {
414 0     0     my $self = shift;
415 0           my @rt = ($self->[0], join('',@{$self->[1]}));
  0            
416 0           return @rt;
417             }
418             1;
419              
420             __END__;