File Coverage

blib/lib/Bio/GeneDesign/Codons.pm

Criterion	Covered	Total	%
statement	212	324	65.4
branch	39	74	52.7
condition	11	17	64.7
subroutine	20	26	76.9
pod			n/a
total	282	441	63.9

line	stmt	bran	cond	sub	time	code
1						#
2						# GeneDesign module for codon analysis and manipulation
3						#
4
5						=head1 NAME
6
7						GeneDesign::Codons
8
9						=head1 VERSION
10
11						Version 5.52
12
13						=head1 DESCRIPTION
14
15						GeneDesign functions for codon analysis and manipulation
16
17						=head1 AUTHOR
18
19						Sarah Richardson <SMRichardson@lbl.gov>.
20
21						=cut
22
23						package Bio::GeneDesign::Codons;
24						require Exporter;
25
26	11			11	65	use Bio::GeneDesign::Basic qw(:GD);
	11				19
	11				2452
27	11			11	11555	use Math::Combinatorics qw(combine);
	11				147664
	11				1005
28	11			11	128	use List::Util qw(max first);
	11				29
	11				1139
29	11			11	76	use File::Basename;
	11				25
	11				766
30	11			11	69	use Carp;
	11				26
	11				701
31
32	11			11	70	use strict;
	11				21
	11				376
33	11			11	61	use warnings;
	11				22
	11				576
34
35						our $VERSION = 5.52;
36
37	11			11	71	use base qw(Exporter);
	11				25
	11				64973
38						our @EXPORT_OK = qw(
39						_translate
40						_reverse_codon_table
41						_parse_organisms
42						_parse_codon_file
43						_subtract
44						_codon_count
45						_generate_RSCU_table
46						_generate_codon_report
47						_generate_codon_file
48						_amb_translation
49						_degcodon_to_aas
50						_find_in_frame
51						_minimize_local_alignment_dp
52						_define_codons
53						_pattern_aligner
54						_pattern_adder
55						_codon_change_type
56						$VERSION
57						);
58						our %EXPORT_TAGS = ( GD => \@EXPORT_OK );
59
60						my $CODLINE = qr/^ \{ ([ATCG]{3}) \} \s* = \s* (.+) $/x;
61						my %ambtransswits = map {$_ => 1} qw(1 2 3 -1 -2 -3 s t);
62
63						=head2 parse_organisms
64
65						=cut
66
67						sub _parse_organisms
68						{
69	0			0	0	my ($path) = @_;
70	0				0	my ($orgs, $cods) = ({}, {});
71	0	0			0	opendir (my $CODDIR, $path) \|\| croak "can't opendir $path";
72	0				0	foreach my $table (readdir($CODDIR))
73						{
74	0				0	my $name = basename($table);
75	0				0	$name =~ s{\.[^.]+$}{}x;
76	0	0			0	if ($table =~ /\.rscu\Z/x)
		0
77						{
78	0				0	$orgs->{$name} = $path . $table;
79						}
80						elsif ($table =~ /\.ct\Z/x)
81						{
82	0				0	$cods->{$name} = $path . $table;
83						}
84						}
85	0				0	closedir($CODDIR);
86	0				0	return ($orgs, $cods);
87						}
88
89						=head2 parse_codon_file
90
91						=cut
92
93						sub _parse_codon_file
94						{
95	14			14	31	my ($path) = @_;
96	14	50			514	open (my $CFILE, "<", $path) \|\| croak "Can't open $path : $!";
97	14				26	my $ref = do { local $/ = <$CFILE> };
	14				377
98	14				216	close $CFILE;
99	14				660	my @lines = split(/\n/x, $ref);
100	14				128	my $cods = {};
101	14				35	foreach my $line (grep {$_ !~ /^\#/x} @lines)
	959				1642
102						{
103	896	50			4255	if ($line =~ $CODLINE)
104						{
105	896				2878	$cods->{$1} = $2;
106						}
107						else
108						{
109	0				0	croak "Badly formatted definition in codon file $path: $line";
110						}
111						}
112	14				62	my @codons = _define_codons();
113	14				63	foreach my $codon (@codons)
114						{
115	896	50			1793	croak "$path table is missing definition for codon $codon"
116						unless (exists $cods->{$codon});
117						}
118	14				287	return $cods;
119						}
120
121						=head2 reverse_codon_table()
122
123						Takes a codon table hashref and reverses it such that each key is a one letter
124						amino acid residue and each value is an array reference containing all of the
125						codons that can code for that residue.
126
127						=cut
128
129						sub _reverse_codon_table
130						{
131	7			7	21	my ($codontable) = @_;
132	7				24	my $revcodon_t = {};
133	7				111	foreach my $codon (keys %$codontable)
134						{
135	448				600	my $aa = $codontable->{$codon};
136	448	100			1182	$revcodon_t->{$aa} = [] if ( ! exists $revcodon_t->{$aa} );
137	448				458	push @{$revcodon_t->{$aa}}, $codon;
	448				1259
138						}
139	7				60	return $revcodon_t;
140						}
141
142						=head2 _translate()
143
144						takes a nucleotide sequence, a frame, and a codon table and returns that frame
145						translated into amino acids.
146
147						=cut
148
149						sub _translate
150						{
151	15			15	34	my ($nucseq, $frame, $codon_t) = @_;
152	15	50			101	croak ("translate requires an unambiguous nucleotide sequence\n")
153						if ($nucseq =~ $ambnt);
154	15	100			41	$nucseq = _complement($nucseq, 1) if ($frame < 0);
155	15				24	my $peptide = q{};
156	15				53	for (my $offset = abs($frame)-1; $offset < length($nucseq); $offset += 3)
157						{
158	1227				1530	my $codon = substr($nucseq, $offset, 3);
159	1227	100			4361	$peptide .= $codon_t->{$codon} if (exists $codon_t->{$codon});
160						}
161	15				53	return $peptide;
162						}
163
164						=head2 _subtract
165
166						=cut
167
168						sub _subtract
169						{
170	3			3	10	my ($oldseq, $pattern, $regarr, $codon_t, $rscu_t, $revcodon_t) = @_;
171	3				6	my $seq = $oldseq;
172	3				17	my $temphash = _positions($seq, $regarr);
173	3				14	foreach my $gpos (keys %$temphash)
174						{
175	3				8	my $framestart = ($gpos) % 3;
176	3				15	my $startpos = int( length($temphash->{$gpos}) / 3 + 2 ) * 3;
177	3				12	my $string = substr($seq, $gpos - $framestart, $startpos);
178	3				6	my $newrepseg = $string;
179
180	3				5	my %newseqs;
181	3				7	my $len = length($string)/3;
182	3				13	my $curval = _rscu_sum($rscu_t, $string);
183						#compute the changes possible
184	3				10	for my $it (1..$len)
185						{
186	7				43	my @map = map { join q{}, sort @$_ } combine($it,(1..$len));
	15				2047
187	7				24	foreach my $guide (@map)
188						{
189	15				28	my $rscuers = {};
190	15				57	my @coords = sort split(q{}, $guide);
191	15				51	$rscuers->{0} = [$string];
192	15				26	my $passthrough = 0;
193	15				28	foreach my $coord (@coords)
194						{
195	25				47	my $srcarr = $rscuers->{$passthrough};
196	25				46	foreach my $str (@$srcarr)
197						{
198	37				80	my $coda = substr($str, $coord*3-3, 3);
199	37				48	foreach my $codb (grep {$coda ne $_}
	177				366
	37				106
200						@{$revcodon_t->{$codon_t->{$coda}}})
201						{
202	140				198	my $newstr = $str;
203						#substr($newstr, $coord*3-3, 3) = $codb;
204	140				209	substr($newstr, $coord*3-3, 3, $codb);
205	140				159	push @{$rscuers->{$passthrough+1}}, $newstr;
	140				484
206						}
207						}
208	25				54	$passthrough++;
209						}
210	15				22	foreach my $newseq (@{$rscuers->{$passthrough}})
	15				102
211						{
212	118				222	my $a = _rscu_sum($rscu_t, $newseq);
213	118				343	my $b = _compare_sequences($string, $newseq);
214	118				1024	$newseqs{$newseq} = [sprintf("%.2f",abs($a - $curval)), $b->{D}];
215						}
216						}
217						}
218						#try all the changes
219	3	50			35	foreach my $newseq (sort {$newseqs{$a}->[0] <=> $newseqs{$b}->[0]
	559				1248
220						\|\| $newseqs{$a}->[1] <=> $newseqs{$b}->[1]}
221						keys %newseqs)
222						{
223	2	50	33		53	if ($newseq !~ $pattern && _complement($newseq, 1) !~ $pattern)
224						{
225	2				5	$newrepseg = $newseq;
226	2				6	last;
227						}
228						}
229
230						#substr($seq, $gpos - $framestart, length($newrepseg)) = $newrepseg;
231	3				64	substr($seq, $gpos - $framestart, length($newrepseg), $newrepseg);
232						}
233	3				21	return $seq;
234						}
235
236						=head2 _rscu_sum()
237
238						=cut
239
240						sub _rscu_sum
241						{
242	121			121	202	my ($rscu_t, $ntseq) = @_;
243	121				147	my $offset = 0;
244	121				148	my $rscusum = 0;
245	121				161	my $length = length($ntseq);
246	121				265	while ($offset < $length)
247						{
248	361				598	my $cod = substr($ntseq, $offset, 3);
249	361				577	$rscusum += $rscu_t->{$cod};
250	361				877	$offset += 3;
251						}
252	121				267	return $rscusum;
253						}
254
255						=head2 _codon_count()
256
257						takes a reference to an array of sequences and returns a hash with codons as
258						keys and the number of times the codon occurs as a value.
259
260						=cut
261
262						sub _codon_count
263						{
264	3			3	7	my ($seqs, $codon_t, $hashref) = @_;
265	3				37	my %blank = map {$_ => 0} keys %$codon_t;
	192				326
266	3		100		38	my $codoncount = $hashref \|\| \%blank;
267	3				8	foreach my $seq (@$seqs)
268						{
269	3				387	my @arr = ($seq =~ m/ [ATCG]{3} /xg);
270	3				23	foreach my $codon (@arr)
271						{
272	600				821	$codoncount->{$codon}++;
273						}
274						}
275	3				17	return $codoncount;
276						}
277
278						=head2 generate_RSCU_table()
279
280						takes a hash reference with keys as codons and values as number of times
281						those codons occur (it helps to use codon_count) and returns a hash with each
282						codon and its RSCU value
283
284						=cut
285
286						sub _generate_RSCU_table
287						{
288	1			1	3	my ($codon_count, $codon_t, $revcodon_t) = @_;
289	1				3	my $RSCU_hash = {};
290	1				14	foreach my $codon (sort grep {$_ ne "XXX"} keys %$codon_count)
	64				99
291						{
292	64				80	my $x_j = 0;
293	64				88	my $x = $codon_count->{$codon};
294	64				104	my $family = $revcodon_t->{$codon_t->{$codon}};
295	64				79	my $family_size = scalar(@$family);
296	64				84	foreach (grep {exists $codon_count->{$_}} @$family)
	244				436
297						{
298	244				380	$x_j += $codon_count->{$_};
299						}
300	64	50			145	my $rscu = $x_j > 0 ? $x / ($x_j / $family_size) : 0.00;
301	64				290	$RSCU_hash->{$codon} = sprintf("%.2f", $rscu ) ;
302						}
303	1				11	return $RSCU_hash;
304						}
305
306						=head2 _generate_codon_report
307
308						=cut
309
310						sub _generate_codon_report
311						{
312	0			0	0	my ($codon_count, $codon_t, $rscu_t) = @_;
313	0				0	my $string = "Codon counts and RSCU values:\n";
314	0				0	my @codvalsort = sort {$b <=> $a} values %{$codon_count};
	0				0
	0				0
315	0				0	my $maxcodnum = $codvalsort[0];
316	0				0	foreach my $a ( qw(T C A G))
317						{
318	0				0	$string .= "\n";
319	0				0	foreach my $c ( qw(T C A G) )
320						{
321	0				0	foreach my $b ( qw(T C A G) )
322						{
323	0				0	my $codon = $a . $b . $c;
324	0				0	my $count = $codon_count->{$codon};
325	0				0	my $spacer = q{ } x (length($maxcodnum) - length($count));
326	0				0	$string .= "$codon (" . $codon_t->{$codon} . ") $spacer$count ";
327	0				0	$string .= $rscu_t->{$codon} . q{ } x 5;
328						}
329	0				0	$string .= "\n";
330						}
331	0				0	$string .= "\n";
332						}
333	0				0	return $string;
334						}
335
336						=head2 _generate_codon_file
337
338						=cut
339
340						sub _generate_codon_file
341						{
342	0			0	0	my ($table, $rev_cod_t, $comments) = @_;
343	0				0	my $string = q{};
344	0				0	my @cs = @$comments;
345	0				0	$string .= "# " . $_ . "\n" foreach (@cs);
346	0				0	foreach my $aa (sort keys %$rev_cod_t)
347						{
348	0				0	my @codons = @{$rev_cod_t->{$aa}};
	0				0
349	0				0	$string .= "#$aa\n";
350	0				0	foreach my $codon (sort @codons)
351						{
352	0				0	$string .= "{$codon} = $table->{$codon}\n";
353						}
354						}
355	0				0	return $string;
356						}
357
358						=head2 _define_codons
359
360						Generates an array reference that contains every possible nucleotide codon
361
362						=cut
363
364						sub _define_codons
365						{
366	14			14	24	my @codons;
367	14				36	foreach my $a (qw(A T C G))
368						{
369	56				84	foreach my $b (qw(A T C G))
370						{
371	224				284	foreach my $c (qw(A T C G))
372						{
373	896				1710	push @codons, $a . $b . $c;
374						}
375						}
376						}
377	14				331	return @codons;
378						}
379
380						=head2 _amb_translation()
381
382						takes a nucleotide that may be degenerate and a codon table and returns a list
383						of all amino acid sequences that nucleotide sequence could be translated into.
384
385						in: nucleotide sequence (string),
386						codon table (hash reference),
387						optional switch to force only a single frame of translation
388						optional hashref of previous answers to speed processing
389						out: amino acid sequence list (vector)
390
391						=cut
392
393						sub _amb_translation
394						{
395	37			37	96	my ($seq, $codon_t, $swit, $memo) = @_;
396	37	50			173	croak ("Bad frame argument\n") unless exists $ambtransswits{$swit};
397	37	100	100		192	if ($swit eq "s" \|\| $swit eq "t")
398						{
399	8				32	my @frames = qw(1 2 3);
400	8	100			30	push @frames, qw(-1 -2 -3) if ($swit eq "s");
401	8				13	my (%RES);
402	8				19	foreach my $s (@frames)
403						{
404	27				76	my @pep_set = _amb_translation($seq, $codon_t, $s, $memo);
405	27				574	$RES{$_}++ foreach (@pep_set);
406						}
407	8				263	return keys %RES;
408						}
409						else
410						{
411	29				32	my (%RES, @SEED, @NEW);
412	29	100			112	$seq = _complement($seq, 1) if ($swit < 0);
413	29	50			144	$seq = 'N' x (abs($swit) - 1) . $seq if (abs($swit) < 4);
414	29				69	my $seqlen = length($seq);
415	29				40	my $gothrough = 0;
416	29				86	for (my $offset = 0; $offset < $seqlen; $offset += 3)
417						{
418	67				169	my $tempcodon = substr($seq, $offset, 3);
419	67				265	$tempcodon .= "N" while (length($tempcodon) % 3);
420	67	50			136	if (!$swit)
421						{
422	0				0	$tempcodon .= 'N' while (length($tempcodon) < 3);
423						}
424	67	100			173	if ($gothrough == 0)
425						{
426	29				74	@SEED = _degcodon_to_aas($tempcodon, $codon_t, $memo) ;
427						}
428						else
429						{
430	38				79	@NEW = _degcodon_to_aas($tempcodon, $codon_t, $memo);
431	38				146	@SEED = _add_arr(\@SEED, \@NEW);
432						}
433	67				401	$gothrough++;
434						}
435	29				981	$RES{$_}++ foreach(@SEED);
436	29				495	return keys %RES;
437						}
438						}
439
440						=head2 _degcodon_to_aas()
441
442						takes a codon that may be degenerate and a codon table and returns a list of
443						all amino acids that codon could represent. If a hashref is provided with
444						previous answers, it will run MUCH faster (memoization).
445
446						in: codon (string),
447						codon table (hash reference)
448						out: amino acid list (vector)
449
450						=cut
451
452						sub _degcodon_to_aas
453						{
454	88			88	190	my ($codon, $codon_t, $memo) = @_;
455	88	50	33		469	return if ( ! $codon \|\| length($codon) != 3);
456	88				176	my (@answer, %temphash) = ((), ());
457	88	100			265	if (exists $memo->{$codon})
		50
458						{
459	25				44	return @{$memo->{$codon}};
	25				135
460						}
461						elsif ($codon eq "NNN")
462						{
463	0				0	%temphash = map { $_ => 1} values %$codon_t;
	0				0
464	0				0	@answer = keys %temphash;
465						}
466						else
467						{
468	63				196	my $reg = _regres($codon, 1);
469	63				632	%temphash = map {$codon_t->{$_} => 1} grep { $_ =~ $reg } keys %$codon_t;
	360				1195
	4032				10354
470	63				500	@answer = keys %temphash;
471						}
472	63				193	$memo->{$codon} = \@answer;
473	63				290	return @answer;
474						}
475
476						=head2 _find_in_frame
477
478						=cut
479
480						sub _find_in_frame
481						{
482	0			0	0	my ($ntseq, $pattern, $codon_t) = @_;
483	0				0	my $regex = _regres($pattern, 2);
484	0				0	my $aaseq = _translate($ntseq, 1, $codon_t);
485	0				0	my $hshref = _positions($aaseq, [$regex]);
486	0				0	my $pattntlen = length($pattern) * 3;
487	0				0	my $answer = {};
488	0				0	foreach my $ao (keys %$hshref)
489						{
490	0				0	my $nuco = 3 * $ao;
491	0				0	$answer->{$nuco} = substr($ntseq, $nuco, $pattntlen);
492						}
493	0				0	return $answer;
494						}
495
496						=head2 _minimize_local_alignment_dp()
497
498						Repeatsmasher, by Dongwon Lee. A function that minimizes local alignment
499						scores.
500
501						in: gene sequence (string)
502						codon table (hashref)
503						RSCU table (hashref)
504						out: new gene sequence (string)
505						#NO UNIT TEST
506
507						=cut
508
509						sub _minimize_local_alignment_dp
510						{
511	0			0	0	my ($oldseq, $codon_t, $rev_codon_t, $rscu_t) = @_;
512	0				0	my $match = 5;
513	0				0	my $transi = -3;
514	0				0	my $transv = -4;
515	0				0	my $score_threshold = $match*6; #count the scores only consecutive 6 nts
516	0				0	my @s = ( [$match, $transv, $transi, $transv],
517						[$transv, $match, $transv, $transi],
518						[$transi, $transv, $match, $transv],
519						[$transv, $transi, $transv, $match] );
520	0				0	my %nt2idx = ("A"=>0, "C"=>1, "G"=>2, "T"=>3);
521
522						#initial values
523	0				0	my @optM = (0);
524	0				0	my $optseq = q{};
525
526						#assumming that the sequence is in frame
527	0				0	my ($offset, $cod, $aa) = (0, q{}, q{});
528	0				0	my $oldlen = length($oldseq) - 3;
529	0				0	while ( $offset <= $oldlen )
530						{
531	0				0	$cod = substr($oldseq, $offset, 3);
532	0				0	$aa = _translate($cod, 1, $codon_t);
533	0				0	my @posarr = sort { $rscu_t->{$b} <=> $rscu_t->{$a} }
	0				0
534	0				0	@{$rev_codon_t->{$aa}};
535
536	0				0	my @minM = ();
537	0				0	my $min_seq = q{};
538						#assign an impossible large score
539	0				0	my $min_score = $match(length($oldseq)*2);
540
541	0	0			0	if ($aa ne '*')
542						{
543	0				0	foreach my $newcod (@posarr)
544						{
545	0				0	my @prevM = @optM;
546	0				0	my $prevseq = $optseq;
547
548	0				0	foreach my $nt (split(//, $newcod))
549						{
550	0				0	my $currseq = $prevseq . $nt;
551	0				0	my $currlen = length($currseq);
552	0				0	my @currM = ();
553	0				0	my $pos = 0;
554	0				0	push @currM, 0;
555	0				0	while($pos < $currlen)
556						{
557	0				0	my $nt2 = substr($currseq, $pos, 1);
558	0				0	my $nidx1 = $nt2idx{$nt};
559	0				0	my $nidx2 = $nt2idx{$nt2};
560	0				0	push @currM, max(0, $prevM[$pos]+$s[$nidx1][$nidx2]);
561	0				0	$pos++;
562						}
563	0				0	@prevM = @currM;
564	0				0	$prevseq = $currseq;
565						}
566	0				0	my $scoresum = 0;
567	0				0	foreach my $i (@prevM)
568						{
569	0	0			0	if ($i >= $score_threshold)
570						{
571	0				0	$scoresum += $i;
572						}
573						}
574	0	0			0	if ($min_score > $scoresum)
575						{
576	0				0	$min_score = $scoresum;
577	0				0	$min_seq = $prevseq;
578	0				0	@minM = @prevM;
579						}
580						}
581						}
582						else
583						{
584	0				0	$optseq = $optseq . $cod;
585	0				0	last;
586						}
587	0				0	@optM = @minM;
588	0				0	$optseq = $min_seq;
589	0				0	$offset+=3;
590						}
591	0				0	return $optseq;
592						}
593
594						=head2 _pattern_aligner
595
596						takes a nucleotide sequence, a pattern, a peptide sequence, and a codon table
597						and inserts Ns before the pattern until they align properly. This is so a
598						pattern can be inserted out of frame.
599
600						in: nucleotide sequence (string),
601						nucleotide pattern (string),
602						amino acid sequence (string),
603						codon table (hash reference)
604						out: nucleotide pattern (string)
605
606						=cut
607
608						sub _pattern_aligner
609						{
610	3			3	7	my ($critseg, $pattern, $peptide, $codon_t, $memo) = @_;
611	3				8	my $diff = length($critseg) - length($pattern);
612	3				9	my ($newpatt, $nstring, $rounds, $offset, $check, $pcheck) = (q{}, "N" x $diff, 0, 0, q{}, q{});
613						# print "seeking $pattern for $peptide from $critseg...\n";
614	3		66		23	while ($check ne $peptide && $rounds <= $diff*2 + 1)
615						{
616	6	50			23	$newpatt = $rounds <= $diff
617						? substr($nstring, 0, $rounds) . $pattern
618						: substr($nstring, 0, ($rounds-3)) . _complement($pattern, 1);
619	6				37	$newpatt .= "N" while (length($newpatt) != length($critseg));
620						# print "\t$newpatt\n";
621	6				12	my ($noff, $poff) = (0, 0);
622	6				8	$check = q{};
623	6				19	while ($poff < length($peptide))
624						{
625	18				55	my @possibles = _degcodon_to_aas( substr($newpatt, $noff, 3), $codon_t, $memo );
626						# print "\t\t@possibles\n";
627	18				44	$check .= $_ foreach( grep { substr($peptide, $poff, 1) eq $_ } @possibles);
	52				133
628	18				27	$noff += 3;
629	18				60	$poff ++;
630						}
631	6				23	$pcheck = _translate(substr($critseg, $offset, length($peptide) * 3), 1, $codon_t);
632						# print "\t\t$check, $pcheck, $offset\n";
633	6				14	$rounds++;
634	6	100			37	$offset += 3 if ($rounds % 3 == 0);
635						# $check = q{} if ( $pcheck !~ $check);
636						}
637	3	50			9	$newpatt = "0" if ($check ne $peptide);
638						# print "\t\tpataln $check, $pcheck, $rounds, $newpatt\n" if ($check ne $peptide);
639	3				13	return ($newpatt, $rounds - 1);
640						}
641
642						=head2 _pattern_adder()
643
644						takes a nucleotide sequence, a nucleotide "pattern" to be interpolated, and
645						the codon table, and returns an edited nucleotide sequence that contains the
646						pattern (if possible).
647
648						in: nucleotide sequence (string),
649						nucleotide pattern (string),
650						codon table (hash reference)
651						out: nucleotide sequence (string) OR null
652
653						=cut
654
655						sub _pattern_adder
656						{
657	1			1	4	my ($oldpatt, $newpatt, $codon_t, $revcodon_t, $memo) = @_;
658						#assume that critseg and pattern come in as complete codons
659						# (i.e., have been run through pattern_aligner)
660	1				3	my $copy = q{};
661	1				6	for (my $offset = 0; $offset < length($oldpatt); $offset += 3)
662						{
663	3				6	my $curcod = substr($oldpatt, $offset, 3);
664	3				6	my $curtar = substr($newpatt, $offset, 3);
665	3				12	my $ctregx = _regres($curtar);
666	3				12	foreach my $g (_degcodon_to_aas($curcod, $codon_t, $memo))
667						{
668	3	100			22	if ($curcod =~ $ctregx)
669						{
670	1				8	$copy .= $curcod;
671						}
672						else
673						{
674	2				5	my @arr = @{$revcodon_t->{$g}};
	2				10
675	2				5	foreach my $potcod (@arr)
676						{
677	8				12	my $flag = 0;
678	8	100	66		51	$flag++ if ($potcod =~ $ctregx \|\| $curtar =~ _regres($potcod));
679	8	100			30	if ($flag != 0)
680						{
681	2				5	$copy .= $potcod;
682	2				17	last;
683						}
684						}
685						}
686						# print "\t\tpatadd\t($curcod, $curtar)\t$copy<br>\n";
687						}
688						}
689						# print "\t\tpatadd $copy from $oldpatt\n";
690	1	50			7	return length($copy) == length($oldpatt) ? $copy : 0;
691						}
692
693						=head2 _codon_change_type
694
695						=cut
696
697						sub _codon_change_type
698						{
699	0			0		my ($oldcod, $newcod, $codon_t) = @_;
700	0					my $oldaa = $codon_t->{$oldcod};
701	0					my $newaa = $codon_t->{$newcod};
702	0	0				my $type = $oldaa eq $newaa
		0
		0
		0
703						? $oldaa eq q{*}
704						? 'stop_retained_variant'
705						: 'synonymous_codon'
706						: $oldaa eq q{*}
707						? 'stop_lost'
708						: $newaa eq q{*}
709						? 'stop_gained'
710						: 'non_synonymous_codon';
711	0					return $type;
712						}
713
714						1;
715
716						__END__
717
718						=head1 COPYRIGHT AND LICENSE
719
720						Copyright (c) 2013, GeneDesign developers
721						All rights reserved.
722
723						Redistribution and use in source and binary forms, with or without modification,
724						are permitted provided that the following conditions are met:
725
726						* Redistributions of source code must retain the above copyright notice, this
727						list of conditions and the following disclaimer.
728
729						* Redistributions in binary form must reproduce the above copyright notice, this
730						list of conditions and the following disclaimer in the documentation and/or
731						other materials provided with the distribution.
732
733						* The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
734						National Laboratory, the Department of Energy, and the GeneDesign developers may
735						not be used to endorse or promote products derived from this software without
736						specific prior written permission.
737
738						THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
739						ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
740						WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
741						DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
742						INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
743						LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
744						PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
745						LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
746						OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
747						ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
748
749						=cut