File Coverage

blib/lib/Bio/GeneDesign/Codons.pm

Criterion	Covered	Total	%
statement	230	343	67.0
branch	38	74	51.3
condition	10	14	71.4
subroutine	21	27	77.7
pod			n/a
total	299	458	65.2

line	stmt	bran	cond	sub	time	code
1						#
2						# GeneDesign module for codon analysis and manipulation
3						#
4
5						=head1 NAME
6
7						GeneDesign::Codons
8
9						=head1 VERSION
10
11						Version 5.56
12
13						=head1 DESCRIPTION
14
15						GeneDesign functions for codon analysis and manipulation
16
17						=head1 AUTHOR
18
19						Sarah Richardson .
20
21						=cut
22
23						package Bio::GeneDesign::Codons;
24						require Exporter;
25
26	11			11	71	use Bio::GeneDesign::Basic qw(:GD);
	11				17
	11				1955
27	11			11	5418	use Math::Combinatorics qw(combine);
	11				99446
	11				736
28	11			11	86	use List::Util qw(max first);
	11				25
	11				622
29	11			11	66	use File::Basename;
	11				19
	11				563
30	11			11	5039	use autodie qw(open close);
	11				129947
	11				55
31	11			11	4442	use Carp;
	11				24
	11				580
32
33	11			11	60	use strict;
	11				20
	11				173
34	11			11	48	use warnings;
	11				17
	11				406
35
36						our $VERSION = 5.56;
37
38	11			11	58	use base qw(Exporter);
	11				21
	11				43351
39						our @EXPORT_OK = qw(
40						_translate
41						_reverse_codon_table
42						_parse_organisms
43						_parse_codon_file
44						_subtract
45						_codon_count
46						_generate_RSCU_table
47						_generate_codon_report
48						_generate_codon_file
49						_amb_translation
50						_degcodon_to_aas
51						_find_in_frame
52						_minimize_local_alignment_dp
53						_define_codons
54						_pattern_aligner
55						_pattern_adder
56						_codon_change_type
57						$VERSION
58						);
59						our %EXPORT_TAGS = ( GD => \@EXPORT_OK );
60
61						my $CODLINE = qr/^ \{ ([ATCG]{3}) \} \s* = \s* (.+) $/x;
62						my %ambtransswits = map {$_ => 1} qw(1 2 3 -1 -2 -3 s t);
63
64						=head2 parse_organisms
65
66						=cut
67
68						sub _parse_organisms
69						{
70	0			0	0	my ($path) = @_;
71	0				0	my ($orgs, $cods) = ({}, {});
72	0	0			0	opendir (my $CODDIR, $path) \|\| croak "can't opendir $path";
73	0				0	foreach my $table (readdir($CODDIR))
74						{
75	0				0	my $name = basename($table);
76	0				0	$name =~ s{\.[^.]+$}{}x;
77	0	0			0	if ($table =~ m{\.rscu\Z}x)
		0
78						{
79	0				0	$orgs->{$name} = $path . $table;
80						}
81						elsif ($table =~ /\.ct\Z/x)
82						{
83	0				0	$cods->{$name} = $path . $table;
84						}
85						}
86	0				0	closedir($CODDIR);
87	0				0	return ($orgs, $cods);
88						}
89
90						=head2 parse_codon_file
91
92						=cut
93
94						sub _parse_codon_file
95						{
96	14			14	37	my ($path) = @_;
97	14				65	open (my $CFILE, '<', $path);
98	14				16831	my $ref = do { local $/ = <$CFILE> };
	14				481
99	14				86	close $CFILE;
100	14				6333	my @lines = split(/\n/x, $ref);
101	14				40	my $cods = {};
102	14				40	foreach my $line (grep {$_ !~ /^\#/x} @lines)
	938				1379
103						{
104	896	50			3061	if ($line =~ $CODLINE)
105						{
106	896				2148	$cods->{$1} = $2;
107						}
108						else
109						{
110	0				0	croak "Badly formatted definition in codon file $path: $line";
111						}
112						}
113	14				51	my @codons = _define_codons();
114	14				51	foreach my $codon (@codons)
115						{
116						croak "$path table is missing definition for codon $codon"
117	896	50			1293	unless (exists $cods->{$codon});
118						}
119	14				478	return $cods;
120						}
121
122						=head2 reverse_codon_table()
123
124						Takes a codon table hashref and reverses it such that each key is a one letter
125						amino acid residue and each value is an array reference containing all of the
126						codons that can code for that residue.
127
128						=cut
129
130						sub _reverse_codon_table
131						{
132	7			7	19	my ($codontable) = @_;
133	7				18	my $revcodon_t = {};
134	7				133	foreach my $codon (sort {$a cmp $b} keys %$codontable)
	2149				2230
135						{
136	448				532	my $aa = $codontable->{$codon};
137	448	100			749	$revcodon_t->{$aa} = [] if ( ! exists $revcodon_t->{$aa} );
138	448				473	push @{$revcodon_t->{$aa}}, $codon;
	448				689
139						}
140	7				38	return $revcodon_t;
141						}
142
143						=head2 _translate()
144
145						takes a nucleotide sequence, a frame, and a codon table and returns that frame
146						translated into amino acids.
147
148						=cut
149
150						sub _translate
151						{
152	15			15	34	my ($nucseq, $frame, $codon_t) = @_;
153	15	100			40	$nucseq = _complement($nucseq, 1) if ($frame < 0);
154	15				26	my $peptide = q{};
155	15				63	my $limit = length $nucseq;
156	15				25	my $offset = abs($frame) - 1;
157	15				64	$limit-- while (($limit - $offset) % 3 != 0);
158	15				32	while ($offset < $limit)
159						{
160	1223				1394	my $codon = substr $nucseq, $offset, 3;
161	1223	50			1479	if (exists $codon_t->{$codon})
162						{
163	1223				1416	$peptide .= $codon_t->{$codon};
164						}
165						else
166						{
167	0				0	carp("GDWarning: $codon is untranslatable\n");
168						}
169	1223				1629	$offset += 3;
170						}
171	15				36	return $peptide;
172						}
173
174						=head2 _subtract
175
176						=cut
177
178						sub _subtract
179						{
180	4			4	12	my ($oldseq, $pattern, $regarr, $codon_t, $rscu_t, $revcodon_t) = @_;
181	4				7	my $seq = $oldseq;
182	4				11	my $temphash = _positions($seq, $regarr);
183	4				8	foreach my $gpos (sort keys %{$temphash})
	4				16
184						{
185	4				9	my $framestart = $gpos % 3;
186	4				16	my $startpos = int ( (length $temphash->{$gpos}) / 3 + 2 ) * 3;
187	4				19	my $string = substr $seq, $gpos - $framestart, $startpos;
188	4				7	my $newrepseg = $string;
189
190	4				4	my %newseqs;
191	4				8	my $len = (length $string) / 3;
192	4				10	my $curval = _rscu_sum($rscu_t, $string);
193						#compute the changes possible
194	4				10	for my $it (1..$len)
195						{
196	10				40	my @map = map { join q{}, sort @{$_} } combine($it, (1..$len));
	22				2868
	22				69
197	10				27	foreach my $guide (@map)
198						{
199	22				32	my $rscuers = {};
200	22				64	my @coords = sort split(q{}, $guide);
201	22				53	$rscuers->{0} = [$string];
202	22				28	my $passthrough = 0;
203	22				30	foreach my $coord (@coords)
204						{
205	37				53	my $srcarr = $rscuers->{$passthrough};
206	37				40	foreach my $str (@{$srcarr})
	37				47
207						{
208	57				97	my $coda = substr $str, ($coord * 3) - 3, 3;
209	57				62	foreach my $codb (grep {$coda ne $_}
	281				394
210	57				104	@{$revcodon_t->{$codon_t->{$coda}}})
211						{
212	224				274	my $newstr = $str;
213	224				279	substr $newstr, ($coord * 3) - 3, 3, $codb;
214	224				237	push @{$rscuers->{$passthrough+1}}, $newstr;
	224				412
215						}
216						}
217	37				53	$passthrough++;
218						}
219	22				25	foreach my $newseq (@{$rscuers->{$passthrough}})
	22				31
220						{
221	189				298	my $a = _rscu_sum($rscu_t, $newseq);
222	189				322	my $b = _compare_sequences($string, $newseq);
223	189				1317	$newseqs{$newseq} = [sprintf("%.2f",abs($a - $curval)), $b->{D}];
224						}
225						}
226						}
227						#try all the changes
228	4				37	foreach my $newseq (sort {$newseqs{$a}->[0] <=> $newseqs{$b}->[0]
229	911	50			1343	\|\| $newseqs{$a}->[1] <=> $newseqs{$b}->[1]}
230						keys %newseqs)
231						{
232	3				11	my $qeswen = _complement($newseq, 1);
233	3				5	my $matchflag = 0;
234	3				5	foreach my $regex (@{$regarr})
	3				7
235						{
236	6	50			25	$matchflag++ if ($newseq =~ $regex);
237	6	50			22	$matchflag++ if ($qeswen =~ $regex);
238						}
239	3	50			7	if ($matchflag == 0)
240						{
241	3				7	$newrepseg = $newseq;
242	3				6	last;
243						}
244						}
245	4				49	substr $seq, $gpos - $framestart, (length $newrepseg), $newrepseg;
246						}
247	4				18	return $seq;
248						}
249
250						=head2 _rscu_sum()
251
252						=cut
253
254						sub _rscu_sum
255						{
256	193			193	292	my ($rscu_t, $ntseq) = @_;
257	193				214	my $offset = 0;
258	193				209	my $rscusum = 0;
259	193				204	my $length = length $ntseq;
260	193				210	my $rem = $length % 3;
261	193				319	while ($offset < ($length - $rem))
262						{
263	577				687	my $cod = substr $ntseq, $offset, 3;
264	577				738	$rscusum += $rscu_t->{$cod};
265	577				812	$offset += 3;
266						}
267	193				278	return $rscusum;
268						}
269
270						=head2 _codon_count()
271
272						takes a reference to an array of sequences and returns a hash with codons as
273						keys and the number of times the codon occurs as a value.
274
275						=cut
276
277						sub _codon_count
278						{
279	3			3	8	my ($seqs, $codon_t, $hashref) = @_;
280	3				80	my %blank = map {$_ => 0} sort keys %$codon_t;
	192				304
281	3		100		26	my $codoncount = $hashref \|\| \%blank;
282	3				8	foreach my $seq (@$seqs)
283						{
284	3				285	my @arr = ($seq =~ m/ [ATCG]{3} /xg);
285	3				12	foreach my $codon (@arr)
286						{
287	600				874	$codoncount->{$codon}++;
288						}
289						}
290	3				16	return $codoncount;
291						}
292
293						=head2 generate_RSCU_table()
294
295						takes a hash reference with keys as codons and values as number of times
296						those codons occur (it helps to use codon_count) and returns a hash with each
297						codon and its RSCU value
298
299						=cut
300
301						sub _generate_RSCU_table
302						{
303	1			1	4	my ($codon_count, $codon_t, $revcodon_t) = @_;
304	1				3	my $RSCU_hash = {};
305	1				14	foreach my $codon (sort grep {$_ ne "XXX"} keys %$codon_count)
	64				115
306						{
307	64				96	my $x_j = 0;
308	64				78	my $x = $codon_count->{$codon};
309	64				112	my $family = $revcodon_t->{$codon_t->{$codon}};
310	64				82	my $family_size = scalar(@$family);
311	64				91	foreach (grep {exists $codon_count->{$_}} @$family)
	244				431
312						{
313	244				340	$x_j += $codon_count->{$_};
314						}
315	64	50			123	my $rscu = $x_j > 0 ? $x / ($x_j / $family_size) : 0.00;
316	64				270	$RSCU_hash->{$codon} = sprintf("%.2f", $rscu ) ;
317						}
318	1				7	return $RSCU_hash;
319						}
320
321						=head2 _generate_codon_report
322
323						=cut
324
325						sub _generate_codon_report
326						{
327	0			0	0	my ($codon_count, $codon_t, $rscu_t) = @_;
328	0				0	my $string = "Codon counts and RSCU values:\n";
329	0				0	my @codvalsort = sort {$b <=> $a} values %{$codon_count};
	0				0
	0				0
330	0				0	my $maxcodnum = $codvalsort[0];
331	0				0	foreach my $a ( qw(T C A G))
332						{
333	0				0	$string .= "\n";
334	0				0	foreach my $c ( qw(T C A G) )
335						{
336	0				0	foreach my $b ( qw(T C A G) )
337						{
338	0				0	my $codon = $a . $b . $c;
339	0				0	my $count = $codon_count->{$codon};
340	0				0	my $spacer = q{ } x (length($maxcodnum) - length($count));
341	0				0	$string .= "$codon (" . $codon_t->{$codon} . ") $spacer$count ";
342	0				0	$string .= $rscu_t->{$codon} . q{ } x 5;
343						}
344	0				0	$string .= "\n";
345						}
346	0				0	$string .= "\n";
347						}
348	0				0	return $string;
349						}
350
351						=head2 _generate_codon_file
352
353						=cut
354
355						sub _generate_codon_file
356						{
357	0			0	0	my ($table, $rev_cod_t, $comments) = @_;
358	0				0	my $string = q{};
359	0				0	my @cs = @$comments;
360	0				0	$string .= "# " . $_ . "\n" foreach (@cs);
361	0				0	foreach my $aa (sort keys %$rev_cod_t)
362						{
363	0				0	my @codons = @{$rev_cod_t->{$aa}};
	0				0
364	0				0	$string .= "#$aa\n";
365	0				0	foreach my $codon (sort @codons)
366						{
367	0				0	$string .= "{$codon} = $table->{$codon}\n";
368						}
369						}
370	0				0	return $string;
371						}
372
373						=head2 _define_codons
374
375						Generates an array reference that contains every possible nucleotide codon
376
377						=cut
378
379						sub _define_codons
380						{
381	14			14	24	my @codons;
382	14				29	foreach my $a (qw(A T C G))
383						{
384	56				84	foreach my $b (qw(A T C G))
385						{
386	224				265	foreach my $c (qw(A T C G))
387						{
388	896				1457	push @codons, $a . $b . $c;
389						}
390						}
391						}
392	14				238	return @codons;
393						}
394
395						=head2 _amb_translation()
396
397						takes a nucleotide that may be degenerate and a codon table and returns a list
398						of all amino acid sequences that nucleotide sequence could be translated into.
399
400						in: nucleotide sequence (string),
401						codon table (hash reference),
402						optional switch to force only a single frame of translation
403						optional hashref of previous answers to speed processing
404						out: amino acid sequence list (vector)
405
406						=cut
407
408						sub _amb_translation
409						{
410	37			37	81	my ($seq, $codon_t, $swit, $memo) = @_;
411	37	50			87	croak ("Bad frame argument\n") unless exists $ambtransswits{$swit};
412	37	100	100		123	if ($swit eq "s" \|\| $swit eq "t")
413						{
414	8				29	my @frames = qw(1 2 3);
415	8	100			18	push @frames, qw(-1 -2 -3) if ($swit eq "s");
416	8				12	my (%RES);
417	8				14	foreach my $s (@frames)
418						{
419	27				64	my @pep_set = _amb_translation($seq, $codon_t, $s, $memo);
420	27				347	$RES{$_}++ foreach (@pep_set);
421						}
422	8				157	return keys %RES;
423						}
424						else
425						{
426	29				44	my (%RES, @SEED, @NEW);
427	29	100			67	$seq = _complement($seq, 1) if ($swit < 0);
428	29	50			108	$seq = 'N' x (abs($swit) - 1) . $seq if (abs($swit) < 4);
429	29				65	my $seqlen = length($seq);
430	29				39	my $gothrough = 0;
431	29				63	for (my $offset = 0; $offset < $seqlen; $offset += 3)
432						{
433	67				144	my $tempcodon = substr($seq, $offset, 3);
434	67				187	$tempcodon .= "N" while (length($tempcodon) % 3);
435	67	50			115	if (!$swit)
436						{
437	0				0	$tempcodon .= 'N' while (length($tempcodon) < 3);
438						}
439	67	100			121	if ($gothrough == 0)
440						{
441	29				52	@SEED = _degcodon_to_aas($tempcodon, $codon_t, $memo) ;
442						}
443						else
444						{
445	38				65	@NEW = _degcodon_to_aas($tempcodon, $codon_t, $memo);
446	38				104	@SEED = _add_arr(\@SEED, \@NEW);
447						}
448	67				219	$gothrough++;
449						}
450	29				611	$RES{$_}++ foreach(@SEED);
451	29				467	return keys %RES;
452						}
453						}
454
455						=head2 _degcodon_to_aas()
456
457						takes a codon that may be degenerate and a codon table and returns a list of
458						all amino acids that codon could represent. If a hashref is provided with
459						previous answers, it will run MUCH faster (memoization).
460
461						in: codon (string),
462						codon table (hash reference)
463						out: amino acid list (vector)
464
465						=cut
466
467						sub _degcodon_to_aas
468						{
469	88			88	171	my ($codon, $codon_t, $memo) = @_;
470	88	50	33		309	return if ( ! $codon \|\| length($codon) != 3);
471	88				143	my (@answer, %temphash) = ((), ());
472	88	100			219	if (exists $memo->{$codon})
		50
473						{
474	25				34	return @{$memo->{$codon}};
	25				93
475						}
476						elsif ($codon eq "NNN")
477						{
478	0				0	%temphash = map { $_ => 1} values %$codon_t;
	0				0
479	0				0	@answer = keys %temphash;
480						}
481						else
482						{
483	63				152	my $reg = _regres($codon, 1);
484	63				524	%temphash = map {$codon_t->{$_} => 1} grep { $_ =~ $reg } keys %$codon_t;
	360				713
	4032				9270
485	63				336	@answer = keys %temphash;
486						}
487	63				215	$memo->{$codon} = \@answer;
488	63				209	return @answer;
489						}
490
491						=head2 _find_in_frame
492
493						=cut
494
495						sub _find_in_frame
496						{
497	0			0	0	my ($ntseq, $pattern, $codon_t) = @_;
498	0				0	my $regex = _regres($pattern, 2);
499	0				0	my $aaseq = _translate($ntseq, 1, $codon_t);
500	0				0	my $hshref = _positions($aaseq, [$regex]);
501	0				0	my $pattntlen = length($pattern) * 3;
502	0				0	my $answer = {};
503	0				0	foreach my $ao (keys %$hshref)
504						{
505	0				0	my $nuco = 3 * $ao;
506	0				0	$answer->{$nuco} = substr($ntseq, $nuco, $pattntlen);
507						}
508	0				0	return $answer;
509						}
510
511						=head2 _minimize_local_alignment_dp()
512
513						Repeatsmasher, by Dongwon Lee. A function that minimizes local alignment
514						scores.
515
516						in: gene sequence (string)
517						codon table (hashref)
518						RSCU table (hashref)
519						out: new gene sequence (string)
520						#NO UNIT TEST
521
522						=cut
523
524						sub _minimize_local_alignment_dp
525						{
526	0			0	0	my ($oldseq, $codon_t, $rev_codon_t, $rscu_t) = @_;
527	0				0	my $match = 5;
528	0				0	my $transi = -3;
529	0				0	my $transv = -4;
530	0				0	my $score_threshold = $match*6; #count the scores only consecutive 6 nts
531	0				0	my @s = ( [$match, $transv, $transi, $transv],
532						[$transv, $match, $transv, $transi],
533						[$transi, $transv, $match, $transv],
534						[$transv, $transi, $transv, $match] );
535	0				0	my %nt2idx = ("A"=>0, "C"=>1, "G"=>2, "T"=>3);
536
537						#initial values
538	0				0	my @optM = (0);
539	0				0	my $optseq = q{};
540
541						#assumming that the sequence is in frame
542	0				0	my ($offset, $cod, $aa) = (0, q{}, q{});
543	0				0	my $oldlen = length($oldseq) - 3;
544	0				0	while ( $offset <= $oldlen )
545						{
546	0				0	$cod = substr($oldseq, $offset, 3);
547	0				0	$aa = _translate($cod, 1, $codon_t);
548	0				0	my @posarr = sort { $rscu_t->{$b} <=> $rscu_t->{$a} }
549	0				0	@{$rev_codon_t->{$aa}};
	0				0
550
551	0				0	my @minM = ();
552	0				0	my $min_seq = q{};
553						#assign an impossible large score
554	0				0	my $min_score = $match(length($oldseq)*2);
555
556	0	0			0	if ($aa ne '*')
557						{
558	0				0	foreach my $newcod (@posarr)
559						{
560	0				0	my @prevM = @optM;
561	0				0	my $prevseq = $optseq;
562
563	0				0	foreach my $nt (split(//, $newcod))
564						{
565	0				0	my $currseq = $prevseq . $nt;
566	0				0	my $currlen = length($currseq);
567	0				0	my @currM = ();
568	0				0	my $pos = 0;
569	0				0	push @currM, 0;
570	0				0	while($pos < $currlen)
571						{
572	0				0	my $nt2 = substr($currseq, $pos, 1);
573	0				0	my $nidx1 = $nt2idx{$nt};
574	0				0	my $nidx2 = $nt2idx{$nt2};
575	0				0	push @currM, max(0, $prevM[$pos]+$s[$nidx1][$nidx2]);
576	0				0	$pos++;
577						}
578	0				0	@prevM = @currM;
579	0				0	$prevseq = $currseq;
580						}
581	0				0	my $scoresum = 0;
582	0				0	foreach my $i (@prevM)
583						{
584	0	0			0	if ($i >= $score_threshold)
585						{
586	0				0	$scoresum += $i;
587						}
588						}
589	0	0			0	if ($min_score > $scoresum)
590						{
591	0				0	$min_score = $scoresum;
592	0				0	$min_seq = $prevseq;
593	0				0	@minM = @prevM;
594						}
595						}
596						}
597						else
598						{
599	0				0	$optseq = $optseq . $cod;
600	0				0	last;
601						}
602	0				0	@optM = @minM;
603	0				0	$optseq = $min_seq;
604	0				0	$offset+=3;
605						}
606	0				0	return $optseq;
607						}
608
609						=head2 _pattern_aligner
610
611						takes a nucleotide sequence, a pattern, a peptide sequence, and a codon table
612						and inserts Ns before the pattern until they align properly. This is so a
613						pattern can be inserted out of frame.
614
615						in: nucleotide sequence (string),
616						nucleotide pattern (string),
617						amino acid sequence (string),
618						codon table (hash reference)
619						out: nucleotide pattern (string)
620
621						=cut
622
623						sub _pattern_aligner
624						{
625	3			3	10	my ($critseg, $pattern, $peptide, $codon_t, $memo) = @_;
626	3				6	my $diff = length($critseg) - length($pattern);
627	3				11	my ($newpatt, $nstring, $rounds, $offset, $check, $pcheck) = (q{}, "N" x $diff, 0, 0, q{}, q{});
628						# print "seeking $pattern for $peptide from $critseg...\n";
629	3		66		15	while ($check ne $peptide && $rounds <= $diff*2 + 1)
630						{
631	6	50			18	$newpatt = $rounds <= $diff
632						? substr($nstring, 0, $rounds) . $pattern
633						: substr($nstring, 0, ($rounds-3)) . _complement($pattern, 1);
634	6				21	$newpatt .= "N" while (length($newpatt) != length($critseg));
635						# print "\t$newpatt\n";
636	6				11	my ($noff, $poff) = (0, 0);
637	6				11	$check = q{};
638	6				13	while ($poff < length($peptide))
639						{
640	18				47	my @possibles = _degcodon_to_aas( substr($newpatt, $noff, 3), $codon_t, $memo );
641						# print "\t\t@possibles\n";
642	18				39	$check .= $_ foreach( grep { substr($peptide, $poff, 1) eq $_ } @possibles);
	52				122
643	18				29	$noff += 3;
644	18				47	$poff ++;
645						}
646	6				20	$pcheck = _translate(substr($critseg, $offset, length($peptide) * 3), 1, $codon_t);
647						# print "\t\t$check, $pcheck, $offset\n";
648	6				14	$rounds++;
649	6	100			28	$offset += 3 if ($rounds % 3 == 0);
650						# $check = q{} if ( $pcheck !~ $check);
651						}
652	3	50			7	$newpatt = "0" if ($check ne $peptide);
653						# print "\t\tpataln $check, $pcheck, $rounds, $newpatt\n" if ($check ne $peptide);
654	3				13	return ($newpatt, $rounds - 1);
655						}
656
657						=head2 _pattern_adder()
658
659						takes a nucleotide sequence, a nucleotide "pattern" to be interpolated, and
660						the codon table, and returns an edited nucleotide sequence that contains the
661						pattern (if possible).
662
663						in: nucleotide sequence (string),
664						nucleotide pattern (string),
665						codon table (hash reference)
666						out: nucleotide sequence (string) OR null
667
668						=cut
669
670						sub _pattern_adder
671						{
672	1			1	4	my ($oldpatt, $newpatt, $codon_t, $revcodon_t, $memo) = @_;
673						#assume that critseg and pattern come in as complete codons
674						# (i.e., have been run through pattern_aligner)
675	1				3	my $copy = q{};
676	1				6	for (my $offset = 0; $offset < length($oldpatt); $offset += 3)
677						{
678	3				10	my $curcod = substr($oldpatt, $offset, 3);
679	3				4	my $curtar = substr($newpatt, $offset, 3);
680	3				9	my $ctregx = _regres($curtar);
681	3				10	foreach my $g (_degcodon_to_aas($curcod, $codon_t, $memo))
682						{
683	3	100			20	if ($curcod =~ $ctregx)
684						{
685	1				6	$copy .= $curcod;
686						}
687						else
688						{
689	2				3	my @arr = @{$revcodon_t->{$g}};
	2				8
690	2				6	foreach my $potcod (@arr)
691						{
692	6				11	my $flag = 0;
693	6	100	66		28	$flag++ if ($potcod =~ $ctregx \|\| $curtar =~ _regres($potcod));
694	6	100			16	if ($flag != 0)
695						{
696	2				5	$copy .= $potcod;
697	2				8	last;
698						}
699						}
700						}
701						# print "\t\tpatadd\t($curcod, $curtar)\t$copy \n";
702						}
703						}
704						# print "\t\tpatadd $copy from $oldpatt\n";
705	1	50			7	return length($copy) == length($oldpatt) ? $copy : 0;
706						}
707
708						=head2 _codon_change_type
709
710						=cut
711
712						sub _codon_change_type
713						{
714	0			0		my ($oldcod, $newcod, $codon_t) = @_;
715	0					my $oldaa = $codon_t->{$oldcod};
716	0					my $newaa = $codon_t->{$newcod};
717	0	0				my $type = $oldaa eq $newaa
		0
		0
		0
718						? $oldaa eq q{*}
719						? 'stop_retained_variant'
720						: 'synonymous_codon'
721						: $oldaa eq q{*}
722						? 'stop_lost'
723						: $newaa eq q{*}
724						? 'stop_gained'
725						: 'non_synonymous_codon';
726	0					return $type;
727						}
728
729						1;
730
731						__END__