File Coverage

blib/lib/Bio/GeneDesign.pm

Criterion	Covered	Total	%
statement	381	684	55.7
branch	117	368	31.7
condition	34	150	22.6
subroutine	50	76	65.7
pod	59	59	100.0
total	641	1337	47.9

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# GeneDesign engine
3							#
4
5							=head1 NAME
6
7							Bio::GeneDesign
8
9							=head1 VERSION
10
11							Version 5.52
12
13							=head1 DESCRIPTION
14
15							=head1 AUTHOR
16
17							Sarah Richardson <smrichardson@lbl.gov>
18
19							=cut
20
21							package Bio::GeneDesign;
22	11			11		316051	use base qw(Bio::Root::Root);
	11					27
	11					10399
23
24	11			11		657490	use Bio::GeneDesign::ConfigData;
	11					33
	11					433
25	11			11		6420	use Bio::GeneDesign::Basic qw(:GD);
	11					35
	11					3295
26	11			11		7111	use Bio::GeneDesign::CodonJuggle qw(:GD);
	11					36
	11					2551
27	11			11		67	use Bio::GeneDesign::Codons qw(:GD);
	11					21
	11					3034
28	11			11		9023	use Bio::GeneDesign::Oligo qw(:GD);
	11					28
	11					1636
29	11			11		69	use Bio::GeneDesign::Random qw(:GD);
	11					24
	11					1654
30	11			11		10365	use Bio::GeneDesign::RestrictionEnzymes qw(:GD);
	11					39
	11					2169
31	11			11		7662	use Bio::GeneDesign::ReverseTranslate qw(:GD);
	11					33
	11					1588
32	11			11		6377	use Bio::GeneDesign::PrefixTree;
	11					31
	11					320
33	11			11		77	use File::Basename;
	11					20
	11					799
34	11			11		12602	use Bio::SeqIO;
	11					506882
	11					911
35	11			11		133	use Carp;
	11					33
	11					868
36
37	11			11		70	use strict;
	11					25
	11					403
38	11			11		68	use warnings;
	11					21
	11					114502
39
40							my $VERSION = 5.52;
41
42							=head1 CONSTRUCTORS
43
44							=head2 new
45
46							Returns an initialized Bio::GeneDesign object.
47
48							This function reads the ConfigData written at installation, imports the
49							relevant sublibraries, and sets the relevant paths.
50
51							my $GD = Bio::GeneDesign->new();
52
53							=cut
54
55							sub new
56							{
57	9			9	1	131	my ($class, @args) = @_;
58	9					141	my $self = $class->SUPER::new(@args);
59	9					129	bless $self, $class;
60
61	9					132	$self->{script_path} = Bio::GeneDesign::ConfigData->config('script_path');
62	9					54	$self->{conf} = Bio::GeneDesign::ConfigData->config('conf_path');
63	9	50				71	$self->{conf} .= q{/} unless substr($self->{conf}, -1, 1) eq q{/};
64
65	9					50	$self->{tmp_path} = Bio::GeneDesign::ConfigData->config('tmp_path');
66	9	50				79	$self->{tmp_path} .= q{/} unless substr($self->{tmp_path}, -1, 1) eq q{/};
67
68	9					53	$self->{graph} = Bio::GeneDesign::ConfigData->config('graphing_support');
69	9	50				49	if ($self->{graph})
70							{
71	0					0	require Bio::GeneDesign::Graph;
72	0					0	import Bio::GeneDesign::Graph qw(:GD);
73							}
74
75	9					50	$self->{EMBOSS} = Bio::GeneDesign::ConfigData->config('EMBOSS_support');
76	9	50				43	if ($self->{EMBOSS})
77							{
78	0					0	require Bio::GeneDesign::Palindrome;
79	0					0	import Bio::GeneDesign::Palindrome qw(:GD);
80							}
81
82	9					44	$self->{BLAST} = Bio::GeneDesign::ConfigData->config('BLAST_support');
83	9	50				51	if ($self->BLAST)
84							{
85							#$ENV{BLASTPLUSDIR} = Bio::GeneDesign::ConfigData->config('blast_path');
86	0					0	require Bio::GeneDesign::Blast;
87	0					0	import Bio::GeneDesign::Blast qw(:GD);
88							}
89
90	9					46	$self->{vmatch} = Bio::GeneDesign::ConfigData->config('vmatch_support');
91	9	50				120	if ($self->{vmatch})
92							{
93	0					0	require Bio::GeneDesign::Vmatch;
94	0					0	import Bio::GeneDesign::Vmatch qw(:GD);
95							}
96
97	9					49	$self->{codon_path} = $self->{conf} . 'codon_tables/';
98	9					27	$self->{organism} = undef;
99	9					24	$self->{codontable} = undef;
100	9					26	$self->{enzyme_set} = undef;
101	9					26	$self->{version} = $VERSION;
102	9					24	$self->{amb_trans_memo} = {};
103
104	9					42	return $self;
105							}
106
107							=head1 ACCESSORS
108
109							=cut
110
111							=head2 EMBOSS
112
113							returns a value if EMBOSS_support was vetted and approved during installation.
114
115							=cut
116
117							sub EMBOSS
118							{
119	0			0	1	0	my ($self) = @_;
120	0					0	return $self->{'EMBOSS'};
121							}
122
123							=head2 BLAST
124
125							returns a value if BLAST_support was vetted and approved during installation.
126
127							=cut
128
129							sub BLAST
130							{
131	9			9	1	27	my ($self) = @_;
132	9					45	return $self->{'BLAST'};
133							}
134
135							=head2 graph
136
137							returns a value if graphing_support was vetted and approved during installation.
138
139							=cut
140
141							sub graph
142							{
143	0			0	1	0	my ($self) = @_;
144	0					0	return $self->{'graph'};
145							}
146
147							=head2 vmatch
148
149							returns a value if vmatch_support was vetted and approved during installation.
150
151							=cut
152
153							sub vmatch
154							{
155	0			0	1	0	my ($self) = @_;
156	0					0	return $self->{'vmatch'};
157							}
158
159							=head2 enzyme_set
160
161							Returns a hash reference where the keys are enzyme names and the values are
162							L<RestrictionEnzyme\|Bio::GeneDesign::RestrictionEnzyme> objects, if the enzyme
163							set has been defined.
164
165							To set this value, use L<set_restriction_enzymes\|/set_restriction_enzymes>.
166
167							=cut
168
169							sub enzyme_set
170							{
171	4			4	1	1237	my ($self) = @_;
172	4					52	return $self->{'enzyme_set'};
173							}
174
175							=head2 enzyme_set_name
176
177							Returns the name of the enzyme set in use, if there is one.
178
179							To set this value, use L<set_restriction_enzymes\|/set_restriction_enzymes>.
180
181							=cut
182
183							sub enzyme_set_name
184							{
185	0			0	1	0	my ($self) = @_;
186	0					0	return $self->{'enzyme_set_name'};
187							}
188
189							=head2 all_enzymes
190
191							Returns a hash reference where the keys are enzyme names and the values are
192							L<RestrictionEnzyme\|Bio::GeneDesign::RestrictionEnzyme> objects
193
194							To set this value, use L<set_restriction_enzymes\|/set_restriction_enzymes>.
195
196							=cut
197
198							sub all_enzymes
199							{
200	0			0	1	0	my ($self) = @_;
201	0					0	return $self->{'all_enzymes'};
202							}
203
204							=head2 organism
205
206							Returns the name of the organism in use, if there is one.
207
208							To set this value, use L<set_organism\|/set_organism>.
209
210							=cut
211
212							sub organism
213							{
214	0			0	1	0	my ($self) = @_;
215	0					0	return $self->{'organism'};
216							}
217
218							=head2 codontable
219
220							Returns the codon table in use, if there is one.
221
222							The codon table is a hash reference where the keys are upper case nucleotides
223							and the values are upper case single letter amino acids.
224
225							my $codon_t = $GD->codontable();
226							$codon_t->{"ATG"} eq "M" \|\| die;
227
228							To set this value, use L<set_codontable\|/set_codontable>.
229
230							=cut
231
232							sub codontable
233							{
234	1			1	1	718	my ($self) = @_;
235	1					11	return $self->{'codontable'};
236							}
237
238							=head2 reversecodontable
239
240							Returns the reverse codon table in use, if there is one.
241
242							The reverse codon table is a hash reference where the keys are upper case single
243							letter amino acids and the values are upper case nucleotides.
244
245							my $revcodon_t = $GD->reversecodontable();
246							$revcodon_t->{"M"} eq "ATG" \|\| die;
247
248							This value is set automatically when L<set_codontable\|/set_codontable> is run.
249
250							=cut
251
252							sub reversecodontable
253							{
254	1			1	1	1722	my ($self) = @_;
255	1					5	return $self->{'reversecodontable'};
256							}
257
258							=head2 rscutable
259
260							Returns the RSCU table in use, if there is one.
261
262							The RSCU codon table is a hash reference where the keys are upper case
263							nucleotides and the values are floats.
264
265							my $rscu_t = $GD->rscutable();
266							$rscu_t->{"ATG"} eq 1.00 \|\| die;
267
268							To set this value, use L<set_rscu_table\|/set_rscutable>.
269
270							=cut
271
272							sub rscutable
273							{
274	1			1	1	1767	my ($self) = @_;
275	1					6	return $self->{'rscutable'};
276							}
277
278
279							=head1 FUNCTIONS
280
281							=cut
282
283							=head2 melt
284
285							my $Tm = $GD->melt(-sequence => $myseq);
286
287							The -sequence argument is required.
288
289							Returns the melting temperature of a DNA sequence.
290
291							You can set the salt and DNA concentrations with the -salt and -concentration
292							arguments; they are 50mm (.05) and 100 pm (.0000001) respectively.
293
294							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
295							object to be analyzed with the -sequence flag.
296
297							There are four different formulae to choose from. If you wish to use the nearest
298							neighbor method, use the -nearest_neighbor flag. Otherwise the appropriate
299							formula will be determined by the length of your -sequence argument.
300
301							For sequences under 14 base pairs:
302							Tm = (4 * #GC) + (2 * #AT).
303
304							For sequences between 14 and 50 base pairs:
305							Tm = 100.5 + (41 * #GC / length) - (820 / length) + 16.6 * log10(salt)
306
307							For sequences over 50 base pairs:
308							Tm = 81.5 + (41 * #GC / length) - (500 / length) + 16.6 * log10(salt) - .62;
309
310							=cut
311
312							sub melt
313							{
314	4			4	1	4230	my ($self, @args) = @_;
315
316	4					20	my ($seq, $salt, $conc, $nnflag)
317							= $self->_rearrange([qw(
318							sequence
319							salt
320							concentration
321							nearest_neighbor)], @args);
322
323	4	50				86	$self->throw("No sequence provided for the melt function")
324							unless ($seq);
325
326	4	100				10	$nnflag = $nnflag ? 1 : 0;
327	4		50			17	$salt = $salt \|\| .05;
328	4		50			13	$conc = $conc \|\| .0000001;
329	4					8	my $str = $self->_stripdown($seq, q{}, 1);
330
331	4	100				12	if ($nnflag)
332							{
333	1					5	return _ntherm($str, $salt, $conc);
334							}
335	3					10	return _melt($str, $salt, $conc);
336							}
337
338							=head2 complement
339
340							$my_seq = "AATTCG";
341
342							my $complemented_seq = $GD->complement($my_seq);
343							$complemented_seq eq "TTAAGC" \|\| die;
344
345							my $reverse_complemented_seq = $GD->complement($my_seq, 1);
346							$reverse_complemented_seq eq "CGAATT" \|\| die;
347
348							#clean
349							my $complemented_seq = $GD->complement(-sequence => $my_seq);
350							$complemented_seq eq "TTAAGC" \|\| die;
351
352							my $reverse_complemented_seq = $GD->complement(-sequence => $my_seq,
353							-reverse => 1);
354							$reverse_complemented_seq eq "CGAATT" \|\| die;
355
356
357							The -sequence argument is required.
358
359							Complements or reverse complements a DNA sequence.
360
361							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
362							object to be processed.
363
364							If you also pass along a true statement, the sequence will be reversed and
365							complemented.
366
367							=cut
368
369							sub complement
370							{
371	4			4	1	3474	my ($self, @args) = @_;
372
373	4					22	my ($seq, $swit)
374							= $self->_rearrange([qw(
375							sequence
376							reverse)], @args);
377
378	4	50				38	$self->throw("No sequence provided for the complement function")
379							unless $seq;
380
381	4		100			14	$swit = $swit \|\| 0;
382
383	4					10	my $str = $self->_stripdown($seq, q{}, 1);
384
385	4					15	return _complement($str, $swit);
386							}
387
388							=head2 count
389
390							$my_seq = "AATTCG";
391							my $count = $GD->count($my_seq);
392							$count->{C} == 1 \|\| die;
393							$count->{G} == 1 \|\| die;
394							$count->{A} == 2 \|\| die;
395							$count->{GCp} == 33.3 \|\| die;
396							$count->{ATp} == 66.7 \|\| die;
397
398							#clean
399							my $count = $GD->count(-sequence => $my_seq);
400
401							You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
402							object.
403
404							the count function counts the bases in a DNA sequence and returns a hash
405							reference where each base (including the ambiguous bases) are keys and the
406							values are the number of times they appear in the sequence. There are also the
407							special values GCp and ATp for GC and AT percentage.
408
409							=cut
410
411							sub count
412							{
413	2			2	1	2778	my ($self, @args) = @_;
414
415	2					12	my ($seq) = $self->_rearrange([qw(sequence)], @args);
416
417	2	50				23	$self->throw("No sequence provided for the count function")
418							unless ($seq);
419
420
421	2					6	my $str = $self->_stripdown($seq, q{}, 1);
422
423	2					10	return _count($str);
424							}
425
426							=head2 regex_nt
427
428							my $my_seq = "ABC";
429							my $regex = $GD->regex_nt(-sequence => $my_seq);
430							# $regex is qr/A[CGT]C/;
431
432							my $regarr = $GD->regex_nt(-sequence => $my_seq --reverse_complement => 1);
433							# $regarr is [qr/A[CGT]C/, qr/G[ACG]T/]
434
435
436							You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
437							object to be processed with the -sequence flag.
438
439							regex_nt creates a compiled regular expression or a set of them that can be used
440							to query large nucleotide sequences for possibly ambiguous subsequences.
441
442
443							If you want to get regular expressions for both the forward and reverse senses
444							of the DNA, use the -reverse_complement flag and expect a reference to an array
445							of compiled regexes.
446
447							=cut
448
449							sub regex_nt
450							{
451	6			6	1	12372	my ($self, @args) = @_;
452
453	6					38	my ($seq, $arrswit)
454							= $self->_rearrange([qw(
455							sequence
456							reverse_complement)], @args
457							);
458
459	6	50				106	$self->throw("no sequence provided the regex_nt function")
460							unless ($seq);
461
462	6					20	my $str = $self->_stripdown($seq, q{}, 1);
463
464	6	100				18	if ($arrswit)
465							{
466	2					9	return _regarr($str);
467							}
468							else
469							{
470	4					21	return _regres($str, 1);
471							}
472							}
473
474							=head2 regex_aa
475
476							my $my_pep = "AEQ*";
477							my $regex = $GD->regex_aa(-sequence => $my_pep);
478							$regex == qr/AEQ[\*]/ \|\| die;
479
480							Creates a compiled regular expression or a set of them that can be used to query
481							large amino acid sequences for smaller subsequences.
482
483							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
484							object to be processed with the -sequence flag.
485
486							=cut
487
488							sub regex_aa
489							{
490	1			1	1	714	my ($self, $seq) = @_;
491
492	1	50				6	$self->throw("no sequence provided for the regex_aa function")
493							unless ($seq);
494
495	1					4	my $str = $self->_stripdown($seq, q{}, 1);
496
497	1					6	return _regres($str, 2);
498							}
499
500							=head2 sequence_is_ambiguous
501
502							my $my_seq = "ABC";
503							my $flag = $GD->sequence_is_ambiguous($my_seq);
504							$flag == 1 \|\| die;
505
506							$my_seq = "ATC";
507							$flag = $GD->sequence_is_ambiguous($my_seq);
508							$flag == 0 \|\| die;
509
510							Checks to see if a DNA sequence contains ambiguous bases (RYMKWSBDHVN) and
511							returns true if it does.
512
513							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
514							object to be processed.
515
516							=cut
517
518							sub sequence_is_ambiguous
519							{
520	5			5	1	1599	my ($self, $seq) = @_;
521
522	5	50				16	$self->throw("no sequence provided for the sequence_is_ambiguous function")
523							unless ($seq);
524
525	5					15	my $str = $self->_stripdown($seq, q{}, 1);
526
527	5					22	return _is_ambiguous($str);
528							}
529
530							=head2 ambiguous_translation
531
532							my $my_seq = "ABC";
533							my @peps = $GD->ambiguous_translation(-sequence => $my_seq, -frame => 1);
534							# @peps is qw(I T C)
535
536							You must pass a string variable, a Bio::Seq object, or a Bio::SeqFeatureI object
537							to be processed.
538
539							Translates a nucleotide sequence that may have ambiguous bases and returns an
540							array of possible peptides.
541
542							The frame argument may be 1, 2, 3, -1, -2, or -3.
543							It may also be t (three, 1, 2, 3), or s (six, 1, 2, 3, -1, -2, -3).
544							It defaults to 1.
545
546							=cut
547
548							sub ambiguous_translation
549							{
550	4			4	1	10088	my ($self, @args) = @_;
551
552	4					35	my ($seq, $frame)
553							= $self->_rearrange([qw(sequence frame)], @args);
554
555	4	50				120	$self->throw("no sequence provided for the ambiguous_translation function")
556							unless ($seq);
557
558	4		50			11	$frame = $frame \|\| 1;
559
560	4					9	my %ambtransswits = map {$_ => 1} qw(1 2 3 -1 -2 -3 s t);
	32					75
561
562	4	50				18	$self->throw("Bad frame argument to ambiguous_translation")
563							unless (exists $ambtransswits{$frame});
564
565	4					13	my $str = $self->_stripdown($seq, q{}, 1);
566
567	4					22	return _amb_translation($str,
568							$self->{codontable},
569							$frame,
570							$self->{amb_trans_memo});
571							}
572
573							=head2 ambiguous_transcription
574
575							my $my_seq = "ABC";
576							my $seqs = $GD->ambiguous_transcription($my_seq);
577							# $seqs is [qw(ACC AGC ATC)]
578
579							Deambiguates a nucleotide sequence that may have ambiguous bases and returns a
580							reference to a sorted array of possible unambiguous sequences.
581
582							You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI
583							object to be processed.
584
585							=cut
586
587							sub ambiguous_transcription
588							{
589	1			1	1	1923	my ($self, $seq) = @_;
590
591	1	50				4	$self->throw("no sequence provided for the ambiguous_transcription function")
592							unless ($seq);
593
594	1					4	my $str = $self->_stripdown($seq, q{}, 1);
595
596	1					7	return _amb_transcription($str);
597							}
598
599							=head2 positions
600
601							my $seq = "TGCTGACTGCAGTCAGTACACTACGTACGTGCATGAC";
602							my $seek = "CWC";
603
604							my $positions = $GD->positions(-sequence => $seq,
605							-query => $seek);
606							# $positions is {18 => "CAC"}
607
608							$positions = $GD->positions(-sequence => $seq,
609							-query => $seek,
610							-reverse_complement => 1);
611							# $positions is {18 => "CAC", 28 => "GTG"}
612
613							Finds and returns all the positions and sequences of a potentially ambiguous
614							subsequence in a larger sequence. The reverse_complement flag is off by default.
615
616							You can pass either string variables, Bio::Seq objects, or Bio::SeqFeatureI
617							objects as the sequence and query arguments; additionally you may pass a
618							L<RestrictionEnzyme\|Bio::GeneDesign::RestrictionEnzyme> object as the query
619							argument.
620
621							=cut
622
623							sub positions
624							{
625	2			2	1	2749	my ($self, @args) = @_;
626
627	2					12	my ($seq, $seek, $revcom)
628							= $self->_rearrange([qw(
629							sequence
630							query
631							reverse_complement)], @args);
632
633	2	50				55	$self->throw("no sequence provided for the positions function")
634							unless ($seq);
635
636	2	50				11	$self->throw("no query provided for the positions function")
637							unless ($seek);
638
639
640	2					7	my $base = $self->_stripdown($seq, q{}, 1);
641
642	2					4	my $regarr = [];
643	2					3	my $query = $seek;
644	2					6	my $qref = ref($seek);
645	2	50				6	if ($qref)
646							{
647	0	0	0			0	$self->throw("object $qref is not a Bio::Seq or Bio::SeqFeature, or " .
			0
648							"Bio::GeneDesign::RestrictionEnzyme object")
649							unless ($seek->isa("Bio::Seq")
650							\|\| $seek->isa("Bio::SeqFeatureI")
651							\|\| $seek->isa("Bio::GeneDesign::RestrictionEnzyme"));
652	0	0				0	if ($seek->isa("Bio::GeneDesign::RestrictionEnzyme"))
653							{
654	0					0	$regarr = $seek->regex;
655							}
656							else
657							{
658	0	0				0	$query = ref($seek->seq) ? $seek->seq->seq : $seek->seq;
659	0	0				0	$regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
660							}
661							}
662							else
663							{
664	2	50				18	$regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)];
665							}
666
667	2					11	return _positions($base, $regarr);
668							}
669
670							=head2 set_codontable
671
672							# load a codon table from the GeneDesign configuration directory
673							$GD->set_codontable(-organism_name => "yeast");
674
675							# load a codon table from an arbitrary path and catch it in a variable
676							my $codon_t = $GD->set_codontable(-organism_name => "custom",
677							-table_path => "/path/to/table.ct");
678
679							The -organism_name argument is required.
680
681							This function loads, sets, and returns a codon definition table. After it is run
682							the accessor L<codontable\|/codontable> will return the hash reference that
683							represents the codon table.
684
685							If no path is provided, the configuration directory /codon_tables is checked for
686							tables that match the provided organism name. If there is no table in that
687							directory, a warning will appear and the standard codon table will be
688							used.
689
690							Any codon table that is missing a definition for a codon will cause a warning to
691							be issued. The table format for codon tables is
692
693							# Standard genetic code
694							{TTT} = F
695							{TTC} = F
696							{TTA} = L
697							...
698
699							See L<NCBI's table\|http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG1>
700
701							=cut
702
703							sub set_codontable
704							{
705	7			7	1	26	my ($self, @args) = @_;
706	7					45	my ($orgname, $table_path)
707							= $self->_rearrange([qw(
708							organism_name
709							table_path)], @args);
710
711	7	50				181	$self->throw("No organsim name provided") unless ($orgname);
712	7					35	$self->{organism} = $orgname;
713
714	7	50	33			196	$self->throw("$table_path does not exist")
715							if ($table_path && ! -e $table_path);
716
717	7	50				31	if (! $table_path)
718							{
719	0					0	$table_path = $self->{codon_path} . $orgname . ".ct";
720	0	0				0	if (! -e $table_path)
721							{
722	0					0	warn "No Codon table for $orgname found. Using Standard values\n";
723	0					0	$table_path = $self->{codon_path} . "Standard.ct";
724							}
725							}
726
727	7					92	$self->{codontable} = _parse_codon_file($table_path);
728	7					46	$self->{reversecodontable} = _reverse_codon_table($self->{codontable});
729	7					29	return $self->{codontable};
730							}
731
732							=head2 set_rscutable
733
734							# load a RSCU table from the GeneDesign configuration directory
735							$GD->set_rscutable(-organism_name => "yeast");
736
737							# load an RSCU table from an arbitrary path and catch it in a variable
738							my $rscu_t = $GD->set_rscutable(-organism_name => "custom",
739							-table_path => "/path/to/table.rscu");
740
741							The -organism_name argument is required.
742
743							This function loads, sets, and returns an RSCU table. After it is run
744							the accessor L<rscutable\|/rscutable> will return the hash reference that
745							represents the RSCU table.
746
747							If no path is provided, the configuration directory /codon_tables is checked for
748							tables that match the provided organism name. If there is no table in that
749							directory, a warning will appear and the flat RSCU table will be used.
750
751							Any RSCU table that is missing a definition for a codon will cause a warning to
752							be issued. The table format for RSCU tables is
753
754							# Saccharomyces cerevisiae (Highly expressed genes)
755							# Nucleic Acids Res 16, 8207-8211 (1988)
756							{TTT} = 0.19
757							{TTC} = 1.81
758							{TTA} = 0.49
759							...
760
761							See L<Sharp et al. 1986\|http://www.ncbi.nlm.nih.gov/pubmed/3526280>.
762
763							=cut
764
765							sub set_rscutable
766							{
767	7			7	1	27	my ($self, @args) = @_;
768	7					44	my ($orgname, $rscu_path)
769							= $self->_rearrange([qw(
770							organism_name
771							rscu_path)], @args);
772
773	7	50				213	$self->throw("No organsim name provided") unless ($orgname);
774	7					22	$self->{organism} = $orgname;
775
776	7	50	33			181	$self->throw("$rscu_path does not exist")
777							if ($rscu_path && ! -e $rscu_path);
778
779	7	50				47	if (! $rscu_path)
780							{
781	0					0	$rscu_path = $self->{codon_path} . $orgname . ".rscu";
782	0	0				0	if (! -e $rscu_path)
783							{
784	0					0	warn "No RSCU table for $orgname found. Using Flat values\n";
785	0					0	$rscu_path = $self->{codon_path} . "Flat.rscu";
786							}
787							}
788
789	7					34	$self->{rscutable} = _parse_codon_file($rscu_path);
790	7					29	return $self->{rscutable};
791							}
792
793							=head2 set_organism
794
795							# load both codon tables and RSCU tables simultaneously
796							$GD->set_organism(-organism_name => "yeast");
797
798							# with arguments
799							$GD->set_organism(-organism_name => "custom",
800							-table_path => "/path/to/table.ct",
801							-rscu_path => "/path/to/table.rscu");
802
803
804							The -organism_name argument is required.
805
806							This function is just a shortcut; it runs L<set_codontable/set_codontable> and
807							L<set_rscutable/set_rscutable>. See those functions for details.
808
809							=cut
810
811							sub set_organism
812							{
813	7			7	1	92	my ($self, @args) = @_;
814
815	7					101	my ($orgname, $table_path, $rscu_path)
816							= $self->_rearrange([qw(
817							organism_name
818							table_path
819							rscu_path)], @args);
820
821	7	50				311	$self->throw("No organsim name provided") unless ($orgname);
822	7					20	$self->{organism} = $orgname;
823
824	7					50	$self->set_codontable(-organism_name => $orgname, -table_path => $table_path);
825	7					54	$self->set_rscutable(-organism_name => $orgname, -rscu_path=> $rscu_path);
826
827	7					32	return;
828							}
829
830							=head2 codon_count
831
832							# count the codons in a list of sequences
833							my $tally = $GD->codon_count(-input => \@sequences);
834
835							# add a gene to an existing codon count
836							$tally = $GD->codon_count(-input => $sequence,
837							-count => $tally);
838
839							# add a list of Bio::Seq objects to an existing codon count
840							$tally = $GD->codon_count(-input => \@seqobjects,
841							-count => $tally);
842
843							The -input argument is required and will take a string variable, a Bio::Seq
844							object, a Bio::SeqFeatureI object, or a reference to an array full of any
845							combination of those things.
846
847							The codon_count function takes a set of sequences and counts how often each
848							codon appears in them. It returns a hash reference where the keys are upper case
849							nucleotide codons and the values are integers. If you pass a hash reference
850							containing codon counts with the -count argument, new counts will be added to
851							the old values.
852
853							This function will warn you if non nucleotide codons are found.
854
855							TODO: what about ambiguous codons?
856
857							=cut
858
859							sub codon_count
860							{
861	3			3	1	3912	my ($self, @args) = @_;
862
863	3					19	my ($input, $count) = $self->_rearrange([qw(input count)], @args);
864
865	3	50				51	$self->throw("no codon table has been defined")
866							unless $self->{codontable};
867
868	3	50				11	$self->throw("no sequences were provided to count")
869							unless $input;
870
871	3					11	my $list = $self->_stripdown($input, 'ARRAY', 0);
872
873	3					18	my $cod_count = _codon_count($list,
874							$self->{codontable},
875							$count);
876
877	3	50				14	warn("There are bad codons in codon count\n") if (exists $cod_count->{XXX});
878
879	3					16	return $cod_count;
880							}
881
882							=head2 generate_RSCU_table
883
884							my $rscu_t = $GD->generate_RSCU_table(-sequences => \@list_of_sequences);
885
886							The -sequences argument is required and will take a string variable, a Bio::Seq
887							object, a Bio::SeqFeatureI object, or a reference to an array full of any
888							combination of those things.
889
890							The generate_RSCU_table function takes a set of sequences, counts how often each
891							codon appears, and returns an RSCU table as a hash reference where the keys are
892							upper case nucleotide codons and the values are floats.
893
894							See L<Sharp et al. 1986\|http://www.ncbi.nlm.nih.gov/pubmed/3526280>.
895
896							=cut
897
898							sub generate_RSCU_table
899							{
900	1			1	1	1742	my ($self, @args) = @_;
901
902	1					7	my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
903
904	1	50				11	$self->throw("no codon table has been defined")
905							unless $self->{codontable};
906
907	1	50				6	$self->throw("Sequence set must be provided")
908							unless ($seqobjs);
909
910	1					5	return _generate_RSCU_table(
911							$self->codon_count(-input => $seqobjs),
912							$self->{codontable},
913							$self->{reversecodontable}
914							);
915							}
916
917							=head2 generate_codon_report
918
919							my $report = $GD->generate_codon_report(-sequences => \@list_of_sequences);
920
921							The report will have the format
922
923							TTT (F) 12800 0.74
924							TTC (F) 21837 1.26
925							TTA (L) 4859 0.31
926							TTG (L) 18806 1.22
927
928							where the first column in each group is the codon, the second column is the one
929							letter amino acid abbreviation in parentheses, the third column is the number of
930							times that codon has been seen, and the fourth column is the RSCU value for that
931							codon.
932
933							This report comes in a 4x4 layout, as would a standard genetic code table in a
934							textbook.
935
936							NO TEST
937
938							=cut
939
940							sub generate_codon_report
941							{
942	0			0	1	0	my ($self, @args) = @_;
943
944	0					0	my ($seqobjs) = $self->_rearrange([qw(sequences)], @args);
945
946	0	0				0	$self->throw("no codon table has been defined")
947							unless $self->{codontable};
948
949	0	0				0	$self->throw("Sequence set must be provided")
950							unless ($seqobjs);
951
952	0					0	my $count_t = $self->codon_count(-input => $seqobjs);
953
954	0					0	my $rscu_t = _generate_RSCU_table(
955							$count_t,
956							$self->{codontable},
957							$self->{reversecodontable}
958							);
959
960	0					0	my $report = _generate_codon_report(
961							$count_t,
962							$self->{codontable},
963							$rscu_t
964							);
965
966	0					0	return $report;
967							}
968
969							=head2 generate_RSCU_file
970
971							my $contents = $GD->generate_RSCU_file(
972							-sequences => \@seqs,
973							-comments => ["Got these codons from mice"]
974							);
975							open (my $OUT, '>', '/path/to/cods') \|\| die "can't write to /path/to/cods";
976							print $OUT $contents;
977							close $OUT;
978
979							This function generates a string that can be written to file to serve as a
980							GeneDesign RSCU table. Provide a set of sequences and an optional array
981							reference of comments to prepend to the file.
982
983							The file will have the format
984							# Comment 1
985							# ...
986							# Comment n
987							{TTT} = 0.19
988							{TTC} = 1.81
989							...
990
991							NO TEST
992
993							=cut
994
995							sub generate_RSCU_file
996							{
997	0			0	1	0	my ($self, @args) = @_;
998
999	0					0	my ($seqobjs, $comments) = $self->_rearrange([qw(sequences comments)], @args);
1000
1001	0	0				0	$self->throw('No codon table has been defined')
1002							unless $self->{codontable};
1003
1004	0	0				0	$self->throw('Sequence set must be provided')
1005							unless ($seqobjs);
1006
1007	0	0				0	$self->throw('Comment argument must be array reference')
1008							unless ref($comments) eq 'ARRAY';
1009
1010	0					0	my $count_t = $self->codon_count(-input => $seqobjs);
1011
1012	0					0	my $rscu_t = _generate_RSCU_table(
1013							$count_t,
1014							$self->{codontable},
1015							$self->{reversecodontable}
1016							);
1017
1018	0					0	my $report = _generate_codon_file(
1019							$rscu_t,
1020							$self->{reversecodontable},
1021							$comments
1022							);
1023
1024	0					0	return $report;
1025							}
1026
1027							=head2 list_enzyme_sets
1028
1029							my @available_enzlists = $GD->list_enzyme_sets();
1030							# @available_enzlists == ('standard_and_IIB', 'blunts', 'IIB', 'nonpal', ...)
1031
1032							Returns an array containing the names of every restriction enzyme recognition
1033							list GeneDesign knows about.
1034
1035							=cut
1036
1037							sub list_enzyme_sets
1038							{
1039	0			0	1	0	my ($self) = @_;
1040	0					0	my $epath = $self->{conf} . 'enzymes/';
1041	0					0	my @sets = ();
1042	0	0				0	opendir (my $ENZDIR, $epath) \|\| $self->throw("can't opendir $epath");
1043	0					0	foreach my $list (readdir($ENZDIR))
1044							{
1045	0					0	my $name = basename($list);
1046	0					0	$name =~ s{\.[^.]+$}{}x;
1047	0	0	0			0	next if ($name eq q{.} \|\| $name eq q{..});
1048	0					0	push @sets, $name;
1049							}
1050	0					0	closedir($ENZDIR);
1051	0					0	return @sets;
1052							}
1053
1054							=head2 set_restriction_enzymes
1055
1056							$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1057
1058							or
1059
1060							$GD->set_restriction_enzymes(-list_path => '/path/to/enzyme_file');
1061
1062							or even
1063
1064							$GD->set_restriction_enzymes(
1065							-list_path => '/path/to/enzyme_file',
1066							-enzyme_set => 'custom_enzymes'
1067							);
1068
1069							All will return a hash structure full of restriction enzymes.
1070
1071							Tell GeneDesign which set of restriction enzymes to use. If you provide only a
1072							set name with the -enzyme_set flag, GeneDesign will check its config path for a
1073							matching file. Otherwise you must provide a path to a file (and optionally a
1074							name for the set).
1075
1076							=cut
1077
1078							sub set_restriction_enzymes
1079							{
1080	3			3	1	675	my ($self, @args) = @_;
1081
1082	3					18	my ($set_name, $set_path)
1083							= $self->_rearrange([qw(enzyme_set list_path)], @args);
1084
1085	3					70	my $def = 'all_enzymes';
1086	3					13	my $defpath = $self->{conf} . 'enzymes/' . $def;
1087	3					22	$self->{all_enzymes} = _define_sites($defpath);
1088
1089
1090	3	50	33			347	if (! $set_name && ! $set_path)
		50	33
		50
1091							{
1092	0					0	$self->{enzyme_set} = $self->{all_enzymes};
1093	0					0	$self->{enzyme_set_name} = $def;
1094							}
1095							elsif ($set_name && ! $set_path)
1096							{
1097	0					0	$set_path = $self->{conf} . "enzymes/$set_name";
1098	0	0				0	$self->throw("No enzyme set found that matches $set_name\n")
1099							unless (-e $set_path);
1100	0					0	my $list = _parse_enzyme_list($set_path);
1101	0					0	my $set = {};
1102	0					0	foreach my $id (@$list)
1103							{
1104	0	0				0	if (! exists $self->{all_enzymes}->{$id})
1105							{
1106	0					0	warn "$id was not recognized as an enzyme... skipping\n";
1107	0					0	next;
1108							}
1109	0					0	$set->{$id} = $self->{all_enzymes}->{$id};
1110							}
1111	0					0	$self->{enzyme_set} = $set;
1112	0					0	$self->{enzyme_set_name} = $set_name;
1113							}
1114							elsif ($set_path)
1115							{
1116	3	50				139	$self->throw("No enzyme list found at $set_path\n")
1117							unless (-e $set_path);
1118	3					23	my $list = _parse_enzyme_list($set_path);
1119	3					10	my $set = {};
1120	3					13	foreach my $id (@$list)
1121							{
1122	87	50				309	if (! exists $self->{all_enzymes}->{$id})
1123							{
1124	0					0	warn "$id was not recognized as an enzyme... skipping\n";
1125	0					0	next;
1126							}
1127	87					295	$set->{$id} = $self->{all_enzymes}->{$id};
1128							}
1129	3					15	$self->{enzyme_set} = $set;
1130	3		33			250	$self->{enzyme_set_name} = $set_name \|\| basename($set_path);
1131							}
1132	3					24	return $self->{enzyme_set};
1133							}
1134
1135							=head2 remove_from_enzyme_set
1136
1137							Removes a subset of enzymes from an enzyme list. This only happens in memory, no
1138							files will be altered. The argument is an array reference of enzyme names.
1139
1140							$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1141							$GD->remove_from_enzyme_set(-enzymes => ['SmaI', 'MlyI']);
1142
1143							NO TEST
1144
1145							=cut
1146
1147							sub remove_from_enzyme_set
1148							{
1149	0			0	1	0	my ($self, @args) = @_;
1150
1151	0					0	my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
1152
1153	0	0				0	return unless ($enzes);
1154
1155	0	0				0	$self->throw("no enzyme set has been defined")
1156							unless $self->{enzyme_set};
1157
1158	0					0	my @toremove = ();
1159	0					0	my $ref = ref ($enzes);
1160	0	0				0	if ($ref eq "ARRAY")
1161							{
1162	0					0	push @toremove, @$enzes;
1163							}
1164							else
1165							{
1166	0					0	push @toremove, $enzes;
1167							}
1168	0					0	foreach my $enz (@toremove)
1169							{
1170	0					0	my $eid = $enz;
1171	0					0	my $eref = ref $enz;
1172	0	0				0	if ($eref)
1173							{
1174	0	0				0	$self->throw("object in enzymes is not a " .
1175							"Bio::GeneDesign::RestrictionEnzyme object")
1176							unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
1177	0					0	$eid = $enz->id;
1178							}
1179
1180	0	0				0	if (exists $self->{enzyme_set}->{$eid})
1181							{
1182	0					0	delete $self->{enzyme_set}->{$eid};
1183							}
1184							}
1185	0					0	return;
1186							}
1187
1188							=head2 add_to_enzyme_set
1189
1190							Adds a subset of enzymes to an enzyme list. This only happens in memory, no
1191							files will be altered. The argument is an array reference of RestrictionEnzyme
1192							objects.
1193
1194							#Grab all known enzymes
1195							my $allenz = $GD->set_restriction_enzymes(-enzyme_set => 'standard_and_IIB');
1196
1197							#Pull out a few
1198							my @keepers = ($allenz->{'BmrI'}, $allenz->{'HphI'});
1199
1200							#Give GeneDesign the enzyme set you want
1201							$GD->set_restriction_enzymes(-enzyme_set => 'blunts');
1202
1203							#Add the few enzymes it didn't have before
1204							$GD->add_to_enzyme_set(-enzymes => \@keepers);
1205
1206							NO TEST
1207
1208							=cut
1209
1210							sub add_to_enzyme_set
1211							{
1212	0			0	1	0	my ($self, @args) = @_;
1213
1214	0					0	my ($enzes) = $self->_rearrange([qw(enzymes)], @args);
1215
1216	0	0				0	return unless ($enzes);
1217
1218	0	0				0	$self->throw("no enzyme set has been defined")
1219							unless $self->{enzyme_set};
1220
1221	0					0	my @toadds = ();
1222	0					0	my $ref = ref ($enzes);
1223	0	0				0	if ($ref eq "ARRAY")
1224							{
1225	0					0	push @toadds, @$enzes;
1226							}
1227							else
1228							{
1229	0					0	push @toadds, $enzes;
1230							}
1231	0					0	foreach my $enz (@toadds)
1232							{
1233	0	0				0	$self->throw("object in enzymes is not a " .
1234							"Bio::GeneDesign::RestrictionEnzyme object")
1235							unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme"));
1236
1237	0	0				0	next if (exists $self->{enzyme_set}->{$enz->id});
1238	0					0	$self->{enzyme_set}->{$enz->id} = $enz;
1239							}
1240	0					0	return;
1241							}
1242
1243							=head2 restriction_status
1244
1245							=cut
1246
1247							sub restriction_status
1248							{
1249	1			1	1	68837	my ($self, @args) = @_;
1250
1251	1					13	my ($seq) = $self->_rearrange([qw(sequence)], @args);
1252
1253	1	50				18	$self->throw("no enzyme set has been defined")
1254							unless $self->{enzyme_set};
1255
1256	1	50				6	$self->throw("No arguments provided for set_restriction_enzymes")
1257							unless ($seq);
1258
1259	1					7	my $str = $self->_stripdown($seq, q{}, 0);
1260	1					5	my @reslist = values %{$self->{enzyme_set}};
	1					9
1261	1					7	return _define_site_status($str, \@reslist);
1262							}
1263
1264							=head2 build_prefix_tree
1265
1266							Take an array reference of nucleotide sequences (they can be strings, Bio::Seq
1267							objects, or Bio::GeneDesign::RestrictionEnzyme objects) and create a suffix
1268							tree. If you add the peptide flag, the sequences will be ambiguously translated
1269							before they are added to the suffix tree. Otherwise they will be ambiguously
1270							transcribed. It will add the reverse complement of any non peptide sequence as
1271							long as the reverse complement is different.
1272
1273							my $tree = $GD->build_prefix_tree(-input => ['GGATCC']);
1274
1275							my $ptree = $GD->build_prefix_tree(
1276							-input => ['GGCCNNNNNGGCC'],
1277							-peptide => 1
1278							);
1279
1280							=cut
1281
1282							sub build_prefix_tree
1283							{
1284	2			2	1	7013	my ($self, @args) = @_;
1285
1286	2					19	my ($list, $pep) = $self->_rearrange([qw(input peptide)], @args);
1287
1288	2	50				63	$self->throw("no input provided")
1289							unless ($list);
1290
1291	2					16	my $tree = Bio::GeneDesign::PrefixTree->new();
1292
1293	2					6	foreach my $seq (@$list)
1294							{
1295	10					23	my $base = $seq;
1296	10					15	my $id = $seq;
1297	10					25	my $ref = ref($seq);
1298	10	50				24	if ($ref)
1299							{
1300	10	50	33			183	$self->throw("object in input is not a Bio::Seq, Bio::SeqFeature, or " .
			33
1301							"Bio::GeneDesign::RestrictionEnzyme object")
1302							unless ($seq->isa("Bio::Seq")
1303							\|\| $seq->isa("Bio::SeqFeatureI")
1304							\|\| $seq->isa("Bio::GeneDesign::RestrictionEnzyme")
1305							);
1306	10	50				42	$base = ref($seq->seq) ? $seq->seq->seq : $seq->seq;
1307	10					45	$id = $seq->id;
1308							}
1309
1310	10	100				25	if ($pep)
1311							{
1312	4	50				17	$self->throw('No codon table has been defined')
1313							unless $self->{codontable};
1314
1315	4					22	my @fpeptides = _amb_translation($base, $self->{codontable},
1316							't', $self->{amb_trans_memo});
1317	4					28	$tree->add_prefix($_, $id, $base) foreach (@fpeptides);
1318
1319	4					24	my $esab = _complement($base, 1);
1320	4					13	my $lagcheck = $esab;
1321	4					26	while (substr($lagcheck, -1) eq "N")
1322							{
1323	0					0	$lagcheck = substr($lagcheck, 0, length($lagcheck) - 1);
1324							}
1325	4	100	66			38	if ($esab ne $base && $lagcheck eq $esab)
1326							{
1327	2					14	my @rpeptides = _amb_translation($esab, $self->{codontable},
1328							't', $self->{amb_trans_memo});
1329	2					30	$tree->add_prefix($_, $id, $esab) foreach (@rpeptides);
1330							}
1331							}
1332							else
1333							{
1334	6					24	my $fnucs = _amb_transcription($base);
1335	6					33	$tree->add_prefix($_, $id, undef) foreach (@$fnucs);
1336
1337	6					22	my $esab = _complement($base, 1);
1338	6	100				31	if ($esab ne $base)
1339							{
1340	3					10	my $rnucs = _amb_transcription($esab);
1341	3					18	$tree->add_prefix($_, $id, undef) foreach (@$rnucs);
1342							}
1343							}
1344							}
1345	2					12	return $tree;
1346							}
1347
1348							=head2 search_prefix_tree
1349
1350							Takes a suffix tree and a sequence and searches for results, which are returned
1351							as in the Bio::GeneDesign::PrefixTree documentation.
1352
1353							my $hits = $GD->search_prefix_tree(-tree => $ptree, -sequence => $mygeneseq);
1354
1355							# @$hits = (['BamHI', 4, 'GGATCC', 'i hope this didn't pop up'],
1356							# ['OhnoI', 21, 'GGCCC', 'I hope these pop up'],
1357							# ['WoopsII', 21, 'GGCCC', 'I hope these pop up']
1358							#);
1359
1360							=cut
1361
1362							sub search_prefix_tree
1363							{
1364	2			2	1	186216	my ($self, @args) = @_;
1365
1366	2					21	my ($tree, $seq) = $self->_rearrange([qw(tree sequence)], @args);
1367
1368	2	50				93	$self->throw("no query sequence provided")
1369							unless ($seq);
1370
1371	2	50				9	$self->throw("no suffix tree provided")
1372							unless ($tree);
1373
1374	2	50				17	$self->throw("tree is not a Bio::GeneDesign::PrefixTree")
1375							unless ($tree->isa("Bio::GeneDesign::PrefixTree"));
1376
1377	2					13	my $str = $self->_stripdown($seq, q{}, 0);
1378
1379	2					16	my @hits = $tree->find_prefixes($str);
1380
1381	2					19	return \@hits;
1382							}
1383
1384							=head2 pattern_aligner
1385
1386							=cut
1387
1388							sub pattern_aligner
1389							{
1390	3			3	1	5578	my ($self, @args) = @_;
1391
1392	3					21	my ($seq, $pattern, $peptide, $re)
1393							= $self->_rearrange([qw(sequence pattern peptide offset)], @args);
1394
1395	3	50				100	$self->throw("no codon table has been defined")
1396							unless $self->{codontable};
1397
1398	3	50				9	$self->throw("no nucleotide sequence provided")
1399							unless $seq;
1400
1401	3		100			12	$re = $re \|\| 0;
1402
1403	3					12	my $str = $self->_stripdown($seq, q{}, 1);
1404
1405	3		33			20	$peptide = $peptide \|\| $self->translate(-sequence => $str);
1406
1407	3					16	my ($newpattern, $offset) = _pattern_aligner($str,
1408							$pattern,
1409							$peptide,
1410							$self->{codontable},
1411							$self->{amb_trans_memo}
1412							);
1413	3	100				19	return $re ? ($newpattern, $offset) : $newpattern;
1414							}
1415
1416							=head2 pattern_adder
1417
1418							=cut
1419
1420							sub pattern_adder
1421							{
1422	1			1	1	1917	my ($self, @args) = @_;
1423
1424	1					8	my ($seq, $pattern) = $self->_rearrange([qw(sequence pattern)], @args);
1425
1426	1	50				32	$self->throw("no codon table has been defined")
1427							unless $self->{codontable};
1428
1429	1	50				6	$self->throw("no nucleotide sequence provided")
1430							unless $seq;
1431
1432	1					5	my $str = $self->_stripdown($seq, q{}, 1);
1433	1					4	my $pat = $self->_stripdown($pattern, q{}, 1);
1434
1435	1					9	my $newsequence = _pattern_adder($str,
1436							$pat,
1437							$self->{codontable},
1438							$self->{reversecodontable},
1439							$self->{amb_trans_memo}
1440							);
1441	1					6	return $newsequence;
1442							}
1443
1444							=head2 codon_change_type
1445
1446							=cut
1447
1448							sub codon_change_type
1449							{
1450	0			0	1	0	my ($self, @args) = @_;
1451
1452	0					0	my ($codold, $codnew) = $self->_rearrange([qw(from to)], @args);
1453
1454	0	0				0	$self->throw("no codon table has been defined")
1455							unless $self->{codontable};
1456
1457	0	0				0	$self->throw("no from sequence provided")
1458							unless $codold;
1459
1460	0	0				0	$self->throw("no to sequence provided")
1461							unless $codnew;
1462
1463	0					0	my $changetype = _codon_change_type($codold, $codnew, $self->{codontable});
1464	0					0	return $changetype;
1465							}
1466
1467							=head2 translate
1468
1469							=cut
1470
1471							sub translate
1472							{
1473	9			9	1	5374	my ($self, @args) = @_;
1474
1475	9					53	my ($seq, $frame) = $self->_rearrange([qw(sequence frame)], @args);
1476
1477	9	50				242	$self->throw("no codon table has been defined")
1478							unless $self->{codontable};
1479
1480	9	50				24	$self->throw("no nucleotide sequence provided")
1481							unless $seq;
1482
1483	9					31	my $str = $self->_stripdown($seq, q{}, 1);
1484
1485	9	100				23	$frame = $frame ? $frame : 1;
1486
1487	9	50	33			52	$self->throw("frame must be -3, -2, -1, 1, 2, or 3")
1488							if (abs($frame) > 4 \|\| abs($frame) < 0);
1489
1490	9					43	my $peptide = _translate($str, $frame, $self->{codontable});
1491	9	100				28	if (ref $seq)
1492							{
1493	6					30	my $newobj = $seq->clone();
1494	6	50				308	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1495	6					86	$desc = "translated with " . $self->{organism} . " codon values";
1496	6					18	$newobj->seq($peptide);
1497	6					428	$newobj->alphabet("protein");
1498	6					129	$newobj->desc($desc);
1499	6					78	return $newobj;
1500							}
1501							else
1502							{
1503	3					43	return $peptide;
1504							}
1505							}
1506
1507							=head2 reverse_translate
1508
1509							=cut
1510
1511							sub reverse_translate
1512							{
1513	20001			20001	1	1778649	my ($self, @args) = @_;
1514
1515	20001					85178	my ($pep, $algorithm)
1516							= $self->_rearrange([qw(
1517							peptide
1518							algorithm)], @args);
1519
1520	20001	50				525176	$self->throw("no codon table has been defined")
1521							unless $self->{codontable};
1522
1523	20001	50				49745	$self->throw("no RSCU table has been defined")
1524							unless $self->{rscutable};
1525
1526	20001	50				42707	$self->throw("no peptide sequence provided")
1527							unless $pep;
1528
1529	20001					47898	my $str = $self->_stripdown($pep, q{}, 1);
1530
1531	20001					61284	my ($newstr, @baddies) = _sanitize($str, 'pep');
1532	20001					33322	$str = $newstr;
1533	20001	50				46631	if (scalar @baddies)
1534							{
1535	0					0	print "\nGD_WARNING: removed bad characters (", join q{, }, @baddies;
1536	0					0	print ") from input sequence\n";
1537							}
1538	20001		50			43918	$algorithm = $algorithm \|\| "balanced";
1539	20001					29745	$algorithm = lc $algorithm;
1540	20001					31328	$algorithm =~ s{\;}{}xg;
1541	20001					35669	my $name = "_reversetranslate" . "_" . $algorithm;
1542	20001					46919	my $subref = \&$name;
1543	20001		33			78231	my $seq = &$subref($self->{reversecodontable}, $self->{rscutable}, $str)
1544							\|\| $self->throw("Can't reverse translate with $algorithm? $!");
1545
1546	20001	50				51611	if (ref $pep)
1547							{
1548	20001					68642	my $newobj = $pep->clone();
1549	20001	50				708335	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1550	20001					289412	$desc .= "$algorithm reverse translated with " . $self->{organism};
1551	20001					29493	$desc .= " RSCU values";
1552	20001					58405	$newobj->seq($seq);
1553	20001					1319029	$newobj->desc($desc);
1554	20001					266311	return $newobj;
1555							}
1556							else
1557							{
1558	0					0	return $seq;
1559							}
1560							}
1561
1562							=head2 codon_juggle
1563
1564							=cut
1565
1566							sub codon_juggle
1567							{
1568	20003			20003	1	932845	my ($self, @args) = @_;
1569
1570	20003					85621	my ($seq, $algorithm)
1571							= $self->_rearrange([qw(
1572							sequence
1573							algorithm)], @args);
1574
1575	20003	50				505492	$self->throw("no codon table has been defined")
1576							unless $self->{codontable};
1577
1578	20003	50				44796	$self->throw("no RSCU table has been defined")
1579							unless $self->{rscutable};
1580
1581	20003	50				36430	$self->throw("no nucleotide sequence provided")
1582							unless $seq;
1583
1584
1585	20003	50				33675	$self->throw("no algorithm provided")
1586							unless $algorithm;
1587
1588	20003					43961	my $str = $self->_stripdown($seq, q{}, 0);
1589
1590							##REPLACE THIS WITH CODE THAT GRACEFULLY JUGGLES JUST CDSES OR GENES
1591	20003	50				51569	$self->throw("sequence does not appear to be the right length to be a gene")
1592							unless length($str) % 3 == 0;
1593
1594	20003					29712	$algorithm = lc $algorithm;
1595	20003					34055	$algorithm =~ s/\W//xg;
1596	20003					31153	my $name = "_codonJuggle_" . $algorithm;
1597	20003					43082	my $subref = \&$name;
1598	20003		33			77284	my $newseq = &$subref($self->{codontable},
1599							$self->{reversecodontable},
1600							$self->{rscutable},
1601							$str
1602							) \|\| $self->throw("Can't run $algorithm? $!");
1603	20003	50				49303	if (ref $seq)
1604							{
1605	20003					74644	my $newobj = $seq->clone();
1606	20003	50				698613	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1607	20003					283854	$desc .= "$algorithm codon juggled with " . $self->{organism};
1608	20003					30913	$desc .= " RSCU values";
1609	20003					56485	$newobj->seq($newseq);
1610	20003					1369061	$newobj->desc($desc);
1611	20003					262637	return $newobj;
1612							}
1613							else
1614							{
1615	0					0	return $newseq;
1616							}
1617							}
1618
1619							=head2 subtract_sequence
1620
1621							=cut
1622
1623							sub subtract_sequence
1624							{
1625	3			3	1	2660	my ($self, @args) = @_;
1626
1627	3					19	my ($seq, $rem) = $self->_rearrange([qw(sequence remove)], @args);
1628
1629	3	50				80	$self->throw("no codon table has been defined")
1630							unless $self->{codontable};
1631
1632	3	50				13	$self->throw("no RSCU table has been defined")
1633							unless $self->{rscutable};
1634
1635	3	50				10	$self->throw("no nucleotide sequence provided")
1636							unless $seq;
1637
1638	3	50				12	$self->throw("no sequence to be removed was defined")
1639							unless ($rem);
1640
1641	3					14	my $str = $self->_stripdown($seq, q{}, 0);
1642
1643	3					6	my $regarr;
1644	3					6	my $less = $rem;
1645	3	50				11	if (ref($rem))
1646							{
1647	3	100	66			37	if ($rem->isa("Bio::Seq") \|\| $rem->isa("Bio::SeqFeatureI"))
		50
1648							{
1649	2	50				7	$less = ref($rem->seq) ? $rem->seq->seq : $rem->seq;
1650	2					95	$regarr = _regarr($less, 1);
1651							}
1652							elsif ($rem->isa("Bio::GeneDesign::RestrictionEnzyme"))
1653							{
1654	1					11	$less = $rem->seq;
1655	1					8	$regarr = $rem->regex;
1656							}
1657							else
1658							{
1659	0					0	$self->throw("removal argument is not a Bio::Seq, Bio::SeqFeature, or "
1660							. "Bio::GeneDesign::RestrictionEnzyme");
1661							}
1662							}
1663							else
1664							{
1665	0					0	$regarr = _regarr($less, 1);
1666							}
1667
1668	3					30	my $newseq = _subtract( uc $str,
1669							uc $less,
1670							$regarr,
1671							$self->{codontable},
1672							$self->{rscutable},
1673							$self->{reversecodontable}
1674							);
1675
1676	3	50				15	if (ref $seq)
1677							{
1678	3					29	my $newobj = $seq->clone();
1679	3	50				171	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
1680	3					70	$desc .= $rem->id . " subtracted with " . $self->{organism};
1681	3					43	$desc .= " RSCU values";
1682	3					14	$newobj->seq($newseq);
1683	3					278	$newobj->desc($desc);
1684	3					57	return $newobj;
1685							}
1686							else
1687							{
1688	0					0	return $newseq;
1689							}
1690							}
1691
1692							=head2 repeat_smash
1693
1694							=cut
1695
1696							sub repeat_smash
1697							{
1698	0			0	1	0	my ($self, @args) = @_;
1699
1700	0					0	my ($seq) = $self->_rearrange([qw(sequence)], @args);
1701
1702	0	0				0	$self->throw("no codon table has been defined")
1703							unless $self->{codontable};
1704
1705	0	0				0	$self->throw("no RSCU table has been defined")
1706							unless $self->{rscutable};
1707
1708	0	0				0	$self->throw("no nucleotide sequence provided")
1709							unless $seq;
1710
1711	0					0	my $str = $self->_stripdown($seq, q{}, 0);
1712
1713	0					0	return _minimize_local_alignment_dp(
1714							$str,
1715							$self->{codontable},
1716							$self->{reversecodontable},
1717							$self->{rscutable}
1718							);
1719							}
1720
1721							=head2 make_amplification_primers
1722
1723							NO TEST
1724
1725							=cut
1726
1727							sub make_amplification_primers
1728							{
1729	0			0	1	0	my ($self, @args) = @_;
1730
1731	0					0	my ($seq, $temp) = $self->_rearrange([qw(sequence temperature)], @args);
1732
1733	0	0				0	$self->throw("no sequence provided") unless ($seq);
1734	0		0			0	$temp = $temp \|\| 60;
1735
1736	0					0	my $str = $self->_stripdown($seq, q{}, 0);
1737
1738	0					0	return _make_amplification_primers($str, $temp);
1739							}
1740
1741							=head2 contains_homopolymer
1742
1743							Returns 1 if the sequence contains a homopolymer of the provided length (default
1744							is 5bp) and 0 else
1745
1746							=cut
1747
1748							sub contains_homopolymer
1749							{
1750	0			0	1	0	my ($self, @args) = @_;
1751
1752	0					0	my ($seq, $length) = $self->_rearrange([qw(sequence length)], @args);
1753
1754	0	0				0	$self->throw("no sequence provided") unless ($seq);
1755	0		0			0	$length = $length \|\| 5;
1756
1757	0					0	my $str = $self->_stripdown($seq, q{}, 0);
1758
1759	0					0	return _check_for_homopolymer($str, $length);
1760							}
1761
1762							=head2 filter_homopolymers
1763
1764							=cut
1765
1766							sub filter_homopolymers
1767							{
1768	0			0	1	0	my ($self, @args) = @_;
1769
1770	0					0	my ($seqs, $length) = $self->_rearrange([qw(sequences length)], @args);
1771
1772	0	0				0	$self->throw("no argument provided to filter_homopolymers")
1773							unless $seqs;
1774	0		0			0	$length = $length \|\| 5;
1775
1776	0					0	my $arrref = $self->_stripdown($seqs, 'ARRAY', 1);
1777
1778	0					0	my $seqarr = _filter_homopolymer( $arrref, $length);
1779	0					0	return $seqarr;
1780							}
1781
1782							=head2 search_vmatch
1783
1784							=cut
1785
1786							sub search_vmatch
1787							{
1788	0			0	1	0	my ($self, @args) = @_;
1789
1790	0	0				0	$self->throw("vmatch searching is not available")
1791							unless $self->vmatch;
1792
1793	0					0	my ($parent, $seqobj, $mismatches, $revcom)
1794							= $self->_rearrange([qw(
1795							parent
1796							sequence
1797							mismatches
1798							revcom)], @args);
1799
1800	0	0				0	$self->throw("no nucleotide sequence provided")
1801							unless $seqobj;
1802
1803	0	0				0	$self->throw("sequence argument is not a Bio::Seq object")
1804							unless $seqobj->isa("Bio::Seq");
1805
1806	0	0				0	$self->throw("$seqobj is not a nucleotide sequence")
1807							unless $seqobj->alphabet eq "dna";
1808
1809	0	0				0	$self->throw("no parent sequence provided")
1810							unless $parent;
1811
1812	0	0				0	$self->throw("parent argument is not a Bio::Seq object")
1813							unless $parent->isa("Bio::Seq");
1814
1815	0	0				0	$self->throw("$parent is not a nucleotide sequence")
1816							unless $parent->alphabet eq "dna";
1817
1818	0		0			0	my $writedir = $self->{tmp_path} \|\| "./";
1819	0		0			0	$mismatches = $mismatches \|\| 1;
1820	0		0			0	$revcom = $revcom \|\| 1;
1821
1822	0					0	my $hits = _search_vmatch( $parent,
1823							$seqobj,
1824							$mismatches,
1825							$revcom,
1826							$writedir);
1827	0					0	return $hits;
1828							}
1829
1830							=head2 filter_blast
1831
1832							=cut
1833
1834							sub filter_blast
1835							{
1836	0			0	1	0	my ($self, @args) = @_;
1837
1838	0	0				0	$self->throw("BLAST+ filtering is not available")
1839							unless $self->BLAST;
1840
1841	0					0	my ($parent, $seqobjs, $percent, $revcom)
1842							= $self->_rearrange([qw(
1843							parent
1844							sequences
1845							percent_identity
1846							revcom)], @args);
1847
1848	0	0				0	$self->throw("no nucleotide sequences provided")
1849							unless $seqobjs;
1850
1851	0	0				0	$self->throw("sequences argument is not an array reference")
1852							unless ref($seqobjs) eq "ARRAY";
1853
1854	0					0	foreach my $seqobj (@$seqobjs)
1855							{
1856	0	0				0	$self->throw(ref($seqobj) . " is not a Bio::Seq object")
1857							unless $seqobj->isa("Bio::Seq");
1858
1859	0	0				0	$self->throw("$seqobj is not a nucleotide sequence")
1860							unless $seqobj->alphabet eq "dna";
1861							}
1862
1863	0	0				0	$self->throw("no parent sequence provided")
1864							unless $parent;
1865
1866	0	0				0	$self->throw(ref($parent) . " is not a Bio::Seq object")
1867							unless $parent->isa("Bio::Seq");
1868
1869	0	0				0	$self->throw("$parent is not a nucleotide sequence")
1870							unless $parent->alphabet eq "dna";
1871
1872	0		0			0	my $writedir = $self->{tmp_path} \|\| "./";
1873	0		0			0	$percent = $percent \|\| 75;
1874	0		0			0	$revcom = $revcom \|\| 1;
1875
1876	0					0	my $seqarr = _filter_blast( $parent,
1877							$seqobjs,
1878							$percent,
1879							$writedir);
1880	0					0	return $seqarr;
1881							}
1882
1883							=head2 carve_building_blocks
1884
1885							NO TEST
1886
1887							=cut
1888
1889							sub carve_building_blocks
1890							{
1891	0			0	1	0	my ($self, @args) = @_;
1892	0					0	my ($chobj, $algorithm, $tarbblen, $maxbblen,
1893							$minbblen, $tarbblap, $stitch, $excludes,
1894							$fpexcludes, $tpexcludes, $verbose)
1895							= $self->_rearrange([qw(
1896							sequence
1897							algorithm
1898							target_length
1899							max_length
1900							min_length
1901							overlap_length
1902							stitch_temp
1903							excludes
1904							fpexcludes
1905							tpexcludes
1906							verbose)], @args);
1907
1908	0	0				0	$self->throw("no nucleotide sequence provided")
1909							unless $chobj;
1910
1911	0	0				0	$self->throw("object " . ref($chobj) . " is not a Bio::Seq")
1912							unless $chobj->isa("Bio::Seq");
1913
1914	0		0			0	$tarbblen = $tarbblen \|\| 1000;
1915	0		0			0	$maxbblen = $maxbblen \|\| 1023;
1916	0		0			0	$minbblen = $minbblen \|\| 200;
1917
1918	0	0	0			0	$self->throw("Target building block size $tarbblen is outside of allowable " .
1919							"range min $minbblen to max $maxbblen")
1920							if ($tarbblen < $minbblen \|\| $tarbblen > $maxbblen);
1921
1922	0	0	0			0	$self->throw("Minimum and maximum building block sizes don't make sense")
			0
1923							if ($maxbblen < $minbblen \|\| $maxbblen < 0 \|\| $minbblen < 0 );
1924
1925	0		0			0	$tarbblap = $tarbblap \|\| 40;
1926
1927	0		0			0	$algorithm = $algorithm \|\| "overlap";
1928	0					0	$algorithm =~ s/\;//xg;
1929	0					0	my $module = "Bio::GeneDesign::BuildingBlocks::$algorithm";
1930	0					0	(my $require_name = $module . ".pm") =~ s{::}{/}xg;
1931							my $req = eval
1932	0					0	{
1933	0					0	require $require_name;
1934							};
1935	0	0				0	if (! $req)
1936							{
1937	0					0	$self->throw("BuildingBlocks module for $algorithm not found\n$@");
1938							}
1939	0					0	my $name = $module . "::_carve_building_blocks";
1940	0					0	my $subref = \&$name;
1941	0					0	my $BB_list = [];
1942							my $run = eval
1943	0					0	{
1944	0					0	$BB_list = &$subref( $self, $chobj, $tarbblen, $maxbblen,
1945							$minbblen, $tarbblap, $stitch,
1946							$excludes, $fpexcludes, $tpexcludes,
1947							$verbose
1948							);
1949							};
1950	0					0	my $e;
1951	0	0				0	if ($e = Bio::GeneDesign::Exception::UnBBable->caught())
1952							{
1953	0					0	print $chobj->id . " cannot be carved into building blocks: "
1954							. $e->error . "\n";
1955							}
1956	0					0	return $BB_list;
1957							}
1958
1959							=head2 chop_oligos
1960
1961							NO TEST
1962
1963							=cut
1964
1965							sub chop_oligos
1966							{
1967	0			0	1	0	my ($self, @args) = @_;
1968
1969	0					0	my ($bbobj, $algorithm, $olilenmin, $olilenmax, $olilen, $laptemp, $laplenmin,
1970							$tmtol, $poolsize, $poolnummax, $verbose)
1971							= $self->_rearrange([qw(
1972							building_block
1973							algorithm
1974							oligo_len_min
1975							oligo_len_max
1976							oligo_len
1977							overlap_tm
1978							overlap_len_min
1979							tm_tolerance
1980							pool_size
1981							pool_num_max
1982							verbose)], @args);
1983
1984	0	0				0	$self->throw("no building block provided")
1985							unless $bbobj;
1986
1987	0	0				0	$self->throw("object " . ref($bbobj) . " is not a Bio::SeqFeatureI")
1988							unless $bbobj->isa("Bio::SeqFeatureI");
1989
1990	0		0			0	$olilen = $olilen \|\| 180;
1991	0		0			0	$olilenmin = $olilenmin \|\| 45;
1992	0		0			0	$olilenmax = $olilenmax \|\| 200;
1993	0		0			0	$laptemp = $laptemp \|\| 70;
1994	0		0			0	$tmtol = $tmtol \|\| .5;
1995	0		0			0	$verbose = $verbose \|\| undef;
1996
1997	0	0	0			0	$self->throw("maximum oligo length is less than the target oligo length")
			0
1998							if ($olilenmax && $olilen && $olilenmax < $olilen);
1999
2000	0		0			0	$algorithm = $algorithm \|\| "JGI";
2001	0					0	$algorithm =~ s/\;//xg;
2002	0					0	my $module = "Bio::GeneDesign::Oligos::$algorithm";
2003	0					0	(my $require_name = $module . ".pm") =~ s{::}{/}xg;
2004							my $req = eval
2005	0					0	{
2006	0					0	require $require_name;
2007							};
2008	0	0				0	if (! $req)
2009							{
2010	0					0	$self->throw("Oligos module for algorithm $algorithm not found:\n$@\n\n");
2011							}
2012	0					0	my $name = $module . "::_chop_oligos";
2013	0					0	my $subref = \&$name;
2014	0					0	my $OL_list = [];
2015							my $run = eval
2016	0					0	{
2017	0					0	$OL_list = &$subref( $self, $bbobj, $olilenmax, $olilenmin, $olilen,
2018							$laptemp, $laplenmin, $tmtol, $poolsize,
2019							$poolnummax, $verbose
2020							);
2021							};
2022	0					0	my $e;
2023	0	0				0	if ($e = Bio::GeneDesign::Exception::UnOLable->caught())
2024							{
2025	0					0	print "Cannot chop into oligos: " . $e->error . "\n";
2026							}
2027	0					0	return $OL_list;
2028							}
2029
2030							=head2 make_graph
2031
2032							=cut
2033
2034							sub make_graph
2035							{
2036	0			0	1	0	my ($self, @args) = @_;
2037
2038	0	0				0	$self->throw("Graphing is not available")
2039							unless $self->{graph};
2040
2041	0					0	my ($seqobjs, $window)
2042							= $self->_rearrange([qw(sequences window)], @args);
2043
2044	0	0				0	$self->throw("no codon table has been defined")
2045							unless $self->{codontable};
2046
2047	0	0				0	$self->throw("no RSCU table has been defined")
2048							unless $self->{rscutable};
2049
2050	0	0				0	$self->throw("no nucleotide sequences provided")
2051							unless $seqobjs;
2052
2053	0	0				0	$self->throw("sequences argument is not an array reference")
2054							unless ref($seqobjs) eq "ARRAY";
2055
2056	0					0	foreach my $seqobj (@$seqobjs)
2057							{
2058	0	0				0	$self->throw(ref($seqobj) . " is not a Bio::Seq object")
2059							unless $seqobj->isa("Bio::Seq");
2060
2061	0	0				0	$self->throw("$seqobj is not a nucleotide sequence")
2062							unless $seqobj->alphabet eq "dna";
2063							}
2064
2065	0					0	my ($graph, $format) = _make_graph( $seqobjs,
2066							$window,
2067							$self->{organism},
2068							$self->{codontable},
2069							$self->{rscutable},
2070							$self->{reversecodontable});
2071	0					0	return ($graph, $format);
2072							}
2073
2074							=head2 make_dotplot
2075
2076							=cut
2077
2078							sub make_dotplot
2079							{
2080	0			0	1	0	my ($self, @args) = @_;
2081
2082	0	0				0	$self->throw("Graphing is not available")
2083							unless $self->{graph};
2084
2085	0					0	my ($seq1, $seq2, $window, $stringency)
2086							= $self->_rearrange([qw(first second window stringency)], @args);
2087
2088	0		0			0	$window = $window \|\| 10;
2089	0		0			0	$stringency = $stringency \|\| 10;
2090
2091	0	0	0			0	$self->throw("no nucleotide sequences provided")
2092							unless ($seq1 && $seq2);
2093
2094	0					0	foreach my $seqobj ($seq1, $seq2)
2095							{
2096	0	0				0	$self->throw(ref($seqobj) . " is not a Bio::Seq object")
2097							unless $seqobj->isa("Bio::Seq");
2098
2099	0	0				0	$self->throw("$seqobj is not a nucleotide sequence")
2100							unless $seqobj->alphabet eq "dna";
2101							}
2102
2103	0					0	my $graph = _dotplot(
2104							$seq1->seq,
2105							$seq2->seq,
2106							$window,
2107							$stringency
2108							);
2109	0					0	return $graph;
2110							}
2111
2112							=head2 import_seqs
2113
2114							NO TEST
2115
2116							=cut
2117
2118							sub import_seqs
2119							{
2120	1			1	1	406	my ($self, $path) = @_;
2121	1	50				22	$self->throw("$path does not exist.")
2122							if (! -e $path);
2123	1		33			11	my $iterator = Bio::SeqIO->new(-file => $path)
2124							\|\| $self->throw("Can't parse $path!");
2125
2126	1					7655	my ($filename, $dirs, $suffix) = fileparse($path, qr/\.[^.]*/x);
2127	1	50				11	$suffix = substr($suffix, 1) if (substr($suffix, 0, 1) eq ".");
2128	1	50				7	$suffix = 'fasta' if ($suffix eq 'fa');
2129
2130	1					6	return ($iterator, $filename, $suffix);
2131							}
2132
2133							=head2 export_seqs
2134
2135							NO TEST
2136
2137							=cut
2138
2139							sub export_seqs
2140							{
2141	0			0	1	0	my ($self, @args) = @_;
2142
2143	0					0	my ($filename, $outpath, $outformat, $seqarr)
2144							= $self->_rearrange([qw(
2145							filename
2146							path
2147							format
2148							sequences)], @args);
2149
2150	0	0				0	$outpath = $outpath ? $outpath : ".";
2151	0	0				0	$outpath .= "/" if (substr($outpath, -1, 1) !~ /[ \/ ]/x);
2152	0	0				0	$self->throw("$outpath does not exist") if (! -e $outpath);
2153
2154	0	0				0	$outformat = $outformat ? $outformat : "genbank";
2155	0					0	my $module = "Bio::SeqIO::$outformat";
2156	0					0	(my $require_name = $module . ".pm") =~ s{::}{/}xg;
2157							my $flag = eval
2158	0					0	{
2159	0					0	require $require_name;
2160							};
2161	0	0	0			0	$self->throw("$outformat is not a format recognized by BioPerl")
2162							unless ($outformat && $flag);
2163
2164							#Long attributes that come in from a genbank file will have corruption
2165							#remove spaces and reattribute to fix bbs in genbank file ):
2166	0	0				0	if ($outformat eq 'genbank')
2167							{
2168	0					0	foreach my $seq (@{$seqarr})
	0					0
2169							{
2170	0					0	foreach my $feat ($seq->get_SeqFeatures)
2171							{
2172	0					0	foreach my $tag ($feat->get_all_tags())
2173							{
2174	0					0	my $value = join(q{}, $feat->get_tag_values($tag));
2175	0					0	$value =~ s/\s//xg;
2176	0					0	$feat->remove_tag($tag);
2177	0					0	$feat->add_tag_value($tag, $value);
2178							}
2179							}
2180							}
2181							}
2182	0					0	my $path = $outpath . $filename;
2183	0	0				0	open (my $OUTFH, '>', $path ) \|\| $self->throw("Can't write to $path? $!");
2184	0					0	my $FOUT = Bio::SeqIO->new(-fh => $OUTFH, -format => $outformat);
2185	0					0	$FOUT->write_seq($_) foreach (@$seqarr);
2186	0					0	close $OUTFH;
2187	0					0	return;
2188							}
2189
2190							=head2 random_dna
2191
2192							=cut
2193
2194							sub random_dna
2195							{
2196	30000			30000	1	252220	my ($self, @args) = @_;
2197
2198	30000					124475	my ($rlen, $rgc, $rstop)
2199							= $self->_rearrange([qw(
2200							length
2201							gc_percentage
2202							no_stops)], @args);
2203
2204	30000	50	33			735445	$self->throw("no codon table has been defined")
2205							if ($rstop && ! $self->{codontable});
2206
2207	30000		100			73576	$rgc = $rgc \|\| 50;
2208	30000	50	33			159628	$self->throw("gc_percentage must be between 0 and 100")
			33
2209							if ($rgc && ($rgc < 0 \|\| $rgc > 100));
2210
2211	30000	50	33			145810	if (! $rlen \|\| $rlen < 1)
		50
2212							{
2213	0					0	return q{};
2214							}
2215							elsif ($rlen == 1)
2216							{
2217	30000	50				103832	return $rgc ? _randombase_weighted($rgc) : _randombase;
2218							}
2219	0					0	return _randomDNA($rlen, $rgc, $rstop, $self->{codontable});
2220							}
2221
2222							=head2 replace_ambiguous_bases
2223
2224							=cut
2225
2226							sub replace_ambiguous_bases
2227							{
2228	3			3	1	18	my ($self, $seq) = @_;
2229
2230	3	50				8	$self->throw("no sequence provided ")
2231							unless ($seq);
2232
2233	3					8	my $str = $self->_stripdown($seq, q{}, 1);
2234
2235	3					13	my $newstr = _replace_ambiguous_bases($str);
2236
2237	3	50				9	if (ref $seq)
2238							{
2239	0					0	my $newobj = $seq->clone();
2240	0	0				0	my $desc = $newobj->desc ? $newobj->desc . q{ } : q{};
2241	0					0	$desc .= "deambiguated";
2242	0					0	$newobj->seq($newstr);
2243	0					0	$newobj->desc($desc);
2244	0					0	return $newobj;
2245							}
2246							else
2247							{
2248	3					10	return $newstr;
2249							}
2250							}
2251
2252							=head1 PLEASANTRIES
2253
2254							=head2 pad
2255
2256							my $name = 5;
2257							my $nice = $GD->pad($name, 3);
2258							$nice == "005" \|\| die;
2259
2260							$name = "oligo";
2261							$nice = $GD->pad($name, 7, "_");
2262							$nice == "__oligo" \|\| die;
2263
2264							Pads an integer with leading zeroes (by default) or any provided set of
2265							characters. This is useful both to make reports pretty and to standardize the
2266							length of designations.
2267
2268							=cut
2269
2270							sub pad
2271							{
2272	0			0	1	0	my ($self, $num, $thickness, $chars) = @_;
2273	0					0	my $t = $num;
2274	0		0			0	$chars = $chars \|\| "0";
2275	0					0	$t = $chars . $t while (length($t) < $thickness);
2276	0					0	return $t;
2277							}
2278
2279							=head2 attitude
2280
2281							my $adverb = $GD->attitude();
2282
2283							Ask GeneDesign how it handled your request.
2284
2285							=cut
2286
2287							sub attitude
2288							{
2289	0			0	1	0	my @adverbs = qw(Elegantly Energetically Enthusiastically Excitedly Daintily
2290							Deliberately Diligently Dreamily Courageously Cooly Cleverly Cheerfully
2291							Carefully Calmly Briskly Blindly Bashfully Absentmindedly Awkwardly
2292							Faithfully Ferociously Fervently Fiercely Fondly Gently Gleefully Gratefully
2293							Gracefully Happily Helpfully Heroically Honestly Joyfully Jubilantly
2294							Jovially Keenly Kindly Knowingly Kookily Loftily Lovingly Loyally
2295							Majestically Mechanically Merrily Mostly Neatly Nicely Obediently Officially
2296							Optimistically Patiently Perfectly Playfully Positively Powerfully
2297							Punctually Properly Promptly Quaintly Quickly Quirkily Rapidly Readily
2298							Reassuringly Righteously Sedately Seriously Sharply Shyly Silently Smoothly
2299							Solemnly Speedily Strictly Successfully Suddenly Sweetly Swiftly Tenderly
2300							Thankfully Throroughly Thoughtfully Triumphantly Ultimately Unabashedly
2301							Utterly Upliftingly Urgently Usefully Valiantly Victoriously Vivaciously
2302							Warmly Wholly Wisely Wonderfully Yawningly Zealously Zestfully
2303							);
2304	0					0	my $index = _random_index(scalar(@adverbs));
2305	0					0	return $adverbs[$index];
2306							}
2307
2308							=head2 endslash
2309
2310							=cut
2311
2312							sub endslash
2313							{
2314	0			0	1	0	my ($self, $path) = @_;
2315	0	0				0	if (substr($path, -1, 1) ne q{/})
2316							{
2317	0					0	$path .= q{/};
2318							}
2319	0					0	return $path;
2320							}
2321
2322							=head2 _stripdown
2323
2324							=cut
2325
2326							sub _stripdown
2327							{
2328	40059			40059		62111	my ($self, $seqarg, $type, $enz_allowed) = @_;
2329
2330	40059		100			121728	$enz_allowed = $enz_allowed \|\| 0;
2331	40059	100				114096	my @seqs = ref($seqarg) eq "ARRAY" ? @$seqarg : ($seqarg);
2332	40059					45371	my @list;
2333	40059					64486	foreach my $seq (@seqs)
2334							{
2335	40059					48241	my $str = $seq;
2336	40059					52050	my $ref = ref($seq);
2337	40059	100				77438	if ($ref)
2338							{
2339	40027					82807	my $bit = $self->_checkref($seq, $enz_allowed);
2340	40027	50				92721	$self->throw("object $ref is not a compatible object $bit") if ($bit < 1);
2341	40027	50				109572	$str = ref($seq->seq) ? $seq->seq->seq : $seq->seq;
2342							}
2343	40059					1073206	push @list, uc $str;
2344							}
2345	40059	100				102547	return \@list if ($type eq "ARRAY");
2346	40056					116332	return $list[0];
2347							}
2348
2349							=head2 _checkref
2350
2351							=cut
2352
2353							sub _checkref
2354							{
2355	40027			40027		56173	my ($self, $pobj, $enz_allowed) = @_;
2356	40027					50510	my $ref = ref $pobj;
2357	40027	50				76121	return -1 if (! $ref);
2358	40027		100			116398	$enz_allowed = $enz_allowed \|\| 0;
2359	40027					58857	my ($bioseq, $bioseqfeat) = (0, 0);
2360	40027					112163	$bioseq = $pobj->isa("Bio::Seq");
2361	40027					166356	$bioseqfeat = $pobj->isa("Bio::SeqFeatureI");
2362	40027	100				83598	if ($enz_allowed)
2363							{
2364	20016					102778	$enz_allowed = $pobj->isa("Bio::GeneDesign::RestrictionEnzyme");
2365							}
2366	40027					97527	return $bioseq + $bioseqfeat + $enz_allowed;
2367							}
2368
2369							1;
2370
2371							__END__
2372
2373							=head1 COPYRIGHT AND LICENSE
2374
2375							Copyright (c) 2013, GeneDesign developers
2376							All rights reserved.
2377
2378							Redistribution and use in source and binary forms, with or without modification,
2379							are permitted provided that the following conditions are met:
2380
2381							* Redistributions of source code must retain the above copyright notice, this
2382							list of conditions and the following disclaimer.
2383
2384							* Redistributions in binary form must reproduce the above copyright notice, this
2385							list of conditions and the following disclaimer in the documentation and/or
2386							other materials provided with the distribution.
2387
2388							* The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley
2389							National Laboratory, the Department of Energy, and the GeneDesign developers may
2390							not be used to endorse or promote products derived from this software without
2391							specific prior written permission.
2392
2393							THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
2394							ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
2395							WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
2396							DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT,
2397							INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
2398							LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
2399							PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
2400							LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
2401							OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
2402							ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2403
2404							=cut