| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # GeneDesign engine | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 NAME | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | Bio::GeneDesign | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | =head1 VERSION | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | Version 5.52 | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | =head1 AUTHOR | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | Sarah Richardson <smrichardson@lbl.gov> | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | =cut | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | package Bio::GeneDesign; | 
| 22 | 11 |  |  | 11 |  | 316051 | use base qw(Bio::Root::Root); | 
|  | 11 |  |  |  |  | 27 |  | 
|  | 11 |  |  |  |  | 10399 |  | 
| 23 |  |  |  |  |  |  |  | 
| 24 | 11 |  |  | 11 |  | 657490 | use Bio::GeneDesign::ConfigData; | 
|  | 11 |  |  |  |  | 33 |  | 
|  | 11 |  |  |  |  | 433 |  | 
| 25 | 11 |  |  | 11 |  | 6420 | use Bio::GeneDesign::Basic qw(:GD); | 
|  | 11 |  |  |  |  | 35 |  | 
|  | 11 |  |  |  |  | 3295 |  | 
| 26 | 11 |  |  | 11 |  | 7111 | use Bio::GeneDesign::CodonJuggle qw(:GD); | 
|  | 11 |  |  |  |  | 36 |  | 
|  | 11 |  |  |  |  | 2551 |  | 
| 27 | 11 |  |  | 11 |  | 67 | use Bio::GeneDesign::Codons qw(:GD); | 
|  | 11 |  |  |  |  | 21 |  | 
|  | 11 |  |  |  |  | 3034 |  | 
| 28 | 11 |  |  | 11 |  | 9023 | use Bio::GeneDesign::Oligo qw(:GD); | 
|  | 11 |  |  |  |  | 28 |  | 
|  | 11 |  |  |  |  | 1636 |  | 
| 29 | 11 |  |  | 11 |  | 69 | use Bio::GeneDesign::Random qw(:GD); | 
|  | 11 |  |  |  |  | 24 |  | 
|  | 11 |  |  |  |  | 1654 |  | 
| 30 | 11 |  |  | 11 |  | 10365 | use Bio::GeneDesign::RestrictionEnzymes qw(:GD); | 
|  | 11 |  |  |  |  | 39 |  | 
|  | 11 |  |  |  |  | 2169 |  | 
| 31 | 11 |  |  | 11 |  | 7662 | use Bio::GeneDesign::ReverseTranslate qw(:GD); | 
|  | 11 |  |  |  |  | 33 |  | 
|  | 11 |  |  |  |  | 1588 |  | 
| 32 | 11 |  |  | 11 |  | 6377 | use Bio::GeneDesign::PrefixTree; | 
|  | 11 |  |  |  |  | 31 |  | 
|  | 11 |  |  |  |  | 320 |  | 
| 33 | 11 |  |  | 11 |  | 77 | use File::Basename; | 
|  | 11 |  |  |  |  | 20 |  | 
|  | 11 |  |  |  |  | 799 |  | 
| 34 | 11 |  |  | 11 |  | 12602 | use Bio::SeqIO; | 
|  | 11 |  |  |  |  | 506882 |  | 
|  | 11 |  |  |  |  | 911 |  | 
| 35 | 11 |  |  | 11 |  | 133 | use Carp; | 
|  | 11 |  |  |  |  | 33 |  | 
|  | 11 |  |  |  |  | 868 |  | 
| 36 |  |  |  |  |  |  |  | 
| 37 | 11 |  |  | 11 |  | 70 | use strict; | 
|  | 11 |  |  |  |  | 25 |  | 
|  | 11 |  |  |  |  | 403 |  | 
| 38 | 11 |  |  | 11 |  | 68 | use warnings; | 
|  | 11 |  |  |  |  | 21 |  | 
|  | 11 |  |  |  |  | 114502 |  | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | my $VERSION = 5.52; | 
| 41 |  |  |  |  |  |  |  | 
| 42 |  |  |  |  |  |  | =head1 CONSTRUCTORS | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | =head2 new | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | Returns an initialized Bio::GeneDesign object. | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | This function reads the ConfigData written at installation, imports the | 
| 49 |  |  |  |  |  |  | relevant sublibraries, and sets the relevant paths. | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | my $GD = Bio::GeneDesign->new(); | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | =cut | 
| 54 |  |  |  |  |  |  |  | 
| 55 |  |  |  |  |  |  | sub new | 
| 56 |  |  |  |  |  |  | { | 
| 57 | 9 |  |  | 9 | 1 | 131 | my ($class, @args) = @_; | 
| 58 | 9 |  |  |  |  | 141 | my $self = $class->SUPER::new(@args); | 
| 59 | 9 |  |  |  |  | 129 | bless $self, $class; | 
| 60 |  |  |  |  |  |  |  | 
| 61 | 9 |  |  |  |  | 132 | $self->{script_path} = Bio::GeneDesign::ConfigData->config('script_path'); | 
| 62 | 9 |  |  |  |  | 54 | $self->{conf} = Bio::GeneDesign::ConfigData->config('conf_path'); | 
| 63 | 9 | 50 |  |  |  | 71 | $self->{conf} .= q{/} unless substr($self->{conf}, -1, 1) eq q{/}; | 
| 64 |  |  |  |  |  |  |  | 
| 65 | 9 |  |  |  |  | 50 | $self->{tmp_path} = Bio::GeneDesign::ConfigData->config('tmp_path'); | 
| 66 | 9 | 50 |  |  |  | 79 | $self->{tmp_path} .= q{/} unless substr($self->{tmp_path}, -1, 1) eq q{/}; | 
| 67 |  |  |  |  |  |  |  | 
| 68 | 9 |  |  |  |  | 53 | $self->{graph} = Bio::GeneDesign::ConfigData->config('graphing_support'); | 
| 69 | 9 | 50 |  |  |  | 49 | if ($self->{graph}) | 
| 70 |  |  |  |  |  |  | { | 
| 71 | 0 |  |  |  |  | 0 | require Bio::GeneDesign::Graph; | 
| 72 | 0 |  |  |  |  | 0 | import Bio::GeneDesign::Graph qw(:GD); | 
| 73 |  |  |  |  |  |  | } | 
| 74 |  |  |  |  |  |  |  | 
| 75 | 9 |  |  |  |  | 50 | $self->{EMBOSS} = Bio::GeneDesign::ConfigData->config('EMBOSS_support'); | 
| 76 | 9 | 50 |  |  |  | 43 | if ($self->{EMBOSS}) | 
| 77 |  |  |  |  |  |  | { | 
| 78 | 0 |  |  |  |  | 0 | require Bio::GeneDesign::Palindrome; | 
| 79 | 0 |  |  |  |  | 0 | import Bio::GeneDesign::Palindrome qw(:GD); | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  |  | 
| 82 | 9 |  |  |  |  | 44 | $self->{BLAST} = Bio::GeneDesign::ConfigData->config('BLAST_support'); | 
| 83 | 9 | 50 |  |  |  | 51 | if ($self->BLAST) | 
| 84 |  |  |  |  |  |  | { | 
| 85 |  |  |  |  |  |  | #$ENV{BLASTPLUSDIR} = Bio::GeneDesign::ConfigData->config('blast_path'); | 
| 86 | 0 |  |  |  |  | 0 | require Bio::GeneDesign::Blast; | 
| 87 | 0 |  |  |  |  | 0 | import Bio::GeneDesign::Blast qw(:GD); | 
| 88 |  |  |  |  |  |  | } | 
| 89 |  |  |  |  |  |  |  | 
| 90 | 9 |  |  |  |  | 46 | $self->{vmatch} = Bio::GeneDesign::ConfigData->config('vmatch_support'); | 
| 91 | 9 | 50 |  |  |  | 120 | if ($self->{vmatch}) | 
| 92 |  |  |  |  |  |  | { | 
| 93 | 0 |  |  |  |  | 0 | require Bio::GeneDesign::Vmatch; | 
| 94 | 0 |  |  |  |  | 0 | import Bio::GeneDesign::Vmatch qw(:GD); | 
| 95 |  |  |  |  |  |  | } | 
| 96 |  |  |  |  |  |  |  | 
| 97 | 9 |  |  |  |  | 49 | $self->{codon_path} = $self->{conf} . 'codon_tables/'; | 
| 98 | 9 |  |  |  |  | 27 | $self->{organism} = undef; | 
| 99 | 9 |  |  |  |  | 24 | $self->{codontable} = undef; | 
| 100 | 9 |  |  |  |  | 26 | $self->{enzyme_set} = undef; | 
| 101 | 9 |  |  |  |  | 26 | $self->{version} = $VERSION; | 
| 102 | 9 |  |  |  |  | 24 | $self->{amb_trans_memo} = {}; | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 9 |  |  |  |  | 42 | return $self; | 
| 105 |  |  |  |  |  |  | } | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | =head1 ACCESSORS | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | =cut | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | =head2 EMBOSS | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | returns a value if EMBOSS_support was vetted and approved during installation. | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | =cut | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | sub EMBOSS | 
| 118 |  |  |  |  |  |  | { | 
| 119 | 0 |  |  | 0 | 1 | 0 | my ($self) = @_; | 
| 120 | 0 |  |  |  |  | 0 | return $self->{'EMBOSS'}; | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | =head2 BLAST | 
| 124 |  |  |  |  |  |  |  | 
| 125 |  |  |  |  |  |  | returns a value if BLAST_support was vetted and approved during installation. | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | =cut | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | sub BLAST | 
| 130 |  |  |  |  |  |  | { | 
| 131 | 9 |  |  | 9 | 1 | 27 | my ($self) = @_; | 
| 132 | 9 |  |  |  |  | 45 | return $self->{'BLAST'}; | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | =head2 graph | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | returns a value if graphing_support was vetted and approved during installation. | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | =cut | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | sub graph | 
| 142 |  |  |  |  |  |  | { | 
| 143 | 0 |  |  | 0 | 1 | 0 | my ($self) = @_; | 
| 144 | 0 |  |  |  |  | 0 | return $self->{'graph'}; | 
| 145 |  |  |  |  |  |  | } | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | =head2 vmatch | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | returns a value if vmatch_support was vetted and approved during installation. | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | =cut | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | sub vmatch | 
| 154 |  |  |  |  |  |  | { | 
| 155 | 0 |  |  | 0 | 1 | 0 | my ($self) = @_; | 
| 156 | 0 |  |  |  |  | 0 | return $self->{'vmatch'}; | 
| 157 |  |  |  |  |  |  | } | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | =head2 enzyme_set | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | Returns a hash reference where the keys are enzyme names and the values are | 
| 162 |  |  |  |  |  |  | L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> objects, if the enzyme | 
| 163 |  |  |  |  |  |  | set has been defined. | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>. | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | =cut | 
| 168 |  |  |  |  |  |  |  | 
| 169 |  |  |  |  |  |  | sub enzyme_set | 
| 170 |  |  |  |  |  |  | { | 
| 171 | 4 |  |  | 4 | 1 | 1237 | my ($self) = @_; | 
| 172 | 4 |  |  |  |  | 52 | return $self->{'enzyme_set'}; | 
| 173 |  |  |  |  |  |  | } | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | =head2 enzyme_set_name | 
| 176 |  |  |  |  |  |  |  | 
| 177 |  |  |  |  |  |  | Returns the name of the enzyme set in use, if there is one. | 
| 178 |  |  |  |  |  |  |  | 
| 179 |  |  |  |  |  |  | To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>. | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | =cut | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | sub enzyme_set_name | 
| 184 |  |  |  |  |  |  | { | 
| 185 | 0 |  |  | 0 | 1 | 0 | my ($self) = @_; | 
| 186 | 0 |  |  |  |  | 0 | return $self->{'enzyme_set_name'}; | 
| 187 |  |  |  |  |  |  | } | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | =head2 all_enzymes | 
| 190 |  |  |  |  |  |  |  | 
| 191 |  |  |  |  |  |  | Returns a hash reference where the keys are enzyme names and the values are | 
| 192 |  |  |  |  |  |  | L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> objects | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | To set this value, use L<set_restriction_enzymes|/set_restriction_enzymes>. | 
| 195 |  |  |  |  |  |  |  | 
| 196 |  |  |  |  |  |  | =cut | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | sub all_enzymes | 
| 199 |  |  |  |  |  |  | { | 
| 200 | 0 |  |  | 0 | 1 | 0 | my ($self) = @_; | 
| 201 | 0 |  |  |  |  | 0 | return $self->{'all_enzymes'}; | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | =head2 organism | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | Returns the name of the organism in use, if there is one. | 
| 207 |  |  |  |  |  |  |  | 
| 208 |  |  |  |  |  |  | To set this value, use L<set_organism|/set_organism>. | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | =cut | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  | sub organism | 
| 213 |  |  |  |  |  |  | { | 
| 214 | 0 |  |  | 0 | 1 | 0 | my ($self) = @_; | 
| 215 | 0 |  |  |  |  | 0 | return $self->{'organism'}; | 
| 216 |  |  |  |  |  |  | } | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | =head2 codontable | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  | Returns the codon table in use, if there is one. | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | The codon table is a hash reference where the keys are upper case nucleotides | 
| 223 |  |  |  |  |  |  | and the values are upper case single letter amino acids. | 
| 224 |  |  |  |  |  |  |  | 
| 225 |  |  |  |  |  |  | my $codon_t = $GD->codontable(); | 
| 226 |  |  |  |  |  |  | $codon_t->{"ATG"} eq "M" || die; | 
| 227 |  |  |  |  |  |  |  | 
| 228 |  |  |  |  |  |  | To set this value, use L<set_codontable|/set_codontable>. | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | =cut | 
| 231 |  |  |  |  |  |  |  | 
| 232 |  |  |  |  |  |  | sub codontable | 
| 233 |  |  |  |  |  |  | { | 
| 234 | 1 |  |  | 1 | 1 | 718 | my ($self) = @_; | 
| 235 | 1 |  |  |  |  | 11 | return $self->{'codontable'}; | 
| 236 |  |  |  |  |  |  | } | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | =head2 reversecodontable | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | Returns the reverse codon table in use, if there is one. | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  | The reverse codon table is a hash reference where the keys are upper case single | 
| 243 |  |  |  |  |  |  | letter amino acids and the values are upper case nucleotides. | 
| 244 |  |  |  |  |  |  |  | 
| 245 |  |  |  |  |  |  | my $revcodon_t = $GD->reversecodontable(); | 
| 246 |  |  |  |  |  |  | $revcodon_t->{"M"} eq "ATG" || die; | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  | This value is set automatically when L<set_codontable|/set_codontable> is run. | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | =cut | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  | sub reversecodontable | 
| 253 |  |  |  |  |  |  | { | 
| 254 | 1 |  |  | 1 | 1 | 1722 | my ($self) = @_; | 
| 255 | 1 |  |  |  |  | 5 | return $self->{'reversecodontable'}; | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  |  | 
| 258 |  |  |  |  |  |  | =head2 rscutable | 
| 259 |  |  |  |  |  |  |  | 
| 260 |  |  |  |  |  |  | Returns the RSCU table in use, if there is one. | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | The RSCU codon table is a hash reference where the keys are upper case | 
| 263 |  |  |  |  |  |  | nucleotides and the values are floats. | 
| 264 |  |  |  |  |  |  |  | 
| 265 |  |  |  |  |  |  | my $rscu_t = $GD->rscutable(); | 
| 266 |  |  |  |  |  |  | $rscu_t->{"ATG"} eq 1.00 || die; | 
| 267 |  |  |  |  |  |  |  | 
| 268 |  |  |  |  |  |  | To set this value, use L<set_rscu_table|/set_rscutable>. | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | =cut | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  | sub rscutable | 
| 273 |  |  |  |  |  |  | { | 
| 274 | 1 |  |  | 1 | 1 | 1767 | my ($self) = @_; | 
| 275 | 1 |  |  |  |  | 6 | return $self->{'rscutable'}; | 
| 276 |  |  |  |  |  |  | } | 
| 277 |  |  |  |  |  |  |  | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | =head1 FUNCTIONS | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | =cut | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | =head2 melt | 
| 284 |  |  |  |  |  |  |  | 
| 285 |  |  |  |  |  |  | my $Tm = $GD->melt(-sequence => $myseq); | 
| 286 |  |  |  |  |  |  |  | 
| 287 |  |  |  |  |  |  | The -sequence argument is required. | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | Returns the melting temperature of a DNA sequence. | 
| 290 |  |  |  |  |  |  |  | 
| 291 |  |  |  |  |  |  | You can set the salt and DNA concentrations with the -salt and -concentration | 
| 292 |  |  |  |  |  |  | arguments; they are 50mm (.05) and 100 pm (.0000001) respectively. | 
| 293 |  |  |  |  |  |  |  | 
| 294 |  |  |  |  |  |  | You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI | 
| 295 |  |  |  |  |  |  | object to be analyzed with the -sequence flag. | 
| 296 |  |  |  |  |  |  |  | 
| 297 |  |  |  |  |  |  | There are four different formulae to choose from. If you wish to use the nearest | 
| 298 |  |  |  |  |  |  | neighbor method, use the -nearest_neighbor flag. Otherwise the appropriate | 
| 299 |  |  |  |  |  |  | formula will be determined by the length of your -sequence argument. | 
| 300 |  |  |  |  |  |  |  | 
| 301 |  |  |  |  |  |  | For sequences under 14 base pairs: | 
| 302 |  |  |  |  |  |  | Tm = (4 * #GC) + (2 * #AT). | 
| 303 |  |  |  |  |  |  |  | 
| 304 |  |  |  |  |  |  | For sequences between 14 and 50 base pairs: | 
| 305 |  |  |  |  |  |  | Tm = 100.5 + (41 * #GC / length) - (820 / length) + 16.6 * log10(salt) | 
| 306 |  |  |  |  |  |  |  | 
| 307 |  |  |  |  |  |  | For sequences over 50 base pairs: | 
| 308 |  |  |  |  |  |  | Tm = 81.5 + (41 * #GC / length) - (500 / length) + 16.6 * log10(salt) - .62; | 
| 309 |  |  |  |  |  |  |  | 
| 310 |  |  |  |  |  |  | =cut | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | sub melt | 
| 313 |  |  |  |  |  |  | { | 
| 314 | 4 |  |  | 4 | 1 | 4230 | my ($self, @args) = @_; | 
| 315 |  |  |  |  |  |  |  | 
| 316 | 4 |  |  |  |  | 20 | my ($seq, $salt, $conc, $nnflag) | 
| 317 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 318 |  |  |  |  |  |  | sequence | 
| 319 |  |  |  |  |  |  | salt | 
| 320 |  |  |  |  |  |  | concentration | 
| 321 |  |  |  |  |  |  | nearest_neighbor)], @args); | 
| 322 |  |  |  |  |  |  |  | 
| 323 | 4 | 50 |  |  |  | 86 | $self->throw("No sequence provided for the melt function") | 
| 324 |  |  |  |  |  |  | unless ($seq); | 
| 325 |  |  |  |  |  |  |  | 
| 326 | 4 | 100 |  |  |  | 10 | $nnflag = $nnflag ? 1 : 0; | 
| 327 | 4 |  | 50 |  |  | 17 | $salt = $salt || .05; | 
| 328 | 4 |  | 50 |  |  | 13 | $conc = $conc || .0000001; | 
| 329 | 4 |  |  |  |  | 8 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 330 |  |  |  |  |  |  |  | 
| 331 | 4 | 100 |  |  |  | 12 | if ($nnflag) | 
| 332 |  |  |  |  |  |  | { | 
| 333 | 1 |  |  |  |  | 5 | return _ntherm($str, $salt, $conc); | 
| 334 |  |  |  |  |  |  | } | 
| 335 | 3 |  |  |  |  | 10 | return _melt($str, $salt, $conc); | 
| 336 |  |  |  |  |  |  | } | 
| 337 |  |  |  |  |  |  |  | 
| 338 |  |  |  |  |  |  | =head2 complement | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | $my_seq = "AATTCG"; | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | my $complemented_seq = $GD->complement($my_seq); | 
| 343 |  |  |  |  |  |  | $complemented_seq eq "TTAAGC" || die; | 
| 344 |  |  |  |  |  |  |  | 
| 345 |  |  |  |  |  |  | my $reverse_complemented_seq = $GD->complement($my_seq, 1); | 
| 346 |  |  |  |  |  |  | $reverse_complemented_seq eq "CGAATT" || die; | 
| 347 |  |  |  |  |  |  |  | 
| 348 |  |  |  |  |  |  | #clean | 
| 349 |  |  |  |  |  |  | my $complemented_seq = $GD->complement(-sequence => $my_seq); | 
| 350 |  |  |  |  |  |  | $complemented_seq eq "TTAAGC" || die; | 
| 351 |  |  |  |  |  |  |  | 
| 352 |  |  |  |  |  |  | my $reverse_complemented_seq = $GD->complement(-sequence => $my_seq, | 
| 353 |  |  |  |  |  |  | -reverse => 1); | 
| 354 |  |  |  |  |  |  | $reverse_complemented_seq eq "CGAATT" || die; | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  |  | 
| 357 |  |  |  |  |  |  | The -sequence argument is required. | 
| 358 |  |  |  |  |  |  |  | 
| 359 |  |  |  |  |  |  | Complements or reverse complements a DNA sequence. | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI | 
| 362 |  |  |  |  |  |  | object to be processed. | 
| 363 |  |  |  |  |  |  |  | 
| 364 |  |  |  |  |  |  | If you also pass along a true statement, the sequence will be reversed and | 
| 365 |  |  |  |  |  |  | complemented. | 
| 366 |  |  |  |  |  |  |  | 
| 367 |  |  |  |  |  |  | =cut | 
| 368 |  |  |  |  |  |  |  | 
| 369 |  |  |  |  |  |  | sub complement | 
| 370 |  |  |  |  |  |  | { | 
| 371 | 4 |  |  | 4 | 1 | 3474 | my ($self, @args) = @_; | 
| 372 |  |  |  |  |  |  |  | 
| 373 | 4 |  |  |  |  | 22 | my ($seq, $swit) | 
| 374 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 375 |  |  |  |  |  |  | sequence | 
| 376 |  |  |  |  |  |  | reverse)], @args); | 
| 377 |  |  |  |  |  |  |  | 
| 378 | 4 | 50 |  |  |  | 38 | $self->throw("No sequence provided for the complement function") | 
| 379 |  |  |  |  |  |  | unless $seq; | 
| 380 |  |  |  |  |  |  |  | 
| 381 | 4 |  | 100 |  |  | 14 | $swit = $swit || 0; | 
| 382 |  |  |  |  |  |  |  | 
| 383 | 4 |  |  |  |  | 10 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 384 |  |  |  |  |  |  |  | 
| 385 | 4 |  |  |  |  | 15 | return _complement($str, $swit); | 
| 386 |  |  |  |  |  |  | } | 
| 387 |  |  |  |  |  |  |  | 
| 388 |  |  |  |  |  |  | =head2 count | 
| 389 |  |  |  |  |  |  |  | 
| 390 |  |  |  |  |  |  | $my_seq = "AATTCG"; | 
| 391 |  |  |  |  |  |  | my $count = $GD->count($my_seq); | 
| 392 |  |  |  |  |  |  | $count->{C} == 1 || die; | 
| 393 |  |  |  |  |  |  | $count->{G} == 1 || die; | 
| 394 |  |  |  |  |  |  | $count->{A} == 2 || die; | 
| 395 |  |  |  |  |  |  | $count->{GCp} == 33.3 || die; | 
| 396 |  |  |  |  |  |  | $count->{ATp} == 66.7 || die; | 
| 397 |  |  |  |  |  |  |  | 
| 398 |  |  |  |  |  |  | #clean | 
| 399 |  |  |  |  |  |  | my $count = $GD->count(-sequence => $my_seq); | 
| 400 |  |  |  |  |  |  |  | 
| 401 |  |  |  |  |  |  | You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI | 
| 402 |  |  |  |  |  |  | object. | 
| 403 |  |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  | the count function counts the bases in a DNA sequence and returns a hash | 
| 405 |  |  |  |  |  |  | reference where each base (including the ambiguous bases) are keys and the | 
| 406 |  |  |  |  |  |  | values are the number of times they appear in the sequence. There are also the | 
| 407 |  |  |  |  |  |  | special values GCp and ATp for GC and AT percentage. | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | =cut | 
| 410 |  |  |  |  |  |  |  | 
| 411 |  |  |  |  |  |  | sub count | 
| 412 |  |  |  |  |  |  | { | 
| 413 | 2 |  |  | 2 | 1 | 2778 | my ($self, @args) = @_; | 
| 414 |  |  |  |  |  |  |  | 
| 415 | 2 |  |  |  |  | 12 | my ($seq) = $self->_rearrange([qw(sequence)], @args); | 
| 416 |  |  |  |  |  |  |  | 
| 417 | 2 | 50 |  |  |  | 23 | $self->throw("No sequence provided for the count function") | 
| 418 |  |  |  |  |  |  | unless ($seq); | 
| 419 |  |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  |  | 
| 421 | 2 |  |  |  |  | 6 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 422 |  |  |  |  |  |  |  | 
| 423 | 2 |  |  |  |  | 10 | return _count($str); | 
| 424 |  |  |  |  |  |  | } | 
| 425 |  |  |  |  |  |  |  | 
| 426 |  |  |  |  |  |  | =head2 regex_nt | 
| 427 |  |  |  |  |  |  |  | 
| 428 |  |  |  |  |  |  | my $my_seq = "ABC"; | 
| 429 |  |  |  |  |  |  | my $regex = $GD->regex_nt(-sequence => $my_seq); | 
| 430 |  |  |  |  |  |  | # $regex is qr/A[CGT]C/; | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  | my $regarr = $GD->regex_nt(-sequence => $my_seq --reverse_complement => 1); | 
| 433 |  |  |  |  |  |  | # $regarr is [qr/A[CGT]C/, qr/G[ACG]T/] | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  |  | 
| 436 |  |  |  |  |  |  | You must pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI | 
| 437 |  |  |  |  |  |  | object to be processed with the -sequence flag. | 
| 438 |  |  |  |  |  |  |  | 
| 439 |  |  |  |  |  |  | regex_nt creates a compiled regular expression or a set of them that can be used | 
| 440 |  |  |  |  |  |  | to query large nucleotide sequences for possibly ambiguous subsequences. | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | If you want to get regular expressions for both the forward and reverse senses | 
| 444 |  |  |  |  |  |  | of the DNA, use the -reverse_complement flag and expect a reference to an array | 
| 445 |  |  |  |  |  |  | of compiled regexes. | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | =cut | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  | sub regex_nt | 
| 450 |  |  |  |  |  |  | { | 
| 451 | 6 |  |  | 6 | 1 | 12372 | my ($self, @args) = @_; | 
| 452 |  |  |  |  |  |  |  | 
| 453 | 6 |  |  |  |  | 38 | my ($seq, $arrswit) | 
| 454 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 455 |  |  |  |  |  |  | sequence | 
| 456 |  |  |  |  |  |  | reverse_complement)], @args | 
| 457 |  |  |  |  |  |  | ); | 
| 458 |  |  |  |  |  |  |  | 
| 459 | 6 | 50 |  |  |  | 106 | $self->throw("no sequence provided the regex_nt function") | 
| 460 |  |  |  |  |  |  | unless ($seq); | 
| 461 |  |  |  |  |  |  |  | 
| 462 | 6 |  |  |  |  | 20 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 463 |  |  |  |  |  |  |  | 
| 464 | 6 | 100 |  |  |  | 18 | if ($arrswit) | 
| 465 |  |  |  |  |  |  | { | 
| 466 | 2 |  |  |  |  | 9 | return _regarr($str); | 
| 467 |  |  |  |  |  |  | } | 
| 468 |  |  |  |  |  |  | else | 
| 469 |  |  |  |  |  |  | { | 
| 470 | 4 |  |  |  |  | 21 | return _regres($str, 1); | 
| 471 |  |  |  |  |  |  | } | 
| 472 |  |  |  |  |  |  | } | 
| 473 |  |  |  |  |  |  |  | 
| 474 |  |  |  |  |  |  | =head2 regex_aa | 
| 475 |  |  |  |  |  |  |  | 
| 476 |  |  |  |  |  |  | my $my_pep = "AEQ*"; | 
| 477 |  |  |  |  |  |  | my $regex = $GD->regex_aa(-sequence => $my_pep); | 
| 478 |  |  |  |  |  |  | $regex == qr/AEQ[\*]/ || die; | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | Creates a compiled regular expression or a set of them that can be used to query | 
| 481 |  |  |  |  |  |  | large amino acid sequences for smaller subsequences. | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI | 
| 484 |  |  |  |  |  |  | object to be processed with the -sequence flag. | 
| 485 |  |  |  |  |  |  |  | 
| 486 |  |  |  |  |  |  | =cut | 
| 487 |  |  |  |  |  |  |  | 
| 488 |  |  |  |  |  |  | sub regex_aa | 
| 489 |  |  |  |  |  |  | { | 
| 490 | 1 |  |  | 1 | 1 | 714 | my ($self, $seq) = @_; | 
| 491 |  |  |  |  |  |  |  | 
| 492 | 1 | 50 |  |  |  | 6 | $self->throw("no sequence provided for the regex_aa function") | 
| 493 |  |  |  |  |  |  | unless ($seq); | 
| 494 |  |  |  |  |  |  |  | 
| 495 | 1 |  |  |  |  | 4 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 496 |  |  |  |  |  |  |  | 
| 497 | 1 |  |  |  |  | 6 | return _regres($str, 2); | 
| 498 |  |  |  |  |  |  | } | 
| 499 |  |  |  |  |  |  |  | 
| 500 |  |  |  |  |  |  | =head2 sequence_is_ambiguous | 
| 501 |  |  |  |  |  |  |  | 
| 502 |  |  |  |  |  |  | my $my_seq = "ABC"; | 
| 503 |  |  |  |  |  |  | my $flag = $GD->sequence_is_ambiguous($my_seq); | 
| 504 |  |  |  |  |  |  | $flag == 1 || die; | 
| 505 |  |  |  |  |  |  |  | 
| 506 |  |  |  |  |  |  | $my_seq = "ATC"; | 
| 507 |  |  |  |  |  |  | $flag = $GD->sequence_is_ambiguous($my_seq); | 
| 508 |  |  |  |  |  |  | $flag == 0 || die; | 
| 509 |  |  |  |  |  |  |  | 
| 510 |  |  |  |  |  |  | Checks to see if a DNA sequence contains ambiguous bases (RYMKWSBDHVN) and | 
| 511 |  |  |  |  |  |  | returns true if it does. | 
| 512 |  |  |  |  |  |  |  | 
| 513 |  |  |  |  |  |  | You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI | 
| 514 |  |  |  |  |  |  | object to be processed. | 
| 515 |  |  |  |  |  |  |  | 
| 516 |  |  |  |  |  |  | =cut | 
| 517 |  |  |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | sub sequence_is_ambiguous | 
| 519 |  |  |  |  |  |  | { | 
| 520 | 5 |  |  | 5 | 1 | 1599 | my ($self, $seq) = @_; | 
| 521 |  |  |  |  |  |  |  | 
| 522 | 5 | 50 |  |  |  | 16 | $self->throw("no sequence provided for the sequence_is_ambiguous function") | 
| 523 |  |  |  |  |  |  | unless ($seq); | 
| 524 |  |  |  |  |  |  |  | 
| 525 | 5 |  |  |  |  | 15 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 526 |  |  |  |  |  |  |  | 
| 527 | 5 |  |  |  |  | 22 | return _is_ambiguous($str); | 
| 528 |  |  |  |  |  |  | } | 
| 529 |  |  |  |  |  |  |  | 
| 530 |  |  |  |  |  |  | =head2 ambiguous_translation | 
| 531 |  |  |  |  |  |  |  | 
| 532 |  |  |  |  |  |  | my $my_seq = "ABC"; | 
| 533 |  |  |  |  |  |  | my @peps = $GD->ambiguous_translation(-sequence => $my_seq, -frame => 1); | 
| 534 |  |  |  |  |  |  | # @peps is qw(I T C) | 
| 535 |  |  |  |  |  |  |  | 
| 536 |  |  |  |  |  |  | You must pass a string variable, a Bio::Seq object, or a Bio::SeqFeatureI object | 
| 537 |  |  |  |  |  |  | to be processed. | 
| 538 |  |  |  |  |  |  |  | 
| 539 |  |  |  |  |  |  | Translates a nucleotide sequence that may have ambiguous bases and returns an | 
| 540 |  |  |  |  |  |  | array of possible peptides. | 
| 541 |  |  |  |  |  |  |  | 
| 542 |  |  |  |  |  |  | The frame argument may be 1, 2, 3, -1, -2, or -3. | 
| 543 |  |  |  |  |  |  | It may also be t (three, 1, 2, 3), or s (six, 1, 2, 3, -1, -2, -3). | 
| 544 |  |  |  |  |  |  | It defaults to 1. | 
| 545 |  |  |  |  |  |  |  | 
| 546 |  |  |  |  |  |  | =cut | 
| 547 |  |  |  |  |  |  |  | 
| 548 |  |  |  |  |  |  | sub ambiguous_translation | 
| 549 |  |  |  |  |  |  | { | 
| 550 | 4 |  |  | 4 | 1 | 10088 | my ($self, @args) = @_; | 
| 551 |  |  |  |  |  |  |  | 
| 552 | 4 |  |  |  |  | 35 | my ($seq, $frame) | 
| 553 |  |  |  |  |  |  | = $self->_rearrange([qw(sequence frame)], @args); | 
| 554 |  |  |  |  |  |  |  | 
| 555 | 4 | 50 |  |  |  | 120 | $self->throw("no sequence provided for the ambiguous_translation function") | 
| 556 |  |  |  |  |  |  | unless ($seq); | 
| 557 |  |  |  |  |  |  |  | 
| 558 | 4 |  | 50 |  |  | 11 | $frame = $frame || 1; | 
| 559 |  |  |  |  |  |  |  | 
| 560 | 4 |  |  |  |  | 9 | my %ambtransswits = map {$_ => 1} qw(1 2 3 -1 -2 -3 s t); | 
|  | 32 |  |  |  |  | 75 |  | 
| 561 |  |  |  |  |  |  |  | 
| 562 | 4 | 50 |  |  |  | 18 | $self->throw("Bad frame argument to ambiguous_translation") | 
| 563 |  |  |  |  |  |  | unless (exists $ambtransswits{$frame}); | 
| 564 |  |  |  |  |  |  |  | 
| 565 | 4 |  |  |  |  | 13 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 566 |  |  |  |  |  |  |  | 
| 567 | 4 |  |  |  |  | 22 | return _amb_translation($str, | 
| 568 |  |  |  |  |  |  | $self->{codontable}, | 
| 569 |  |  |  |  |  |  | $frame, | 
| 570 |  |  |  |  |  |  | $self->{amb_trans_memo}); | 
| 571 |  |  |  |  |  |  | } | 
| 572 |  |  |  |  |  |  |  | 
| 573 |  |  |  |  |  |  | =head2 ambiguous_transcription | 
| 574 |  |  |  |  |  |  |  | 
| 575 |  |  |  |  |  |  | my $my_seq = "ABC"; | 
| 576 |  |  |  |  |  |  | my $seqs = $GD->ambiguous_transcription($my_seq); | 
| 577 |  |  |  |  |  |  | # $seqs is [qw(ACC AGC ATC)] | 
| 578 |  |  |  |  |  |  |  | 
| 579 |  |  |  |  |  |  | Deambiguates a nucleotide sequence that may have ambiguous bases and returns a | 
| 580 |  |  |  |  |  |  | reference to a sorted array of possible unambiguous sequences. | 
| 581 |  |  |  |  |  |  |  | 
| 582 |  |  |  |  |  |  | You can pass either a string variable, a Bio::Seq object, or a Bio::SeqFeatureI | 
| 583 |  |  |  |  |  |  | object to be processed. | 
| 584 |  |  |  |  |  |  |  | 
| 585 |  |  |  |  |  |  | =cut | 
| 586 |  |  |  |  |  |  |  | 
| 587 |  |  |  |  |  |  | sub ambiguous_transcription | 
| 588 |  |  |  |  |  |  | { | 
| 589 | 1 |  |  | 1 | 1 | 1923 | my ($self, $seq) = @_; | 
| 590 |  |  |  |  |  |  |  | 
| 591 | 1 | 50 |  |  |  | 4 | $self->throw("no sequence provided for the ambiguous_transcription function") | 
| 592 |  |  |  |  |  |  | unless ($seq); | 
| 593 |  |  |  |  |  |  |  | 
| 594 | 1 |  |  |  |  | 4 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 595 |  |  |  |  |  |  |  | 
| 596 | 1 |  |  |  |  | 7 | return _amb_transcription($str); | 
| 597 |  |  |  |  |  |  | } | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | =head2 positions | 
| 600 |  |  |  |  |  |  |  | 
| 601 |  |  |  |  |  |  | my $seq = "TGCTGACTGCAGTCAGTACACTACGTACGTGCATGAC"; | 
| 602 |  |  |  |  |  |  | my $seek = "CWC"; | 
| 603 |  |  |  |  |  |  |  | 
| 604 |  |  |  |  |  |  | my $positions = $GD->positions(-sequence => $seq, | 
| 605 |  |  |  |  |  |  | -query => $seek); | 
| 606 |  |  |  |  |  |  | # $positions is {18 => "CAC"} | 
| 607 |  |  |  |  |  |  |  | 
| 608 |  |  |  |  |  |  | $positions = $GD->positions(-sequence => $seq, | 
| 609 |  |  |  |  |  |  | -query => $seek, | 
| 610 |  |  |  |  |  |  | -reverse_complement => 1); | 
| 611 |  |  |  |  |  |  | # $positions is {18 => "CAC", 28 => "GTG"} | 
| 612 |  |  |  |  |  |  |  | 
| 613 |  |  |  |  |  |  | Finds and returns all the positions and sequences of a potentially ambiguous | 
| 614 |  |  |  |  |  |  | subsequence in a larger sequence. The reverse_complement flag is off by default. | 
| 615 |  |  |  |  |  |  |  | 
| 616 |  |  |  |  |  |  | You can pass either string variables, Bio::Seq objects, or Bio::SeqFeatureI | 
| 617 |  |  |  |  |  |  | objects as the sequence and query arguments; additionally you may pass a | 
| 618 |  |  |  |  |  |  | L<RestrictionEnzyme|Bio::GeneDesign::RestrictionEnzyme> object as the query | 
| 619 |  |  |  |  |  |  | argument. | 
| 620 |  |  |  |  |  |  |  | 
| 621 |  |  |  |  |  |  | =cut | 
| 622 |  |  |  |  |  |  |  | 
| 623 |  |  |  |  |  |  | sub positions | 
| 624 |  |  |  |  |  |  | { | 
| 625 | 2 |  |  | 2 | 1 | 2749 | my ($self, @args) = @_; | 
| 626 |  |  |  |  |  |  |  | 
| 627 | 2 |  |  |  |  | 12 | my ($seq, $seek, $revcom) | 
| 628 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 629 |  |  |  |  |  |  | sequence | 
| 630 |  |  |  |  |  |  | query | 
| 631 |  |  |  |  |  |  | reverse_complement)], @args); | 
| 632 |  |  |  |  |  |  |  | 
| 633 | 2 | 50 |  |  |  | 55 | $self->throw("no sequence provided for the positions function") | 
| 634 |  |  |  |  |  |  | unless ($seq); | 
| 635 |  |  |  |  |  |  |  | 
| 636 | 2 | 50 |  |  |  | 11 | $self->throw("no query provided for the positions function") | 
| 637 |  |  |  |  |  |  | unless ($seek); | 
| 638 |  |  |  |  |  |  |  | 
| 639 |  |  |  |  |  |  |  | 
| 640 | 2 |  |  |  |  | 7 | my $base = $self->_stripdown($seq, q{}, 1); | 
| 641 |  |  |  |  |  |  |  | 
| 642 | 2 |  |  |  |  | 4 | my $regarr = []; | 
| 643 | 2 |  |  |  |  | 3 | my $query = $seek; | 
| 644 | 2 |  |  |  |  | 6 | my $qref = ref($seek); | 
| 645 | 2 | 50 |  |  |  | 6 | if ($qref) | 
| 646 |  |  |  |  |  |  | { | 
| 647 | 0 | 0 | 0 |  |  | 0 | $self->throw("object $qref is not a Bio::Seq or Bio::SeqFeature, or " . | 
|  |  |  | 0 |  |  |  |  | 
| 648 |  |  |  |  |  |  | "Bio::GeneDesign::RestrictionEnzyme object") | 
| 649 |  |  |  |  |  |  | unless ($seek->isa("Bio::Seq") | 
| 650 |  |  |  |  |  |  | || $seek->isa("Bio::SeqFeatureI") | 
| 651 |  |  |  |  |  |  | || $seek->isa("Bio::GeneDesign::RestrictionEnzyme")); | 
| 652 | 0 | 0 |  |  |  | 0 | if ($seek->isa("Bio::GeneDesign::RestrictionEnzyme")) | 
| 653 |  |  |  |  |  |  | { | 
| 654 | 0 |  |  |  |  | 0 | $regarr = $seek->regex; | 
| 655 |  |  |  |  |  |  | } | 
| 656 |  |  |  |  |  |  | else | 
| 657 |  |  |  |  |  |  | { | 
| 658 | 0 | 0 |  |  |  | 0 | $query = ref($seek->seq)  ? $seek->seq->seq  : $seek->seq; | 
| 659 | 0 | 0 |  |  |  | 0 | $regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)]; | 
| 660 |  |  |  |  |  |  | } | 
| 661 |  |  |  |  |  |  | } | 
| 662 |  |  |  |  |  |  | else | 
| 663 |  |  |  |  |  |  | { | 
| 664 | 2 | 50 |  |  |  | 18 | $regarr = $revcom ? _regarr($query, 1) : [_regres($query, 1)]; | 
| 665 |  |  |  |  |  |  | } | 
| 666 |  |  |  |  |  |  |  | 
| 667 | 2 |  |  |  |  | 11 | return _positions($base, $regarr); | 
| 668 |  |  |  |  |  |  | } | 
| 669 |  |  |  |  |  |  |  | 
| 670 |  |  |  |  |  |  | =head2 set_codontable | 
| 671 |  |  |  |  |  |  |  | 
| 672 |  |  |  |  |  |  | # load a codon table from the GeneDesign configuration directory | 
| 673 |  |  |  |  |  |  | $GD->set_codontable(-organism_name => "yeast"); | 
| 674 |  |  |  |  |  |  |  | 
| 675 |  |  |  |  |  |  | # load a codon table from an arbitrary path and catch it in a variable | 
| 676 |  |  |  |  |  |  | my $codon_t = $GD->set_codontable(-organism_name => "custom", | 
| 677 |  |  |  |  |  |  | -table_path => "/path/to/table.ct"); | 
| 678 |  |  |  |  |  |  |  | 
| 679 |  |  |  |  |  |  | The -organism_name argument is required. | 
| 680 |  |  |  |  |  |  |  | 
| 681 |  |  |  |  |  |  | This function loads, sets, and returns a codon definition table. After it is run | 
| 682 |  |  |  |  |  |  | the accessor L<codontable|/codontable> will return the hash reference that | 
| 683 |  |  |  |  |  |  | represents the codon table. | 
| 684 |  |  |  |  |  |  |  | 
| 685 |  |  |  |  |  |  | If no path is provided, the configuration directory /codon_tables is checked for | 
| 686 |  |  |  |  |  |  | tables that match the provided organism name. If there is no table in that | 
| 687 |  |  |  |  |  |  | directory, a warning will appear and the standard codon table will be | 
| 688 |  |  |  |  |  |  | used. | 
| 689 |  |  |  |  |  |  |  | 
| 690 |  |  |  |  |  |  | Any codon table that is missing a definition for a codon will cause a warning to | 
| 691 |  |  |  |  |  |  | be issued. The table format for codon tables is | 
| 692 |  |  |  |  |  |  |  | 
| 693 |  |  |  |  |  |  | # Standard genetic code | 
| 694 |  |  |  |  |  |  | {TTT} = F | 
| 695 |  |  |  |  |  |  | {TTC} = F | 
| 696 |  |  |  |  |  |  | {TTA} = L | 
| 697 |  |  |  |  |  |  | ... | 
| 698 |  |  |  |  |  |  |  | 
| 699 |  |  |  |  |  |  | See L<NCBI's table|http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi#SG1> | 
| 700 |  |  |  |  |  |  |  | 
| 701 |  |  |  |  |  |  | =cut | 
| 702 |  |  |  |  |  |  |  | 
| 703 |  |  |  |  |  |  | sub set_codontable | 
| 704 |  |  |  |  |  |  | { | 
| 705 | 7 |  |  | 7 | 1 | 26 | my ($self, @args) = @_; | 
| 706 | 7 |  |  |  |  | 45 | my ($orgname, $table_path) | 
| 707 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 708 |  |  |  |  |  |  | organism_name | 
| 709 |  |  |  |  |  |  | table_path)], @args); | 
| 710 |  |  |  |  |  |  |  | 
| 711 | 7 | 50 |  |  |  | 181 | $self->throw("No organsim name provided") unless ($orgname); | 
| 712 | 7 |  |  |  |  | 35 | $self->{organism} = $orgname; | 
| 713 |  |  |  |  |  |  |  | 
| 714 | 7 | 50 | 33 |  |  | 196 | $self->throw("$table_path does not exist") | 
| 715 |  |  |  |  |  |  | if ($table_path && ! -e $table_path); | 
| 716 |  |  |  |  |  |  |  | 
| 717 | 7 | 50 |  |  |  | 31 | if (! $table_path) | 
| 718 |  |  |  |  |  |  | { | 
| 719 | 0 |  |  |  |  | 0 | $table_path = $self->{codon_path} . $orgname . ".ct"; | 
| 720 | 0 | 0 |  |  |  | 0 | if (! -e $table_path) | 
| 721 |  |  |  |  |  |  | { | 
| 722 | 0 |  |  |  |  | 0 | warn "No Codon table for $orgname found. Using Standard values\n"; | 
| 723 | 0 |  |  |  |  | 0 | $table_path = $self->{codon_path} . "Standard.ct"; | 
| 724 |  |  |  |  |  |  | } | 
| 725 |  |  |  |  |  |  | } | 
| 726 |  |  |  |  |  |  |  | 
| 727 | 7 |  |  |  |  | 92 | $self->{codontable} = _parse_codon_file($table_path); | 
| 728 | 7 |  |  |  |  | 46 | $self->{reversecodontable} = _reverse_codon_table($self->{codontable}); | 
| 729 | 7 |  |  |  |  | 29 | return $self->{codontable}; | 
| 730 |  |  |  |  |  |  | } | 
| 731 |  |  |  |  |  |  |  | 
| 732 |  |  |  |  |  |  | =head2 set_rscutable | 
| 733 |  |  |  |  |  |  |  | 
| 734 |  |  |  |  |  |  | # load a RSCU table from the GeneDesign configuration directory | 
| 735 |  |  |  |  |  |  | $GD->set_rscutable(-organism_name => "yeast"); | 
| 736 |  |  |  |  |  |  |  | 
| 737 |  |  |  |  |  |  | # load an RSCU table from an arbitrary path and catch it in a variable | 
| 738 |  |  |  |  |  |  | my $rscu_t = $GD->set_rscutable(-organism_name => "custom", | 
| 739 |  |  |  |  |  |  | -table_path => "/path/to/table.rscu"); | 
| 740 |  |  |  |  |  |  |  | 
| 741 |  |  |  |  |  |  | The -organism_name argument is required. | 
| 742 |  |  |  |  |  |  |  | 
| 743 |  |  |  |  |  |  | This function loads, sets, and returns an RSCU table. After it is run | 
| 744 |  |  |  |  |  |  | the accessor L<rscutable|/rscutable> will return the hash reference that | 
| 745 |  |  |  |  |  |  | represents the RSCU table. | 
| 746 |  |  |  |  |  |  |  | 
| 747 |  |  |  |  |  |  | If no path is provided, the configuration directory /codon_tables is checked for | 
| 748 |  |  |  |  |  |  | tables that match the provided organism name. If there is no table in that | 
| 749 |  |  |  |  |  |  | directory, a warning will appear and the flat RSCU table will be used. | 
| 750 |  |  |  |  |  |  |  | 
| 751 |  |  |  |  |  |  | Any RSCU table that is missing a definition for a codon will cause a warning to | 
| 752 |  |  |  |  |  |  | be issued. The table format for RSCU tables is | 
| 753 |  |  |  |  |  |  |  | 
| 754 |  |  |  |  |  |  | # Saccharomyces cerevisiae (Highly expressed genes) | 
| 755 |  |  |  |  |  |  | # Nucleic Acids Res 16, 8207-8211 (1988) | 
| 756 |  |  |  |  |  |  | {TTT} = 0.19 | 
| 757 |  |  |  |  |  |  | {TTC} = 1.81 | 
| 758 |  |  |  |  |  |  | {TTA} = 0.49 | 
| 759 |  |  |  |  |  |  | ... | 
| 760 |  |  |  |  |  |  |  | 
| 761 |  |  |  |  |  |  | See L<Sharp et al. 1986|http://www.ncbi.nlm.nih.gov/pubmed/3526280>. | 
| 762 |  |  |  |  |  |  |  | 
| 763 |  |  |  |  |  |  | =cut | 
| 764 |  |  |  |  |  |  |  | 
| 765 |  |  |  |  |  |  | sub set_rscutable | 
| 766 |  |  |  |  |  |  | { | 
| 767 | 7 |  |  | 7 | 1 | 27 | my ($self, @args) = @_; | 
| 768 | 7 |  |  |  |  | 44 | my ($orgname, $rscu_path) | 
| 769 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 770 |  |  |  |  |  |  | organism_name | 
| 771 |  |  |  |  |  |  | rscu_path)], @args); | 
| 772 |  |  |  |  |  |  |  | 
| 773 | 7 | 50 |  |  |  | 213 | $self->throw("No organsim name provided") unless ($orgname); | 
| 774 | 7 |  |  |  |  | 22 | $self->{organism} = $orgname; | 
| 775 |  |  |  |  |  |  |  | 
| 776 | 7 | 50 | 33 |  |  | 181 | $self->throw("$rscu_path does not exist") | 
| 777 |  |  |  |  |  |  | if ($rscu_path && ! -e $rscu_path); | 
| 778 |  |  |  |  |  |  |  | 
| 779 | 7 | 50 |  |  |  | 47 | if (! $rscu_path) | 
| 780 |  |  |  |  |  |  | { | 
| 781 | 0 |  |  |  |  | 0 | $rscu_path = $self->{codon_path} . $orgname . ".rscu"; | 
| 782 | 0 | 0 |  |  |  | 0 | if (! -e $rscu_path) | 
| 783 |  |  |  |  |  |  | { | 
| 784 | 0 |  |  |  |  | 0 | warn "No RSCU table for $orgname found. Using Flat values\n"; | 
| 785 | 0 |  |  |  |  | 0 | $rscu_path = $self->{codon_path} . "Flat.rscu"; | 
| 786 |  |  |  |  |  |  | } | 
| 787 |  |  |  |  |  |  | } | 
| 788 |  |  |  |  |  |  |  | 
| 789 | 7 |  |  |  |  | 34 | $self->{rscutable} = _parse_codon_file($rscu_path); | 
| 790 | 7 |  |  |  |  | 29 | return $self->{rscutable}; | 
| 791 |  |  |  |  |  |  | } | 
| 792 |  |  |  |  |  |  |  | 
| 793 |  |  |  |  |  |  | =head2 set_organism | 
| 794 |  |  |  |  |  |  |  | 
| 795 |  |  |  |  |  |  | # load both codon tables and RSCU tables simultaneously | 
| 796 |  |  |  |  |  |  | $GD->set_organism(-organism_name => "yeast"); | 
| 797 |  |  |  |  |  |  |  | 
| 798 |  |  |  |  |  |  | # with arguments | 
| 799 |  |  |  |  |  |  | $GD->set_organism(-organism_name => "custom", | 
| 800 |  |  |  |  |  |  | -table_path => "/path/to/table.ct", | 
| 801 |  |  |  |  |  |  | -rscu_path => "/path/to/table.rscu"); | 
| 802 |  |  |  |  |  |  |  | 
| 803 |  |  |  |  |  |  |  | 
| 804 |  |  |  |  |  |  | The -organism_name argument is required. | 
| 805 |  |  |  |  |  |  |  | 
| 806 |  |  |  |  |  |  | This function is just a shortcut; it runs L<set_codontable/set_codontable> and | 
| 807 |  |  |  |  |  |  | L<set_rscutable/set_rscutable>. See those functions for details. | 
| 808 |  |  |  |  |  |  |  | 
| 809 |  |  |  |  |  |  | =cut | 
| 810 |  |  |  |  |  |  |  | 
| 811 |  |  |  |  |  |  | sub set_organism | 
| 812 |  |  |  |  |  |  | { | 
| 813 | 7 |  |  | 7 | 1 | 92 | my ($self, @args) = @_; | 
| 814 |  |  |  |  |  |  |  | 
| 815 | 7 |  |  |  |  | 101 | my ($orgname, $table_path, $rscu_path) | 
| 816 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 817 |  |  |  |  |  |  | organism_name | 
| 818 |  |  |  |  |  |  | table_path | 
| 819 |  |  |  |  |  |  | rscu_path)], @args); | 
| 820 |  |  |  |  |  |  |  | 
| 821 | 7 | 50 |  |  |  | 311 | $self->throw("No organsim name provided") unless ($orgname); | 
| 822 | 7 |  |  |  |  | 20 | $self->{organism} = $orgname; | 
| 823 |  |  |  |  |  |  |  | 
| 824 | 7 |  |  |  |  | 50 | $self->set_codontable(-organism_name => $orgname, -table_path => $table_path); | 
| 825 | 7 |  |  |  |  | 54 | $self->set_rscutable(-organism_name => $orgname, -rscu_path=> $rscu_path); | 
| 826 |  |  |  |  |  |  |  | 
| 827 | 7 |  |  |  |  | 32 | return; | 
| 828 |  |  |  |  |  |  | } | 
| 829 |  |  |  |  |  |  |  | 
| 830 |  |  |  |  |  |  | =head2 codon_count | 
| 831 |  |  |  |  |  |  |  | 
| 832 |  |  |  |  |  |  | # count the codons in a list of sequences | 
| 833 |  |  |  |  |  |  | my $tally = $GD->codon_count(-input => \@sequences); | 
| 834 |  |  |  |  |  |  |  | 
| 835 |  |  |  |  |  |  | # add a gene to an existing codon count | 
| 836 |  |  |  |  |  |  | $tally = $GD->codon_count(-input => $sequence, | 
| 837 |  |  |  |  |  |  | -count => $tally); | 
| 838 |  |  |  |  |  |  |  | 
| 839 |  |  |  |  |  |  | # add a list of Bio::Seq objects to an existing codon count | 
| 840 |  |  |  |  |  |  | $tally = $GD->codon_count(-input => \@seqobjects, | 
| 841 |  |  |  |  |  |  | -count => $tally); | 
| 842 |  |  |  |  |  |  |  | 
| 843 |  |  |  |  |  |  | The -input argument is required and will take a string variable, a Bio::Seq | 
| 844 |  |  |  |  |  |  | object, a Bio::SeqFeatureI object, or a reference to an array full of any | 
| 845 |  |  |  |  |  |  | combination of those things. | 
| 846 |  |  |  |  |  |  |  | 
| 847 |  |  |  |  |  |  | The codon_count function takes a set of sequences and counts how often each | 
| 848 |  |  |  |  |  |  | codon appears in them. It returns a hash reference where the keys are upper case | 
| 849 |  |  |  |  |  |  | nucleotide codons and the values are integers. If you pass a hash reference | 
| 850 |  |  |  |  |  |  | containing codon counts with the -count argument, new counts will be added to | 
| 851 |  |  |  |  |  |  | the old values. | 
| 852 |  |  |  |  |  |  |  | 
| 853 |  |  |  |  |  |  | This function will warn you if non nucleotide codons are found. | 
| 854 |  |  |  |  |  |  |  | 
| 855 |  |  |  |  |  |  | TODO: what about ambiguous codons? | 
| 856 |  |  |  |  |  |  |  | 
| 857 |  |  |  |  |  |  | =cut | 
| 858 |  |  |  |  |  |  |  | 
| 859 |  |  |  |  |  |  | sub codon_count | 
| 860 |  |  |  |  |  |  | { | 
| 861 | 3 |  |  | 3 | 1 | 3912 | my ($self, @args) = @_; | 
| 862 |  |  |  |  |  |  |  | 
| 863 | 3 |  |  |  |  | 19 | my ($input, $count) = $self->_rearrange([qw(input count)], @args); | 
| 864 |  |  |  |  |  |  |  | 
| 865 | 3 | 50 |  |  |  | 51 | $self->throw("no codon table has been defined") | 
| 866 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 867 |  |  |  |  |  |  |  | 
| 868 | 3 | 50 |  |  |  | 11 | $self->throw("no sequences were provided to count") | 
| 869 |  |  |  |  |  |  | unless $input; | 
| 870 |  |  |  |  |  |  |  | 
| 871 | 3 |  |  |  |  | 11 | my $list = $self->_stripdown($input, 'ARRAY', 0); | 
| 872 |  |  |  |  |  |  |  | 
| 873 | 3 |  |  |  |  | 18 | my $cod_count = _codon_count($list, | 
| 874 |  |  |  |  |  |  | $self->{codontable}, | 
| 875 |  |  |  |  |  |  | $count); | 
| 876 |  |  |  |  |  |  |  | 
| 877 | 3 | 50 |  |  |  | 14 | warn("There are bad codons in codon count\n") if (exists $cod_count->{XXX}); | 
| 878 |  |  |  |  |  |  |  | 
| 879 | 3 |  |  |  |  | 16 | return $cod_count; | 
| 880 |  |  |  |  |  |  | } | 
| 881 |  |  |  |  |  |  |  | 
| 882 |  |  |  |  |  |  | =head2 generate_RSCU_table | 
| 883 |  |  |  |  |  |  |  | 
| 884 |  |  |  |  |  |  | my $rscu_t = $GD->generate_RSCU_table(-sequences => \@list_of_sequences); | 
| 885 |  |  |  |  |  |  |  | 
| 886 |  |  |  |  |  |  | The -sequences argument is required and will take a string variable, a Bio::Seq | 
| 887 |  |  |  |  |  |  | object, a Bio::SeqFeatureI object, or a reference to an array full of any | 
| 888 |  |  |  |  |  |  | combination of those things. | 
| 889 |  |  |  |  |  |  |  | 
| 890 |  |  |  |  |  |  | The generate_RSCU_table function takes a set of sequences, counts how often each | 
| 891 |  |  |  |  |  |  | codon appears, and returns an RSCU table as a hash reference where the keys are | 
| 892 |  |  |  |  |  |  | upper case nucleotide codons and the values are floats. | 
| 893 |  |  |  |  |  |  |  | 
| 894 |  |  |  |  |  |  | See L<Sharp et al. 1986|http://www.ncbi.nlm.nih.gov/pubmed/3526280>. | 
| 895 |  |  |  |  |  |  |  | 
| 896 |  |  |  |  |  |  | =cut | 
| 897 |  |  |  |  |  |  |  | 
| 898 |  |  |  |  |  |  | sub generate_RSCU_table | 
| 899 |  |  |  |  |  |  | { | 
| 900 | 1 |  |  | 1 | 1 | 1742 | my ($self, @args) = @_; | 
| 901 |  |  |  |  |  |  |  | 
| 902 | 1 |  |  |  |  | 7 | my ($seqobjs) = $self->_rearrange([qw(sequences)], @args); | 
| 903 |  |  |  |  |  |  |  | 
| 904 | 1 | 50 |  |  |  | 11 | $self->throw("no codon table has been defined") | 
| 905 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 906 |  |  |  |  |  |  |  | 
| 907 | 1 | 50 |  |  |  | 6 | $self->throw("Sequence set must be provided") | 
| 908 |  |  |  |  |  |  | unless ($seqobjs); | 
| 909 |  |  |  |  |  |  |  | 
| 910 | 1 |  |  |  |  | 5 | return _generate_RSCU_table( | 
| 911 |  |  |  |  |  |  | $self->codon_count(-input => $seqobjs), | 
| 912 |  |  |  |  |  |  | $self->{codontable}, | 
| 913 |  |  |  |  |  |  | $self->{reversecodontable} | 
| 914 |  |  |  |  |  |  | ); | 
| 915 |  |  |  |  |  |  | } | 
| 916 |  |  |  |  |  |  |  | 
| 917 |  |  |  |  |  |  | =head2 generate_codon_report | 
| 918 |  |  |  |  |  |  |  | 
| 919 |  |  |  |  |  |  | my $report = $GD->generate_codon_report(-sequences => \@list_of_sequences); | 
| 920 |  |  |  |  |  |  |  | 
| 921 |  |  |  |  |  |  | The report will have the format | 
| 922 |  |  |  |  |  |  |  | 
| 923 |  |  |  |  |  |  | TTT (F) 12800 0.74 | 
| 924 |  |  |  |  |  |  | TTC (F) 21837 1.26 | 
| 925 |  |  |  |  |  |  | TTA (L)  4859 0.31 | 
| 926 |  |  |  |  |  |  | TTG (L) 18806 1.22 | 
| 927 |  |  |  |  |  |  |  | 
| 928 |  |  |  |  |  |  | where the first column in each group is the codon, the second column is the one | 
| 929 |  |  |  |  |  |  | letter amino acid abbreviation in parentheses, the third column is the number of | 
| 930 |  |  |  |  |  |  | times that codon has been seen, and the fourth column is the RSCU value for that | 
| 931 |  |  |  |  |  |  | codon. | 
| 932 |  |  |  |  |  |  |  | 
| 933 |  |  |  |  |  |  | This report comes in a 4x4 layout, as would a standard genetic code table in a | 
| 934 |  |  |  |  |  |  | textbook. | 
| 935 |  |  |  |  |  |  |  | 
| 936 |  |  |  |  |  |  | NO TEST | 
| 937 |  |  |  |  |  |  |  | 
| 938 |  |  |  |  |  |  | =cut | 
| 939 |  |  |  |  |  |  |  | 
| 940 |  |  |  |  |  |  | sub generate_codon_report | 
| 941 |  |  |  |  |  |  | { | 
| 942 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 943 |  |  |  |  |  |  |  | 
| 944 | 0 |  |  |  |  | 0 | my ($seqobjs) = $self->_rearrange([qw(sequences)], @args); | 
| 945 |  |  |  |  |  |  |  | 
| 946 | 0 | 0 |  |  |  | 0 | $self->throw("no codon table has been defined") | 
| 947 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 948 |  |  |  |  |  |  |  | 
| 949 | 0 | 0 |  |  |  | 0 | $self->throw("Sequence set must be provided") | 
| 950 |  |  |  |  |  |  | unless ($seqobjs); | 
| 951 |  |  |  |  |  |  |  | 
| 952 | 0 |  |  |  |  | 0 | my $count_t = $self->codon_count(-input => $seqobjs); | 
| 953 |  |  |  |  |  |  |  | 
| 954 | 0 |  |  |  |  | 0 | my $rscu_t = _generate_RSCU_table( | 
| 955 |  |  |  |  |  |  | $count_t, | 
| 956 |  |  |  |  |  |  | $self->{codontable}, | 
| 957 |  |  |  |  |  |  | $self->{reversecodontable} | 
| 958 |  |  |  |  |  |  | ); | 
| 959 |  |  |  |  |  |  |  | 
| 960 | 0 |  |  |  |  | 0 | my $report = _generate_codon_report( | 
| 961 |  |  |  |  |  |  | $count_t, | 
| 962 |  |  |  |  |  |  | $self->{codontable}, | 
| 963 |  |  |  |  |  |  | $rscu_t | 
| 964 |  |  |  |  |  |  | ); | 
| 965 |  |  |  |  |  |  |  | 
| 966 | 0 |  |  |  |  | 0 | return $report; | 
| 967 |  |  |  |  |  |  | } | 
| 968 |  |  |  |  |  |  |  | 
| 969 |  |  |  |  |  |  | =head2 generate_RSCU_file | 
| 970 |  |  |  |  |  |  |  | 
| 971 |  |  |  |  |  |  | my $contents = $GD->generate_RSCU_file( | 
| 972 |  |  |  |  |  |  | -sequences => \@seqs, | 
| 973 |  |  |  |  |  |  | -comments => ["Got these codons from mice"] | 
| 974 |  |  |  |  |  |  | ); | 
| 975 |  |  |  |  |  |  | open (my $OUT, '>', '/path/to/cods') || die "can't write to /path/to/cods"; | 
| 976 |  |  |  |  |  |  | print $OUT $contents; | 
| 977 |  |  |  |  |  |  | close $OUT; | 
| 978 |  |  |  |  |  |  |  | 
| 979 |  |  |  |  |  |  | This function generates a string that can be written to file to serve as a | 
| 980 |  |  |  |  |  |  | GeneDesign RSCU table. Provide a set of sequences and an optional array | 
| 981 |  |  |  |  |  |  | reference of comments to prepend to the file. | 
| 982 |  |  |  |  |  |  |  | 
| 983 |  |  |  |  |  |  | The file will have the format | 
| 984 |  |  |  |  |  |  | # Comment 1 | 
| 985 |  |  |  |  |  |  | # ... | 
| 986 |  |  |  |  |  |  | # Comment n | 
| 987 |  |  |  |  |  |  | {TTT} = 0.19 | 
| 988 |  |  |  |  |  |  | {TTC} = 1.81 | 
| 989 |  |  |  |  |  |  | ... | 
| 990 |  |  |  |  |  |  |  | 
| 991 |  |  |  |  |  |  | NO TEST | 
| 992 |  |  |  |  |  |  |  | 
| 993 |  |  |  |  |  |  | =cut | 
| 994 |  |  |  |  |  |  |  | 
| 995 |  |  |  |  |  |  | sub generate_RSCU_file | 
| 996 |  |  |  |  |  |  | { | 
| 997 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 998 |  |  |  |  |  |  |  | 
| 999 | 0 |  |  |  |  | 0 | my ($seqobjs, $comments) = $self->_rearrange([qw(sequences comments)], @args); | 
| 1000 |  |  |  |  |  |  |  | 
| 1001 | 0 | 0 |  |  |  | 0 | $self->throw('No codon table has been defined') | 
| 1002 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1003 |  |  |  |  |  |  |  | 
| 1004 | 0 | 0 |  |  |  | 0 | $self->throw('Sequence set must be provided') | 
| 1005 |  |  |  |  |  |  | unless ($seqobjs); | 
| 1006 |  |  |  |  |  |  |  | 
| 1007 | 0 | 0 |  |  |  | 0 | $self->throw('Comment argument must be array reference') | 
| 1008 |  |  |  |  |  |  | unless ref($comments) eq 'ARRAY'; | 
| 1009 |  |  |  |  |  |  |  | 
| 1010 | 0 |  |  |  |  | 0 | my $count_t = $self->codon_count(-input => $seqobjs); | 
| 1011 |  |  |  |  |  |  |  | 
| 1012 | 0 |  |  |  |  | 0 | my $rscu_t = _generate_RSCU_table( | 
| 1013 |  |  |  |  |  |  | $count_t, | 
| 1014 |  |  |  |  |  |  | $self->{codontable}, | 
| 1015 |  |  |  |  |  |  | $self->{reversecodontable} | 
| 1016 |  |  |  |  |  |  | ); | 
| 1017 |  |  |  |  |  |  |  | 
| 1018 | 0 |  |  |  |  | 0 | my $report = _generate_codon_file( | 
| 1019 |  |  |  |  |  |  | $rscu_t, | 
| 1020 |  |  |  |  |  |  | $self->{reversecodontable}, | 
| 1021 |  |  |  |  |  |  | $comments | 
| 1022 |  |  |  |  |  |  | ); | 
| 1023 |  |  |  |  |  |  |  | 
| 1024 | 0 |  |  |  |  | 0 | return $report; | 
| 1025 |  |  |  |  |  |  | } | 
| 1026 |  |  |  |  |  |  |  | 
| 1027 |  |  |  |  |  |  | =head2 list_enzyme_sets | 
| 1028 |  |  |  |  |  |  |  | 
| 1029 |  |  |  |  |  |  | my @available_enzlists = $GD->list_enzyme_sets(); | 
| 1030 |  |  |  |  |  |  | # @available_enzlists == ('standard_and_IIB', 'blunts', 'IIB', 'nonpal', ...) | 
| 1031 |  |  |  |  |  |  |  | 
| 1032 |  |  |  |  |  |  | Returns an array containing the names of every restriction enzyme recognition | 
| 1033 |  |  |  |  |  |  | list GeneDesign knows about. | 
| 1034 |  |  |  |  |  |  |  | 
| 1035 |  |  |  |  |  |  | =cut | 
| 1036 |  |  |  |  |  |  |  | 
| 1037 |  |  |  |  |  |  | sub list_enzyme_sets | 
| 1038 |  |  |  |  |  |  | { | 
| 1039 | 0 |  |  | 0 | 1 | 0 | my ($self) = @_; | 
| 1040 | 0 |  |  |  |  | 0 | my $epath = $self->{conf} . 'enzymes/'; | 
| 1041 | 0 |  |  |  |  | 0 | my @sets = (); | 
| 1042 | 0 | 0 |  |  |  | 0 | opendir (my $ENZDIR, $epath) || $self->throw("can't opendir $epath"); | 
| 1043 | 0 |  |  |  |  | 0 | foreach my $list (readdir($ENZDIR)) | 
| 1044 |  |  |  |  |  |  | { | 
| 1045 | 0 |  |  |  |  | 0 | my $name = basename($list); | 
| 1046 | 0 |  |  |  |  | 0 | $name =~ s{\.[^.]+$}{}x; | 
| 1047 | 0 | 0 | 0 |  |  | 0 | next if ($name eq q{.} || $name eq q{..}); | 
| 1048 | 0 |  |  |  |  | 0 | push @sets, $name; | 
| 1049 |  |  |  |  |  |  | } | 
| 1050 | 0 |  |  |  |  | 0 | closedir($ENZDIR); | 
| 1051 | 0 |  |  |  |  | 0 | return @sets; | 
| 1052 |  |  |  |  |  |  | } | 
| 1053 |  |  |  |  |  |  |  | 
| 1054 |  |  |  |  |  |  | =head2 set_restriction_enzymes | 
| 1055 |  |  |  |  |  |  |  | 
| 1056 |  |  |  |  |  |  | $GD->set_restriction_enzymes(-enzyme_set => 'blunts'); | 
| 1057 |  |  |  |  |  |  |  | 
| 1058 |  |  |  |  |  |  | or | 
| 1059 |  |  |  |  |  |  |  | 
| 1060 |  |  |  |  |  |  | $GD->set_restriction_enzymes(-list_path => '/path/to/enzyme_file'); | 
| 1061 |  |  |  |  |  |  |  | 
| 1062 |  |  |  |  |  |  | or even | 
| 1063 |  |  |  |  |  |  |  | 
| 1064 |  |  |  |  |  |  | $GD->set_restriction_enzymes( | 
| 1065 |  |  |  |  |  |  | -list_path => '/path/to/enzyme_file', | 
| 1066 |  |  |  |  |  |  | -enzyme_set => 'custom_enzymes' | 
| 1067 |  |  |  |  |  |  | ); | 
| 1068 |  |  |  |  |  |  |  | 
| 1069 |  |  |  |  |  |  | All will return a hash structure full of restriction enzymes. | 
| 1070 |  |  |  |  |  |  |  | 
| 1071 |  |  |  |  |  |  | Tell GeneDesign which set of restriction enzymes to use. If you provide only a | 
| 1072 |  |  |  |  |  |  | set name with the -enzyme_set flag, GeneDesign will check its config path for a | 
| 1073 |  |  |  |  |  |  | matching file. Otherwise you must provide a path to a file (and optionally a | 
| 1074 |  |  |  |  |  |  | name for the set). | 
| 1075 |  |  |  |  |  |  |  | 
| 1076 |  |  |  |  |  |  | =cut | 
| 1077 |  |  |  |  |  |  |  | 
| 1078 |  |  |  |  |  |  | sub set_restriction_enzymes | 
| 1079 |  |  |  |  |  |  | { | 
| 1080 | 3 |  |  | 3 | 1 | 675 | my ($self, @args) = @_; | 
| 1081 |  |  |  |  |  |  |  | 
| 1082 | 3 |  |  |  |  | 18 | my ($set_name, $set_path) | 
| 1083 |  |  |  |  |  |  | = $self->_rearrange([qw(enzyme_set list_path)], @args); | 
| 1084 |  |  |  |  |  |  |  | 
| 1085 | 3 |  |  |  |  | 70 | my $def = 'all_enzymes'; | 
| 1086 | 3 |  |  |  |  | 13 | my $defpath = $self->{conf} . 'enzymes/' . $def; | 
| 1087 | 3 |  |  |  |  | 22 | $self->{all_enzymes} = _define_sites($defpath); | 
| 1088 |  |  |  |  |  |  |  | 
| 1089 |  |  |  |  |  |  |  | 
| 1090 | 3 | 50 | 33 |  |  | 347 | if (! $set_name && ! $set_path) | 
|  |  | 50 | 33 |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 1091 |  |  |  |  |  |  | { | 
| 1092 | 0 |  |  |  |  | 0 | $self->{enzyme_set} = $self->{all_enzymes}; | 
| 1093 | 0 |  |  |  |  | 0 | $self->{enzyme_set_name} = $def; | 
| 1094 |  |  |  |  |  |  | } | 
| 1095 |  |  |  |  |  |  | elsif ($set_name && ! $set_path) | 
| 1096 |  |  |  |  |  |  | { | 
| 1097 | 0 |  |  |  |  | 0 | $set_path = $self->{conf} . "enzymes/$set_name"; | 
| 1098 | 0 | 0 |  |  |  | 0 | $self->throw("No enzyme set found that matches $set_name\n") | 
| 1099 |  |  |  |  |  |  | unless (-e $set_path); | 
| 1100 | 0 |  |  |  |  | 0 | my $list = _parse_enzyme_list($set_path); | 
| 1101 | 0 |  |  |  |  | 0 | my $set = {}; | 
| 1102 | 0 |  |  |  |  | 0 | foreach my $id (@$list) | 
| 1103 |  |  |  |  |  |  | { | 
| 1104 | 0 | 0 |  |  |  | 0 | if (! exists $self->{all_enzymes}->{$id}) | 
| 1105 |  |  |  |  |  |  | { | 
| 1106 | 0 |  |  |  |  | 0 | warn "$id was not recognized as an enzyme... skipping\n"; | 
| 1107 | 0 |  |  |  |  | 0 | next; | 
| 1108 |  |  |  |  |  |  | } | 
| 1109 | 0 |  |  |  |  | 0 | $set->{$id} = $self->{all_enzymes}->{$id}; | 
| 1110 |  |  |  |  |  |  | } | 
| 1111 | 0 |  |  |  |  | 0 | $self->{enzyme_set} = $set; | 
| 1112 | 0 |  |  |  |  | 0 | $self->{enzyme_set_name} = $set_name; | 
| 1113 |  |  |  |  |  |  | } | 
| 1114 |  |  |  |  |  |  | elsif ($set_path) | 
| 1115 |  |  |  |  |  |  | { | 
| 1116 | 3 | 50 |  |  |  | 139 | $self->throw("No enzyme list found at $set_path\n") | 
| 1117 |  |  |  |  |  |  | unless (-e $set_path); | 
| 1118 | 3 |  |  |  |  | 23 | my $list = _parse_enzyme_list($set_path); | 
| 1119 | 3 |  |  |  |  | 10 | my $set = {}; | 
| 1120 | 3 |  |  |  |  | 13 | foreach my $id (@$list) | 
| 1121 |  |  |  |  |  |  | { | 
| 1122 | 87 | 50 |  |  |  | 309 | if (! exists $self->{all_enzymes}->{$id}) | 
| 1123 |  |  |  |  |  |  | { | 
| 1124 | 0 |  |  |  |  | 0 | warn "$id was not recognized as an enzyme... skipping\n"; | 
| 1125 | 0 |  |  |  |  | 0 | next; | 
| 1126 |  |  |  |  |  |  | } | 
| 1127 | 87 |  |  |  |  | 295 | $set->{$id} = $self->{all_enzymes}->{$id}; | 
| 1128 |  |  |  |  |  |  | } | 
| 1129 | 3 |  |  |  |  | 15 | $self->{enzyme_set} = $set; | 
| 1130 | 3 |  | 33 |  |  | 250 | $self->{enzyme_set_name} = $set_name  || basename($set_path); | 
| 1131 |  |  |  |  |  |  | } | 
| 1132 | 3 |  |  |  |  | 24 | return $self->{enzyme_set}; | 
| 1133 |  |  |  |  |  |  | } | 
| 1134 |  |  |  |  |  |  |  | 
| 1135 |  |  |  |  |  |  | =head2 remove_from_enzyme_set | 
| 1136 |  |  |  |  |  |  |  | 
| 1137 |  |  |  |  |  |  | Removes a subset of enzymes from an enzyme list. This only happens in memory, no | 
| 1138 |  |  |  |  |  |  | files will be altered. The argument is an array reference of enzyme names. | 
| 1139 |  |  |  |  |  |  |  | 
| 1140 |  |  |  |  |  |  | $GD->set_restriction_enzymes(-enzyme_set => 'blunts'); | 
| 1141 |  |  |  |  |  |  | $GD->remove_from_enzyme_set(-enzymes => ['SmaI', 'MlyI']); | 
| 1142 |  |  |  |  |  |  |  | 
| 1143 |  |  |  |  |  |  | NO TEST | 
| 1144 |  |  |  |  |  |  |  | 
| 1145 |  |  |  |  |  |  | =cut | 
| 1146 |  |  |  |  |  |  |  | 
| 1147 |  |  |  |  |  |  | sub remove_from_enzyme_set | 
| 1148 |  |  |  |  |  |  | { | 
| 1149 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1150 |  |  |  |  |  |  |  | 
| 1151 | 0 |  |  |  |  | 0 | my ($enzes) = $self->_rearrange([qw(enzymes)], @args); | 
| 1152 |  |  |  |  |  |  |  | 
| 1153 | 0 | 0 |  |  |  | 0 | return unless ($enzes); | 
| 1154 |  |  |  |  |  |  |  | 
| 1155 | 0 | 0 |  |  |  | 0 | $self->throw("no enzyme set has been defined") | 
| 1156 |  |  |  |  |  |  | unless $self->{enzyme_set}; | 
| 1157 |  |  |  |  |  |  |  | 
| 1158 | 0 |  |  |  |  | 0 | my @toremove = (); | 
| 1159 | 0 |  |  |  |  | 0 | my $ref = ref ($enzes); | 
| 1160 | 0 | 0 |  |  |  | 0 | if ($ref eq "ARRAY") | 
| 1161 |  |  |  |  |  |  | { | 
| 1162 | 0 |  |  |  |  | 0 | push @toremove, @$enzes; | 
| 1163 |  |  |  |  |  |  | } | 
| 1164 |  |  |  |  |  |  | else | 
| 1165 |  |  |  |  |  |  | { | 
| 1166 | 0 |  |  |  |  | 0 | push @toremove, $enzes; | 
| 1167 |  |  |  |  |  |  | } | 
| 1168 | 0 |  |  |  |  | 0 | foreach my $enz (@toremove) | 
| 1169 |  |  |  |  |  |  | { | 
| 1170 | 0 |  |  |  |  | 0 | my $eid = $enz; | 
| 1171 | 0 |  |  |  |  | 0 | my $eref = ref $enz; | 
| 1172 | 0 | 0 |  |  |  | 0 | if ($eref) | 
| 1173 |  |  |  |  |  |  | { | 
| 1174 | 0 | 0 |  |  |  | 0 | $self->throw("object in enzymes is not a " . | 
| 1175 |  |  |  |  |  |  | "Bio::GeneDesign::RestrictionEnzyme object") | 
| 1176 |  |  |  |  |  |  | unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme")); | 
| 1177 | 0 |  |  |  |  | 0 | $eid = $enz->id; | 
| 1178 |  |  |  |  |  |  | } | 
| 1179 |  |  |  |  |  |  |  | 
| 1180 | 0 | 0 |  |  |  | 0 | if (exists $self->{enzyme_set}->{$eid}) | 
| 1181 |  |  |  |  |  |  | { | 
| 1182 | 0 |  |  |  |  | 0 | delete $self->{enzyme_set}->{$eid}; | 
| 1183 |  |  |  |  |  |  | } | 
| 1184 |  |  |  |  |  |  | } | 
| 1185 | 0 |  |  |  |  | 0 | return; | 
| 1186 |  |  |  |  |  |  | } | 
| 1187 |  |  |  |  |  |  |  | 
| 1188 |  |  |  |  |  |  | =head2 add_to_enzyme_set | 
| 1189 |  |  |  |  |  |  |  | 
| 1190 |  |  |  |  |  |  | Adds a subset of enzymes to an enzyme list. This only happens in memory, no | 
| 1191 |  |  |  |  |  |  | files will be altered. The argument is an array reference of RestrictionEnzyme | 
| 1192 |  |  |  |  |  |  | objects. | 
| 1193 |  |  |  |  |  |  |  | 
| 1194 |  |  |  |  |  |  | #Grab all known enzymes | 
| 1195 |  |  |  |  |  |  | my $allenz = $GD->set_restriction_enzymes(-enzyme_set => 'standard_and_IIB'); | 
| 1196 |  |  |  |  |  |  |  | 
| 1197 |  |  |  |  |  |  | #Pull out a few | 
| 1198 |  |  |  |  |  |  | my @keepers = ($allenz->{'BmrI'}, $allenz->{'HphI'}); | 
| 1199 |  |  |  |  |  |  |  | 
| 1200 |  |  |  |  |  |  | #Give GeneDesign the enzyme set you want | 
| 1201 |  |  |  |  |  |  | $GD->set_restriction_enzymes(-enzyme_set => 'blunts'); | 
| 1202 |  |  |  |  |  |  |  | 
| 1203 |  |  |  |  |  |  | #Add the few enzymes it didn't have before | 
| 1204 |  |  |  |  |  |  | $GD->add_to_enzyme_set(-enzymes => \@keepers); | 
| 1205 |  |  |  |  |  |  |  | 
| 1206 |  |  |  |  |  |  | NO TEST | 
| 1207 |  |  |  |  |  |  |  | 
| 1208 |  |  |  |  |  |  | =cut | 
| 1209 |  |  |  |  |  |  |  | 
| 1210 |  |  |  |  |  |  | sub add_to_enzyme_set | 
| 1211 |  |  |  |  |  |  | { | 
| 1212 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1213 |  |  |  |  |  |  |  | 
| 1214 | 0 |  |  |  |  | 0 | my ($enzes) = $self->_rearrange([qw(enzymes)], @args); | 
| 1215 |  |  |  |  |  |  |  | 
| 1216 | 0 | 0 |  |  |  | 0 | return unless ($enzes); | 
| 1217 |  |  |  |  |  |  |  | 
| 1218 | 0 | 0 |  |  |  | 0 | $self->throw("no enzyme set has been defined") | 
| 1219 |  |  |  |  |  |  | unless $self->{enzyme_set}; | 
| 1220 |  |  |  |  |  |  |  | 
| 1221 | 0 |  |  |  |  | 0 | my @toadds = (); | 
| 1222 | 0 |  |  |  |  | 0 | my $ref = ref ($enzes); | 
| 1223 | 0 | 0 |  |  |  | 0 | if ($ref eq "ARRAY") | 
| 1224 |  |  |  |  |  |  | { | 
| 1225 | 0 |  |  |  |  | 0 | push @toadds, @$enzes; | 
| 1226 |  |  |  |  |  |  | } | 
| 1227 |  |  |  |  |  |  | else | 
| 1228 |  |  |  |  |  |  | { | 
| 1229 | 0 |  |  |  |  | 0 | push @toadds, $enzes; | 
| 1230 |  |  |  |  |  |  | } | 
| 1231 | 0 |  |  |  |  | 0 | foreach my $enz (@toadds) | 
| 1232 |  |  |  |  |  |  | { | 
| 1233 | 0 | 0 |  |  |  | 0 | $self->throw("object in enzymes is not a " . | 
| 1234 |  |  |  |  |  |  | "Bio::GeneDesign::RestrictionEnzyme object") | 
| 1235 |  |  |  |  |  |  | unless ($enz->isa("Bio::GeneDesign::RestrictionEnzyme")); | 
| 1236 |  |  |  |  |  |  |  | 
| 1237 | 0 | 0 |  |  |  | 0 | next if (exists $self->{enzyme_set}->{$enz->id}); | 
| 1238 | 0 |  |  |  |  | 0 | $self->{enzyme_set}->{$enz->id} = $enz; | 
| 1239 |  |  |  |  |  |  | } | 
| 1240 | 0 |  |  |  |  | 0 | return; | 
| 1241 |  |  |  |  |  |  | } | 
| 1242 |  |  |  |  |  |  |  | 
| 1243 |  |  |  |  |  |  | =head2 restriction_status | 
| 1244 |  |  |  |  |  |  |  | 
| 1245 |  |  |  |  |  |  | =cut | 
| 1246 |  |  |  |  |  |  |  | 
| 1247 |  |  |  |  |  |  | sub restriction_status | 
| 1248 |  |  |  |  |  |  | { | 
| 1249 | 1 |  |  | 1 | 1 | 68837 | my ($self, @args) = @_; | 
| 1250 |  |  |  |  |  |  |  | 
| 1251 | 1 |  |  |  |  | 13 | my ($seq) = $self->_rearrange([qw(sequence)], @args); | 
| 1252 |  |  |  |  |  |  |  | 
| 1253 | 1 | 50 |  |  |  | 18 | $self->throw("no enzyme set has been defined") | 
| 1254 |  |  |  |  |  |  | unless $self->{enzyme_set}; | 
| 1255 |  |  |  |  |  |  |  | 
| 1256 | 1 | 50 |  |  |  | 6 | $self->throw("No arguments provided for set_restriction_enzymes") | 
| 1257 |  |  |  |  |  |  | unless ($seq); | 
| 1258 |  |  |  |  |  |  |  | 
| 1259 | 1 |  |  |  |  | 7 | my $str = $self->_stripdown($seq, q{}, 0); | 
| 1260 | 1 |  |  |  |  | 5 | my @reslist = values %{$self->{enzyme_set}}; | 
|  | 1 |  |  |  |  | 9 |  | 
| 1261 | 1 |  |  |  |  | 7 | return _define_site_status($str, \@reslist); | 
| 1262 |  |  |  |  |  |  | } | 
| 1263 |  |  |  |  |  |  |  | 
| 1264 |  |  |  |  |  |  | =head2 build_prefix_tree | 
| 1265 |  |  |  |  |  |  |  | 
| 1266 |  |  |  |  |  |  | Take an array reference of nucleotide sequences (they can be strings, Bio::Seq | 
| 1267 |  |  |  |  |  |  | objects, or Bio::GeneDesign::RestrictionEnzyme objects) and create a suffix | 
| 1268 |  |  |  |  |  |  | tree. If you add the peptide flag, the sequences will be ambiguously translated | 
| 1269 |  |  |  |  |  |  | before they are added to the suffix tree. Otherwise they will be ambiguously | 
| 1270 |  |  |  |  |  |  | transcribed. It will add the reverse complement of any non peptide sequence as | 
| 1271 |  |  |  |  |  |  | long as the reverse complement is different. | 
| 1272 |  |  |  |  |  |  |  | 
| 1273 |  |  |  |  |  |  | my $tree = $GD->build_prefix_tree(-input => ['GGATCC']); | 
| 1274 |  |  |  |  |  |  |  | 
| 1275 |  |  |  |  |  |  | my $ptree = $GD->build_prefix_tree( | 
| 1276 |  |  |  |  |  |  | -input => ['GGCCNNNNNGGCC'], | 
| 1277 |  |  |  |  |  |  | -peptide => 1 | 
| 1278 |  |  |  |  |  |  | ); | 
| 1279 |  |  |  |  |  |  |  | 
| 1280 |  |  |  |  |  |  | =cut | 
| 1281 |  |  |  |  |  |  |  | 
| 1282 |  |  |  |  |  |  | sub build_prefix_tree | 
| 1283 |  |  |  |  |  |  | { | 
| 1284 | 2 |  |  | 2 | 1 | 7013 | my ($self, @args) = @_; | 
| 1285 |  |  |  |  |  |  |  | 
| 1286 | 2 |  |  |  |  | 19 | my ($list, $pep) = $self->_rearrange([qw(input peptide)], @args); | 
| 1287 |  |  |  |  |  |  |  | 
| 1288 | 2 | 50 |  |  |  | 63 | $self->throw("no input provided") | 
| 1289 |  |  |  |  |  |  | unless ($list); | 
| 1290 |  |  |  |  |  |  |  | 
| 1291 | 2 |  |  |  |  | 16 | my $tree = Bio::GeneDesign::PrefixTree->new(); | 
| 1292 |  |  |  |  |  |  |  | 
| 1293 | 2 |  |  |  |  | 6 | foreach my $seq (@$list) | 
| 1294 |  |  |  |  |  |  | { | 
| 1295 | 10 |  |  |  |  | 23 | my $base = $seq; | 
| 1296 | 10 |  |  |  |  | 15 | my $id = $seq; | 
| 1297 | 10 |  |  |  |  | 25 | my $ref = ref($seq); | 
| 1298 | 10 | 50 |  |  |  | 24 | if ($ref) | 
| 1299 |  |  |  |  |  |  | { | 
| 1300 | 10 | 50 | 33 |  |  | 183 | $self->throw("object in input is not a Bio::Seq, Bio::SeqFeature, or " . | 
|  |  |  | 33 |  |  |  |  | 
| 1301 |  |  |  |  |  |  | "Bio::GeneDesign::RestrictionEnzyme object") | 
| 1302 |  |  |  |  |  |  | unless ($seq->isa("Bio::Seq") | 
| 1303 |  |  |  |  |  |  | || $seq->isa("Bio::SeqFeatureI") | 
| 1304 |  |  |  |  |  |  | || $seq->isa("Bio::GeneDesign::RestrictionEnzyme") | 
| 1305 |  |  |  |  |  |  | ); | 
| 1306 | 10 | 50 |  |  |  | 42 | $base = ref($seq->seq)  ? $seq->seq->seq  : $seq->seq; | 
| 1307 | 10 |  |  |  |  | 45 | $id = $seq->id; | 
| 1308 |  |  |  |  |  |  | } | 
| 1309 |  |  |  |  |  |  |  | 
| 1310 | 10 | 100 |  |  |  | 25 | if ($pep) | 
| 1311 |  |  |  |  |  |  | { | 
| 1312 | 4 | 50 |  |  |  | 17 | $self->throw('No codon table has been defined') | 
| 1313 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1314 |  |  |  |  |  |  |  | 
| 1315 | 4 |  |  |  |  | 22 | my @fpeptides = _amb_translation($base, $self->{codontable}, | 
| 1316 |  |  |  |  |  |  | 't', $self->{amb_trans_memo}); | 
| 1317 | 4 |  |  |  |  | 28 | $tree->add_prefix($_, $id, $base) foreach (@fpeptides); | 
| 1318 |  |  |  |  |  |  |  | 
| 1319 | 4 |  |  |  |  | 24 | my $esab = _complement($base, 1); | 
| 1320 | 4 |  |  |  |  | 13 | my $lagcheck = $esab; | 
| 1321 | 4 |  |  |  |  | 26 | while (substr($lagcheck, -1) eq "N") | 
| 1322 |  |  |  |  |  |  | { | 
| 1323 | 0 |  |  |  |  | 0 | $lagcheck = substr($lagcheck, 0, length($lagcheck) - 1); | 
| 1324 |  |  |  |  |  |  | } | 
| 1325 | 4 | 100 | 66 |  |  | 38 | if ($esab ne $base && $lagcheck eq $esab) | 
| 1326 |  |  |  |  |  |  | { | 
| 1327 | 2 |  |  |  |  | 14 | my @rpeptides = _amb_translation($esab, $self->{codontable}, | 
| 1328 |  |  |  |  |  |  | 't', $self->{amb_trans_memo}); | 
| 1329 | 2 |  |  |  |  | 30 | $tree->add_prefix($_, $id, $esab) foreach (@rpeptides); | 
| 1330 |  |  |  |  |  |  | } | 
| 1331 |  |  |  |  |  |  | } | 
| 1332 |  |  |  |  |  |  | else | 
| 1333 |  |  |  |  |  |  | { | 
| 1334 | 6 |  |  |  |  | 24 | my $fnucs = _amb_transcription($base); | 
| 1335 | 6 |  |  |  |  | 33 | $tree->add_prefix($_, $id, undef) foreach (@$fnucs); | 
| 1336 |  |  |  |  |  |  |  | 
| 1337 | 6 |  |  |  |  | 22 | my $esab = _complement($base, 1); | 
| 1338 | 6 | 100 |  |  |  | 31 | if ($esab ne $base) | 
| 1339 |  |  |  |  |  |  | { | 
| 1340 | 3 |  |  |  |  | 10 | my $rnucs = _amb_transcription($esab); | 
| 1341 | 3 |  |  |  |  | 18 | $tree->add_prefix($_, $id, undef) foreach (@$rnucs); | 
| 1342 |  |  |  |  |  |  | } | 
| 1343 |  |  |  |  |  |  | } | 
| 1344 |  |  |  |  |  |  | } | 
| 1345 | 2 |  |  |  |  | 12 | return $tree; | 
| 1346 |  |  |  |  |  |  | } | 
| 1347 |  |  |  |  |  |  |  | 
| 1348 |  |  |  |  |  |  | =head2 search_prefix_tree | 
| 1349 |  |  |  |  |  |  |  | 
| 1350 |  |  |  |  |  |  | Takes a suffix tree and a sequence and searches for results, which are returned | 
| 1351 |  |  |  |  |  |  | as in the Bio::GeneDesign::PrefixTree documentation. | 
| 1352 |  |  |  |  |  |  |  | 
| 1353 |  |  |  |  |  |  | my $hits = $GD->search_prefix_tree(-tree => $ptree, -sequence => $mygeneseq); | 
| 1354 |  |  |  |  |  |  |  | 
| 1355 |  |  |  |  |  |  | # @$hits = (['BamHI', 4, 'GGATCC', 'i hope this didn't pop up'], | 
| 1356 |  |  |  |  |  |  | #          ['OhnoI', 21, 'GGCCC', 'I hope these pop up'], | 
| 1357 |  |  |  |  |  |  | #          ['WoopsII', 21, 'GGCCC', 'I hope these pop up'] | 
| 1358 |  |  |  |  |  |  | #); | 
| 1359 |  |  |  |  |  |  |  | 
| 1360 |  |  |  |  |  |  | =cut | 
| 1361 |  |  |  |  |  |  |  | 
| 1362 |  |  |  |  |  |  | sub search_prefix_tree | 
| 1363 |  |  |  |  |  |  | { | 
| 1364 | 2 |  |  | 2 | 1 | 186216 | my ($self, @args) = @_; | 
| 1365 |  |  |  |  |  |  |  | 
| 1366 | 2 |  |  |  |  | 21 | my ($tree, $seq) = $self->_rearrange([qw(tree sequence)], @args); | 
| 1367 |  |  |  |  |  |  |  | 
| 1368 | 2 | 50 |  |  |  | 93 | $self->throw("no query sequence provided") | 
| 1369 |  |  |  |  |  |  | unless ($seq); | 
| 1370 |  |  |  |  |  |  |  | 
| 1371 | 2 | 50 |  |  |  | 9 | $self->throw("no suffix tree provided") | 
| 1372 |  |  |  |  |  |  | unless ($tree); | 
| 1373 |  |  |  |  |  |  |  | 
| 1374 | 2 | 50 |  |  |  | 17 | $self->throw("tree is not a Bio::GeneDesign::PrefixTree") | 
| 1375 |  |  |  |  |  |  | unless ($tree->isa("Bio::GeneDesign::PrefixTree")); | 
| 1376 |  |  |  |  |  |  |  | 
| 1377 | 2 |  |  |  |  | 13 | my $str = $self->_stripdown($seq, q{}, 0); | 
| 1378 |  |  |  |  |  |  |  | 
| 1379 | 2 |  |  |  |  | 16 | my @hits = $tree->find_prefixes($str); | 
| 1380 |  |  |  |  |  |  |  | 
| 1381 | 2 |  |  |  |  | 19 | return \@hits; | 
| 1382 |  |  |  |  |  |  | } | 
| 1383 |  |  |  |  |  |  |  | 
| 1384 |  |  |  |  |  |  | =head2 pattern_aligner | 
| 1385 |  |  |  |  |  |  |  | 
| 1386 |  |  |  |  |  |  | =cut | 
| 1387 |  |  |  |  |  |  |  | 
| 1388 |  |  |  |  |  |  | sub pattern_aligner | 
| 1389 |  |  |  |  |  |  | { | 
| 1390 | 3 |  |  | 3 | 1 | 5578 | my ($self, @args) = @_; | 
| 1391 |  |  |  |  |  |  |  | 
| 1392 | 3 |  |  |  |  | 21 | my ($seq, $pattern, $peptide, $re) | 
| 1393 |  |  |  |  |  |  | = $self->_rearrange([qw(sequence pattern peptide offset)], @args); | 
| 1394 |  |  |  |  |  |  |  | 
| 1395 | 3 | 50 |  |  |  | 100 | $self->throw("no codon table has been defined") | 
| 1396 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1397 |  |  |  |  |  |  |  | 
| 1398 | 3 | 50 |  |  |  | 9 | $self->throw("no nucleotide sequence provided") | 
| 1399 |  |  |  |  |  |  | unless $seq; | 
| 1400 |  |  |  |  |  |  |  | 
| 1401 | 3 |  | 100 |  |  | 12 | $re = $re || 0; | 
| 1402 |  |  |  |  |  |  |  | 
| 1403 | 3 |  |  |  |  | 12 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 1404 |  |  |  |  |  |  |  | 
| 1405 | 3 |  | 33 |  |  | 20 | $peptide = $peptide || $self->translate(-sequence => $str); | 
| 1406 |  |  |  |  |  |  |  | 
| 1407 | 3 |  |  |  |  | 16 | my ($newpattern, $offset) = _pattern_aligner($str, | 
| 1408 |  |  |  |  |  |  | $pattern, | 
| 1409 |  |  |  |  |  |  | $peptide, | 
| 1410 |  |  |  |  |  |  | $self->{codontable}, | 
| 1411 |  |  |  |  |  |  | $self->{amb_trans_memo} | 
| 1412 |  |  |  |  |  |  | ); | 
| 1413 | 3 | 100 |  |  |  | 19 | return $re  ? ($newpattern, $offset)  : $newpattern; | 
| 1414 |  |  |  |  |  |  | } | 
| 1415 |  |  |  |  |  |  |  | 
| 1416 |  |  |  |  |  |  | =head2 pattern_adder | 
| 1417 |  |  |  |  |  |  |  | 
| 1418 |  |  |  |  |  |  | =cut | 
| 1419 |  |  |  |  |  |  |  | 
| 1420 |  |  |  |  |  |  | sub pattern_adder | 
| 1421 |  |  |  |  |  |  | { | 
| 1422 | 1 |  |  | 1 | 1 | 1917 | my ($self, @args) = @_; | 
| 1423 |  |  |  |  |  |  |  | 
| 1424 | 1 |  |  |  |  | 8 | my ($seq, $pattern) = $self->_rearrange([qw(sequence pattern)], @args); | 
| 1425 |  |  |  |  |  |  |  | 
| 1426 | 1 | 50 |  |  |  | 32 | $self->throw("no codon table has been defined") | 
| 1427 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1428 |  |  |  |  |  |  |  | 
| 1429 | 1 | 50 |  |  |  | 6 | $self->throw("no nucleotide sequence provided") | 
| 1430 |  |  |  |  |  |  | unless $seq; | 
| 1431 |  |  |  |  |  |  |  | 
| 1432 | 1 |  |  |  |  | 5 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 1433 | 1 |  |  |  |  | 4 | my $pat = $self->_stripdown($pattern, q{}, 1); | 
| 1434 |  |  |  |  |  |  |  | 
| 1435 | 1 |  |  |  |  | 9 | my $newsequence = _pattern_adder($str, | 
| 1436 |  |  |  |  |  |  | $pat, | 
| 1437 |  |  |  |  |  |  | $self->{codontable}, | 
| 1438 |  |  |  |  |  |  | $self->{reversecodontable}, | 
| 1439 |  |  |  |  |  |  | $self->{amb_trans_memo} | 
| 1440 |  |  |  |  |  |  | ); | 
| 1441 | 1 |  |  |  |  | 6 | return $newsequence; | 
| 1442 |  |  |  |  |  |  | } | 
| 1443 |  |  |  |  |  |  |  | 
| 1444 |  |  |  |  |  |  | =head2 codon_change_type | 
| 1445 |  |  |  |  |  |  |  | 
| 1446 |  |  |  |  |  |  | =cut | 
| 1447 |  |  |  |  |  |  |  | 
| 1448 |  |  |  |  |  |  | sub codon_change_type | 
| 1449 |  |  |  |  |  |  | { | 
| 1450 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1451 |  |  |  |  |  |  |  | 
| 1452 | 0 |  |  |  |  | 0 | my ($codold, $codnew) = $self->_rearrange([qw(from to)], @args); | 
| 1453 |  |  |  |  |  |  |  | 
| 1454 | 0 | 0 |  |  |  | 0 | $self->throw("no codon table has been defined") | 
| 1455 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1456 |  |  |  |  |  |  |  | 
| 1457 | 0 | 0 |  |  |  | 0 | $self->throw("no from sequence provided") | 
| 1458 |  |  |  |  |  |  | unless $codold; | 
| 1459 |  |  |  |  |  |  |  | 
| 1460 | 0 | 0 |  |  |  | 0 | $self->throw("no to sequence provided") | 
| 1461 |  |  |  |  |  |  | unless $codnew; | 
| 1462 |  |  |  |  |  |  |  | 
| 1463 | 0 |  |  |  |  | 0 | my $changetype = _codon_change_type($codold, $codnew, $self->{codontable}); | 
| 1464 | 0 |  |  |  |  | 0 | return $changetype; | 
| 1465 |  |  |  |  |  |  | } | 
| 1466 |  |  |  |  |  |  |  | 
| 1467 |  |  |  |  |  |  | =head2 translate | 
| 1468 |  |  |  |  |  |  |  | 
| 1469 |  |  |  |  |  |  | =cut | 
| 1470 |  |  |  |  |  |  |  | 
| 1471 |  |  |  |  |  |  | sub translate | 
| 1472 |  |  |  |  |  |  | { | 
| 1473 | 9 |  |  | 9 | 1 | 5374 | my ($self, @args) = @_; | 
| 1474 |  |  |  |  |  |  |  | 
| 1475 | 9 |  |  |  |  | 53 | my ($seq, $frame) = $self->_rearrange([qw(sequence frame)], @args); | 
| 1476 |  |  |  |  |  |  |  | 
| 1477 | 9 | 50 |  |  |  | 242 | $self->throw("no codon table has been defined") | 
| 1478 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1479 |  |  |  |  |  |  |  | 
| 1480 | 9 | 50 |  |  |  | 24 | $self->throw("no nucleotide sequence provided") | 
| 1481 |  |  |  |  |  |  | unless $seq; | 
| 1482 |  |  |  |  |  |  |  | 
| 1483 | 9 |  |  |  |  | 31 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 1484 |  |  |  |  |  |  |  | 
| 1485 | 9 | 100 |  |  |  | 23 | $frame = $frame ? $frame  : 1; | 
| 1486 |  |  |  |  |  |  |  | 
| 1487 | 9 | 50 | 33 |  |  | 52 | $self->throw("frame must be -3, -2, -1, 1, 2, or 3") | 
| 1488 |  |  |  |  |  |  | if (abs($frame) > 4 || abs($frame) < 0); | 
| 1489 |  |  |  |  |  |  |  | 
| 1490 | 9 |  |  |  |  | 43 | my $peptide = _translate($str, $frame, $self->{codontable}); | 
| 1491 | 9 | 100 |  |  |  | 28 | if (ref $seq) | 
| 1492 |  |  |  |  |  |  | { | 
| 1493 | 6 |  |  |  |  | 30 | my $newobj = $seq->clone(); | 
| 1494 | 6 | 50 |  |  |  | 308 | my $desc = $newobj->desc ?  $newobj->desc . q{ } : q{}; | 
| 1495 | 6 |  |  |  |  | 86 | $desc = "translated with " . $self->{organism} . " codon values"; | 
| 1496 | 6 |  |  |  |  | 18 | $newobj->seq($peptide); | 
| 1497 | 6 |  |  |  |  | 428 | $newobj->alphabet("protein"); | 
| 1498 | 6 |  |  |  |  | 129 | $newobj->desc($desc); | 
| 1499 | 6 |  |  |  |  | 78 | return $newobj; | 
| 1500 |  |  |  |  |  |  | } | 
| 1501 |  |  |  |  |  |  | else | 
| 1502 |  |  |  |  |  |  | { | 
| 1503 | 3 |  |  |  |  | 43 | return $peptide; | 
| 1504 |  |  |  |  |  |  | } | 
| 1505 |  |  |  |  |  |  | } | 
| 1506 |  |  |  |  |  |  |  | 
| 1507 |  |  |  |  |  |  | =head2 reverse_translate | 
| 1508 |  |  |  |  |  |  |  | 
| 1509 |  |  |  |  |  |  | =cut | 
| 1510 |  |  |  |  |  |  |  | 
| 1511 |  |  |  |  |  |  | sub reverse_translate | 
| 1512 |  |  |  |  |  |  | { | 
| 1513 | 20001 |  |  | 20001 | 1 | 1778649 | my ($self, @args) = @_; | 
| 1514 |  |  |  |  |  |  |  | 
| 1515 | 20001 |  |  |  |  | 85178 | my ($pep, $algorithm) | 
| 1516 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 1517 |  |  |  |  |  |  | peptide | 
| 1518 |  |  |  |  |  |  | algorithm)], @args); | 
| 1519 |  |  |  |  |  |  |  | 
| 1520 | 20001 | 50 |  |  |  | 525176 | $self->throw("no codon table has been defined") | 
| 1521 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1522 |  |  |  |  |  |  |  | 
| 1523 | 20001 | 50 |  |  |  | 49745 | $self->throw("no RSCU table has been defined") | 
| 1524 |  |  |  |  |  |  | unless $self->{rscutable}; | 
| 1525 |  |  |  |  |  |  |  | 
| 1526 | 20001 | 50 |  |  |  | 42707 | $self->throw("no peptide sequence provided") | 
| 1527 |  |  |  |  |  |  | unless $pep; | 
| 1528 |  |  |  |  |  |  |  | 
| 1529 | 20001 |  |  |  |  | 47898 | my $str = $self->_stripdown($pep, q{}, 1); | 
| 1530 |  |  |  |  |  |  |  | 
| 1531 | 20001 |  |  |  |  | 61284 | my ($newstr, @baddies) = _sanitize($str, 'pep'); | 
| 1532 | 20001 |  |  |  |  | 33322 | $str = $newstr; | 
| 1533 | 20001 | 50 |  |  |  | 46631 | if (scalar @baddies) | 
| 1534 |  |  |  |  |  |  | { | 
| 1535 | 0 |  |  |  |  | 0 | print "\nGD_WARNING: removed bad characters (", join q{, }, @baddies; | 
| 1536 | 0 |  |  |  |  | 0 | print ") from input sequence\n"; | 
| 1537 |  |  |  |  |  |  | } | 
| 1538 | 20001 |  | 50 |  |  | 43918 | $algorithm = $algorithm || "balanced"; | 
| 1539 | 20001 |  |  |  |  | 29745 | $algorithm = lc $algorithm; | 
| 1540 | 20001 |  |  |  |  | 31328 | $algorithm =~ s{\;}{}xg; | 
| 1541 | 20001 |  |  |  |  | 35669 | my $name = "_reversetranslate" . "_" . $algorithm; | 
| 1542 | 20001 |  |  |  |  | 46919 | my $subref = \&$name; | 
| 1543 | 20001 |  | 33 |  |  | 78231 | my $seq = &$subref($self->{reversecodontable}, $self->{rscutable}, $str) | 
| 1544 |  |  |  |  |  |  | || $self->throw("Can't reverse translate with $algorithm? $!"); | 
| 1545 |  |  |  |  |  |  |  | 
| 1546 | 20001 | 50 |  |  |  | 51611 | if (ref $pep) | 
| 1547 |  |  |  |  |  |  | { | 
| 1548 | 20001 |  |  |  |  | 68642 | my $newobj = $pep->clone(); | 
| 1549 | 20001 | 50 |  |  |  | 708335 | my $desc = $newobj->desc ?  $newobj->desc . q{ } : q{}; | 
| 1550 | 20001 |  |  |  |  | 289412 | $desc .= "$algorithm reverse translated with " . $self->{organism}; | 
| 1551 | 20001 |  |  |  |  | 29493 | $desc .= " RSCU values"; | 
| 1552 | 20001 |  |  |  |  | 58405 | $newobj->seq($seq); | 
| 1553 | 20001 |  |  |  |  | 1319029 | $newobj->desc($desc); | 
| 1554 | 20001 |  |  |  |  | 266311 | return $newobj; | 
| 1555 |  |  |  |  |  |  | } | 
| 1556 |  |  |  |  |  |  | else | 
| 1557 |  |  |  |  |  |  | { | 
| 1558 | 0 |  |  |  |  | 0 | return $seq; | 
| 1559 |  |  |  |  |  |  | } | 
| 1560 |  |  |  |  |  |  | } | 
| 1561 |  |  |  |  |  |  |  | 
| 1562 |  |  |  |  |  |  | =head2 codon_juggle | 
| 1563 |  |  |  |  |  |  |  | 
| 1564 |  |  |  |  |  |  | =cut | 
| 1565 |  |  |  |  |  |  |  | 
| 1566 |  |  |  |  |  |  | sub codon_juggle | 
| 1567 |  |  |  |  |  |  | { | 
| 1568 | 20003 |  |  | 20003 | 1 | 932845 | my ($self, @args) = @_; | 
| 1569 |  |  |  |  |  |  |  | 
| 1570 | 20003 |  |  |  |  | 85621 | my ($seq, $algorithm) | 
| 1571 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 1572 |  |  |  |  |  |  | sequence | 
| 1573 |  |  |  |  |  |  | algorithm)], @args); | 
| 1574 |  |  |  |  |  |  |  | 
| 1575 | 20003 | 50 |  |  |  | 505492 | $self->throw("no codon table has been defined") | 
| 1576 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1577 |  |  |  |  |  |  |  | 
| 1578 | 20003 | 50 |  |  |  | 44796 | $self->throw("no RSCU table has been defined") | 
| 1579 |  |  |  |  |  |  | unless $self->{rscutable}; | 
| 1580 |  |  |  |  |  |  |  | 
| 1581 | 20003 | 50 |  |  |  | 36430 | $self->throw("no nucleotide sequence provided") | 
| 1582 |  |  |  |  |  |  | unless $seq; | 
| 1583 |  |  |  |  |  |  |  | 
| 1584 |  |  |  |  |  |  |  | 
| 1585 | 20003 | 50 |  |  |  | 33675 | $self->throw("no algorithm provided") | 
| 1586 |  |  |  |  |  |  | unless $algorithm; | 
| 1587 |  |  |  |  |  |  |  | 
| 1588 | 20003 |  |  |  |  | 43961 | my $str = $self->_stripdown($seq, q{}, 0); | 
| 1589 |  |  |  |  |  |  |  | 
| 1590 |  |  |  |  |  |  | ##REPLACE THIS WITH CODE THAT GRACEFULLY JUGGLES JUST CDSES OR GENES | 
| 1591 | 20003 | 50 |  |  |  | 51569 | $self->throw("sequence does not appear to be the right length to be a gene") | 
| 1592 |  |  |  |  |  |  | unless length($str) % 3 == 0; | 
| 1593 |  |  |  |  |  |  |  | 
| 1594 | 20003 |  |  |  |  | 29712 | $algorithm = lc $algorithm; | 
| 1595 | 20003 |  |  |  |  | 34055 | $algorithm =~ s/\W//xg; | 
| 1596 | 20003 |  |  |  |  | 31153 | my $name = "_codonJuggle_" . $algorithm; | 
| 1597 | 20003 |  |  |  |  | 43082 | my $subref = \&$name; | 
| 1598 | 20003 |  | 33 |  |  | 77284 | my $newseq = &$subref($self->{codontable}, | 
| 1599 |  |  |  |  |  |  | $self->{reversecodontable}, | 
| 1600 |  |  |  |  |  |  | $self->{rscutable}, | 
| 1601 |  |  |  |  |  |  | $str | 
| 1602 |  |  |  |  |  |  | ) || $self->throw("Can't run $algorithm? $!"); | 
| 1603 | 20003 | 50 |  |  |  | 49303 | if (ref $seq) | 
| 1604 |  |  |  |  |  |  | { | 
| 1605 | 20003 |  |  |  |  | 74644 | my $newobj = $seq->clone(); | 
| 1606 | 20003 | 50 |  |  |  | 698613 | my $desc = $newobj->desc ?  $newobj->desc . q{ } : q{}; | 
| 1607 | 20003 |  |  |  |  | 283854 | $desc .= "$algorithm codon juggled with " . $self->{organism}; | 
| 1608 | 20003 |  |  |  |  | 30913 | $desc .= " RSCU values"; | 
| 1609 | 20003 |  |  |  |  | 56485 | $newobj->seq($newseq); | 
| 1610 | 20003 |  |  |  |  | 1369061 | $newobj->desc($desc); | 
| 1611 | 20003 |  |  |  |  | 262637 | return $newobj; | 
| 1612 |  |  |  |  |  |  | } | 
| 1613 |  |  |  |  |  |  | else | 
| 1614 |  |  |  |  |  |  | { | 
| 1615 | 0 |  |  |  |  | 0 | return $newseq; | 
| 1616 |  |  |  |  |  |  | } | 
| 1617 |  |  |  |  |  |  | } | 
| 1618 |  |  |  |  |  |  |  | 
| 1619 |  |  |  |  |  |  | =head2 subtract_sequence | 
| 1620 |  |  |  |  |  |  |  | 
| 1621 |  |  |  |  |  |  | =cut | 
| 1622 |  |  |  |  |  |  |  | 
| 1623 |  |  |  |  |  |  | sub subtract_sequence | 
| 1624 |  |  |  |  |  |  | { | 
| 1625 | 3 |  |  | 3 | 1 | 2660 | my ($self, @args) = @_; | 
| 1626 |  |  |  |  |  |  |  | 
| 1627 | 3 |  |  |  |  | 19 | my ($seq, $rem) = $self->_rearrange([qw(sequence remove)], @args); | 
| 1628 |  |  |  |  |  |  |  | 
| 1629 | 3 | 50 |  |  |  | 80 | $self->throw("no codon table has been defined") | 
| 1630 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1631 |  |  |  |  |  |  |  | 
| 1632 | 3 | 50 |  |  |  | 13 | $self->throw("no RSCU table has been defined") | 
| 1633 |  |  |  |  |  |  | unless $self->{rscutable}; | 
| 1634 |  |  |  |  |  |  |  | 
| 1635 | 3 | 50 |  |  |  | 10 | $self->throw("no nucleotide sequence provided") | 
| 1636 |  |  |  |  |  |  | unless $seq; | 
| 1637 |  |  |  |  |  |  |  | 
| 1638 | 3 | 50 |  |  |  | 12 | $self->throw("no sequence to be removed was defined") | 
| 1639 |  |  |  |  |  |  | unless ($rem); | 
| 1640 |  |  |  |  |  |  |  | 
| 1641 | 3 |  |  |  |  | 14 | my $str = $self->_stripdown($seq, q{}, 0); | 
| 1642 |  |  |  |  |  |  |  | 
| 1643 | 3 |  |  |  |  | 6 | my $regarr; | 
| 1644 | 3 |  |  |  |  | 6 | my $less = $rem; | 
| 1645 | 3 | 50 |  |  |  | 11 | if (ref($rem)) | 
| 1646 |  |  |  |  |  |  | { | 
| 1647 | 3 | 100 | 66 |  |  | 37 | if ($rem->isa("Bio::Seq") || $rem->isa("Bio::SeqFeatureI")) | 
|  |  | 50 |  |  |  |  |  | 
| 1648 |  |  |  |  |  |  | { | 
| 1649 | 2 | 50 |  |  |  | 7 | $less = ref($rem->seq) ? $rem->seq->seq  : $rem->seq; | 
| 1650 | 2 |  |  |  |  | 95 | $regarr = _regarr($less, 1); | 
| 1651 |  |  |  |  |  |  | } | 
| 1652 |  |  |  |  |  |  | elsif ($rem->isa("Bio::GeneDesign::RestrictionEnzyme")) | 
| 1653 |  |  |  |  |  |  | { | 
| 1654 | 1 |  |  |  |  | 11 | $less = $rem->seq; | 
| 1655 | 1 |  |  |  |  | 8 | $regarr = $rem->regex; | 
| 1656 |  |  |  |  |  |  | } | 
| 1657 |  |  |  |  |  |  | else | 
| 1658 |  |  |  |  |  |  | { | 
| 1659 | 0 |  |  |  |  | 0 | $self->throw("removal argument is not a Bio::Seq, Bio::SeqFeature, or " | 
| 1660 |  |  |  |  |  |  | . "Bio::GeneDesign::RestrictionEnzyme"); | 
| 1661 |  |  |  |  |  |  | } | 
| 1662 |  |  |  |  |  |  | } | 
| 1663 |  |  |  |  |  |  | else | 
| 1664 |  |  |  |  |  |  | { | 
| 1665 | 0 |  |  |  |  | 0 | $regarr = _regarr($less, 1); | 
| 1666 |  |  |  |  |  |  | } | 
| 1667 |  |  |  |  |  |  |  | 
| 1668 | 3 |  |  |  |  | 30 | my $newseq = _subtract( uc $str, | 
| 1669 |  |  |  |  |  |  | uc $less, | 
| 1670 |  |  |  |  |  |  | $regarr, | 
| 1671 |  |  |  |  |  |  | $self->{codontable}, | 
| 1672 |  |  |  |  |  |  | $self->{rscutable}, | 
| 1673 |  |  |  |  |  |  | $self->{reversecodontable} | 
| 1674 |  |  |  |  |  |  | ); | 
| 1675 |  |  |  |  |  |  |  | 
| 1676 | 3 | 50 |  |  |  | 15 | if (ref $seq) | 
| 1677 |  |  |  |  |  |  | { | 
| 1678 | 3 |  |  |  |  | 29 | my $newobj = $seq->clone(); | 
| 1679 | 3 | 50 |  |  |  | 171 | my $desc = $newobj->desc ?  $newobj->desc . q{ } : q{}; | 
| 1680 | 3 |  |  |  |  | 70 | $desc .= $rem->id . " subtracted with " . $self->{organism}; | 
| 1681 | 3 |  |  |  |  | 43 | $desc .= " RSCU values"; | 
| 1682 | 3 |  |  |  |  | 14 | $newobj->seq($newseq); | 
| 1683 | 3 |  |  |  |  | 278 | $newobj->desc($desc); | 
| 1684 | 3 |  |  |  |  | 57 | return $newobj; | 
| 1685 |  |  |  |  |  |  | } | 
| 1686 |  |  |  |  |  |  | else | 
| 1687 |  |  |  |  |  |  | { | 
| 1688 | 0 |  |  |  |  | 0 | return $newseq; | 
| 1689 |  |  |  |  |  |  | } | 
| 1690 |  |  |  |  |  |  | } | 
| 1691 |  |  |  |  |  |  |  | 
| 1692 |  |  |  |  |  |  | =head2 repeat_smash | 
| 1693 |  |  |  |  |  |  |  | 
| 1694 |  |  |  |  |  |  | =cut | 
| 1695 |  |  |  |  |  |  |  | 
| 1696 |  |  |  |  |  |  | sub repeat_smash | 
| 1697 |  |  |  |  |  |  | { | 
| 1698 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1699 |  |  |  |  |  |  |  | 
| 1700 | 0 |  |  |  |  | 0 | my ($seq) = $self->_rearrange([qw(sequence)], @args); | 
| 1701 |  |  |  |  |  |  |  | 
| 1702 | 0 | 0 |  |  |  | 0 | $self->throw("no codon table has been defined") | 
| 1703 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 1704 |  |  |  |  |  |  |  | 
| 1705 | 0 | 0 |  |  |  | 0 | $self->throw("no RSCU table has been defined") | 
| 1706 |  |  |  |  |  |  | unless $self->{rscutable}; | 
| 1707 |  |  |  |  |  |  |  | 
| 1708 | 0 | 0 |  |  |  | 0 | $self->throw("no nucleotide sequence provided") | 
| 1709 |  |  |  |  |  |  | unless $seq; | 
| 1710 |  |  |  |  |  |  |  | 
| 1711 | 0 |  |  |  |  | 0 | my $str = $self->_stripdown($seq, q{}, 0); | 
| 1712 |  |  |  |  |  |  |  | 
| 1713 | 0 |  |  |  |  | 0 | return _minimize_local_alignment_dp( | 
| 1714 |  |  |  |  |  |  | $str, | 
| 1715 |  |  |  |  |  |  | $self->{codontable}, | 
| 1716 |  |  |  |  |  |  | $self->{reversecodontable}, | 
| 1717 |  |  |  |  |  |  | $self->{rscutable} | 
| 1718 |  |  |  |  |  |  | ); | 
| 1719 |  |  |  |  |  |  | } | 
| 1720 |  |  |  |  |  |  |  | 
| 1721 |  |  |  |  |  |  | =head2 make_amplification_primers | 
| 1722 |  |  |  |  |  |  |  | 
| 1723 |  |  |  |  |  |  | NO TEST | 
| 1724 |  |  |  |  |  |  |  | 
| 1725 |  |  |  |  |  |  | =cut | 
| 1726 |  |  |  |  |  |  |  | 
| 1727 |  |  |  |  |  |  | sub make_amplification_primers | 
| 1728 |  |  |  |  |  |  | { | 
| 1729 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1730 |  |  |  |  |  |  |  | 
| 1731 | 0 |  |  |  |  | 0 | my ($seq, $temp) = $self->_rearrange([qw(sequence temperature)], @args); | 
| 1732 |  |  |  |  |  |  |  | 
| 1733 | 0 | 0 |  |  |  | 0 | $self->throw("no sequence provided") unless ($seq); | 
| 1734 | 0 |  | 0 |  |  | 0 | $temp = $temp || 60; | 
| 1735 |  |  |  |  |  |  |  | 
| 1736 | 0 |  |  |  |  | 0 | my $str = $self->_stripdown($seq, q{}, 0); | 
| 1737 |  |  |  |  |  |  |  | 
| 1738 | 0 |  |  |  |  | 0 | return _make_amplification_primers($str, $temp); | 
| 1739 |  |  |  |  |  |  | } | 
| 1740 |  |  |  |  |  |  |  | 
| 1741 |  |  |  |  |  |  | =head2 contains_homopolymer | 
| 1742 |  |  |  |  |  |  |  | 
| 1743 |  |  |  |  |  |  | Returns 1 if the sequence contains a homopolymer of the provided length (default | 
| 1744 |  |  |  |  |  |  | is 5bp) and 0 else | 
| 1745 |  |  |  |  |  |  |  | 
| 1746 |  |  |  |  |  |  | =cut | 
| 1747 |  |  |  |  |  |  |  | 
| 1748 |  |  |  |  |  |  | sub contains_homopolymer | 
| 1749 |  |  |  |  |  |  | { | 
| 1750 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1751 |  |  |  |  |  |  |  | 
| 1752 | 0 |  |  |  |  | 0 | my ($seq, $length) = $self->_rearrange([qw(sequence length)], @args); | 
| 1753 |  |  |  |  |  |  |  | 
| 1754 | 0 | 0 |  |  |  | 0 | $self->throw("no sequence provided") unless ($seq); | 
| 1755 | 0 |  | 0 |  |  | 0 | $length = $length || 5; | 
| 1756 |  |  |  |  |  |  |  | 
| 1757 | 0 |  |  |  |  | 0 | my $str = $self->_stripdown($seq, q{}, 0); | 
| 1758 |  |  |  |  |  |  |  | 
| 1759 | 0 |  |  |  |  | 0 | return _check_for_homopolymer($str, $length); | 
| 1760 |  |  |  |  |  |  | } | 
| 1761 |  |  |  |  |  |  |  | 
| 1762 |  |  |  |  |  |  | =head2 filter_homopolymers | 
| 1763 |  |  |  |  |  |  |  | 
| 1764 |  |  |  |  |  |  | =cut | 
| 1765 |  |  |  |  |  |  |  | 
| 1766 |  |  |  |  |  |  | sub filter_homopolymers | 
| 1767 |  |  |  |  |  |  | { | 
| 1768 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1769 |  |  |  |  |  |  |  | 
| 1770 | 0 |  |  |  |  | 0 | my ($seqs, $length) = $self->_rearrange([qw(sequences length)], @args); | 
| 1771 |  |  |  |  |  |  |  | 
| 1772 | 0 | 0 |  |  |  | 0 | $self->throw("no argument provided to filter_homopolymers") | 
| 1773 |  |  |  |  |  |  | unless $seqs; | 
| 1774 | 0 |  | 0 |  |  | 0 | $length = $length || 5; | 
| 1775 |  |  |  |  |  |  |  | 
| 1776 | 0 |  |  |  |  | 0 | my $arrref = $self->_stripdown($seqs, 'ARRAY', 1); | 
| 1777 |  |  |  |  |  |  |  | 
| 1778 | 0 |  |  |  |  | 0 | my $seqarr = _filter_homopolymer( $arrref, $length); | 
| 1779 | 0 |  |  |  |  | 0 | return $seqarr; | 
| 1780 |  |  |  |  |  |  | } | 
| 1781 |  |  |  |  |  |  |  | 
| 1782 |  |  |  |  |  |  | =head2 search_vmatch | 
| 1783 |  |  |  |  |  |  |  | 
| 1784 |  |  |  |  |  |  | =cut | 
| 1785 |  |  |  |  |  |  |  | 
| 1786 |  |  |  |  |  |  | sub search_vmatch | 
| 1787 |  |  |  |  |  |  | { | 
| 1788 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1789 |  |  |  |  |  |  |  | 
| 1790 | 0 | 0 |  |  |  | 0 | $self->throw("vmatch searching is not available") | 
| 1791 |  |  |  |  |  |  | unless $self->vmatch; | 
| 1792 |  |  |  |  |  |  |  | 
| 1793 | 0 |  |  |  |  | 0 | my ($parent, $seqobj, $mismatches, $revcom) | 
| 1794 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 1795 |  |  |  |  |  |  | parent | 
| 1796 |  |  |  |  |  |  | sequence | 
| 1797 |  |  |  |  |  |  | mismatches | 
| 1798 |  |  |  |  |  |  | revcom)], @args); | 
| 1799 |  |  |  |  |  |  |  | 
| 1800 | 0 | 0 |  |  |  | 0 | $self->throw("no nucleotide sequence provided") | 
| 1801 |  |  |  |  |  |  | unless $seqobj; | 
| 1802 |  |  |  |  |  |  |  | 
| 1803 | 0 | 0 |  |  |  | 0 | $self->throw("sequence argument is not a Bio::Seq object") | 
| 1804 |  |  |  |  |  |  | unless $seqobj->isa("Bio::Seq"); | 
| 1805 |  |  |  |  |  |  |  | 
| 1806 | 0 | 0 |  |  |  | 0 | $self->throw("$seqobj is not a nucleotide sequence") | 
| 1807 |  |  |  |  |  |  | unless $seqobj->alphabet eq "dna"; | 
| 1808 |  |  |  |  |  |  |  | 
| 1809 | 0 | 0 |  |  |  | 0 | $self->throw("no parent sequence provided") | 
| 1810 |  |  |  |  |  |  | unless $parent; | 
| 1811 |  |  |  |  |  |  |  | 
| 1812 | 0 | 0 |  |  |  | 0 | $self->throw("parent argument is not a Bio::Seq object") | 
| 1813 |  |  |  |  |  |  | unless $parent->isa("Bio::Seq"); | 
| 1814 |  |  |  |  |  |  |  | 
| 1815 | 0 | 0 |  |  |  | 0 | $self->throw("$parent is not a nucleotide sequence") | 
| 1816 |  |  |  |  |  |  | unless $parent->alphabet eq "dna"; | 
| 1817 |  |  |  |  |  |  |  | 
| 1818 | 0 |  | 0 |  |  | 0 | my $writedir = $self->{tmp_path} || "./"; | 
| 1819 | 0 |  | 0 |  |  | 0 | $mismatches = $mismatches || 1; | 
| 1820 | 0 |  | 0 |  |  | 0 | $revcom = $revcom || 1; | 
| 1821 |  |  |  |  |  |  |  | 
| 1822 | 0 |  |  |  |  | 0 | my $hits = _search_vmatch(  $parent, | 
| 1823 |  |  |  |  |  |  | $seqobj, | 
| 1824 |  |  |  |  |  |  | $mismatches, | 
| 1825 |  |  |  |  |  |  | $revcom, | 
| 1826 |  |  |  |  |  |  | $writedir); | 
| 1827 | 0 |  |  |  |  | 0 | return $hits; | 
| 1828 |  |  |  |  |  |  | } | 
| 1829 |  |  |  |  |  |  |  | 
| 1830 |  |  |  |  |  |  | =head2 filter_blast | 
| 1831 |  |  |  |  |  |  |  | 
| 1832 |  |  |  |  |  |  | =cut | 
| 1833 |  |  |  |  |  |  |  | 
| 1834 |  |  |  |  |  |  | sub filter_blast | 
| 1835 |  |  |  |  |  |  | { | 
| 1836 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1837 |  |  |  |  |  |  |  | 
| 1838 | 0 | 0 |  |  |  | 0 | $self->throw("BLAST+ filtering is not available") | 
| 1839 |  |  |  |  |  |  | unless $self->BLAST; | 
| 1840 |  |  |  |  |  |  |  | 
| 1841 | 0 |  |  |  |  | 0 | my ($parent, $seqobjs, $percent, $revcom) | 
| 1842 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 1843 |  |  |  |  |  |  | parent | 
| 1844 |  |  |  |  |  |  | sequences | 
| 1845 |  |  |  |  |  |  | percent_identity | 
| 1846 |  |  |  |  |  |  | revcom)], @args); | 
| 1847 |  |  |  |  |  |  |  | 
| 1848 | 0 | 0 |  |  |  | 0 | $self->throw("no nucleotide sequences provided") | 
| 1849 |  |  |  |  |  |  | unless $seqobjs; | 
| 1850 |  |  |  |  |  |  |  | 
| 1851 | 0 | 0 |  |  |  | 0 | $self->throw("sequences argument is not an array reference") | 
| 1852 |  |  |  |  |  |  | unless ref($seqobjs) eq "ARRAY"; | 
| 1853 |  |  |  |  |  |  |  | 
| 1854 | 0 |  |  |  |  | 0 | foreach my $seqobj (@$seqobjs) | 
| 1855 |  |  |  |  |  |  | { | 
| 1856 | 0 | 0 |  |  |  | 0 | $self->throw(ref($seqobj) . " is not a Bio::Seq object") | 
| 1857 |  |  |  |  |  |  | unless $seqobj->isa("Bio::Seq"); | 
| 1858 |  |  |  |  |  |  |  | 
| 1859 | 0 | 0 |  |  |  | 0 | $self->throw("$seqobj is not a nucleotide sequence") | 
| 1860 |  |  |  |  |  |  | unless $seqobj->alphabet eq "dna"; | 
| 1861 |  |  |  |  |  |  | } | 
| 1862 |  |  |  |  |  |  |  | 
| 1863 | 0 | 0 |  |  |  | 0 | $self->throw("no parent sequence provided") | 
| 1864 |  |  |  |  |  |  | unless $parent; | 
| 1865 |  |  |  |  |  |  |  | 
| 1866 | 0 | 0 |  |  |  | 0 | $self->throw(ref($parent) . " is not a Bio::Seq object") | 
| 1867 |  |  |  |  |  |  | unless $parent->isa("Bio::Seq"); | 
| 1868 |  |  |  |  |  |  |  | 
| 1869 | 0 | 0 |  |  |  | 0 | $self->throw("$parent is not a nucleotide sequence") | 
| 1870 |  |  |  |  |  |  | unless $parent->alphabet eq "dna"; | 
| 1871 |  |  |  |  |  |  |  | 
| 1872 | 0 |  | 0 |  |  | 0 | my $writedir = $self->{tmp_path} || "./"; | 
| 1873 | 0 |  | 0 |  |  | 0 | $percent = $percent || 75; | 
| 1874 | 0 |  | 0 |  |  | 0 | $revcom = $revcom || 1; | 
| 1875 |  |  |  |  |  |  |  | 
| 1876 | 0 |  |  |  |  | 0 | my $seqarr = _filter_blast( $parent, | 
| 1877 |  |  |  |  |  |  | $seqobjs, | 
| 1878 |  |  |  |  |  |  | $percent, | 
| 1879 |  |  |  |  |  |  | $writedir); | 
| 1880 | 0 |  |  |  |  | 0 | return $seqarr; | 
| 1881 |  |  |  |  |  |  | } | 
| 1882 |  |  |  |  |  |  |  | 
| 1883 |  |  |  |  |  |  | =head2 carve_building_blocks | 
| 1884 |  |  |  |  |  |  |  | 
| 1885 |  |  |  |  |  |  | NO TEST | 
| 1886 |  |  |  |  |  |  |  | 
| 1887 |  |  |  |  |  |  | =cut | 
| 1888 |  |  |  |  |  |  |  | 
| 1889 |  |  |  |  |  |  | sub carve_building_blocks | 
| 1890 |  |  |  |  |  |  | { | 
| 1891 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1892 | 0 |  |  |  |  | 0 | my ($chobj, $algorithm, $tarbblen, $maxbblen, | 
| 1893 |  |  |  |  |  |  | $minbblen, $tarbblap, $stitch, $excludes, | 
| 1894 |  |  |  |  |  |  | $fpexcludes, $tpexcludes, $verbose) | 
| 1895 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 1896 |  |  |  |  |  |  | sequence | 
| 1897 |  |  |  |  |  |  | algorithm | 
| 1898 |  |  |  |  |  |  | target_length | 
| 1899 |  |  |  |  |  |  | max_length | 
| 1900 |  |  |  |  |  |  | min_length | 
| 1901 |  |  |  |  |  |  | overlap_length | 
| 1902 |  |  |  |  |  |  | stitch_temp | 
| 1903 |  |  |  |  |  |  | excludes | 
| 1904 |  |  |  |  |  |  | fpexcludes | 
| 1905 |  |  |  |  |  |  | tpexcludes | 
| 1906 |  |  |  |  |  |  | verbose)], @args); | 
| 1907 |  |  |  |  |  |  |  | 
| 1908 | 0 | 0 |  |  |  | 0 | $self->throw("no nucleotide sequence provided") | 
| 1909 |  |  |  |  |  |  | unless $chobj; | 
| 1910 |  |  |  |  |  |  |  | 
| 1911 | 0 | 0 |  |  |  | 0 | $self->throw("object " . ref($chobj) . " is not a Bio::Seq") | 
| 1912 |  |  |  |  |  |  | unless $chobj->isa("Bio::Seq"); | 
| 1913 |  |  |  |  |  |  |  | 
| 1914 | 0 |  | 0 |  |  | 0 | $tarbblen = $tarbblen || 1000; | 
| 1915 | 0 |  | 0 |  |  | 0 | $maxbblen = $maxbblen || 1023; | 
| 1916 | 0 |  | 0 |  |  | 0 | $minbblen = $minbblen || 200; | 
| 1917 |  |  |  |  |  |  |  | 
| 1918 | 0 | 0 | 0 |  |  | 0 | $self->throw("Target building block size $tarbblen is outside of allowable " . | 
| 1919 |  |  |  |  |  |  | "range min $minbblen to max $maxbblen") | 
| 1920 |  |  |  |  |  |  | if ($tarbblen < $minbblen || $tarbblen > $maxbblen); | 
| 1921 |  |  |  |  |  |  |  | 
| 1922 | 0 | 0 | 0 |  |  | 0 | $self->throw("Minimum and maximum building block sizes don't make sense") | 
|  |  |  | 0 |  |  |  |  | 
| 1923 |  |  |  |  |  |  | if ($maxbblen < $minbblen || $maxbblen < 0 || $minbblen < 0 ); | 
| 1924 |  |  |  |  |  |  |  | 
| 1925 | 0 |  | 0 |  |  | 0 | $tarbblap = $tarbblap   ||  40; | 
| 1926 |  |  |  |  |  |  |  | 
| 1927 | 0 |  | 0 |  |  | 0 | $algorithm = $algorithm || "overlap"; | 
| 1928 | 0 |  |  |  |  | 0 | $algorithm =~ s/\;//xg; | 
| 1929 | 0 |  |  |  |  | 0 | my $module = "Bio::GeneDesign::BuildingBlocks::$algorithm"; | 
| 1930 | 0 |  |  |  |  | 0 | (my $require_name = $module . ".pm") =~ s{::}{/}xg; | 
| 1931 |  |  |  |  |  |  | my $req = eval | 
| 1932 | 0 |  |  |  |  | 0 | { | 
| 1933 | 0 |  |  |  |  | 0 | require $require_name; | 
| 1934 |  |  |  |  |  |  | }; | 
| 1935 | 0 | 0 |  |  |  | 0 | if (! $req) | 
| 1936 |  |  |  |  |  |  | { | 
| 1937 | 0 |  |  |  |  | 0 | $self->throw("BuildingBlocks module for $algorithm not found\n$@"); | 
| 1938 |  |  |  |  |  |  | } | 
| 1939 | 0 |  |  |  |  | 0 | my $name = $module . "::_carve_building_blocks"; | 
| 1940 | 0 |  |  |  |  | 0 | my $subref = \&$name; | 
| 1941 | 0 |  |  |  |  | 0 | my $BB_list = []; | 
| 1942 |  |  |  |  |  |  | my $run = eval | 
| 1943 | 0 |  |  |  |  | 0 | { | 
| 1944 | 0 |  |  |  |  | 0 | $BB_list = &$subref( $self, $chobj, $tarbblen, $maxbblen, | 
| 1945 |  |  |  |  |  |  | $minbblen, $tarbblap, $stitch, | 
| 1946 |  |  |  |  |  |  | $excludes, $fpexcludes, $tpexcludes, | 
| 1947 |  |  |  |  |  |  | $verbose | 
| 1948 |  |  |  |  |  |  | ); | 
| 1949 |  |  |  |  |  |  | }; | 
| 1950 | 0 |  |  |  |  | 0 | my $e; | 
| 1951 | 0 | 0 |  |  |  | 0 | if ($e = Bio::GeneDesign::Exception::UnBBable->caught()) | 
| 1952 |  |  |  |  |  |  | { | 
| 1953 | 0 |  |  |  |  | 0 | print $chobj->id . " cannot be carved into building blocks: " | 
| 1954 |  |  |  |  |  |  | . $e->error . "\n"; | 
| 1955 |  |  |  |  |  |  | } | 
| 1956 | 0 |  |  |  |  | 0 | return $BB_list; | 
| 1957 |  |  |  |  |  |  | } | 
| 1958 |  |  |  |  |  |  |  | 
| 1959 |  |  |  |  |  |  | =head2 chop_oligos | 
| 1960 |  |  |  |  |  |  |  | 
| 1961 |  |  |  |  |  |  | NO TEST | 
| 1962 |  |  |  |  |  |  |  | 
| 1963 |  |  |  |  |  |  | =cut | 
| 1964 |  |  |  |  |  |  |  | 
| 1965 |  |  |  |  |  |  | sub chop_oligos | 
| 1966 |  |  |  |  |  |  | { | 
| 1967 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 1968 |  |  |  |  |  |  |  | 
| 1969 | 0 |  |  |  |  | 0 | my ($bbobj, $algorithm, $olilenmin, $olilenmax, $olilen, $laptemp, $laplenmin, | 
| 1970 |  |  |  |  |  |  | $tmtol, $poolsize, $poolnummax, $verbose) | 
| 1971 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 1972 |  |  |  |  |  |  | building_block | 
| 1973 |  |  |  |  |  |  | algorithm | 
| 1974 |  |  |  |  |  |  | oligo_len_min | 
| 1975 |  |  |  |  |  |  | oligo_len_max | 
| 1976 |  |  |  |  |  |  | oligo_len | 
| 1977 |  |  |  |  |  |  | overlap_tm | 
| 1978 |  |  |  |  |  |  | overlap_len_min | 
| 1979 |  |  |  |  |  |  | tm_tolerance | 
| 1980 |  |  |  |  |  |  | pool_size | 
| 1981 |  |  |  |  |  |  | pool_num_max | 
| 1982 |  |  |  |  |  |  | verbose)], @args); | 
| 1983 |  |  |  |  |  |  |  | 
| 1984 | 0 | 0 |  |  |  | 0 | $self->throw("no building block provided") | 
| 1985 |  |  |  |  |  |  | unless $bbobj; | 
| 1986 |  |  |  |  |  |  |  | 
| 1987 | 0 | 0 |  |  |  | 0 | $self->throw("object " . ref($bbobj) . " is not a Bio::SeqFeatureI") | 
| 1988 |  |  |  |  |  |  | unless $bbobj->isa("Bio::SeqFeatureI"); | 
| 1989 |  |  |  |  |  |  |  | 
| 1990 | 0 |  | 0 |  |  | 0 | $olilen     = $olilen     || 180; | 
| 1991 | 0 |  | 0 |  |  | 0 | $olilenmin  = $olilenmin  || 45; | 
| 1992 | 0 |  | 0 |  |  | 0 | $olilenmax  = $olilenmax  || 200; | 
| 1993 | 0 |  | 0 |  |  | 0 | $laptemp    = $laptemp    || 70; | 
| 1994 | 0 |  | 0 |  |  | 0 | $tmtol      = $tmtol      || .5; | 
| 1995 | 0 |  | 0 |  |  | 0 | $verbose    = $verbose    || undef; | 
| 1996 |  |  |  |  |  |  |  | 
| 1997 | 0 | 0 | 0 |  |  | 0 | $self->throw("maximum oligo length is less than the target oligo length") | 
|  |  |  | 0 |  |  |  |  | 
| 1998 |  |  |  |  |  |  | if ($olilenmax && $olilen && $olilenmax < $olilen); | 
| 1999 |  |  |  |  |  |  |  | 
| 2000 | 0 |  | 0 |  |  | 0 | $algorithm = $algorithm || "JGI"; | 
| 2001 | 0 |  |  |  |  | 0 | $algorithm =~ s/\;//xg; | 
| 2002 | 0 |  |  |  |  | 0 | my $module = "Bio::GeneDesign::Oligos::$algorithm"; | 
| 2003 | 0 |  |  |  |  | 0 | (my $require_name = $module . ".pm") =~ s{::}{/}xg; | 
| 2004 |  |  |  |  |  |  | my $req = eval | 
| 2005 | 0 |  |  |  |  | 0 | { | 
| 2006 | 0 |  |  |  |  | 0 | require $require_name; | 
| 2007 |  |  |  |  |  |  | }; | 
| 2008 | 0 | 0 |  |  |  | 0 | if (! $req) | 
| 2009 |  |  |  |  |  |  | { | 
| 2010 | 0 |  |  |  |  | 0 | $self->throw("Oligos module for algorithm $algorithm not found:\n$@\n\n"); | 
| 2011 |  |  |  |  |  |  | } | 
| 2012 | 0 |  |  |  |  | 0 | my $name = $module . "::_chop_oligos"; | 
| 2013 | 0 |  |  |  |  | 0 | my $subref = \&$name; | 
| 2014 | 0 |  |  |  |  | 0 | my $OL_list = []; | 
| 2015 |  |  |  |  |  |  | my $run = eval | 
| 2016 | 0 |  |  |  |  | 0 | { | 
| 2017 | 0 |  |  |  |  | 0 | $OL_list = &$subref( $self, $bbobj, $olilenmax, $olilenmin, $olilen, | 
| 2018 |  |  |  |  |  |  | $laptemp, $laplenmin, $tmtol, $poolsize, | 
| 2019 |  |  |  |  |  |  | $poolnummax, $verbose | 
| 2020 |  |  |  |  |  |  | ); | 
| 2021 |  |  |  |  |  |  | }; | 
| 2022 | 0 |  |  |  |  | 0 | my $e; | 
| 2023 | 0 | 0 |  |  |  | 0 | if ($e = Bio::GeneDesign::Exception::UnOLable->caught()) | 
| 2024 |  |  |  |  |  |  | { | 
| 2025 | 0 |  |  |  |  | 0 | print "Cannot chop into oligos: " . $e->error . "\n"; | 
| 2026 |  |  |  |  |  |  | } | 
| 2027 | 0 |  |  |  |  | 0 | return $OL_list; | 
| 2028 |  |  |  |  |  |  | } | 
| 2029 |  |  |  |  |  |  |  | 
| 2030 |  |  |  |  |  |  | =head2 make_graph | 
| 2031 |  |  |  |  |  |  |  | 
| 2032 |  |  |  |  |  |  | =cut | 
| 2033 |  |  |  |  |  |  |  | 
| 2034 |  |  |  |  |  |  | sub make_graph | 
| 2035 |  |  |  |  |  |  | { | 
| 2036 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 2037 |  |  |  |  |  |  |  | 
| 2038 | 0 | 0 |  |  |  | 0 | $self->throw("Graphing is not available") | 
| 2039 |  |  |  |  |  |  | unless $self->{graph}; | 
| 2040 |  |  |  |  |  |  |  | 
| 2041 | 0 |  |  |  |  | 0 | my ($seqobjs, $window) | 
| 2042 |  |  |  |  |  |  | = $self->_rearrange([qw(sequences window)], @args); | 
| 2043 |  |  |  |  |  |  |  | 
| 2044 | 0 | 0 |  |  |  | 0 | $self->throw("no codon table has been defined") | 
| 2045 |  |  |  |  |  |  | unless $self->{codontable}; | 
| 2046 |  |  |  |  |  |  |  | 
| 2047 | 0 | 0 |  |  |  | 0 | $self->throw("no RSCU table has been defined") | 
| 2048 |  |  |  |  |  |  | unless $self->{rscutable}; | 
| 2049 |  |  |  |  |  |  |  | 
| 2050 | 0 | 0 |  |  |  | 0 | $self->throw("no nucleotide sequences provided") | 
| 2051 |  |  |  |  |  |  | unless $seqobjs; | 
| 2052 |  |  |  |  |  |  |  | 
| 2053 | 0 | 0 |  |  |  | 0 | $self->throw("sequences argument is not an array reference") | 
| 2054 |  |  |  |  |  |  | unless ref($seqobjs) eq "ARRAY"; | 
| 2055 |  |  |  |  |  |  |  | 
| 2056 | 0 |  |  |  |  | 0 | foreach my $seqobj (@$seqobjs) | 
| 2057 |  |  |  |  |  |  | { | 
| 2058 | 0 | 0 |  |  |  | 0 | $self->throw(ref($seqobj) . " is not a Bio::Seq object") | 
| 2059 |  |  |  |  |  |  | unless $seqobj->isa("Bio::Seq"); | 
| 2060 |  |  |  |  |  |  |  | 
| 2061 | 0 | 0 |  |  |  | 0 | $self->throw("$seqobj is not a nucleotide sequence") | 
| 2062 |  |  |  |  |  |  | unless $seqobj->alphabet eq "dna"; | 
| 2063 |  |  |  |  |  |  | } | 
| 2064 |  |  |  |  |  |  |  | 
| 2065 | 0 |  |  |  |  | 0 | my ($graph, $format) = _make_graph( $seqobjs, | 
| 2066 |  |  |  |  |  |  | $window, | 
| 2067 |  |  |  |  |  |  | $self->{organism}, | 
| 2068 |  |  |  |  |  |  | $self->{codontable}, | 
| 2069 |  |  |  |  |  |  | $self->{rscutable}, | 
| 2070 |  |  |  |  |  |  | $self->{reversecodontable}); | 
| 2071 | 0 |  |  |  |  | 0 | return ($graph, $format); | 
| 2072 |  |  |  |  |  |  | } | 
| 2073 |  |  |  |  |  |  |  | 
| 2074 |  |  |  |  |  |  | =head2 make_dotplot | 
| 2075 |  |  |  |  |  |  |  | 
| 2076 |  |  |  |  |  |  | =cut | 
| 2077 |  |  |  |  |  |  |  | 
| 2078 |  |  |  |  |  |  | sub make_dotplot | 
| 2079 |  |  |  |  |  |  | { | 
| 2080 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 2081 |  |  |  |  |  |  |  | 
| 2082 | 0 | 0 |  |  |  | 0 | $self->throw("Graphing is not available") | 
| 2083 |  |  |  |  |  |  | unless $self->{graph}; | 
| 2084 |  |  |  |  |  |  |  | 
| 2085 | 0 |  |  |  |  | 0 | my ($seq1, $seq2, $window, $stringency) | 
| 2086 |  |  |  |  |  |  | = $self->_rearrange([qw(first second window stringency)], @args); | 
| 2087 |  |  |  |  |  |  |  | 
| 2088 | 0 |  | 0 |  |  | 0 | $window = $window || 10; | 
| 2089 | 0 |  | 0 |  |  | 0 | $stringency = $stringency || 10; | 
| 2090 |  |  |  |  |  |  |  | 
| 2091 | 0 | 0 | 0 |  |  | 0 | $self->throw("no nucleotide sequences provided") | 
| 2092 |  |  |  |  |  |  | unless ($seq1 && $seq2); | 
| 2093 |  |  |  |  |  |  |  | 
| 2094 | 0 |  |  |  |  | 0 | foreach my $seqobj ($seq1, $seq2) | 
| 2095 |  |  |  |  |  |  | { | 
| 2096 | 0 | 0 |  |  |  | 0 | $self->throw(ref($seqobj) . " is not a Bio::Seq object") | 
| 2097 |  |  |  |  |  |  | unless $seqobj->isa("Bio::Seq"); | 
| 2098 |  |  |  |  |  |  |  | 
| 2099 | 0 | 0 |  |  |  | 0 | $self->throw("$seqobj is not a nucleotide sequence") | 
| 2100 |  |  |  |  |  |  | unless $seqobj->alphabet eq "dna"; | 
| 2101 |  |  |  |  |  |  | } | 
| 2102 |  |  |  |  |  |  |  | 
| 2103 | 0 |  |  |  |  | 0 | my $graph = _dotplot( | 
| 2104 |  |  |  |  |  |  | $seq1->seq, | 
| 2105 |  |  |  |  |  |  | $seq2->seq, | 
| 2106 |  |  |  |  |  |  | $window, | 
| 2107 |  |  |  |  |  |  | $stringency | 
| 2108 |  |  |  |  |  |  | ); | 
| 2109 | 0 |  |  |  |  | 0 | return $graph; | 
| 2110 |  |  |  |  |  |  | } | 
| 2111 |  |  |  |  |  |  |  | 
| 2112 |  |  |  |  |  |  | =head2 import_seqs | 
| 2113 |  |  |  |  |  |  |  | 
| 2114 |  |  |  |  |  |  | NO TEST | 
| 2115 |  |  |  |  |  |  |  | 
| 2116 |  |  |  |  |  |  | =cut | 
| 2117 |  |  |  |  |  |  |  | 
| 2118 |  |  |  |  |  |  | sub import_seqs | 
| 2119 |  |  |  |  |  |  | { | 
| 2120 | 1 |  |  | 1 | 1 | 406 | my ($self, $path) = @_; | 
| 2121 | 1 | 50 |  |  |  | 22 | $self->throw("$path does not exist.") | 
| 2122 |  |  |  |  |  |  | if (! -e $path); | 
| 2123 | 1 |  | 33 |  |  | 11 | my $iterator = Bio::SeqIO->new(-file => $path) | 
| 2124 |  |  |  |  |  |  | || $self->throw("Can't parse $path!"); | 
| 2125 |  |  |  |  |  |  |  | 
| 2126 | 1 |  |  |  |  | 7655 | my ($filename, $dirs, $suffix) = fileparse($path, qr/\.[^.]*/x); | 
| 2127 | 1 | 50 |  |  |  | 11 | $suffix = substr($suffix, 1) if (substr($suffix, 0, 1) eq "."); | 
| 2128 | 1 | 50 |  |  |  | 7 | $suffix = 'fasta' if ($suffix eq 'fa'); | 
| 2129 |  |  |  |  |  |  |  | 
| 2130 | 1 |  |  |  |  | 6 | return ($iterator, $filename, $suffix); | 
| 2131 |  |  |  |  |  |  | } | 
| 2132 |  |  |  |  |  |  |  | 
| 2133 |  |  |  |  |  |  | =head2 export_seqs | 
| 2134 |  |  |  |  |  |  |  | 
| 2135 |  |  |  |  |  |  | NO TEST | 
| 2136 |  |  |  |  |  |  |  | 
| 2137 |  |  |  |  |  |  | =cut | 
| 2138 |  |  |  |  |  |  |  | 
| 2139 |  |  |  |  |  |  | sub export_seqs | 
| 2140 |  |  |  |  |  |  | { | 
| 2141 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 2142 |  |  |  |  |  |  |  | 
| 2143 | 0 |  |  |  |  | 0 | my ($filename, $outpath, $outformat, $seqarr) | 
| 2144 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 2145 |  |  |  |  |  |  | filename | 
| 2146 |  |  |  |  |  |  | path | 
| 2147 |  |  |  |  |  |  | format | 
| 2148 |  |  |  |  |  |  | sequences)], @args); | 
| 2149 |  |  |  |  |  |  |  | 
| 2150 | 0 | 0 |  |  |  | 0 | $outpath = $outpath ? $outpath :  "."; | 
| 2151 | 0 | 0 |  |  |  | 0 | $outpath .= "/" if (substr($outpath, -1, 1) !~ /[ \/ ]/x); | 
| 2152 | 0 | 0 |  |  |  | 0 | $self->throw("$outpath does not exist") if (! -e $outpath); | 
| 2153 |  |  |  |  |  |  |  | 
| 2154 | 0 | 0 |  |  |  | 0 | $outformat = $outformat ? $outformat : "genbank"; | 
| 2155 | 0 |  |  |  |  | 0 | my $module = "Bio::SeqIO::$outformat"; | 
| 2156 | 0 |  |  |  |  | 0 | (my $require_name = $module . ".pm") =~ s{::}{/}xg; | 
| 2157 |  |  |  |  |  |  | my $flag = eval | 
| 2158 | 0 |  |  |  |  | 0 | { | 
| 2159 | 0 |  |  |  |  | 0 | require $require_name; | 
| 2160 |  |  |  |  |  |  | }; | 
| 2161 | 0 | 0 | 0 |  |  | 0 | $self->throw("$outformat is not a format recognized by BioPerl") | 
| 2162 |  |  |  |  |  |  | unless ($outformat && $flag); | 
| 2163 |  |  |  |  |  |  |  | 
| 2164 |  |  |  |  |  |  | #Long attributes that come in from a genbank file will have corruption | 
| 2165 |  |  |  |  |  |  | #remove spaces and reattribute to fix bbs in genbank file ): | 
| 2166 | 0 | 0 |  |  |  | 0 | if ($outformat eq 'genbank') | 
| 2167 |  |  |  |  |  |  | { | 
| 2168 | 0 |  |  |  |  | 0 | foreach my $seq (@{$seqarr}) | 
|  | 0 |  |  |  |  | 0 |  | 
| 2169 |  |  |  |  |  |  | { | 
| 2170 | 0 |  |  |  |  | 0 | foreach my $feat ($seq->get_SeqFeatures) | 
| 2171 |  |  |  |  |  |  | { | 
| 2172 | 0 |  |  |  |  | 0 | foreach my $tag ($feat->get_all_tags()) | 
| 2173 |  |  |  |  |  |  | { | 
| 2174 | 0 |  |  |  |  | 0 | my $value = join(q{}, $feat->get_tag_values($tag)); | 
| 2175 | 0 |  |  |  |  | 0 | $value =~ s/\s//xg; | 
| 2176 | 0 |  |  |  |  | 0 | $feat->remove_tag($tag); | 
| 2177 | 0 |  |  |  |  | 0 | $feat->add_tag_value($tag, $value); | 
| 2178 |  |  |  |  |  |  | } | 
| 2179 |  |  |  |  |  |  | } | 
| 2180 |  |  |  |  |  |  | } | 
| 2181 |  |  |  |  |  |  | } | 
| 2182 | 0 |  |  |  |  | 0 | my $path = $outpath . $filename; | 
| 2183 | 0 | 0 |  |  |  | 0 | open (my $OUTFH, '>', $path ) || $self->throw("Can't write to $path? $!"); | 
| 2184 | 0 |  |  |  |  | 0 | my $FOUT = Bio::SeqIO->new(-fh => $OUTFH, -format => $outformat); | 
| 2185 | 0 |  |  |  |  | 0 | $FOUT->write_seq($_) foreach (@$seqarr); | 
| 2186 | 0 |  |  |  |  | 0 | close $OUTFH; | 
| 2187 | 0 |  |  |  |  | 0 | return; | 
| 2188 |  |  |  |  |  |  | } | 
| 2189 |  |  |  |  |  |  |  | 
| 2190 |  |  |  |  |  |  | =head2 random_dna | 
| 2191 |  |  |  |  |  |  |  | 
| 2192 |  |  |  |  |  |  | =cut | 
| 2193 |  |  |  |  |  |  |  | 
| 2194 |  |  |  |  |  |  | sub random_dna | 
| 2195 |  |  |  |  |  |  | { | 
| 2196 | 30000 |  |  | 30000 | 1 | 252220 | my ($self, @args) = @_; | 
| 2197 |  |  |  |  |  |  |  | 
| 2198 | 30000 |  |  |  |  | 124475 | my ($rlen, $rgc, $rstop) | 
| 2199 |  |  |  |  |  |  | = $self->_rearrange([qw( | 
| 2200 |  |  |  |  |  |  | length | 
| 2201 |  |  |  |  |  |  | gc_percentage | 
| 2202 |  |  |  |  |  |  | no_stops)], @args); | 
| 2203 |  |  |  |  |  |  |  | 
| 2204 | 30000 | 50 | 33 |  |  | 735445 | $self->throw("no codon table has been defined") | 
| 2205 |  |  |  |  |  |  | if ($rstop && ! $self->{codontable}); | 
| 2206 |  |  |  |  |  |  |  | 
| 2207 | 30000 |  | 100 |  |  | 73576 | $rgc = $rgc || 50; | 
| 2208 | 30000 | 50 | 33 |  |  | 159628 | $self->throw("gc_percentage must be between 0 and 100") | 
|  |  |  | 33 |  |  |  |  | 
| 2209 |  |  |  |  |  |  | if ($rgc && ($rgc < 0 || $rgc > 100)); | 
| 2210 |  |  |  |  |  |  |  | 
| 2211 | 30000 | 50 | 33 |  |  | 145810 | if (! $rlen || $rlen < 1) | 
|  |  | 50 |  |  |  |  |  | 
| 2212 |  |  |  |  |  |  | { | 
| 2213 | 0 |  |  |  |  | 0 | return q{}; | 
| 2214 |  |  |  |  |  |  | } | 
| 2215 |  |  |  |  |  |  | elsif ($rlen == 1) | 
| 2216 |  |  |  |  |  |  | { | 
| 2217 | 30000 | 50 |  |  |  | 103832 | return $rgc ? _randombase_weighted($rgc)  : _randombase; | 
| 2218 |  |  |  |  |  |  | } | 
| 2219 | 0 |  |  |  |  | 0 | return _randomDNA($rlen, $rgc, $rstop, $self->{codontable}); | 
| 2220 |  |  |  |  |  |  | } | 
| 2221 |  |  |  |  |  |  |  | 
| 2222 |  |  |  |  |  |  | =head2 replace_ambiguous_bases | 
| 2223 |  |  |  |  |  |  |  | 
| 2224 |  |  |  |  |  |  | =cut | 
| 2225 |  |  |  |  |  |  |  | 
| 2226 |  |  |  |  |  |  | sub replace_ambiguous_bases | 
| 2227 |  |  |  |  |  |  | { | 
| 2228 | 3 |  |  | 3 | 1 | 18 | my ($self, $seq) = @_; | 
| 2229 |  |  |  |  |  |  |  | 
| 2230 | 3 | 50 |  |  |  | 8 | $self->throw("no sequence provided ") | 
| 2231 |  |  |  |  |  |  | unless ($seq); | 
| 2232 |  |  |  |  |  |  |  | 
| 2233 | 3 |  |  |  |  | 8 | my $str = $self->_stripdown($seq, q{}, 1); | 
| 2234 |  |  |  |  |  |  |  | 
| 2235 | 3 |  |  |  |  | 13 | my $newstr = _replace_ambiguous_bases($str); | 
| 2236 |  |  |  |  |  |  |  | 
| 2237 | 3 | 50 |  |  |  | 9 | if (ref $seq) | 
| 2238 |  |  |  |  |  |  | { | 
| 2239 | 0 |  |  |  |  | 0 | my $newobj = $seq->clone(); | 
| 2240 | 0 | 0 |  |  |  | 0 | my $desc = $newobj->desc ?  $newobj->desc . q{ } : q{}; | 
| 2241 | 0 |  |  |  |  | 0 | $desc .= "deambiguated"; | 
| 2242 | 0 |  |  |  |  | 0 | $newobj->seq($newstr); | 
| 2243 | 0 |  |  |  |  | 0 | $newobj->desc($desc); | 
| 2244 | 0 |  |  |  |  | 0 | return $newobj; | 
| 2245 |  |  |  |  |  |  | } | 
| 2246 |  |  |  |  |  |  | else | 
| 2247 |  |  |  |  |  |  | { | 
| 2248 | 3 |  |  |  |  | 10 | return $newstr; | 
| 2249 |  |  |  |  |  |  | } | 
| 2250 |  |  |  |  |  |  | } | 
| 2251 |  |  |  |  |  |  |  | 
| 2252 |  |  |  |  |  |  | =head1 PLEASANTRIES | 
| 2253 |  |  |  |  |  |  |  | 
| 2254 |  |  |  |  |  |  | =head2 pad | 
| 2255 |  |  |  |  |  |  |  | 
| 2256 |  |  |  |  |  |  | my $name = 5; | 
| 2257 |  |  |  |  |  |  | my $nice = $GD->pad($name, 3); | 
| 2258 |  |  |  |  |  |  | $nice == "005" || die; | 
| 2259 |  |  |  |  |  |  |  | 
| 2260 |  |  |  |  |  |  | $name = "oligo"; | 
| 2261 |  |  |  |  |  |  | $nice = $GD->pad($name, 7, "_"); | 
| 2262 |  |  |  |  |  |  | $nice == "__oligo" || die; | 
| 2263 |  |  |  |  |  |  |  | 
| 2264 |  |  |  |  |  |  | Pads an integer with leading zeroes (by default) or any provided set of | 
| 2265 |  |  |  |  |  |  | characters. This is useful both to make reports pretty and to standardize the | 
| 2266 |  |  |  |  |  |  | length of designations. | 
| 2267 |  |  |  |  |  |  |  | 
| 2268 |  |  |  |  |  |  | =cut | 
| 2269 |  |  |  |  |  |  |  | 
| 2270 |  |  |  |  |  |  | sub pad | 
| 2271 |  |  |  |  |  |  | { | 
| 2272 | 0 |  |  | 0 | 1 | 0 | my ($self, $num, $thickness, $chars) = @_; | 
| 2273 | 0 |  |  |  |  | 0 | my $t = $num; | 
| 2274 | 0 |  | 0 |  |  | 0 | $chars = $chars || "0"; | 
| 2275 | 0 |  |  |  |  | 0 | $t = $chars . $t while (length($t) < $thickness); | 
| 2276 | 0 |  |  |  |  | 0 | return $t; | 
| 2277 |  |  |  |  |  |  | } | 
| 2278 |  |  |  |  |  |  |  | 
| 2279 |  |  |  |  |  |  | =head2 attitude | 
| 2280 |  |  |  |  |  |  |  | 
| 2281 |  |  |  |  |  |  | my $adverb = $GD->attitude(); | 
| 2282 |  |  |  |  |  |  |  | 
| 2283 |  |  |  |  |  |  | Ask GeneDesign how it handled your request. | 
| 2284 |  |  |  |  |  |  |  | 
| 2285 |  |  |  |  |  |  | =cut | 
| 2286 |  |  |  |  |  |  |  | 
| 2287 |  |  |  |  |  |  | sub attitude | 
| 2288 |  |  |  |  |  |  | { | 
| 2289 | 0 |  |  | 0 | 1 | 0 | my @adverbs = qw(Elegantly Energetically Enthusiastically Excitedly Daintily | 
| 2290 |  |  |  |  |  |  | Deliberately Diligently Dreamily Courageously Cooly Cleverly Cheerfully | 
| 2291 |  |  |  |  |  |  | Carefully Calmly Briskly Blindly Bashfully Absentmindedly Awkwardly | 
| 2292 |  |  |  |  |  |  | Faithfully Ferociously Fervently Fiercely Fondly Gently Gleefully Gratefully | 
| 2293 |  |  |  |  |  |  | Gracefully Happily Helpfully Heroically Honestly Joyfully Jubilantly | 
| 2294 |  |  |  |  |  |  | Jovially Keenly Kindly Knowingly Kookily Loftily Lovingly Loyally | 
| 2295 |  |  |  |  |  |  | Majestically Mechanically Merrily Mostly Neatly Nicely Obediently Officially | 
| 2296 |  |  |  |  |  |  | Optimistically Patiently Perfectly Playfully Positively Powerfully | 
| 2297 |  |  |  |  |  |  | Punctually Properly Promptly Quaintly Quickly Quirkily Rapidly Readily | 
| 2298 |  |  |  |  |  |  | Reassuringly Righteously Sedately Seriously Sharply Shyly Silently Smoothly | 
| 2299 |  |  |  |  |  |  | Solemnly Speedily Strictly Successfully Suddenly Sweetly Swiftly Tenderly | 
| 2300 |  |  |  |  |  |  | Thankfully Throroughly Thoughtfully Triumphantly Ultimately Unabashedly | 
| 2301 |  |  |  |  |  |  | Utterly Upliftingly Urgently Usefully Valiantly Victoriously Vivaciously | 
| 2302 |  |  |  |  |  |  | Warmly Wholly Wisely Wonderfully Yawningly Zealously Zestfully | 
| 2303 |  |  |  |  |  |  | ); | 
| 2304 | 0 |  |  |  |  | 0 | my $index = _random_index(scalar(@adverbs)); | 
| 2305 | 0 |  |  |  |  | 0 | return $adverbs[$index]; | 
| 2306 |  |  |  |  |  |  | } | 
| 2307 |  |  |  |  |  |  |  | 
| 2308 |  |  |  |  |  |  | =head2 endslash | 
| 2309 |  |  |  |  |  |  |  | 
| 2310 |  |  |  |  |  |  | =cut | 
| 2311 |  |  |  |  |  |  |  | 
| 2312 |  |  |  |  |  |  | sub endslash | 
| 2313 |  |  |  |  |  |  | { | 
| 2314 | 0 |  |  | 0 | 1 | 0 | my ($self, $path) = @_; | 
| 2315 | 0 | 0 |  |  |  | 0 | if (substr($path, -1, 1) ne q{/}) | 
| 2316 |  |  |  |  |  |  | { | 
| 2317 | 0 |  |  |  |  | 0 | $path .= q{/}; | 
| 2318 |  |  |  |  |  |  | } | 
| 2319 | 0 |  |  |  |  | 0 | return $path; | 
| 2320 |  |  |  |  |  |  | } | 
| 2321 |  |  |  |  |  |  |  | 
| 2322 |  |  |  |  |  |  | =head2 _stripdown | 
| 2323 |  |  |  |  |  |  |  | 
| 2324 |  |  |  |  |  |  | =cut | 
| 2325 |  |  |  |  |  |  |  | 
| 2326 |  |  |  |  |  |  | sub _stripdown | 
| 2327 |  |  |  |  |  |  | { | 
| 2328 | 40059 |  |  | 40059 |  | 62111 | my ($self, $seqarg, $type, $enz_allowed) = @_; | 
| 2329 |  |  |  |  |  |  |  | 
| 2330 | 40059 |  | 100 |  |  | 121728 | $enz_allowed = $enz_allowed || 0; | 
| 2331 | 40059 | 100 |  |  |  | 114096 | my @seqs = ref($seqarg) eq "ARRAY" ? @$seqarg  : ($seqarg); | 
| 2332 | 40059 |  |  |  |  | 45371 | my @list; | 
| 2333 | 40059 |  |  |  |  | 64486 | foreach my $seq (@seqs) | 
| 2334 |  |  |  |  |  |  | { | 
| 2335 | 40059 |  |  |  |  | 48241 | my $str = $seq; | 
| 2336 | 40059 |  |  |  |  | 52050 | my $ref = ref($seq); | 
| 2337 | 40059 | 100 |  |  |  | 77438 | if ($ref) | 
| 2338 |  |  |  |  |  |  | { | 
| 2339 | 40027 |  |  |  |  | 82807 | my $bit = $self->_checkref($seq, $enz_allowed); | 
| 2340 | 40027 | 50 |  |  |  | 92721 | $self->throw("object $ref is not a compatible object $bit") if ($bit < 1); | 
| 2341 | 40027 | 50 |  |  |  | 109572 | $str = ref($seq->seq)  ? $seq->seq->seq  : $seq->seq; | 
| 2342 |  |  |  |  |  |  | } | 
| 2343 | 40059 |  |  |  |  | 1073206 | push @list, uc $str; | 
| 2344 |  |  |  |  |  |  | } | 
| 2345 | 40059 | 100 |  |  |  | 102547 | return \@list if ($type eq "ARRAY"); | 
| 2346 | 40056 |  |  |  |  | 116332 | return $list[0]; | 
| 2347 |  |  |  |  |  |  | } | 
| 2348 |  |  |  |  |  |  |  | 
| 2349 |  |  |  |  |  |  | =head2 _checkref | 
| 2350 |  |  |  |  |  |  |  | 
| 2351 |  |  |  |  |  |  | =cut | 
| 2352 |  |  |  |  |  |  |  | 
| 2353 |  |  |  |  |  |  | sub _checkref | 
| 2354 |  |  |  |  |  |  | { | 
| 2355 | 40027 |  |  | 40027 |  | 56173 | my ($self, $pobj, $enz_allowed) = @_; | 
| 2356 | 40027 |  |  |  |  | 50510 | my $ref = ref $pobj; | 
| 2357 | 40027 | 50 |  |  |  | 76121 | return -1 if (! $ref); | 
| 2358 | 40027 |  | 100 |  |  | 116398 | $enz_allowed = $enz_allowed || 0; | 
| 2359 | 40027 |  |  |  |  | 58857 | my ($bioseq, $bioseqfeat) = (0, 0); | 
| 2360 | 40027 |  |  |  |  | 112163 | $bioseq = $pobj->isa("Bio::Seq"); | 
| 2361 | 40027 |  |  |  |  | 166356 | $bioseqfeat = $pobj->isa("Bio::SeqFeatureI"); | 
| 2362 | 40027 | 100 |  |  |  | 83598 | if ($enz_allowed) | 
| 2363 |  |  |  |  |  |  | { | 
| 2364 | 20016 |  |  |  |  | 102778 | $enz_allowed = $pobj->isa("Bio::GeneDesign::RestrictionEnzyme"); | 
| 2365 |  |  |  |  |  |  | } | 
| 2366 | 40027 |  |  |  |  | 97527 | return $bioseq + $bioseqfeat + $enz_allowed; | 
| 2367 |  |  |  |  |  |  | } | 
| 2368 |  |  |  |  |  |  |  | 
| 2369 |  |  |  |  |  |  | 1; | 
| 2370 |  |  |  |  |  |  |  | 
| 2371 |  |  |  |  |  |  | __END__ | 
| 2372 |  |  |  |  |  |  |  | 
| 2373 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 2374 |  |  |  |  |  |  |  | 
| 2375 |  |  |  |  |  |  | Copyright (c) 2013, GeneDesign developers | 
| 2376 |  |  |  |  |  |  | All rights reserved. | 
| 2377 |  |  |  |  |  |  |  | 
| 2378 |  |  |  |  |  |  | Redistribution and use in source and binary forms, with or without modification, | 
| 2379 |  |  |  |  |  |  | are permitted provided that the following conditions are met: | 
| 2380 |  |  |  |  |  |  |  | 
| 2381 |  |  |  |  |  |  | * Redistributions of source code must retain the above copyright notice, this | 
| 2382 |  |  |  |  |  |  | list of conditions and the following disclaimer. | 
| 2383 |  |  |  |  |  |  |  | 
| 2384 |  |  |  |  |  |  | * Redistributions in binary form must reproduce the above copyright notice, this | 
| 2385 |  |  |  |  |  |  | list of conditions and the following disclaimer in the documentation and/or | 
| 2386 |  |  |  |  |  |  | other materials provided with the distribution. | 
| 2387 |  |  |  |  |  |  |  | 
| 2388 |  |  |  |  |  |  | * The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley | 
| 2389 |  |  |  |  |  |  | National Laboratory, the Department of Energy, and the GeneDesign developers may | 
| 2390 |  |  |  |  |  |  | not be used to endorse or promote products derived from this software without | 
| 2391 |  |  |  |  |  |  | specific prior written permission. | 
| 2392 |  |  |  |  |  |  |  | 
| 2393 |  |  |  |  |  |  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | 
| 2394 |  |  |  |  |  |  | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
| 2395 |  |  |  |  |  |  | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
| 2396 |  |  |  |  |  |  | DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, | 
| 2397 |  |  |  |  |  |  | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
| 2398 |  |  |  |  |  |  | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
| 2399 |  |  |  |  |  |  | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | 
| 2400 |  |  |  |  |  |  | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | 
| 2401 |  |  |  |  |  |  | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | 
| 2402 |  |  |  |  |  |  | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
| 2403 |  |  |  |  |  |  |  | 
| 2404 |  |  |  |  |  |  | =cut |