| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # GeneDesign libraries for Random DNA generation | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | =head1 NAME | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | GeneDesign::Random | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | =head1 VERSION | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | Version 5.52 | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 14 |  |  |  |  |  |  |  | 
| 15 |  |  |  |  |  |  | Random DNA Generators | 
| 16 |  |  |  |  |  |  |  | 
| 17 |  |  |  |  |  |  | =head1 AUTHOR | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | Sarah Richardson <SMRichardson@lbl.gov> | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | =cut | 
| 22 |  |  |  |  |  |  |  | 
| 23 |  |  |  |  |  |  | package Bio::GeneDesign::Random; | 
| 24 |  |  |  |  |  |  | require Exporter; | 
| 25 |  |  |  |  |  |  |  | 
| 26 | 11 |  |  | 11 |  | 7174 | use Bio::GeneDesign::Codons qw(_find_in_frame); | 
|  | 11 |  |  |  |  | 39 |  | 
|  | 11 |  |  |  |  | 1133 |  | 
| 27 | 11 |  |  | 11 |  | 78 | use Bio::GeneDesign::Basic qw(@BASES %NTIDES); | 
|  | 11 |  |  |  |  | 27 |  | 
|  | 11 |  |  |  |  | 1617 |  | 
| 28 | 11 |  |  | 11 |  | 67 | use List::Util qw(shuffle); | 
|  | 11 |  |  |  |  | 24 |  | 
|  | 11 |  |  |  |  | 859 |  | 
| 29 |  |  |  |  |  |  |  | 
| 30 | 11 |  |  | 11 |  | 68 | use strict; | 
|  | 11 |  |  |  |  | 22 |  | 
|  | 11 |  |  |  |  | 329 |  | 
| 31 | 11 |  |  | 11 |  | 60 | use warnings; | 
|  | 11 |  |  |  |  | 18 |  | 
|  | 11 |  |  |  |  | 569 |  | 
| 32 |  |  |  |  |  |  |  | 
| 33 |  |  |  |  |  |  | our $VERSION = 5.52; | 
| 34 |  |  |  |  |  |  |  | 
| 35 | 11 |  |  | 11 |  | 63 | use base qw(Exporter); | 
|  | 11 |  |  |  |  | 30 |  | 
|  | 11 |  |  |  |  | 9874 |  | 
| 36 |  |  |  |  |  |  | our @EXPORT_OK = qw( | 
| 37 |  |  |  |  |  |  | _randomDNA | 
| 38 |  |  |  |  |  |  | _randombase | 
| 39 |  |  |  |  |  |  | _randombase_weighted | 
| 40 |  |  |  |  |  |  | _replace_ambiguous_bases | 
| 41 |  |  |  |  |  |  | _weighted_rand | 
| 42 |  |  |  |  |  |  | _random_index | 
| 43 |  |  |  |  |  |  | ); | 
| 44 |  |  |  |  |  |  | our %EXPORT_TAGS =  (GD => \@EXPORT_OK); | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | =head1 Functions | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | =head2 _randomDNA() | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | takes a target length and a GC percentage and generates a random nucleotide | 
| 51 |  |  |  |  |  |  | sequence, with or without stops in the first frame | 
| 52 |  |  |  |  |  |  | in: nucleotide sequence length (scalar), | 
| 53 |  |  |  |  |  |  | GC percentage (0 <= scalar <= 100), | 
| 54 |  |  |  |  |  |  | stop codon prevention(0 stops allowed, else no stops), | 
| 55 |  |  |  |  |  |  | codon table (hash reference) | 
| 56 |  |  |  |  |  |  | out: nucleotide sequence (string) | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | =cut | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | sub _randomDNA | 
| 61 |  |  |  |  |  |  | { | 
| 62 | 0 |  |  | 0 |  | 0 | my ($len, $GCperc, $stopswit, $codon_t) = @_; | 
| 63 |  |  |  |  |  |  |  | 
| 64 | 0 | 0 |  |  |  | 0 | return q{} if ($len == 0); | 
| 65 | 0 | 0 |  |  |  | 0 | return _randombase_weighted($GCperc) if ($len == 1); | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | #GC | 
| 68 | 0 |  |  |  |  | 0 | my $GCtotal = sprintf "%.0f",  $GCperc * $len / 100; | 
| 69 | 0 |  |  |  |  | 0 | my $Gcount  = sprintf "%.0f", rand( $GCtotal ); | 
| 70 | 0 |  |  |  |  | 0 | my $Gstr = 'G' x $Gcount; | 
| 71 | 0 |  |  |  |  | 0 | my $Ccount  = $GCtotal - $Gcount; | 
| 72 | 0 |  |  |  |  | 0 | my $Cstr = 'C' x $Ccount; | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | #AT | 
| 75 | 0 |  |  |  |  | 0 | my $ATtotal = $len - $GCtotal; | 
| 76 | 0 |  |  |  |  | 0 | my $Acount  = sprintf "%.0f", rand( $ATtotal ); | 
| 77 | 0 |  |  |  |  | 0 | my $Astr = 'A' x $Acount; | 
| 78 | 0 |  |  |  |  | 0 | my $Tcount  = $ATtotal - $Acount; | 
| 79 | 0 |  |  |  |  | 0 | my $Tstr = 'T' x $Tcount; | 
| 80 |  |  |  |  |  |  |  | 
| 81 | 0 |  |  |  |  | 0 | my @randomarray = shuffle( split( '', $Gstr . $Cstr . $Astr . $Tstr) ); | 
| 82 | 0 |  |  |  |  | 0 | my $DNA = join('', @randomarray); | 
| 83 |  |  |  |  |  |  |  | 
| 84 | 0 | 0 |  |  |  | 0 | if ($stopswit) | 
| 85 |  |  |  |  |  |  | { | 
| 86 | 0 |  |  |  |  | 0 | my $stoplist = _find_in_frame($DNA, "*", $codon_t); | 
| 87 | 0 |  |  |  |  | 0 | foreach my $pos (@$stoplist) | 
| 88 |  |  |  |  |  |  | { | 
| 89 | 0 |  |  |  |  | 0 | my $bit = substr($DNA, (($pos + 1) * 3) - 3, 3); | 
| 90 |  |  |  |  |  |  | #substr($DNA, (($pos + 1) * 3) - 3, 3) = scalar reverse($bit); | 
| 91 | 0 |  |  |  |  | 0 | substr($DNA, (($pos + 1) * 3) - 3, 3, scalar reverse($bit)); | 
| 92 | 0 | 0 |  |  |  | 0 | if (int(rand(1)+.5) == 1) | 
| 93 |  |  |  |  |  |  | { | 
| 94 | 0 |  |  |  |  | 0 | my $bat = substr($DNA, (($pos + 1) * 3) - 2, 2); | 
| 95 |  |  |  |  |  |  | #substr($DNA, (($pos + 1) * 3) - 2, 2) = scalar reverse($bat); | 
| 96 | 0 |  |  |  |  | 0 | substr($DNA, (($pos + 1) * 3) - 2, 2, scalar reverse($bat)); | 
| 97 |  |  |  |  |  |  | } | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  | } | 
| 100 | 0 |  |  |  |  | 0 | return $DNA; | 
| 101 |  |  |  |  |  |  | } | 
| 102 |  |  |  |  |  |  |  | 
| 103 |  |  |  |  |  |  | =head2 _randombase() | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | when you just want one random base | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | =cut | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | sub _randombase | 
| 110 |  |  |  |  |  |  | { | 
| 111 | 0 |  |  | 0 |  | 0 | my $int = _random_index(4); | 
| 112 | 0 |  |  |  |  | 0 | return $BASES[$int]; | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | =head2 _random_weighted_base() | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | when you just want one random but weighted base | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | =cut | 
| 120 |  |  |  |  |  |  |  | 
| 121 |  |  |  |  |  |  | sub _randombase_weighted | 
| 122 |  |  |  |  |  |  | { | 
| 123 | 30000 |  |  | 30000 |  | 41280 | my ($GCp) = @_; | 
| 124 | 30000 | 50 |  |  |  | 60461 | return _randombase() unless ($GCp); | 
| 125 |  |  |  |  |  |  |  | 
| 126 | 30000 |  |  |  |  | 46004 | my $GCcount = $GCp/200 ; | 
| 127 | 30000 |  |  |  |  | 44047 | my $ATcount = (100-$GCp)/200; | 
| 128 | 30000 |  |  |  |  | 101368 | my $weight = {G => $GCcount, C => $GCcount, T => $ATcount, A => $ATcount}; | 
| 129 | 30000 |  |  |  |  | 55480 | return _weighted_rand($weight); | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | =head2 _weighted_rand() | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | =cut | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | sub _weighted_rand | 
| 137 |  |  |  |  |  |  | { | 
| 138 | 130000 |  |  | 130000 |  | 161443 | my ($dist) = @_; | 
| 139 | 130000 | 50 |  |  |  | 278211 | croak ("no distribution provided") unless ($dist); | 
| 140 | 130000 |  |  |  |  | 135146 | my ($key, $weight); | 
| 141 |  |  |  |  |  |  |  | 
| 142 | 130000 |  |  |  |  | 129871 | while (1) | 
| 143 |  |  |  |  |  |  | { | 
| 144 | 130052 |  |  |  |  | 162784 | my $rand = rand; | 
| 145 | 130052 |  |  |  |  | 443642 | while ( ($key, $weight) = each %$dist ) | 
| 146 |  |  |  |  |  |  | { | 
| 147 | 280365 | 100 |  |  |  | 1341933 | return $key if ($rand -= $weight) < 0; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  | } | 
| 150 | 0 |  |  |  |  | 0 | return; | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | =head2 _replace_ambiguous_bases | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | =cut | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | sub _replace_ambiguous_bases | 
| 158 |  |  |  |  |  |  | { | 
| 159 | 3 |  |  | 3 |  | 6 | my ($seq) = @_; | 
| 160 | 3 |  |  |  |  | 6 | my $new = q{}; | 
| 161 | 3 |  |  |  |  | 12 | foreach my $char (split(q{}, $seq)) | 
| 162 |  |  |  |  |  |  | { | 
| 163 | 26 |  |  |  |  | 28 | my @class = @{$NTIDES{$char}}; | 
|  | 26 |  |  |  |  | 65 |  | 
| 164 | 26 |  |  |  |  | 51 | my $index = _random_index(scalar(@class)); | 
| 165 | 26 |  |  |  |  | 124 | $new .= $class[$index]; | 
| 166 |  |  |  |  |  |  | } | 
| 167 | 3 |  |  |  |  | 13 | return $new; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | =head2 _random_index | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  | =cut | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | sub _random_index | 
| 175 |  |  |  |  |  |  | { | 
| 176 | 100026 |  |  | 100026 |  | 122457 | my ($array_size) = @_; | 
| 177 | 100026 |  |  |  |  | 332942 | return (sprintf "%.0f", rand($array_size)) % $array_size; | 
| 178 |  |  |  |  |  |  | } | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | 1; | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | __END__ | 
| 183 |  |  |  |  |  |  |  | 
| 184 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | Copyright (c) 2013, GeneDesign developers | 
| 187 |  |  |  |  |  |  | All rights reserved. | 
| 188 |  |  |  |  |  |  |  | 
| 189 |  |  |  |  |  |  | Redistribution and use in source and binary forms, with or without modification, | 
| 190 |  |  |  |  |  |  | are permitted provided that the following conditions are met: | 
| 191 |  |  |  |  |  |  |  | 
| 192 |  |  |  |  |  |  | * Redistributions of source code must retain the above copyright notice, this | 
| 193 |  |  |  |  |  |  | list of conditions and the following disclaimer. | 
| 194 |  |  |  |  |  |  |  | 
| 195 |  |  |  |  |  |  | * Redistributions in binary form must reproduce the above copyright notice, this | 
| 196 |  |  |  |  |  |  | list of conditions and the following disclaimer in the documentation and/or | 
| 197 |  |  |  |  |  |  | other materials provided with the distribution. | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | * The names of Johns Hopkins, the Joint Genome Institute, the Lawrence Berkeley | 
| 200 |  |  |  |  |  |  | National Laboratory, the Department of Energy, and the GeneDesign developers may | 
| 201 |  |  |  |  |  |  | not be used to endorse or promote products derived from this software without | 
| 202 |  |  |  |  |  |  | specific prior written permission. | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | 
| 205 |  |  |  |  |  |  | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | 
| 206 |  |  |  |  |  |  | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | 
| 207 |  |  |  |  |  |  | DISCLAIMED. IN NO EVENT SHALL THE DEVELOPERS BE LIABLE FOR ANY DIRECT, INDIRECT, | 
| 208 |  |  |  |  |  |  | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
| 209 |  |  |  |  |  |  | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | 
| 210 |  |  |  |  |  |  | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | 
| 211 |  |  |  |  |  |  | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE | 
| 212 |  |  |  |  |  |  | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF | 
| 213 |  |  |  |  |  |  | ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | =cut |