File Coverage

blib/lib/Bio/GeneDesign/Random.pm

Criterion	Covered	Total	%
statement	41	72	56.9
branch	4	14	28.5
condition			n/a
subroutine	10	12	83.3
pod			n/a
total	55	98	56.1

line	stmt	bran	sub	time	code
1					#
2					# GeneDesign libraries for Random DNA generation
3					#
4
5					=head1 NAME
6
7					GeneDesign::Random
8
9					=head1 VERSION
10
11					Version 5.56
12
13					=head1 DESCRIPTION
14
15					Random DNA Generators
16
17					=head1 AUTHOR
18
19					Sarah Richardson
20
21					=cut
22
23					package Bio::GeneDesign::Random;
24					require Exporter;
25
26	11		11	4429	use Bio::GeneDesign::Codons qw(_find_in_frame);
	11			35
	11			687
27	11		11	71	use Bio::GeneDesign::Basic qw(@BASES %NTIDES);
	11			23
	11			1200
28	11		11	70	use List::Util qw(shuffle);
	11			21
	11			517
29
30	11		11	66	use strict;
	11			29
	11			185
31	11		11	49	use warnings;
	11			19
	11			459
32
33					our $VERSION = 5.56;
34
35	11		11	69	use base qw(Exporter);
	11			22
	11			7293
36					our @EXPORT_OK = qw(
37					_randomDNA
38					_randombase
39					_randombase_weighted
40					_replace_ambiguous_bases
41					_weighted_rand
42					_random_index
43					);
44					our %EXPORT_TAGS = (GD => \@EXPORT_OK);
45
46					=head1 Functions
47
48					=head2 _randomDNA()
49
50					takes a target length and a GC percentage and generates a random nucleotide
51					sequence, with or without stops in the first frame
52					in: nucleotide sequence length (scalar),
53					GC percentage (0 <= scalar <= 100),
54					stop codon prevention(0 stops allowed, else no stops),
55					codon table (hash reference)
56					out: nucleotide sequence (string)
57
58					=cut
59
60					sub _randomDNA
61					{
62	0		0	0	my ($len, $GCperc, $stopswit, $codon_t) = @_;
63
64	0	0		0	return q{} if ($len == 0);
65	0	0		0	return _randombase_weighted($GCperc) if ($len == 1);
66
67					#GC
68	0			0	my $GCtotal = sprintf "%.0f", $GCperc * $len / 100;
69	0			0	my $Gcount = sprintf "%.0f", rand( $GCtotal );
70	0			0	my $Gstr = 'G' x $Gcount;
71	0			0	my $Ccount = $GCtotal - $Gcount;
72	0			0	my $Cstr = 'C' x $Ccount;
73
74					#AT
75	0			0	my $ATtotal = $len - $GCtotal;
76	0			0	my $Acount = sprintf "%.0f", rand( $ATtotal );
77	0			0	my $Astr = 'A' x $Acount;
78	0			0	my $Tcount = $ATtotal - $Acount;
79	0			0	my $Tstr = 'T' x $Tcount;
80
81	0			0	my @randomarray = shuffle( split( '', $Gstr . $Cstr . $Astr . $Tstr) );
82	0			0	my $DNA = join('', @randomarray);
83
84	0	0		0	if ($stopswit)
85					{
86	0			0	my $stophsh = _find_in_frame($DNA, "*", $codon_t);
87	0			0	while (scalar keys %{$stophsh})
	0			0
88					{
89	0			0	foreach my $pos (keys %{$stophsh})
	0			0
90					{
91	0			0	my $bit = substr $DNA, $pos, 3;
92	0			0	substr $DNA, $pos, 3, scalar reverse $bit;
93	0	0		0	if (int(rand(1)+.5) == 1)
94					{
95	0			0	my $bat = substr $DNA, $pos, 2;
96	0			0	substr $DNA, $pos, 2, scalar reverse$bat;
97					}
98					}
99	0			0	$stophsh = _find_in_frame($DNA, "*", $codon_t);
100					}
101					}
102	0			0	return $DNA;
103					}
104
105					=head2 _randombase()
106
107					when you just want one random base
108
109					=cut
110
111					sub _randombase
112					{
113	0		0	0	my $int = _random_index(4);
114	0			0	return $BASES[$int];
115					}
116
117					=head2 _random_weighted_base()
118
119					when you just want one random but weighted base
120
121					=cut
122
123					sub _randombase_weighted
124					{
125	30000		30000	37276	my ($GCp) = @_;
126	30000	50		41165	return _randombase() unless ($GCp);
127
128	30000			37685	my $GCcount = $GCp/200 ;
129	30000			36092	my $ATcount = (100-$GCp)/200;
130	30000			58560	my $weight = {G => $GCcount, C => $GCcount, T => $ATcount, A => $ATcount};
131	30000			41436	return _weighted_rand($weight);
132					}
133
134					=head2 _weighted_rand()
135
136					=cut
137
138					sub _weighted_rand
139					{
140	130000		130000	185233	my ($dist) = @_;
141	130000	50		199828	croak ("no distribution provided") unless ($dist);
142	130000			157269	my ($key, $weight);
143
144	130000			145936	while (1)
145					{
146	130020			170480	my $rand = rand;
147	130020			303323	while ( ($key, $weight) = each %$dist )
148					{
149	284540	100		765960	return $key if ($rand -= $weight) < 0;
150					}
151					}
152	0			0	return;
153					}
154
155					=head2 _replace_ambiguous_bases
156
157					=cut
158
159					sub _replace_ambiguous_bases
160					{
161	3		3	4	my ($seq) = @_;
162	3			5	my $new = q{};
163	3			9	foreach my $char (split(q{}, $seq))
164					{
165	26			25	my @class = @{$NTIDES{$char}};
	26			52
166	26			36	my $index = _random_index(scalar(@class));
167	26			38	$new .= $class[$index];
168					}
169	3			8	return $new;
170					}
171
172					=head2 _random_index
173
174					=cut
175
176					sub _random_index
177					{
178	100026		100026	131480	my ($array_size) = @_;
179	100026			218521	return (sprintf "%.0f", rand($array_size)) % $array_size;
180					}
181
182					1;
183
184					__END__