line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Data::Random::Nucleotides; |
2
|
5
|
|
|
5
|
|
131532
|
use strict; |
|
5
|
|
|
|
|
12
|
|
|
5
|
|
|
|
|
190
|
|
3
|
5
|
|
|
5
|
|
25
|
use warnings; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
133
|
|
4
|
5
|
|
|
5
|
|
28
|
use Carp; |
|
5
|
|
|
|
|
14
|
|
|
5
|
|
|
|
|
485
|
|
5
|
5
|
|
|
5
|
|
6418
|
use Data::Dumper; |
|
5
|
|
|
|
|
71814
|
|
|
5
|
|
|
|
|
464
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
require Exporter; |
8
|
5
|
|
|
|
|
700
|
use vars qw( |
9
|
|
|
|
|
|
|
@ISA |
10
|
|
|
|
|
|
|
%EXPORT_TAGS |
11
|
|
|
|
|
|
|
@EXPORT_OK |
12
|
|
|
|
|
|
|
@EXPORT |
13
|
5
|
|
|
5
|
|
44
|
); |
|
5
|
|
|
|
|
12
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
@ISA = qw(Exporter); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
%EXPORT_TAGS = ( |
18
|
|
|
|
|
|
|
'all' => [ |
19
|
|
|
|
|
|
|
qw( |
20
|
|
|
|
|
|
|
rand_nuc |
21
|
|
|
|
|
|
|
rand_wrapped_nuc |
22
|
|
|
|
|
|
|
rand_fasta |
23
|
|
|
|
|
|
|
) |
24
|
|
|
|
|
|
|
] |
25
|
|
|
|
|
|
|
); |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
@EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } ); |
28
|
|
|
|
|
|
|
@EXPORT = qw(); |
29
|
|
|
|
|
|
|
|
30
|
5
|
|
|
5
|
|
5128
|
use Data::Random qw/:all/; |
|
5
|
|
|
|
|
19336
|
|
|
5
|
|
|
|
|
3004
|
|
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
$Data::Random::Nucleotides::VERSION = '0.1'; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# ABSTRACT: A Module to generate random nucleotide strings and common formats. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
=head1 NAME |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
Data::Random::Nucleotides - Generate random nucleotide strings. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 VERSION |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
version 0.1 |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
=head1 SYNOPSIS |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
use Data::Random::Nucleotides qw/:all/; |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Generate a string of 200 random A/C/G/T characters. |
50
|
|
|
|
|
|
|
$nucs = rand_nuc ( size => 200 ); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Generate a string of random A/C/G/T between 20 and 500 characters. |
53
|
|
|
|
|
|
|
$nucs = rand_nuc ( min => 20, max => 500 ); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Generate a string of 30 random A/C/G/T/N characters. |
56
|
|
|
|
|
|
|
$nucs = rand_nuc ( size => 30, N=>1 ); |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
# Generate a multi-lined string of 500 random A/C/G/T/N characters. |
59
|
|
|
|
|
|
|
# The 500 characters will be split into lines of 70 characters each. |
60
|
|
|
|
|
|
|
$nucs = rand_wrapped_nuc ( size => 500 ); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
# Generate a string containing a single FASTA-like sequence text. |
63
|
|
|
|
|
|
|
$fasta = rand_fasta ( size => 200 ) ; |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=head1 DESCRIPTION |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
This module is a thin wrapper around L, providing utility functions |
68
|
|
|
|
|
|
|
to generate nucleotide sequence strings and FASTA-looking strings. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
nucleotide strings contain only A/C/G/T (and possibly N) characters. |
71
|
|
|
|
|
|
|
FASTA strings are multi-lined nucleotide strings, with the first line containing a sequence id (see L) . |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=head1 METHODS |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head2 rand_nuc() |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Returns a string of random nucleotides. |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
See C in L for possible parameters (e.g. C, C, C). |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
If C is set, N will be a possible nucleoide. Otherwise - only A/C/G/T will be returned. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=cut |
84
|
|
|
|
|
|
|
sub rand_nuc |
85
|
|
|
|
|
|
|
{ |
86
|
19
|
|
|
19
|
1
|
8453
|
my %args = @_; |
87
|
19
|
|
|
|
|
58
|
my @set = qw/A C G T/; |
88
|
19
|
50
|
|
|
|
59
|
push @set, "N" if defined $args{N}; |
89
|
|
|
|
|
|
|
|
90
|
19
|
|
|
|
|
28
|
my $size; |
91
|
19
|
100
|
|
|
|
52
|
if ( defined $args{size} ) { |
92
|
9
|
|
|
|
|
16
|
$size = $args{size}; |
93
|
|
|
|
|
|
|
} else { |
94
|
10
|
50
|
|
|
|
26
|
my $min = $args{min} or croak "missing 'min' value (or use 'size')"; |
95
|
10
|
50
|
|
|
|
26
|
my $max = $args{max} or croak "missing 'max' value (or use 'size')"; |
96
|
10
|
|
|
|
|
31
|
$size = $min + int(rand($max-$min)); |
97
|
|
|
|
|
|
|
} |
98
|
19
|
|
|
|
|
24
|
my @nucs; |
99
|
19
|
|
|
|
|
42
|
foreach ( 1 .. $size ) { |
100
|
3496
|
|
|
|
|
151803
|
push @nucs, rand_chars ( set => \@set, size=>1 ) ; |
101
|
|
|
|
|
|
|
} |
102
|
19
|
|
|
|
|
1124
|
return join("", @nucs); |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head2 rand_wrapped_nuc() |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Returns a multi-lined string of random nucleotides. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
See C for all possible parameters. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
The returned string will be broken into lines of 70 characeters each. |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=cut |
114
|
|
|
|
|
|
|
sub rand_wrapped_nuc |
115
|
|
|
|
|
|
|
{ |
116
|
6
|
|
|
6
|
1
|
1322
|
my $seq = rand_nuc(@_); |
117
|
6
|
|
|
|
|
82
|
$seq =~ s/([^\n]{70})/$1\n/g; |
118
|
6
|
|
|
|
|
54
|
return $seq; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head2 rand_fasta() |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Returns a random FASTA string. |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
First line begins with a C<< > >> prefix, and a random sequence ID (alphanumeric). |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
The rest of the lines are random nucleotide strings, wrapped at 70 characters. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |
131
|
|
|
|
|
|
|
sub rand_fasta |
132
|
|
|
|
|
|
|
{ |
133
|
3
|
|
|
3
|
1
|
2346
|
my $id = ">" . join("", rand_chars( set => 'loweralpha', size=>3 ) ) . |
134
|
|
|
|
|
|
|
"-" . sprintf("%06d", int(rand())) ; |
135
|
3
|
|
|
|
|
629
|
my $seq = rand_wrapped_nuc(@_); |
136
|
3
|
|
|
|
|
18
|
return $id . "\n" . $seq ; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 AUTHOR |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Assaf Gordon, C<< >> |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head1 TODO |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=over |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=item Finer control over nucleotide composition (currently: completely random) |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
=item Generate FASTQ files |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=item Support lower-case nucleotides |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=item generate amino-acid codes |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=back |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=head1 BUGS |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
Please report any bugs or feature requests to |
161
|
|
|
|
|
|
|
L |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=head1 SEE ALSO |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
BioPerl provides similar functionality L, but requires installing the L module. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head1 LICENSE AND COPYRIGHT |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Copyright 2012 Assaf Gordon. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
172
|
|
|
|
|
|
|
under the terms of either: the GNU General Public License as published |
173
|
|
|
|
|
|
|
by the Free Software Foundation; or the Artistic License. |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
See http://dev.perl.org/licenses/ for more information. |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |