line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Perl |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Ewan Birney |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Perl - Functional access to BioPerl for people who don't know objects |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Bio::Perl; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# will guess file format from extension |
23
|
|
|
|
|
|
|
$seq_object = read_sequence($filename); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
# forces genbank format |
26
|
|
|
|
|
|
|
$seq_object = read_sequence($filename,'genbank'); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# reads an array of sequences |
29
|
|
|
|
|
|
|
@seq_object_array = read_all_sequences($filename,'fasta'); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# sequences are Bio::Seq objects, so the following methods work |
32
|
|
|
|
|
|
|
# for more info see Bio::Seq, or do 'perldoc Bio/Seq.pm' |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
print "Sequence name is ",$seq_object->display_id,"\n"; |
35
|
|
|
|
|
|
|
print "Sequence acc is ",$seq_object->accession_number,"\n"; |
36
|
|
|
|
|
|
|
print "First 5 bases is ",$seq_object->subseq(1,5),"\n"; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# get the whole sequence as a single string |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
$sequence_as_a_string = $seq_object->seq(); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# writing sequences |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
write_sequence(">$filename",'genbank',$seq_object); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
write_sequence(">$filename",'genbank',@seq_object_array); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
# making a new sequence from just a string |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
$seq_object = new_sequence("ATTGGTTTGGGGACCCAATTTGTGTGTTATATGTA", |
51
|
|
|
|
|
|
|
"myname","AL12232"); |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
# getting a sequence from a database (assumes internet connection) |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
$seq_object = get_sequence('swissprot',"ROA1_HUMAN"); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
$seq_object = get_sequence('embl',"AI129902"); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
$seq_object = get_sequence('genbank',"AI129902"); |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# BLAST a sequence (assummes an internet connection) |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
$blast_report = blast_sequence($seq_object); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
write_blast(">blast.out",$blast_report); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head1 DESCRIPTION |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Easy first time access to BioPerl via functions. |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head1 FEEDBACK |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head2 Mailing Lists |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
77
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
78
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
bioperl-l@bioperl.org |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head2 Support |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
I |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
89
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
90
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
91
|
|
|
|
|
|
|
with code and data examples if at all possible. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=head2 Reporting Bugs |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
96
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the web: |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 AUTHOR - Ewan Birney |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Email birney@ebi.ac.uk |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=head1 APPENDIX |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
107
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=cut |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
#' |
112
|
|
|
|
|
|
|
# Let the code begin... |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
package Bio::Perl; |
116
|
1
|
|
|
1
|
|
453
|
use vars qw(@EXPORT @EXPORT_OK $DBOKAY); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
43
|
|
117
|
1
|
|
|
1
|
|
3
|
use strict; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
14
|
|
118
|
1
|
|
|
1
|
|
2
|
use Carp; |
|
1
|
|
|
|
|
12
|
|
|
1
|
|
|
|
|
39
|
|
119
|
|
|
|
|
|
|
|
120
|
1
|
|
|
1
|
|
311
|
use Bio::SeqIO; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
27
|
|
121
|
1
|
|
|
1
|
|
377
|
use Bio::Seq; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
122
|
1
|
|
|
1
|
|
5
|
use Bio::Root::Version '$VERSION'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
7
|
|
123
|
|
|
|
|
|
|
BEGIN { |
124
|
1
|
|
|
1
|
|
65
|
eval { |
125
|
1
|
|
|
|
|
248
|
require Bio::DB::EMBL; |
126
|
1
|
|
|
|
|
297
|
require Bio::DB::GenBank; |
127
|
1
|
|
|
|
|
284
|
require Bio::DB::SwissProt; |
128
|
1
|
|
|
|
|
6
|
require Bio::DB::RefSeq; |
129
|
1
|
|
|
|
|
260
|
require Bio::DB::GenPept; |
130
|
|
|
|
|
|
|
}; |
131
|
1
|
50
|
|
|
|
4
|
if( $@ ) { |
132
|
0
|
|
|
|
|
0
|
$DBOKAY = 0; |
133
|
|
|
|
|
|
|
} else { |
134
|
1
|
|
|
|
|
19
|
$DBOKAY = 1; |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
1
|
|
|
1
|
|
4
|
use base qw(Exporter); |
|
1
|
|
|
|
|
0
|
|
|
1
|
|
|
|
|
1278
|
|
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
@EXPORT = qw(read_sequence read_all_sequences write_sequence |
141
|
|
|
|
|
|
|
new_sequence get_sequence translate translate_as_string |
142
|
|
|
|
|
|
|
reverse_complement revcom revcom_as_string |
143
|
|
|
|
|
|
|
reverse_complement_as_string blast_sequence write_blast); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
@EXPORT_OK = @EXPORT; |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
=head2 read_sequence |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
Title : read_sequence |
151
|
|
|
|
|
|
|
Usage : $seq = read_sequence('sequences.fa') |
152
|
|
|
|
|
|
|
$seq = read_sequence($filename,'genbank'); |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
# pipes are fine |
155
|
|
|
|
|
|
|
$seq = read_sequence("my_fetching_program $id |",'fasta'); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Function: Reads the top sequence from the file. If no format is given, it will |
158
|
|
|
|
|
|
|
try to guess the format from the filename. If a format is given, it |
159
|
|
|
|
|
|
|
forces that format. The filename can be any valid perl open() string |
160
|
|
|
|
|
|
|
- in particular, you can put in pipes |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Returns : A Bio::Seq object. A quick synopsis: |
163
|
|
|
|
|
|
|
$seq_object->display_id - name of the sequence |
164
|
|
|
|
|
|
|
$seq_object->seq - sequence as a string |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Args : Two strings, first the filename - any Perl open() string is ok |
167
|
|
|
|
|
|
|
Second string is the format, which is optional |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
For more information on Seq objects see L. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=cut |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub read_sequence{ |
174
|
2
|
|
|
2
|
1
|
9
|
my ($filename,$format) = @_; |
175
|
|
|
|
|
|
|
|
176
|
2
|
50
|
|
|
|
6
|
if( !defined $filename ) { |
177
|
0
|
|
|
|
|
0
|
confess "read_sequence($filename) - usage incorrect"; |
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
2
|
|
|
|
|
2
|
my $seqio; |
181
|
|
|
|
|
|
|
|
182
|
2
|
100
|
|
|
|
4
|
if( defined $format ) { |
183
|
1
|
|
|
|
|
4
|
$seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $format); |
184
|
|
|
|
|
|
|
} else { |
185
|
1
|
|
|
|
|
7
|
$seqio = Bio::SeqIO->new( '-file' => $filename); |
186
|
|
|
|
|
|
|
} |
187
|
|
|
|
|
|
|
|
188
|
2
|
|
|
|
|
5
|
my $seq = $seqio->next_seq(); |
189
|
|
|
|
|
|
|
|
190
|
2
|
|
|
|
|
13
|
return $seq; |
191
|
|
|
|
|
|
|
} |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head2 read_all_sequences |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
Title : read_all_sequences |
197
|
|
|
|
|
|
|
Usage : @seq_object_array = read_all_sequences($filename); |
198
|
|
|
|
|
|
|
@seq_object_array = read_all_sequences($filename,'genbank'); |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
Function: Just as the function above, but reads all the sequences in the |
201
|
|
|
|
|
|
|
file and loads them into an array. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
For very large files, you will run out of memory. When this |
204
|
|
|
|
|
|
|
happens, you've got to use the SeqIO system directly (this is |
205
|
|
|
|
|
|
|
not so hard! Don't worry about it!). |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
Returns : array of Bio::Seq objects |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
Args : two strings, first the filename (any open() string is ok) |
210
|
|
|
|
|
|
|
second the format (which is optional) |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
See L and L for more information |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=cut |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub read_all_sequences{ |
217
|
1
|
|
|
1
|
1
|
5
|
my ($filename,$format) = @_; |
218
|
|
|
|
|
|
|
|
219
|
1
|
50
|
|
|
|
3
|
if( !defined $filename ) { |
220
|
0
|
|
|
|
|
0
|
confess "read_all_sequences($filename) - usage incorrect"; |
221
|
|
|
|
|
|
|
} |
222
|
|
|
|
|
|
|
|
223
|
1
|
|
|
|
|
2
|
my $seqio; |
224
|
|
|
|
|
|
|
|
225
|
1
|
50
|
|
|
|
2
|
if( defined $format ) { |
226
|
1
|
|
|
|
|
6
|
$seqio = Bio::SeqIO->new( '-file' => $filename, '-format' => $format); |
227
|
|
|
|
|
|
|
} else { |
228
|
0
|
|
|
|
|
0
|
$seqio = Bio::SeqIO->new( '-file' => $filename); |
229
|
|
|
|
|
|
|
} |
230
|
|
|
|
|
|
|
|
231
|
1
|
|
|
|
|
2
|
my @seq_array; |
232
|
|
|
|
|
|
|
|
233
|
1
|
|
|
|
|
3
|
while( my $seq = $seqio->next_seq() ) { |
234
|
2
|
|
|
|
|
5
|
push(@seq_array,$seq); |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
1
|
|
|
|
|
4
|
return @seq_array; |
238
|
|
|
|
|
|
|
} |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=head2 write_sequence |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Title : write_sequence |
244
|
|
|
|
|
|
|
Usage : write_sequence(">new_file.gb",'genbank',$seq) |
245
|
|
|
|
|
|
|
write_sequence(">new_file.gb",'genbank',@array_of_sequence_objects) |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
Function: writes sequences in the specified format |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
Returns : true |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Args : filename as a string, must provide an open() output file |
252
|
|
|
|
|
|
|
format as a string |
253
|
|
|
|
|
|
|
one or more sequence objects |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=cut |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub write_sequence{ |
259
|
1
|
|
|
1
|
1
|
11
|
my ($filename,$format,@sequence_objects) = @_; |
260
|
|
|
|
|
|
|
|
261
|
1
|
50
|
|
|
|
3
|
if( scalar(@sequence_objects) == 0 ) { |
262
|
0
|
|
|
|
|
0
|
confess("write_sequence(filename,format,sequence_object)"); |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
1
|
|
|
|
|
2
|
my $error = 0; |
266
|
1
|
|
|
|
|
2
|
my $seqname = "sequence1"; |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# catch users who haven't passed us a filename we can open |
269
|
1
|
0
|
33
|
|
|
5
|
if( $filename !~ /^\>/ && $filename !~ /^|/ ) { |
270
|
0
|
|
|
|
|
0
|
$filename = ">".$filename; |
271
|
|
|
|
|
|
|
} |
272
|
|
|
|
|
|
|
|
273
|
1
|
|
|
|
|
5
|
my $seqio = Bio::SeqIO->new('-file' => $filename, '-format' => $format); |
274
|
|
|
|
|
|
|
|
275
|
1
|
|
|
|
|
4
|
foreach my $seq ( @sequence_objects ) { |
276
|
1
|
|
|
|
|
2
|
my $seq_obj; |
277
|
|
|
|
|
|
|
|
278
|
1
|
50
|
|
|
|
5
|
if( !ref $seq ) { |
279
|
0
|
0
|
|
|
|
0
|
if( length $seq > 50 ) { |
280
|
|
|
|
|
|
|
# odds are this is a sequence as a string, and someone has not figured out |
281
|
|
|
|
|
|
|
# how to make objects. Warn him/her and then make a sequence object |
282
|
|
|
|
|
|
|
# from this |
283
|
0
|
0
|
|
|
|
0
|
if( $error == 0 ) { |
284
|
0
|
|
|
|
|
0
|
carp("WARNING: You have put in a long string into write_sequence.\n". |
285
|
|
|
|
|
|
|
"I suspect this means that this is the actual sequence\n". |
286
|
|
|
|
|
|
|
"In the future try the\n". |
287
|
|
|
|
|
|
|
" new_sequence method of this module to make a new sequence object.\n". |
288
|
|
|
|
|
|
|
"Doing this for you here\n"); |
289
|
0
|
|
|
|
|
0
|
$error = 1; |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
|
292
|
0
|
|
|
|
|
0
|
$seq_obj = new_sequence($seq,$seqname); |
293
|
0
|
|
|
|
|
0
|
$seqname++; |
294
|
|
|
|
|
|
|
} else { |
295
|
0
|
|
|
|
|
0
|
confess("You have a non object [$seq] passed to write_sequence. It maybe that you". |
296
|
|
|
|
|
|
|
"want to use new_sequence to make this string into a sequence object?"); |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
} else { |
299
|
1
|
50
|
|
|
|
5
|
if( !$seq->isa("Bio::SeqI") ) { |
300
|
0
|
|
|
|
|
0
|
confess("object [$seq] is not a Bio::Seq object; can't write it out"); |
301
|
|
|
|
|
|
|
} |
302
|
1
|
|
|
|
|
2
|
$seq_obj = $seq; |
303
|
|
|
|
|
|
|
} |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# finally... we get to write out the sequence! |
306
|
1
|
|
|
|
|
4
|
$seqio->write_seq($seq_obj); |
307
|
|
|
|
|
|
|
} |
308
|
1
|
|
|
|
|
4
|
1; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head2 new_sequence |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Title : new_sequence |
314
|
|
|
|
|
|
|
Usage : $seq_obj = new_sequence("GATTACA", "kino-enzyme"); |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Function: Construct a sequency object from sequence string |
317
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
Args : sequence string |
320
|
|
|
|
|
|
|
name string (optional, default "no-name-for-sequence") |
321
|
|
|
|
|
|
|
accession - accession number (optional, no default) |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=cut |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub new_sequence{ |
326
|
1
|
|
|
1
|
1
|
2
|
my ($seq,$name,$accession) = @_; |
327
|
|
|
|
|
|
|
|
328
|
1
|
50
|
|
|
|
4
|
if( !defined $seq ) { |
329
|
0
|
|
|
|
|
0
|
confess("new_sequence(sequence_as_string) usage"); |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
1
|
|
50
|
|
|
3
|
$name ||= "no-name-for-sequence"; |
333
|
|
|
|
|
|
|
|
334
|
1
|
|
|
|
|
5
|
my $seq_object = Bio::Seq->new( -seq => $seq, -id => $name); |
335
|
|
|
|
|
|
|
|
336
|
1
|
50
|
|
|
|
6
|
$accession && $seq_object->accession_number($accession); |
337
|
|
|
|
|
|
|
|
338
|
1
|
|
|
|
|
4
|
return $seq_object; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=head2 blast_sequence |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Title : blast_sequence |
344
|
|
|
|
|
|
|
Usage : $blast_result = blast_sequence($seq) |
345
|
|
|
|
|
|
|
$blast_result = blast_sequence('MFVEGGTFASEDDDSASAEDE'); |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
Function: If the computer has Internet accessibility, blasts |
348
|
|
|
|
|
|
|
the sequence using the NCBI BLAST server against nrdb. |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
It chooses the flavour of BLAST on the basis of the sequence. |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
This function uses Bio::Tools::Run::RemoteBlast, which itself |
353
|
|
|
|
|
|
|
use Bio::SearchIO - as soon as you want to know more, check out |
354
|
|
|
|
|
|
|
these modules |
355
|
|
|
|
|
|
|
Returns : Bio::Search::Result::GenericResult.pm |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
Args : Either a string of protein letters or nucleotides, or a |
358
|
|
|
|
|
|
|
Bio::Seq object |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=cut |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
sub blast_sequence { |
363
|
0
|
|
|
0
|
1
|
0
|
my ($seq,$verbose) = @_; |
364
|
|
|
|
|
|
|
|
365
|
0
|
0
|
|
|
|
0
|
if( !defined $verbose ) { |
366
|
0
|
|
|
|
|
0
|
$verbose = 1; |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
0
|
0
|
|
|
|
0
|
if( !ref $seq ) { |
|
|
0
|
|
|
|
|
|
370
|
0
|
|
|
|
|
0
|
$seq = Bio::Seq->new( -seq => $seq, -id => 'blast-sequence-temp-id'); |
371
|
|
|
|
|
|
|
} elsif ( !$seq->isa('Bio::PrimarySeqI') ) { |
372
|
0
|
|
|
|
|
0
|
croak("[$seq] is an object, but not a Bio::Seq object, cannot be blasted"); |
373
|
|
|
|
|
|
|
} |
374
|
|
|
|
|
|
|
|
375
|
0
|
|
|
|
|
0
|
require Bio::Tools::Run::RemoteBlast; |
376
|
|
|
|
|
|
|
|
377
|
0
|
0
|
|
|
|
0
|
my $prog = ( $seq->alphabet eq 'protein' ) ? 'blastp' : 'blastn'; |
378
|
0
|
|
|
|
|
0
|
my $e_val= '1e-10'; |
379
|
|
|
|
|
|
|
|
380
|
0
|
|
|
|
|
0
|
my @params = ( '-prog' => $prog, |
381
|
|
|
|
|
|
|
'-expect' => $e_val, |
382
|
|
|
|
|
|
|
'-readmethod' => 'SearchIO' ); |
383
|
|
|
|
|
|
|
|
384
|
0
|
|
|
|
|
0
|
my $factory = Bio::Tools::Run::RemoteBlast->new(@params); |
385
|
|
|
|
|
|
|
|
386
|
0
|
|
|
|
|
0
|
my $r = $factory->submit_blast($seq); |
387
|
0
|
0
|
|
|
|
0
|
if( $verbose ) { |
388
|
0
|
|
|
|
|
0
|
print STDERR "Submitted Blast for [".$seq->id."] "; |
389
|
|
|
|
|
|
|
} |
390
|
0
|
|
|
|
|
0
|
sleep 5; |
391
|
|
|
|
|
|
|
|
392
|
0
|
|
|
|
|
0
|
my $result; |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
LOOP : |
395
|
0
|
|
|
|
|
0
|
while( my @rids = $factory->each_rid) { |
396
|
0
|
|
|
|
|
0
|
foreach my $rid ( @rids ) { |
397
|
0
|
|
|
|
|
0
|
my $rc = $factory->retrieve_blast($rid); |
398
|
0
|
0
|
|
|
|
0
|
if( !ref($rc) ) { |
399
|
0
|
0
|
|
|
|
0
|
if( $rc < 0 ) { |
400
|
0
|
|
|
|
|
0
|
$factory->remove_rid($rid); |
401
|
|
|
|
|
|
|
} |
402
|
0
|
0
|
|
|
|
0
|
if( $verbose ) { |
403
|
0
|
|
|
|
|
0
|
print STDERR "."; |
404
|
|
|
|
|
|
|
} |
405
|
0
|
|
|
|
|
0
|
sleep 10; |
406
|
|
|
|
|
|
|
} else { |
407
|
0
|
|
|
|
|
0
|
$result = $rc->next_result(); |
408
|
0
|
|
|
|
|
0
|
$factory->remove_rid($rid); |
409
|
0
|
|
|
|
|
0
|
last LOOP; |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
0
|
0
|
|
|
|
0
|
if( $verbose ) { |
415
|
0
|
|
|
|
|
0
|
print STDERR "\n"; |
416
|
|
|
|
|
|
|
} |
417
|
0
|
|
|
|
|
0
|
return $result; |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=head2 write_blast |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Title : write_blast |
423
|
|
|
|
|
|
|
Usage : write_blast($filename,$blast_report); |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
Function: Writes a BLAST result object (or more formally |
426
|
|
|
|
|
|
|
a SearchIO result object) out to a filename |
427
|
|
|
|
|
|
|
in BLAST-like format |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
Returns : none |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Args : filename as a string |
432
|
|
|
|
|
|
|
Bio::SearchIO::Results object |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
=cut |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub write_blast { |
437
|
0
|
|
|
0
|
1
|
0
|
my ($filename,$blast) = @_; |
438
|
|
|
|
|
|
|
|
439
|
0
|
0
|
0
|
|
|
0
|
if( $filename !~ /^\>/ && $filename !~ /^|/ ) { |
440
|
0
|
|
|
|
|
0
|
$filename = ">".$filename; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
0
|
|
|
|
|
0
|
my $output = Bio::SearchIO->new( -output_format => 'blast', -file => $filename); |
444
|
|
|
|
|
|
|
|
445
|
0
|
|
|
|
|
0
|
$output->write_result($blast); |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
=head2 get_sequence |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
Title : get_sequence |
452
|
|
|
|
|
|
|
Usage : $seq_object = get_sequence('swiss',"ROA1_HUMAN"); |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
Function: If the computer has Internet access this method gets |
455
|
|
|
|
|
|
|
the sequence from Internet accessible databases. Currently |
456
|
|
|
|
|
|
|
this supports Swissprot ('swiss'), EMBL ('embl'), GenBank |
457
|
|
|
|
|
|
|
('genbank'), GenPept ('genpept'), and RefSeq ('refseq'). |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
Swissprot and EMBL are more robust than GenBank fetching. |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
If the user is trying to retrieve a RefSeq entry from |
462
|
|
|
|
|
|
|
GenBank/EMBL, the query is silently redirected. |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Args : database type - one of swiss, embl, genbank, genpept, or |
467
|
|
|
|
|
|
|
refseq |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=cut |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
my $genbank_db = undef; |
472
|
|
|
|
|
|
|
my $genpept_db = undef; |
473
|
|
|
|
|
|
|
my $embl_db = undef; |
474
|
|
|
|
|
|
|
my $swiss_db = undef; |
475
|
|
|
|
|
|
|
my $refseq_db = undef; |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub get_sequence{ |
478
|
0
|
|
|
0
|
1
|
0
|
my ($db_type,$identifier) = @_; |
479
|
0
|
0
|
|
|
|
0
|
if( ! $DBOKAY ) { |
480
|
0
|
|
|
|
|
0
|
confess ("Your system does not have one of LWP, HTTP::Request::Common, IO::String\n". |
481
|
|
|
|
|
|
|
"installed so the DB retrieval method is not available.\n". |
482
|
|
|
|
|
|
|
"Full error message is:\n $!\n"); |
483
|
0
|
|
|
|
|
0
|
return; |
484
|
|
|
|
|
|
|
} |
485
|
0
|
|
|
|
|
0
|
$db_type = lc($db_type); |
486
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
0
|
my $db; |
488
|
|
|
|
|
|
|
|
489
|
0
|
0
|
|
|
|
0
|
if( $db_type =~ /genbank/ ) { |
490
|
0
|
0
|
|
|
|
0
|
if( !defined $genbank_db ) { |
491
|
0
|
|
|
|
|
0
|
$genbank_db = Bio::DB::GenBank->new(); |
492
|
|
|
|
|
|
|
} |
493
|
0
|
|
|
|
|
0
|
$db = $genbank_db; |
494
|
|
|
|
|
|
|
} |
495
|
0
|
0
|
|
|
|
0
|
if( $db_type =~ /genpept/ ) { |
496
|
0
|
0
|
|
|
|
0
|
if( !defined $genpept_db ) { |
497
|
0
|
|
|
|
|
0
|
$genpept_db = Bio::DB::GenPept->new(); |
498
|
|
|
|
|
|
|
} |
499
|
0
|
|
|
|
|
0
|
$db = $genpept_db; |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
0
|
0
|
|
|
|
0
|
if( $db_type =~ /swiss/ ) { |
503
|
0
|
0
|
|
|
|
0
|
if( !defined $swiss_db ) { |
504
|
0
|
|
|
|
|
0
|
$swiss_db = Bio::DB::SwissProt->new(); |
505
|
|
|
|
|
|
|
} |
506
|
0
|
|
|
|
|
0
|
$db = $swiss_db; |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
0
|
0
|
|
|
|
0
|
if( $db_type =~ /embl/ ) { |
510
|
0
|
0
|
|
|
|
0
|
if( !defined $embl_db ) { |
511
|
0
|
|
|
|
|
0
|
$embl_db = Bio::DB::EMBL->new(); |
512
|
|
|
|
|
|
|
} |
513
|
0
|
|
|
|
|
0
|
$db = $embl_db; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
|
516
|
0
|
0
|
0
|
|
|
0
|
if( $db_type =~ /refseq/ or ($db_type !~ /swiss/ and |
|
|
|
0
|
|
|
|
|
517
|
|
|
|
|
|
|
$identifier =~ /^\s*N\S+_/)) { |
518
|
0
|
0
|
|
|
|
0
|
if( !defined $refseq_db ) { |
519
|
0
|
|
|
|
|
0
|
$refseq_db = Bio::DB::RefSeq->new(); |
520
|
|
|
|
|
|
|
} |
521
|
0
|
|
|
|
|
0
|
$db = $refseq_db; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
0
|
|
|
|
|
0
|
my $seq; |
525
|
|
|
|
|
|
|
|
526
|
0
|
0
|
|
|
|
0
|
if( $identifier =~ /^\w+\d+$/ ) { |
527
|
0
|
|
|
|
|
0
|
$seq = $db->get_Seq_by_acc($identifier); |
528
|
|
|
|
|
|
|
} else { |
529
|
0
|
|
|
|
|
0
|
$seq = $db->get_Seq_by_id($identifier); |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
|
532
|
0
|
|
|
|
|
0
|
return $seq; |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=head2 translate |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
Title : translate |
539
|
|
|
|
|
|
|
Usage : $seqobj = translate($seq_or_string_scalar) |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
Function: translates a DNA sequence object OR just a plain |
542
|
|
|
|
|
|
|
string of DNA to amino acids |
543
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
Args : Either a sequence object or a string of |
546
|
|
|
|
|
|
|
just DNA sequence characters |
547
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=cut |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub translate { |
551
|
4
|
|
|
4
|
1
|
6
|
my ($scalar) = shift; |
552
|
|
|
|
|
|
|
|
553
|
4
|
|
|
|
|
3
|
my $obj; |
554
|
4
|
100
|
|
|
|
8
|
if( ref $scalar ) { |
555
|
2
|
50
|
|
|
|
8
|
if( !$scalar->isa("Bio::PrimarySeqI") ) { |
556
|
0
|
|
|
|
|
0
|
confess("Expecting a sequence object not a $scalar"); |
557
|
|
|
|
|
|
|
} else { |
558
|
2
|
|
|
|
|
2
|
$obj= $scalar; |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
} else { |
561
|
|
|
|
|
|
|
# check this looks vaguely like DNA |
562
|
2
|
|
|
|
|
4
|
my $n = ( $scalar =~ tr/ATGCNatgcn/ATGCNatgcn/ ); |
563
|
2
|
50
|
|
|
|
6
|
if( $n < length($scalar) * 0.85 ) { |
564
|
0
|
|
|
|
|
0
|
confess("Sequence [$scalar] is less than 85% ATGCN, which doesn't look very DNA to me"); |
565
|
|
|
|
|
|
|
} |
566
|
2
|
|
|
|
|
8
|
$obj = Bio::PrimarySeq->new(-id => 'internalbioperlseq',-seq => $scalar); |
567
|
|
|
|
|
|
|
} |
568
|
4
|
|
|
|
|
23
|
return $obj->translate(); |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
=head2 translate_as_string |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
Title : translate_as_string |
575
|
|
|
|
|
|
|
Usage : $seqstring = translate_as_string($seq_or_string_scalar) |
576
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
Function: translates a DNA sequence object OR just a plain |
578
|
|
|
|
|
|
|
string of DNA to amino acids |
579
|
|
|
|
|
|
|
Returns : A string of just amino acids |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
Args : Either a sequence object or a string of |
582
|
|
|
|
|
|
|
just DNA sequence characters |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
=cut |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
sub translate_as_string { |
587
|
2
|
|
|
2
|
1
|
4
|
my ($scalar) = shift; |
588
|
2
|
|
|
|
|
2
|
my $obj = Bio::Perl::translate($scalar); |
589
|
2
|
|
|
|
|
4
|
return $obj->seq; |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
=head2 reverse_complement |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
Title : reverse_complement |
596
|
|
|
|
|
|
|
Usage : $seqobj = reverse_complement($seq_or_string_scalar) |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
Function: reverse complements a string or sequence argument |
599
|
|
|
|
|
|
|
producing a Bio::Seq - if you want a string, you |
600
|
|
|
|
|
|
|
can use reverse_complement_as_string |
601
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
Args : Either a sequence object or a string of |
604
|
|
|
|
|
|
|
just DNA sequence characters |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
=cut |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
sub reverse_complement { |
609
|
0
|
|
|
0
|
1
|
|
my ($scalar) = shift; |
610
|
|
|
|
|
|
|
|
611
|
0
|
|
|
|
|
|
my $obj; |
612
|
|
|
|
|
|
|
|
613
|
0
|
0
|
|
|
|
|
if( ref $scalar ) { |
614
|
0
|
0
|
|
|
|
|
if( !$scalar->isa("Bio::PrimarySeqI") ) { |
615
|
0
|
|
|
|
|
|
confess("Expecting a sequence object not a $scalar"); |
616
|
|
|
|
|
|
|
} else { |
617
|
0
|
|
|
|
|
|
$obj= $scalar; |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
} else { |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
# check this looks vaguely like DNA |
623
|
0
|
|
|
|
|
|
my $n = ( $scalar =~ tr/ATGCNatgcn/ATGCNatgcn/ ); |
624
|
|
|
|
|
|
|
|
625
|
0
|
0
|
|
|
|
|
if( $n < length($scalar) * 0.85 ) { |
626
|
0
|
|
|
|
|
|
confess("Sequence [$scalar] is less than 85% ATGCN, which doesn't look very DNA to me"); |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
|
629
|
0
|
|
|
|
|
|
$obj = Bio::PrimarySeq->new(-id => 'internalbioperlseq',-seq => $scalar); |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
|
632
|
0
|
|
|
|
|
|
return $obj->revcom(); |
633
|
|
|
|
|
|
|
} |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=head2 revcom |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
Title : revcom |
638
|
|
|
|
|
|
|
Usage : $seqobj = revcom($seq_or_string_scalar) |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
Function: reverse complements a string or sequence argument |
641
|
|
|
|
|
|
|
producing a Bio::Seq - if you want a string, you |
642
|
|
|
|
|
|
|
can use reverse_complement_as_string |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
This is an alias for reverse_complement |
645
|
|
|
|
|
|
|
Returns : A Bio::Seq object |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
Args : Either a sequence object or a string of |
648
|
|
|
|
|
|
|
just DNA sequence characters |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=cut |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
sub revcom { |
653
|
0
|
|
|
0
|
1
|
|
return &Bio::Perl::reverse_complement(@_); |
654
|
|
|
|
|
|
|
} |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
|
657
|
|
|
|
|
|
|
=head2 reverse_complement_as_string |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
Title : reverse_complement_as_string |
660
|
|
|
|
|
|
|
Usage : $string = reverse_complement_as_string($seq_or_string_scalar) |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
Function: reverse complements a string or sequence argument |
663
|
|
|
|
|
|
|
producing a string |
664
|
|
|
|
|
|
|
Returns : A string of DNA letters |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
Args : Either a sequence object or a string of |
667
|
|
|
|
|
|
|
just DNA sequence characters |
668
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=cut |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
sub reverse_complement_as_string { |
672
|
0
|
|
|
0
|
1
|
|
my ($scalar) = shift; |
673
|
0
|
|
|
|
|
|
my $obj = &Bio::Perl::reverse_complement($scalar); |
674
|
0
|
|
|
|
|
|
return $obj->seq; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
=head2 revcom_as_string |
679
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
Title : revcom_as_string |
681
|
|
|
|
|
|
|
Usage : $string = revcom_as_string($seq_or_string_scalar) |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
Function: reverse complements a string or sequence argument |
684
|
|
|
|
|
|
|
producing a string |
685
|
|
|
|
|
|
|
Returns : A string of DNA letters |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Args : Either a sequence object or a string of |
688
|
|
|
|
|
|
|
just DNA sequence characters |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
=cut |
691
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
sub revcom_as_string { |
693
|
0
|
|
|
0
|
1
|
|
my ($scalar) = shift; |
694
|
0
|
|
|
|
|
|
my $obj = &Bio::Perl::reverse_complement($scalar); |
695
|
0
|
|
|
|
|
|
return $obj->seq; |
696
|
|
|
|
|
|
|
} |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
1; |