| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SeqIO::EMBL |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# Copyright Ewan Birney |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SeqIO::embl - EMBL sequence input/output stream |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
It is probably best not to use this object directly, but |
|
21
|
|
|
|
|
|
|
rather go through the SeqIO handler system. Go: |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$stream = Bio::SeqIO->new(-file => $filename, -format => 'EMBL'); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
while ( (my $seq = $stream->next_seq()) ) { |
|
26
|
|
|
|
|
|
|
# do something with $seq |
|
27
|
|
|
|
|
|
|
} |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
This object can transform Bio::Seq objects to and from EMBL flat |
|
32
|
|
|
|
|
|
|
file databases. |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
There is a lot of flexibility here about how to dump things which |
|
35
|
|
|
|
|
|
|
should be documented more fully. |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
There should be a common object that this and Genbank share (probably |
|
38
|
|
|
|
|
|
|
with Swissprot). Too much of the magic is identical. |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 Optional functions |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=over 3 |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=item _show_dna() |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
(output only) shows the dna or not |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item _post_sort() |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
(output only) provides a sorting func which is applied to the FTHelpers |
|
51
|
|
|
|
|
|
|
before printing |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=item _id_generation_func() |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
This is function which is called as |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
print "ID ", $func($annseq), "\n"; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
To generate the ID line. If it is not there, it generates a sensible ID |
|
60
|
|
|
|
|
|
|
line using a number of tools. |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
If you want to output annotations in EMBL format they need to be |
|
63
|
|
|
|
|
|
|
stored in a Bio::Annotation::Collection object which is accessible |
|
64
|
|
|
|
|
|
|
through the Bio::SeqI interface method L. |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
The following are the names of the keys which are polled from a |
|
67
|
|
|
|
|
|
|
L object. |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
reference - Should contain Bio::Annotation::Reference objects |
|
70
|
|
|
|
|
|
|
comment - Should contain Bio::Annotation::Comment objects |
|
71
|
|
|
|
|
|
|
dblink - Should contain Bio::Annotation::DBLink objects |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=back |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
80
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
81
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
84
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head2 Support |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
I |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
93
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
94
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
95
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
100
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via |
|
101
|
|
|
|
|
|
|
the web: |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head1 AUTHOR - Ewan Birney |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Email birney@ebi.ac.uk |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 APPENDIX |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
112
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# Let the code begin... |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
package Bio::SeqIO::embl; |
|
121
|
7
|
|
|
7
|
|
514
|
use vars qw(%FTQUAL_NO_QUOTE); |
|
|
7
|
|
|
|
|
9
|
|
|
|
7
|
|
|
|
|
291
|
|
|
122
|
7
|
|
|
7
|
|
24
|
use strict; |
|
|
7
|
|
|
|
|
7
|
|
|
|
7
|
|
|
|
|
111
|
|
|
123
|
7
|
|
|
7
|
|
1372
|
use Bio::SeqIO::FTHelper; |
|
|
7
|
|
|
|
|
10
|
|
|
|
7
|
|
|
|
|
127
|
|
|
124
|
7
|
|
|
7
|
|
28
|
use Bio::SeqFeature::Generic; |
|
|
7
|
|
|
|
|
8
|
|
|
|
7
|
|
|
|
|
98
|
|
|
125
|
7
|
|
|
7
|
|
1401
|
use Bio::Species; |
|
|
7
|
|
|
|
|
11
|
|
|
|
7
|
|
|
|
|
139
|
|
|
126
|
7
|
|
|
7
|
|
26
|
use Bio::Seq::SeqFactory; |
|
|
7
|
|
|
|
|
7
|
|
|
|
7
|
|
|
|
|
113
|
|
|
127
|
7
|
|
|
7
|
|
21
|
use Bio::Annotation::Collection; |
|
|
7
|
|
|
|
|
7
|
|
|
|
7
|
|
|
|
|
111
|
|
|
128
|
7
|
|
|
7
|
|
1114
|
use Bio::Annotation::Comment; |
|
|
7
|
|
|
|
|
10
|
|
|
|
7
|
|
|
|
|
122
|
|
|
129
|
7
|
|
|
7
|
|
1500
|
use Bio::Annotation::Reference; |
|
|
7
|
|
|
|
|
10
|
|
|
|
7
|
|
|
|
|
143
|
|
|
130
|
7
|
|
|
7
|
|
26
|
use Bio::Annotation::DBLink; |
|
|
7
|
|
|
|
|
8
|
|
|
|
7
|
|
|
|
|
110
|
|
|
131
|
|
|
|
|
|
|
|
|
132
|
7
|
|
|
7
|
|
20
|
use base qw(Bio::SeqIO); |
|
|
7
|
|
|
|
|
8
|
|
|
|
7
|
|
|
|
|
33181
|
|
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# Note that a qualifier that exceeds one line (i.e. a long label) will |
|
135
|
|
|
|
|
|
|
# automatically be quoted regardless: |
|
136
|
|
|
|
|
|
|
%FTQUAL_NO_QUOTE=( |
|
137
|
|
|
|
|
|
|
'anticodon'=>1, |
|
138
|
|
|
|
|
|
|
'citation'=>1, |
|
139
|
|
|
|
|
|
|
'codon'=>1, |
|
140
|
|
|
|
|
|
|
'codon_start'=>1, |
|
141
|
|
|
|
|
|
|
'cons_splice'=>1, |
|
142
|
|
|
|
|
|
|
'direction'=>1, |
|
143
|
|
|
|
|
|
|
'evidence'=>1, |
|
144
|
|
|
|
|
|
|
'label'=>1, |
|
145
|
|
|
|
|
|
|
'mod_base'=> 1, |
|
146
|
|
|
|
|
|
|
'number'=> 1, |
|
147
|
|
|
|
|
|
|
'rpt_type'=> 1, |
|
148
|
|
|
|
|
|
|
'rpt_unit'=> 1, |
|
149
|
|
|
|
|
|
|
'transl_except'=> 1, |
|
150
|
|
|
|
|
|
|
'transl_table'=> 1, |
|
151
|
|
|
|
|
|
|
'usedin'=> 1, |
|
152
|
|
|
|
|
|
|
); |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub _initialize { |
|
155
|
31
|
|
|
31
|
|
49
|
my($self,@args) = @_; |
|
156
|
|
|
|
|
|
|
|
|
157
|
31
|
|
|
|
|
110
|
$self->SUPER::_initialize(@args); |
|
158
|
|
|
|
|
|
|
# hash for functions for decoding keys. |
|
159
|
31
|
|
|
|
|
83
|
$self->{'_func_ftunit_hash'} = {}; |
|
160
|
|
|
|
|
|
|
# sets this to one by default. People can change it |
|
161
|
31
|
|
|
|
|
83
|
$self->_show_dna(1); |
|
162
|
31
|
50
|
|
|
|
84
|
if ( ! defined $self->sequence_factory ) { |
|
163
|
31
|
|
|
|
|
87
|
$self->sequence_factory(Bio::Seq::SeqFactory->new |
|
164
|
|
|
|
|
|
|
(-verbose => $self->verbose(), |
|
165
|
|
|
|
|
|
|
-type => 'Bio::Seq::RichSeq')); |
|
166
|
|
|
|
|
|
|
} |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head2 next_seq |
|
170
|
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Title : next_seq |
|
172
|
|
|
|
|
|
|
Usage : $seq = $stream->next_seq() |
|
173
|
|
|
|
|
|
|
Function: returns the next sequence in the stream |
|
174
|
|
|
|
|
|
|
Returns : Bio::Seq object |
|
175
|
|
|
|
|
|
|
Args : |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub next_seq { |
|
180
|
25
|
|
|
25
|
1
|
66
|
my ($self,@args) = @_; |
|
181
|
25
|
|
|
|
|
27
|
my ($pseq,$c,$line,$name,$desc,$acc,$seqc,$mol,$div, |
|
182
|
|
|
|
|
|
|
$date, $comment, @date_arr); |
|
183
|
|
|
|
|
|
|
|
|
184
|
25
|
|
|
|
|
139
|
my ($annotation, %params, @features) = |
|
185
|
|
|
|
|
|
|
Bio::Annotation::Collection->new(); |
|
186
|
|
|
|
|
|
|
|
|
187
|
25
|
|
|
|
|
88
|
$line = $self->_readline; |
|
188
|
|
|
|
|
|
|
# This needs to be before the first eof() test |
|
189
|
|
|
|
|
|
|
|
|
190
|
25
|
100
|
|
|
|
60
|
if ( !defined $line ) { |
|
191
|
1
|
|
|
|
|
4
|
return; # no throws - end of file |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
|
|
194
|
24
|
100
|
|
|
|
109
|
if ( $line =~ /^\s+$/ ) { |
|
195
|
1
|
|
|
|
|
4
|
while ( defined ($line = $self->_readline) ) { |
|
196
|
1
|
50
|
|
|
|
4
|
$line =~/^\S/ && last; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
# return without error if the whole next sequence was just a single |
|
199
|
|
|
|
|
|
|
# blank line and then eof |
|
200
|
1
|
50
|
|
|
|
4
|
return unless $line; |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# no ID as 1st non-blank line, need short circuit and exit routine |
|
204
|
24
|
50
|
|
|
|
98
|
$self->throw("EMBL stream with no ID. Not embl in my book") |
|
205
|
|
|
|
|
|
|
unless $line =~ /^ID\s+\S+/; |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# At this point we are sure that $line contains an ID header line |
|
208
|
24
|
|
|
|
|
26
|
my $alphabet; |
|
209
|
24
|
100
|
|
|
|
62
|
if ( $line =~ tr/;/;/ == 6) { # New style headers contain exactly six semicolons. |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# New style header (EMBL Release >= 87, after June 2006) |
|
212
|
10
|
|
|
|
|
13
|
my $topology; |
|
213
|
|
|
|
|
|
|
my $sv; |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# ID DQ299383; SV 1; linear; mRNA; STD; MAM; 431 BP. |
|
216
|
|
|
|
|
|
|
# This regexp comes from the new2old.pl conversion script, from EBI |
|
217
|
10
|
100
|
|
|
|
46
|
if ($line =~ m/^ID (\S+);\s+SV (\d+); (\w+); ([^;]+); (\w{3}); (\w{3}); (\d+) BP./) { |
|
218
|
7
|
|
|
|
|
38
|
($name, $sv, $topology, $mol, $div) = ($1, $2, $3, $4, $6); |
|
219
|
|
|
|
|
|
|
} |
|
220
|
10
|
100
|
|
|
|
19
|
if (defined $sv) { |
|
221
|
7
|
|
|
|
|
13
|
$params{'-seq_version'} = $sv; |
|
222
|
7
|
|
|
|
|
12
|
$params{'-version'} = $sv; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
|
|
225
|
10
|
50
|
66
|
|
|
38
|
if (defined $topology && $topology eq 'circular') { |
|
226
|
0
|
|
|
|
|
0
|
$params{'-is_circular'} = 1; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
10
|
100
|
|
|
|
20
|
if (defined $mol ) { |
|
230
|
7
|
100
|
|
|
|
20
|
if ($mol =~ /DNA/) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
231
|
6
|
|
|
|
|
12
|
$alphabet = 'dna'; |
|
232
|
|
|
|
|
|
|
} elsif ($mol =~ /RNA/) { |
|
233
|
1
|
|
|
|
|
2
|
$alphabet = 'rna'; |
|
234
|
|
|
|
|
|
|
} elsif ($mol =~ /AA/) { |
|
235
|
0
|
|
|
|
|
0
|
$alphabet = 'protein'; |
|
236
|
|
|
|
|
|
|
} |
|
237
|
|
|
|
|
|
|
} |
|
238
|
|
|
|
|
|
|
} else { |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# Old style header (EMBL Release < 87, before June 2006) |
|
241
|
14
|
100
|
|
|
|
92
|
if ($line =~ /^ID\s+(\S+)[^;]*;\s+(\S+)[^;]*;\s+(\S+)[^;]*;/) { |
|
242
|
12
|
|
|
|
|
42
|
($name, $mol, $div) = ($1, $2, $3); |
|
243
|
|
|
|
|
|
|
} |
|
244
|
|
|
|
|
|
|
|
|
245
|
14
|
100
|
|
|
|
30
|
if ($mol) { |
|
246
|
12
|
50
|
|
|
|
24
|
if ( $mol =~ /circular/ ) { |
|
247
|
0
|
|
|
|
|
0
|
$params{'-is_circular'} = 1; |
|
248
|
0
|
|
|
|
|
0
|
$mol =~ s|circular ||; |
|
249
|
|
|
|
|
|
|
} |
|
250
|
12
|
50
|
|
|
|
26
|
if (defined $mol ) { |
|
251
|
12
|
100
|
|
|
|
36
|
if ($mol =~ /DNA/) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
252
|
10
|
|
|
|
|
12
|
$alphabet='dna'; |
|
253
|
|
|
|
|
|
|
} elsif ($mol =~ /RNA/) { |
|
254
|
2
|
|
|
|
|
3
|
$alphabet='rna'; |
|
255
|
|
|
|
|
|
|
} elsif ($mol =~ /AA/) { |
|
256
|
0
|
|
|
|
|
0
|
$alphabet='protein'; |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
|
|
262
|
24
|
100
|
66
|
|
|
96
|
unless( defined $name && length($name) ) { |
|
263
|
5
|
|
|
|
|
9
|
$name = "unknown_id"; |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# $self->warn("not parsing upper annotation in EMBL file yet!"); |
|
267
|
24
|
|
|
|
|
28
|
my $buffer = $line; |
|
268
|
24
|
|
|
|
|
24
|
local $_; |
|
269
|
24
|
|
|
|
|
26
|
BEFORE_FEATURE_TABLE : |
|
270
|
|
|
|
|
|
|
my $ncbi_taxid; |
|
271
|
24
|
|
|
|
|
52
|
until ( !defined $buffer ) { |
|
272
|
499
|
|
|
|
|
399
|
$_ = $buffer; |
|
273
|
|
|
|
|
|
|
# Exit at start of Feature table |
|
274
|
499
|
100
|
|
|
|
957
|
if ( /^(F[HT]|SQ)/ ) { |
|
275
|
24
|
100
|
100
|
|
|
134
|
$self->_pushback($_) if( $1 eq 'SQ' || $1 eq 'FT'); |
|
276
|
24
|
|
|
|
|
26
|
last; |
|
277
|
|
|
|
|
|
|
} |
|
278
|
|
|
|
|
|
|
# Description line(s) |
|
279
|
475
|
100
|
|
|
|
617
|
if (/^DE\s+(\S.*\S)/) { |
|
280
|
22
|
100
|
|
|
|
95
|
$desc .= $desc ? " $1" : $1; |
|
281
|
|
|
|
|
|
|
} |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
#accession number |
|
284
|
475
|
100
|
100
|
|
|
1303
|
if ( /^AC\s+(.*)?/ || /^PA\s+(.*)?/) { |
|
285
|
24
|
|
|
|
|
102
|
my @accs = split(/[; ]+/, $1); # allow space in addition |
|
286
|
|
|
|
|
|
|
$params{'-accession_number'} = shift @accs |
|
287
|
24
|
100
|
|
|
|
76
|
unless defined $params{'-accession_number'}; |
|
288
|
24
|
|
|
|
|
25
|
push @{$params{'-secondary_accessions'}}, @accs; |
|
|
24
|
|
|
|
|
61
|
|
|
289
|
|
|
|
|
|
|
} |
|
290
|
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
#version number |
|
292
|
475
|
100
|
|
|
|
611
|
if ( /^SV\s+\S+\.(\d+);?/ ) { |
|
293
|
6
|
|
|
|
|
9
|
my $sv = $1; |
|
294
|
|
|
|
|
|
|
#$sv =~ s/\;//; |
|
295
|
6
|
|
|
|
|
13
|
$params{'-seq_version'} = $sv; |
|
296
|
6
|
|
|
|
|
9
|
$params{'-version'} = $sv; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
#date (NOTE: takes last date line) |
|
300
|
475
|
100
|
|
|
|
633
|
if ( /^DT\s+(.+)$/ ) { |
|
301
|
26
|
|
|
|
|
44
|
my $line = $1; |
|
302
|
26
|
|
|
|
|
56
|
my ($date, $version) = split(' ', $line, 2); |
|
303
|
26
|
|
|
|
|
32
|
$date =~ tr/,//d; # remove comma if new version |
|
304
|
26
|
100
|
|
|
|
42
|
if ($version) { |
|
305
|
24
|
100
|
|
|
|
106
|
if ($version =~ /\(Rel\. (\d+), Created\)/ms ) { |
|
|
|
100
|
|
|
|
|
|
|
306
|
11
|
|
|
|
|
73
|
my $release = Bio::Annotation::SimpleValue->new( |
|
307
|
|
|
|
|
|
|
-tagname => 'creation_release', |
|
308
|
|
|
|
|
|
|
-value => $1 |
|
309
|
|
|
|
|
|
|
); |
|
310
|
11
|
|
|
|
|
31
|
$annotation->add_Annotation($release); |
|
311
|
|
|
|
|
|
|
} elsif ($version =~ /\(Rel\. (\d+), Last updated, Version (\d+)\)/ms ) { |
|
312
|
11
|
|
|
|
|
44
|
my $release = Bio::Annotation::SimpleValue->new( |
|
313
|
|
|
|
|
|
|
-tagname => 'update_release', |
|
314
|
|
|
|
|
|
|
-value => $1 |
|
315
|
|
|
|
|
|
|
); |
|
316
|
11
|
|
|
|
|
27
|
$annotation->add_Annotation($release); |
|
317
|
|
|
|
|
|
|
|
|
318
|
11
|
|
|
|
|
39
|
my $update = Bio::Annotation::SimpleValue->new( |
|
319
|
|
|
|
|
|
|
-tagname => 'update_version', |
|
320
|
|
|
|
|
|
|
-value => $2 |
|
321
|
|
|
|
|
|
|
); |
|
322
|
11
|
|
|
|
|
25
|
$annotation->add_Annotation($update); |
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
} |
|
325
|
26
|
|
|
|
|
22
|
push @{$params{'-dates'}}, $date; |
|
|
26
|
|
|
|
|
49
|
|
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
#keywords |
|
329
|
475
|
100
|
|
|
|
1633
|
if ( /^KW (.*)\S*$/ ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
330
|
29
|
|
|
|
|
138
|
my @kw = split(/\s*\;\s*/,$1); |
|
331
|
29
|
|
|
|
|
35
|
push @{$params{'-keywords'}}, @kw; |
|
|
29
|
|
|
|
|
91
|
|
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
# Organism name and phylogenetic information |
|
335
|
|
|
|
|
|
|
elsif (/^O[SC]/) { |
|
336
|
|
|
|
|
|
|
# pass the accession number so we can give an informative throw message if necessary |
|
337
|
21
|
|
|
|
|
76
|
my $species = $self->_read_EMBL_Species(\$buffer, $params{'-accession_number'}); |
|
338
|
21
|
|
|
|
|
46
|
$params{'-species'}= $species; |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# NCBI TaxID Xref |
|
342
|
|
|
|
|
|
|
elsif (/^OX/) { |
|
343
|
3
|
50
|
|
|
|
19
|
if (/NCBI_TaxID=(\d+)/) { |
|
344
|
3
|
|
|
|
|
14
|
$ncbi_taxid=$1; |
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
|
|
347
|
3
|
|
|
|
|
12
|
my @links = $self->_read_EMBL_TaxID_DBLink(\$buffer); |
|
348
|
3
|
|
|
|
|
6
|
foreach my $dblink ( @links ) { |
|
349
|
1
|
|
|
|
|
6
|
$annotation->add_Annotation('dblink',$dblink); |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# References |
|
354
|
|
|
|
|
|
|
elsif (/^R/) { |
|
355
|
99
|
|
|
|
|
187
|
my @refs = $self->_read_EMBL_References(\$buffer); |
|
356
|
99
|
|
|
|
|
122
|
foreach my $ref ( @refs ) { |
|
357
|
99
|
|
|
|
|
211
|
$annotation->add_Annotation('reference',$ref); |
|
358
|
|
|
|
|
|
|
} |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# DB Xrefs |
|
362
|
|
|
|
|
|
|
elsif (/^DR/) { |
|
363
|
5
|
|
|
|
|
32
|
my @links = $self->_read_EMBL_DBLink(\$buffer); |
|
364
|
5
|
|
|
|
|
10
|
foreach my $dblink ( @links ) { |
|
365
|
25
|
|
|
|
|
39
|
$annotation->add_Annotation('dblink',$dblink); |
|
366
|
|
|
|
|
|
|
} |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# Comments |
|
370
|
|
|
|
|
|
|
elsif (/^CC\s+(.*)/) { |
|
371
|
15
|
|
|
|
|
40
|
$comment .= $1; |
|
372
|
15
|
|
|
|
|
18
|
$comment .= " "; |
|
373
|
15
|
|
|
|
|
37
|
while (defined ($_ = $self->_readline) ) { |
|
374
|
257
|
100
|
|
|
|
519
|
if (/^CC\s+(.*)/) { |
|
375
|
242
|
|
|
|
|
288
|
$comment .= $1; |
|
376
|
242
|
|
|
|
|
325
|
$comment .= " "; |
|
377
|
|
|
|
|
|
|
} else { |
|
378
|
15
|
|
|
|
|
21
|
last; |
|
379
|
|
|
|
|
|
|
} |
|
380
|
|
|
|
|
|
|
} |
|
381
|
15
|
|
|
|
|
96
|
my $commobj = Bio::Annotation::Comment->new(); |
|
382
|
15
|
|
|
|
|
45
|
$commobj->text($comment); |
|
383
|
15
|
|
|
|
|
33
|
$annotation->add_Annotation('comment',$commobj); |
|
384
|
15
|
|
|
|
|
31
|
$comment = ""; |
|
385
|
|
|
|
|
|
|
} |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# Get next line. |
|
388
|
475
|
|
|
|
|
744
|
$buffer = $self->_readline; |
|
389
|
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
|
|
391
|
24
|
|
|
|
|
50
|
while ( defined ($_ = $self->_readline) ) { |
|
392
|
46
|
100
|
|
|
|
114
|
/^FT\s{3}\w/ && last; |
|
393
|
24
|
100
|
|
|
|
56
|
/^SQ / && last; |
|
394
|
22
|
50
|
|
|
|
55
|
/^CO / && last; |
|
395
|
|
|
|
|
|
|
} |
|
396
|
24
|
|
|
|
|
27
|
$buffer = $_; |
|
397
|
|
|
|
|
|
|
|
|
398
|
24
|
100
|
66
|
|
|
116
|
if (defined($buffer) && $buffer =~ /^FT /) { |
|
399
|
22
|
|
|
|
|
46
|
until ( !defined ($buffer) ) { |
|
400
|
256
|
|
|
|
|
468
|
my $ftunit = $self->_read_FTHelper_EMBL(\$buffer); |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# process ftunit |
|
403
|
256
|
|
|
|
|
530
|
my $feat = |
|
404
|
|
|
|
|
|
|
$ftunit->_generic_seqfeature($self->location_factory(), $name); |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# add taxon_id from source if available |
|
407
|
|
|
|
|
|
|
# Notice, this will override what is found in the OX line. |
|
408
|
|
|
|
|
|
|
# this is by design as this seems to be the official way |
|
409
|
|
|
|
|
|
|
# of specifying a TaxID |
|
410
|
256
|
100
|
100
|
|
|
673
|
if ($params{'-species'} && ($feat->primary_tag eq 'source') |
|
|
|
|
100
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
411
|
|
|
|
|
|
|
&& $feat->has_tag('db_xref') |
|
412
|
|
|
|
|
|
|
&& (! $params{'-species'}->ncbi_taxid())) { |
|
413
|
12
|
|
|
|
|
32
|
foreach my $tagval ($feat->get_tag_values('db_xref')) { |
|
414
|
12
|
50
|
|
|
|
46
|
if (index($tagval,"taxon:") == 0) { |
|
415
|
12
|
|
|
|
|
38
|
$params{'-species'}->ncbi_taxid(substr($tagval,6)); |
|
416
|
12
|
|
|
|
|
24
|
last; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
} |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
# add feature to list of features |
|
422
|
256
|
|
|
|
|
305
|
push(@features, $feat); |
|
423
|
|
|
|
|
|
|
|
|
424
|
256
|
100
|
|
|
|
1126
|
if ( $buffer !~ /^FT/ ) { |
|
425
|
22
|
|
|
|
|
69
|
last; |
|
426
|
|
|
|
|
|
|
} |
|
427
|
|
|
|
|
|
|
} |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
# Set taxid found in OX line |
|
430
|
24
|
100
|
100
|
|
|
129
|
if ($params{'-species'} && defined $ncbi_taxid |
|
|
|
|
100
|
|
|
|
|
|
431
|
|
|
|
|
|
|
&& (! $params{'-species'}->ncbi_taxid())) { |
|
432
|
2
|
|
|
|
|
7
|
$params{'-species'}->ncbi_taxid($ncbi_taxid); |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# skip comments |
|
436
|
24
|
|
66
|
|
|
147
|
while ( defined ($buffer) && $buffer =~ /^XX/ ) { |
|
437
|
20
|
|
|
|
|
49
|
$buffer = $self->_readline(); |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
|
|
440
|
24
|
100
|
|
|
|
82
|
if ( $buffer =~ /^CO/ ) { |
|
441
|
|
|
|
|
|
|
# bug#2982 |
|
442
|
|
|
|
|
|
|
# special : create contig as annotation |
|
443
|
4
|
|
|
|
|
11
|
while ( defined ($buffer) ) { |
|
444
|
4
|
|
|
|
|
16
|
$annotation->add_Annotation($_) for $self->_read_EMBL_Contig(\$buffer); |
|
445
|
4
|
50
|
66
|
|
|
23
|
if ( !$buffer || $buffer !~ /^CO/ ) { |
|
446
|
4
|
|
|
|
|
5
|
last; |
|
447
|
|
|
|
|
|
|
} |
|
448
|
|
|
|
|
|
|
} |
|
449
|
4
|
|
100
|
|
|
11
|
$buffer ||= ''; |
|
450
|
|
|
|
|
|
|
} |
|
451
|
24
|
100
|
|
|
|
66
|
if ($buffer !~ /^\/\//) { # if no SQ lines following CO (bug#2958) |
|
452
|
20
|
100
|
|
|
|
63
|
if ( $buffer !~ /^SQ/ ) { |
|
453
|
1
|
|
|
|
|
3
|
while ( defined ($_ = $self->_readline) ) { |
|
454
|
0
|
0
|
|
|
|
0
|
/^SQ/ && last; |
|
455
|
|
|
|
|
|
|
} |
|
456
|
|
|
|
|
|
|
} |
|
457
|
20
|
|
|
|
|
24
|
$seqc = ""; |
|
458
|
20
|
|
|
|
|
48
|
while ( defined ($_ = $self->_readline) ) { |
|
459
|
2924
|
100
|
|
|
|
3631
|
m{^//} && last; |
|
460
|
2906
|
|
|
|
|
2431
|
$_ = uc($_); |
|
461
|
2906
|
|
|
|
|
20861
|
s/[^A-Za-z]//g; |
|
462
|
2906
|
|
|
|
|
4620
|
$seqc .= $_; |
|
463
|
|
|
|
|
|
|
} |
|
464
|
|
|
|
|
|
|
} |
|
465
|
24
|
|
|
|
|
84
|
my $seq = $self->sequence_factory->create |
|
466
|
|
|
|
|
|
|
(-verbose => $self->verbose(), |
|
467
|
|
|
|
|
|
|
-division => $div, |
|
468
|
|
|
|
|
|
|
-seq => $seqc, |
|
469
|
|
|
|
|
|
|
-desc => $desc, |
|
470
|
|
|
|
|
|
|
-display_id => $name, |
|
471
|
|
|
|
|
|
|
-annotation => $annotation, |
|
472
|
|
|
|
|
|
|
-molecule => $mol, |
|
473
|
|
|
|
|
|
|
-alphabet => $alphabet, |
|
474
|
|
|
|
|
|
|
-features => \@features, |
|
475
|
|
|
|
|
|
|
%params); |
|
476
|
24
|
|
|
|
|
197
|
return $seq; |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=head2 _write_ID_line |
|
482
|
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
Title : _write_ID_line |
|
484
|
|
|
|
|
|
|
Usage : $self->_write_ID_line($seq); |
|
485
|
|
|
|
|
|
|
Function: Writes the EMBL Release 87 format ID line to the stream, unless |
|
486
|
|
|
|
|
|
|
: there is a user-supplied ID line generation function in which |
|
487
|
|
|
|
|
|
|
: case that is used instead. |
|
488
|
|
|
|
|
|
|
: ( See Bio::SeqIO::embl::_id_generation_function(). ) |
|
489
|
|
|
|
|
|
|
Returns : nothing |
|
490
|
|
|
|
|
|
|
Args : Bio::Seq object |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=cut |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
sub _write_ID_line { |
|
495
|
|
|
|
|
|
|
|
|
496
|
11
|
|
|
11
|
|
17
|
my ($self, $seq) = @_; |
|
497
|
|
|
|
|
|
|
|
|
498
|
11
|
|
|
|
|
14
|
my $id_line; |
|
499
|
|
|
|
|
|
|
# If there is a user-supplied ID generation function, use it. |
|
500
|
11
|
50
|
|
|
|
27
|
if ( $self->_id_generation_func ) { |
|
501
|
0
|
|
|
|
|
0
|
$id_line = "ID " . &{$self->_id_generation_func}($seq) . "\nXX\n"; |
|
|
0
|
|
|
|
|
0
|
|
|
502
|
|
|
|
|
|
|
} |
|
503
|
|
|
|
|
|
|
# Otherwise, generate a standard EMBL release 87 (June 2006) ID line. |
|
504
|
|
|
|
|
|
|
else { |
|
505
|
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# The sequence name is supposed to be the primary accession number, |
|
507
|
11
|
|
|
|
|
36
|
my $name = $seq->accession_number(); |
|
508
|
11
|
100
|
66
|
|
|
57
|
if ( not(defined $name) || $name eq 'unknown') { |
|
509
|
|
|
|
|
|
|
# but if it is not present, use the sequence ID or the empty string |
|
510
|
5
|
|
100
|
|
|
14
|
$name = $seq->id() || ''; |
|
511
|
|
|
|
|
|
|
} |
|
512
|
|
|
|
|
|
|
|
|
513
|
11
|
50
|
|
|
|
35
|
$self->warn("No whitespace allowed in EMBL id [". $name. "]") if $name =~ /\s/; |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
# Use the sequence version, or default to 1. |
|
516
|
11
|
|
50
|
|
|
32
|
my $version = $seq->version() || 1; |
|
517
|
|
|
|
|
|
|
|
|
518
|
11
|
|
|
|
|
32
|
my $len = $seq->length(); |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# Taxonomic division. |
|
521
|
11
|
|
|
|
|
16
|
my $div; |
|
522
|
11
|
100
|
100
|
|
|
122
|
if ( $seq->can('division') && defined($seq->division) && |
|
|
|
|
66
|
|
|
|
|
|
523
|
|
|
|
|
|
|
$self->_is_valid_division($seq->division) ) { |
|
524
|
2
|
|
|
|
|
5
|
$div = $seq->division(); |
|
525
|
|
|
|
|
|
|
} else { |
|
526
|
9
|
|
50
|
|
|
39
|
$div ||= 'UNC'; # 'UNC' is the EMBL division code for 'unclassified'. |
|
527
|
|
|
|
|
|
|
} |
|
528
|
|
|
|
|
|
|
|
|
529
|
11
|
|
|
|
|
16
|
my $mol; |
|
530
|
|
|
|
|
|
|
# If the molecule type is a valid EMBL type, use it. |
|
531
|
11
|
50
|
100
|
|
|
86
|
if ( $seq->can('molecule') |
|
|
|
100
|
66
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
532
|
|
|
|
|
|
|
&& defined($seq->molecule) |
|
533
|
|
|
|
|
|
|
&& $self->_is_valid_molecule_type($seq->molecule) |
|
534
|
|
|
|
|
|
|
) { |
|
535
|
0
|
|
|
|
|
0
|
$mol = $seq->molecule(); |
|
536
|
|
|
|
|
|
|
} |
|
537
|
|
|
|
|
|
|
# Otherwise, choose unassigned DNA or RNA based on the alphabet. |
|
538
|
|
|
|
|
|
|
elsif ($seq->can('primary_seq') && defined $seq->primary_seq->alphabet) { |
|
539
|
6
|
|
|
|
|
10
|
my $alphabet =$seq->primary_seq->alphabet; |
|
540
|
6
|
50
|
|
|
|
12
|
if ($alphabet eq 'dna') { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
541
|
6
|
|
|
|
|
8
|
$mol ='unassigned DNA'; |
|
542
|
|
|
|
|
|
|
} elsif ($alphabet eq 'rna') { |
|
543
|
0
|
|
|
|
|
0
|
$mol='unassigned RNA'; |
|
544
|
|
|
|
|
|
|
} elsif ($alphabet eq 'protein') { |
|
545
|
0
|
|
|
|
|
0
|
$self->warn("Protein sequence found; EMBL is a nucleotide format."); |
|
546
|
0
|
|
|
|
|
0
|
$mol='AA'; # AA is not a valid EMBL molecule type. |
|
547
|
|
|
|
|
|
|
} |
|
548
|
|
|
|
|
|
|
} |
|
549
|
|
|
|
|
|
|
|
|
550
|
11
|
|
|
|
|
18
|
my $topology = 'linear'; |
|
551
|
11
|
50
|
|
|
|
31
|
if ($seq->is_circular) { |
|
552
|
0
|
|
|
|
|
0
|
$topology = 'circular'; |
|
553
|
|
|
|
|
|
|
} |
|
554
|
|
|
|
|
|
|
|
|
555
|
11
|
|
100
|
|
|
34
|
$mol ||= ''; # 'unassigned'; ? |
|
556
|
11
|
|
|
|
|
46
|
$id_line = "ID $name; SV $version; $topology; $mol; STD; $div; $len BP.\nXX\n"; |
|
557
|
11
|
|
|
|
|
48
|
$self->_print($id_line); |
|
558
|
|
|
|
|
|
|
} |
|
559
|
|
|
|
|
|
|
} |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=head2 _is_valid_division |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
Title : _is_valid_division |
|
564
|
|
|
|
|
|
|
Usage : $self->_is_valid_division($div) |
|
565
|
|
|
|
|
|
|
Function: tests division code for validity |
|
566
|
|
|
|
|
|
|
Returns : true if $div is a valid EMBL release 87 taxonomic division. |
|
567
|
|
|
|
|
|
|
Args : taxonomic division code string |
|
568
|
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=cut |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
sub _is_valid_division { |
|
572
|
2
|
|
|
2
|
|
4
|
my ($self, $division) = @_; |
|
573
|
|
|
|
|
|
|
|
|
574
|
2
|
|
|
|
|
25
|
my %EMBL_divisions = ( |
|
575
|
|
|
|
|
|
|
"PHG" => 1, # Bacteriophage |
|
576
|
|
|
|
|
|
|
"ENV" => 1, # Environmental Sample |
|
577
|
|
|
|
|
|
|
"FUN" => 1, # Fungal |
|
578
|
|
|
|
|
|
|
"HUM" => 1, # Human |
|
579
|
|
|
|
|
|
|
"INV" => 1, # Invertebrate |
|
580
|
|
|
|
|
|
|
"MAM" => 1, # Other Mammal |
|
581
|
|
|
|
|
|
|
"VRT" => 1, # Other Vertebrate |
|
582
|
|
|
|
|
|
|
"MUS" => 1, # Mus musculus |
|
583
|
|
|
|
|
|
|
"PLN" => 1, # Plant |
|
584
|
|
|
|
|
|
|
"PRO" => 1, # Prokaryote |
|
585
|
|
|
|
|
|
|
"ROD" => 1, # Other Rodent |
|
586
|
|
|
|
|
|
|
"SYN" => 1, # Synthetic |
|
587
|
|
|
|
|
|
|
"UNC" => 1, # Unclassified |
|
588
|
|
|
|
|
|
|
"VRL" => 1 # Viral |
|
589
|
|
|
|
|
|
|
); |
|
590
|
|
|
|
|
|
|
|
|
591
|
2
|
|
|
|
|
10
|
return exists($EMBL_divisions{$division}); |
|
592
|
|
|
|
|
|
|
} |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=head2 _is_valid_molecule_type |
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
Title : _is_valid_molecule_type |
|
597
|
|
|
|
|
|
|
Usage : $self->_is_valid_molecule_type($mol) |
|
598
|
|
|
|
|
|
|
Function: tests molecule type for validity |
|
599
|
|
|
|
|
|
|
Returns : true if $mol is a valid EMBL release 87 molecule type. |
|
600
|
|
|
|
|
|
|
Args : molecule type string |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=cut |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub _is_valid_molecule_type { |
|
605
|
2
|
|
|
2
|
|
3
|
my ($self, $moltype) = @_; |
|
606
|
|
|
|
|
|
|
|
|
607
|
2
|
|
|
|
|
15
|
my %EMBL_molecule_types = ( |
|
608
|
|
|
|
|
|
|
"genomic DNA" => 1, |
|
609
|
|
|
|
|
|
|
"genomic RNA" => 1, |
|
610
|
|
|
|
|
|
|
"mRNA" => 1, |
|
611
|
|
|
|
|
|
|
"tRNA" => 1, |
|
612
|
|
|
|
|
|
|
"rRNA" => 1, |
|
613
|
|
|
|
|
|
|
"snoRNA" => 1, |
|
614
|
|
|
|
|
|
|
"snRNA" => 1, |
|
615
|
|
|
|
|
|
|
"scRNA" => 1, |
|
616
|
|
|
|
|
|
|
"pre-RNA" => 1, |
|
617
|
|
|
|
|
|
|
"other RNA" => 1, |
|
618
|
|
|
|
|
|
|
"other DNA" => 1, |
|
619
|
|
|
|
|
|
|
"unassigned DNA" => 1, |
|
620
|
|
|
|
|
|
|
"unassigned RNA" => 1 |
|
621
|
|
|
|
|
|
|
); |
|
622
|
|
|
|
|
|
|
|
|
623
|
2
|
|
|
|
|
29
|
return exists($EMBL_molecule_types{$moltype}); |
|
624
|
|
|
|
|
|
|
} |
|
625
|
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=head2 write_seq |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Title : write_seq |
|
629
|
|
|
|
|
|
|
Usage : $stream->write_seq($seq) |
|
630
|
|
|
|
|
|
|
Function: writes the $seq object (must be seq) to the stream |
|
631
|
|
|
|
|
|
|
Returns : 1 for success and undef for error |
|
632
|
|
|
|
|
|
|
Args : array of 1 to n Bio::SeqI objects |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=cut |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
sub write_seq { |
|
638
|
13
|
|
|
13
|
1
|
55
|
my ($self,@seqs) = @_; |
|
639
|
|
|
|
|
|
|
|
|
640
|
13
|
|
|
|
|
27
|
foreach my $seq ( @seqs ) { |
|
641
|
13
|
50
|
|
|
|
32
|
$self->throw("Attempting to write with no seq!") unless defined $seq; |
|
642
|
13
|
100
|
100
|
|
|
87
|
unless ( ref $seq && $seq->isa('Bio::SeqI' ) ) { |
|
643
|
4
|
50
|
|
|
|
12
|
$self->warn("$seq is not a SeqI compliant sequence object!") |
|
644
|
|
|
|
|
|
|
if $self->verbose >= 0; |
|
645
|
4
|
100
|
66
|
|
|
22
|
unless ( ref $seq && $seq->isa('Bio::PrimarySeqI' ) ) { |
|
646
|
2
|
|
|
|
|
27
|
$self->throw("$seq is not a PrimarySeqI compliant sequence object!"); |
|
647
|
|
|
|
|
|
|
} |
|
648
|
|
|
|
|
|
|
} |
|
649
|
11
|
|
100
|
|
|
38
|
my $str = $seq->seq || ''; |
|
650
|
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# Write the ID line. |
|
652
|
11
|
|
|
|
|
30
|
$self->_write_ID_line($seq); |
|
653
|
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# Write the accession line if present |
|
656
|
11
|
|
|
|
|
13
|
my( $acc ); |
|
657
|
|
|
|
|
|
|
{ |
|
658
|
11
|
50
|
66
|
|
|
10
|
if ( my $func = $self->_ac_generation_func ) { |
|
|
11
|
100
|
|
|
|
26
|
|
|
|
|
50
|
|
|
|
|
|
|
659
|
0
|
|
|
|
|
0
|
$acc = &{$func}($seq); |
|
|
0
|
|
|
|
|
0
|
|
|
660
|
|
|
|
|
|
|
} elsif ( $seq->isa('Bio::Seq::RichSeqI') && |
|
661
|
|
|
|
|
|
|
defined($seq->accession_number) ) { |
|
662
|
4
|
|
|
|
|
7
|
$acc = $seq->accession_number; |
|
663
|
4
|
|
|
|
|
10
|
$acc = join("; ", $acc, $seq->get_secondary_accessions); |
|
664
|
|
|
|
|
|
|
} elsif ( $seq->can('accession_number') ) { |
|
665
|
7
|
|
|
|
|
15
|
$acc = $seq->accession_number; |
|
666
|
|
|
|
|
|
|
} |
|
667
|
|
|
|
|
|
|
|
|
668
|
11
|
50
|
|
|
|
22
|
if (defined $acc) { |
|
669
|
11
|
50
|
|
|
|
37
|
$self->_print("AC $acc;\n", |
|
670
|
|
|
|
|
|
|
"XX\n") || return; |
|
671
|
|
|
|
|
|
|
} |
|
672
|
|
|
|
|
|
|
} |
|
673
|
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
# Date lines |
|
675
|
11
|
|
|
|
|
12
|
my $switch=0; |
|
676
|
11
|
100
|
|
|
|
58
|
if ( $seq->can('get_dates') ) { |
|
677
|
4
|
|
|
|
|
8
|
my @dates = $seq->get_dates(); |
|
678
|
4
|
|
|
|
|
4
|
my $ct = 1; |
|
679
|
4
|
|
|
|
|
3
|
my $date_flag = 0; |
|
680
|
4
|
|
|
|
|
9
|
my ($cr) = $seq->annotation->get_Annotations("creation_release"); |
|
681
|
4
|
|
|
|
|
12
|
my ($ur) = $seq->annotation->get_Annotations("update_release"); |
|
682
|
4
|
|
|
|
|
8
|
my ($uv) = $seq->annotation->get_Annotations("update_version"); |
|
683
|
|
|
|
|
|
|
|
|
684
|
4
|
0
|
33
|
|
|
11
|
unless ($cr && $ur && $ur) { |
|
|
|
|
33
|
|
|
|
|
|
685
|
4
|
|
|
|
|
5
|
$date_flag = 1; |
|
686
|
|
|
|
|
|
|
} |
|
687
|
|
|
|
|
|
|
|
|
688
|
4
|
|
|
|
|
7
|
foreach my $dt (@dates) { |
|
689
|
0
|
0
|
|
|
|
0
|
if (!$date_flag) { |
|
690
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("DT ","DT ", |
|
691
|
|
|
|
|
|
|
$dt." (Rel. ".($cr->value()).", Created)", |
|
692
|
|
|
|
|
|
|
'\s+|$',80) if $ct == 1; |
|
693
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("DT ","DT ", |
|
694
|
|
|
|
|
|
|
$dt." (Rel. ".($ur->value()).", Last updated, Version ".($uv->value()).")", |
|
695
|
|
|
|
|
|
|
'\s+|$',80) if $ct == 2; |
|
696
|
|
|
|
|
|
|
} else { # other formats? |
|
697
|
0
|
|
|
|
|
0
|
$self->_write_line_EMBL_regex("DT ","DT ", |
|
698
|
|
|
|
|
|
|
$dt,'\s+|$',80); |
|
699
|
|
|
|
|
|
|
} |
|
700
|
0
|
|
|
|
|
0
|
$switch =1; |
|
701
|
0
|
|
|
|
|
0
|
$ct++; |
|
702
|
|
|
|
|
|
|
} |
|
703
|
4
|
50
|
|
|
|
8
|
if ($switch == 1) { |
|
704
|
0
|
0
|
|
|
|
0
|
$self->_print("XX\n") || return; |
|
705
|
|
|
|
|
|
|
} |
|
706
|
|
|
|
|
|
|
} |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
# Description lines |
|
709
|
11
|
50
|
|
|
|
31
|
$self->_write_line_EMBL_regex("DE ","DE ",$seq->desc(),'\s+|$',80) || return; #' |
|
710
|
11
|
50
|
|
|
|
28
|
$self->_print( "XX\n") || return; |
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
# if there, write the kw line |
|
713
|
|
|
|
|
|
|
{ |
|
714
|
11
|
|
|
|
|
17
|
my( $kw ); |
|
|
11
|
|
|
|
|
563
|
|
|
715
|
11
|
50
|
|
|
|
28
|
if ( my $func = $self->_kw_generation_func ) { |
|
|
|
100
|
|
|
|
|
|
|
716
|
0
|
|
|
|
|
0
|
$kw = &{$func}($seq); |
|
|
0
|
|
|
|
|
0
|
|
|
717
|
|
|
|
|
|
|
} elsif ( $seq->can('keywords') ) { |
|
718
|
4
|
|
|
|
|
9
|
$kw = $seq->keywords; |
|
719
|
|
|
|
|
|
|
} |
|
720
|
11
|
100
|
|
|
|
25
|
if (defined $kw) { |
|
721
|
4
|
50
|
|
|
|
7
|
$self->_write_line_EMBL_regex("KW ", "KW ", $kw, '\s+|$', 80) || return; #' |
|
722
|
4
|
50
|
|
|
|
7
|
$self->_print( "XX\n") || return; |
|
723
|
|
|
|
|
|
|
} |
|
724
|
|
|
|
|
|
|
} |
|
725
|
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# Organism lines |
|
727
|
|
|
|
|
|
|
|
|
728
|
11
|
100
|
100
|
|
|
53
|
if ($seq->can('species') && (my $spec = $seq->species)) { |
|
729
|
6
|
|
|
|
|
21
|
my @class = $spec->classification(); |
|
730
|
6
|
|
|
|
|
9
|
shift @class; # get rid of species name. Some embl files include |
|
731
|
|
|
|
|
|
|
# the species name in the OC lines, but this seems |
|
732
|
|
|
|
|
|
|
# more like an error than something we need to |
|
733
|
|
|
|
|
|
|
# emulate |
|
734
|
6
|
|
|
|
|
24
|
my $OS = $spec->scientific_name; |
|
735
|
6
|
50
|
|
|
|
15
|
if ($spec->common_name) { |
|
736
|
0
|
|
|
|
|
0
|
$OS .= ' ('.$spec->common_name.')'; |
|
737
|
|
|
|
|
|
|
} |
|
738
|
6
|
50
|
|
|
|
20
|
$self->_print("OS $OS\n") || return; |
|
739
|
6
|
|
|
|
|
19
|
my $OC = join('; ', reverse(@class)) .'.'; |
|
740
|
6
|
50
|
|
|
|
13
|
$self->_write_line_EMBL_regex("OC ","OC ",$OC,'; |$',80) || return; |
|
741
|
6
|
50
|
|
|
|
20
|
if ($spec->organelle) { |
|
742
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("OG ","OG ",$spec->organelle,'; |$',80) || return; |
|
743
|
|
|
|
|
|
|
} |
|
744
|
6
|
|
|
|
|
15
|
my $ncbi_taxid = $spec->ncbi_taxid; |
|
745
|
6
|
100
|
|
|
|
12
|
if ($ncbi_taxid) { |
|
746
|
2
|
50
|
|
|
|
6
|
$self->_print("OX NCBI_TaxID=$ncbi_taxid\n") || return; |
|
747
|
|
|
|
|
|
|
} |
|
748
|
6
|
50
|
|
|
|
12
|
$self->_print("XX\n") || return; |
|
749
|
|
|
|
|
|
|
} |
|
750
|
|
|
|
|
|
|
# Reference lines |
|
751
|
11
|
|
|
|
|
12
|
my $t = 1; |
|
752
|
11
|
100
|
66
|
|
|
63
|
if ( $seq->can('annotation') && defined $seq->annotation ) { |
|
753
|
9
|
|
|
|
|
18
|
foreach my $ref ( $seq->annotation->get_Annotations('reference') ) { |
|
754
|
6
|
50
|
|
|
|
15
|
$self->_print( "RN [$t]\n") || return; |
|
755
|
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# Having no RP line is legal, but we need both |
|
757
|
|
|
|
|
|
|
# start and end for a valid location. |
|
758
|
6
|
50
|
|
|
|
14
|
if ($ref->comment) { |
|
759
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("RC ", "RC ", $ref->comment, '\s+|$', 80) || return; #' |
|
760
|
|
|
|
|
|
|
} |
|
761
|
6
|
|
|
|
|
15
|
my $start = $ref->start; |
|
762
|
6
|
|
|
|
|
15
|
my $end = $ref->end; |
|
763
|
6
|
50
|
33
|
|
|
21
|
if ($start and $end) { |
|
|
|
0
|
0
|
|
|
|
|
|
764
|
6
|
50
|
|
|
|
16
|
$self->_print( "RP $start-$end\n") || return; |
|
765
|
|
|
|
|
|
|
} elsif ($start or $end) { |
|
766
|
0
|
|
|
|
|
0
|
$self->throw("Both start and end are needed for a valid RP line.". |
|
767
|
|
|
|
|
|
|
" Got: start='$start' end='$end'"); |
|
768
|
|
|
|
|
|
|
} |
|
769
|
|
|
|
|
|
|
|
|
770
|
6
|
50
|
|
|
|
11
|
if (my $med = $ref->medline) { |
|
771
|
0
|
0
|
|
|
|
0
|
$self->_print( "RX MEDLINE; $med.\n") || return; |
|
772
|
|
|
|
|
|
|
} |
|
773
|
6
|
50
|
|
|
|
9
|
if (my $pm = $ref->pubmed) { |
|
774
|
0
|
0
|
|
|
|
0
|
$self->_print( "RX PUBMED; $pm.\n") || return; |
|
775
|
|
|
|
|
|
|
} |
|
776
|
6
|
|
|
|
|
19
|
my $authors = $ref->authors; |
|
777
|
6
|
|
|
|
|
59
|
$authors =~ s/([\w\.]) (\w)/$1#$2/g; # add word wrap protection char '#' |
|
778
|
|
|
|
|
|
|
|
|
779
|
6
|
50
|
|
|
|
16
|
$self->_write_line_EMBL_regex("RA ", "RA ", |
|
780
|
|
|
|
|
|
|
$authors . ";", |
|
781
|
|
|
|
|
|
|
'\s+|$', 80) || return; #' |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
# If there is no title to the reference, it appears |
|
784
|
|
|
|
|
|
|
# as a single semi-colon. All titles must end in |
|
785
|
|
|
|
|
|
|
# a semi-colon. |
|
786
|
6
|
|
100
|
|
|
13
|
my $ref_title = $ref->title || ''; |
|
787
|
6
|
|
|
|
|
37
|
$ref_title =~ s/[\s;]*$/;/; |
|
788
|
6
|
50
|
|
|
|
12
|
$self->_write_line_EMBL_regex("RT ", "RT ", $ref_title, '\s+|$', 80) || return; #' |
|
789
|
6
|
50
|
|
|
|
12
|
$self->_write_line_EMBL_regex("RL ", "RL ", $ref->location, '\s+|$', 80) || return; #' |
|
790
|
6
|
50
|
|
|
|
9
|
$self->_print("XX\n") || return; |
|
791
|
6
|
|
|
|
|
10
|
$t++; |
|
792
|
|
|
|
|
|
|
} |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
# DB Xref lines |
|
795
|
9
|
100
|
|
|
|
23
|
if (my @db_xref = $seq->annotation->get_Annotations('dblink') ) { |
|
796
|
2
|
|
|
|
|
2
|
for my $dr (@db_xref) { |
|
797
|
43
|
|
|
|
|
59
|
my $db_name = $dr->database; |
|
798
|
43
|
|
|
|
|
49
|
my $prim = $dr->primary_id; |
|
799
|
|
|
|
|
|
|
|
|
800
|
43
|
|
50
|
|
|
47
|
my $opt = $dr->optional_id || ''; |
|
801
|
43
|
50
|
|
|
|
69
|
my $line = $opt ? "$db_name; $prim; $opt." : "$db_name; $prim."; |
|
802
|
43
|
50
|
|
|
|
47
|
$self->_write_line_EMBL_regex("DR ", "DR ", $line, '\s+|$', 80) || return; #' |
|
803
|
|
|
|
|
|
|
} |
|
804
|
2
|
50
|
|
|
|
4
|
$self->_print("XX\n") || return; |
|
805
|
|
|
|
|
|
|
} |
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
# Comment lines |
|
808
|
9
|
|
|
|
|
20
|
foreach my $comment ( $seq->annotation->get_Annotations('comment') ) { |
|
809
|
4
|
50
|
|
|
|
10
|
$self->_write_line_EMBL_regex("CC ", "CC ", $comment->text, '\s+|$', 80) || return; #' |
|
810
|
4
|
50
|
|
|
|
8
|
$self->_print("XX\n") || return; |
|
811
|
|
|
|
|
|
|
} |
|
812
|
|
|
|
|
|
|
} |
|
813
|
|
|
|
|
|
|
# "\\s\+\|\$" |
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
## FEATURE TABLE |
|
816
|
|
|
|
|
|
|
|
|
817
|
11
|
50
|
|
|
|
26
|
$self->_print("FH Key Location/Qualifiers\n") || return; |
|
818
|
11
|
50
|
|
|
|
23
|
$self->_print("FH\n") || return; |
|
819
|
|
|
|
|
|
|
|
|
820
|
11
|
100
|
|
|
|
65
|
my @feats = $seq->can('top_SeqFeatures') ? $seq->top_SeqFeatures : (); |
|
821
|
11
|
100
|
|
|
|
24
|
if ($feats[0]) { |
|
822
|
6
|
50
|
|
|
|
14
|
if ( defined $self->_post_sort ) { |
|
823
|
|
|
|
|
|
|
# we need to read things into an array. |
|
824
|
|
|
|
|
|
|
# Process. Sort them. Print 'em |
|
825
|
|
|
|
|
|
|
|
|
826
|
0
|
|
|
|
|
0
|
my $post_sort_func = $self->_post_sort(); |
|
827
|
0
|
|
|
|
|
0
|
my @fth; |
|
828
|
|
|
|
|
|
|
|
|
829
|
0
|
|
|
|
|
0
|
foreach my $sf ( @feats ) { |
|
830
|
0
|
|
|
|
|
0
|
push(@fth,Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq)); |
|
831
|
|
|
|
|
|
|
} |
|
832
|
|
|
|
|
|
|
|
|
833
|
0
|
|
|
|
|
0
|
@fth = sort { &$post_sort_func($a,$b) } @fth; |
|
|
0
|
|
|
|
|
0
|
|
|
834
|
|
|
|
|
|
|
|
|
835
|
0
|
|
|
|
|
0
|
foreach my $fth ( @fth ) { |
|
836
|
0
|
0
|
|
|
|
0
|
$self->_print_EMBL_FTHelper($fth) || return; |
|
837
|
|
|
|
|
|
|
} |
|
838
|
|
|
|
|
|
|
} else { |
|
839
|
|
|
|
|
|
|
# not post sorted. And so we can print as we get them. |
|
840
|
|
|
|
|
|
|
# lower memory load... |
|
841
|
|
|
|
|
|
|
|
|
842
|
6
|
|
|
|
|
10
|
foreach my $sf ( @feats ) { |
|
843
|
38
|
|
|
|
|
65
|
my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq); |
|
844
|
38
|
|
|
|
|
34
|
foreach my $fth ( @fth ) { |
|
845
|
38
|
50
|
|
|
|
52
|
if ( $fth->key eq 'CONTIG') { |
|
846
|
0
|
|
|
|
|
0
|
$self->_show_dna(0); |
|
847
|
|
|
|
|
|
|
} |
|
848
|
38
|
50
|
|
|
|
66
|
$self->_print_EMBL_FTHelper($fth) || return; |
|
849
|
|
|
|
|
|
|
} |
|
850
|
|
|
|
|
|
|
} |
|
851
|
|
|
|
|
|
|
} |
|
852
|
|
|
|
|
|
|
} |
|
853
|
|
|
|
|
|
|
|
|
854
|
11
|
50
|
|
|
|
25
|
if ( $self->_show_dna() == 0 ) { |
|
855
|
0
|
0
|
|
|
|
0
|
$self->_print( "//\n") || return; |
|
856
|
0
|
|
|
|
|
0
|
return; |
|
857
|
|
|
|
|
|
|
} |
|
858
|
11
|
50
|
|
|
|
24
|
$self->_print( "XX\n") || return; |
|
859
|
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
# finished printing features. |
|
861
|
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
# print contig if present : bug#2982 |
|
863
|
11
|
100
|
66
|
|
|
62
|
if ( $seq->can('annotation') && defined $seq->annotation) { |
|
864
|
9
|
|
|
|
|
19
|
foreach my $ctg ( $seq->annotation->get_Annotations('contig') ) { |
|
865
|
0
|
0
|
|
|
|
0
|
if ($ctg->value) { |
|
866
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("CO ","CO ", $ctg->value, |
|
867
|
|
|
|
|
|
|
'[,]|$', 80) || return; |
|
868
|
|
|
|
|
|
|
} |
|
869
|
|
|
|
|
|
|
} |
|
870
|
|
|
|
|
|
|
} |
|
871
|
|
|
|
|
|
|
# print sequence lines only if sequence is present! bug#2982 |
|
872
|
11
|
100
|
|
|
|
26
|
if (length($str)) { |
|
873
|
8
|
|
|
|
|
24
|
$str =~ tr/A-Z/a-z/; |
|
874
|
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
# Count each nucleotide |
|
876
|
8
|
|
|
|
|
19
|
my $alen = $str =~ tr/a/a/; |
|
877
|
8
|
|
|
|
|
15
|
my $clen = $str =~ tr/c/c/; |
|
878
|
8
|
|
|
|
|
15
|
my $glen = $str =~ tr/g/g/; |
|
879
|
8
|
|
|
|
|
16
|
my $tlen = $str =~ tr/t/t/; |
|
880
|
|
|
|
|
|
|
|
|
881
|
8
|
|
|
|
|
15
|
my $len = $seq->length(); |
|
882
|
8
|
|
|
|
|
13
|
my $olen = $seq->length() - ($alen + $tlen + $clen + $glen); |
|
883
|
8
|
50
|
|
|
|
19
|
if ( $olen < 0 ) { |
|
884
|
0
|
|
|
|
|
0
|
$self->warn("Weird. More atgc than bases. Problem!"); |
|
885
|
|
|
|
|
|
|
} |
|
886
|
|
|
|
|
|
|
|
|
887
|
8
|
50
|
|
|
|
37
|
$self->_print("SQ Sequence $len BP; $alen A; $clen C; $glen G; $tlen T; $olen other;\n") || return; |
|
888
|
|
|
|
|
|
|
|
|
889
|
8
|
|
|
|
|
12
|
my $nuc = 60; # Number of nucleotides per line |
|
890
|
8
|
|
|
|
|
14
|
my $whole_pat = 'a10' x 6; # Pattern for unpacking a whole line |
|
891
|
8
|
|
|
|
|
9
|
my $out_pat = 'A11' x 6; # Pattern for packing a line |
|
892
|
8
|
|
|
|
|
7
|
my $length = length($str); |
|
893
|
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
# Calculate the number of nucleotides which fit on whole lines |
|
895
|
8
|
|
|
|
|
25
|
my $whole = int($length / $nuc) * $nuc; |
|
896
|
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
# Print the whole lines |
|
898
|
8
|
|
|
|
|
6
|
my( $i ); |
|
899
|
8
|
|
|
|
|
19
|
for ($i = 0; $i < $whole; $i += $nuc) { |
|
900
|
162
|
|
|
|
|
366
|
my $blocks = pack $out_pat, |
|
901
|
|
|
|
|
|
|
unpack $whole_pat, |
|
902
|
|
|
|
|
|
|
substr($str, $i, $nuc); |
|
903
|
162
|
50
|
|
|
|
347
|
$self->_print(sprintf(" $blocks%9d\n", $i + $nuc)) || return; |
|
904
|
|
|
|
|
|
|
} |
|
905
|
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
# Print the last line |
|
907
|
8
|
50
|
|
|
|
26
|
if (my $last = substr($str, $i)) { |
|
908
|
8
|
|
|
|
|
8
|
my $last_len = length($last); |
|
909
|
8
|
|
|
|
|
31
|
my $last_pat = 'a10' x int($last_len / 10) .'a'. $last_len % 10; |
|
910
|
8
|
|
|
|
|
62
|
my $blocks = pack $out_pat, |
|
911
|
|
|
|
|
|
|
unpack($last_pat, $last); |
|
912
|
8
|
50
|
|
|
|
41
|
$self->_print(sprintf(" $blocks%9d\n", $length)) || |
|
913
|
|
|
|
|
|
|
return; # Add the length to the end |
|
914
|
|
|
|
|
|
|
} |
|
915
|
|
|
|
|
|
|
} |
|
916
|
|
|
|
|
|
|
|
|
917
|
11
|
50
|
|
|
|
27
|
$self->_print( "//\n") || return; |
|
918
|
|
|
|
|
|
|
|
|
919
|
11
|
50
|
33
|
|
|
24
|
$self->flush if $self->_flush_on_write && defined $self->_fh; |
|
920
|
|
|
|
|
|
|
} |
|
921
|
11
|
|
|
|
|
41
|
return 1; |
|
922
|
|
|
|
|
|
|
} |
|
923
|
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
=head2 _print_EMBL_FTHelper |
|
925
|
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
Title : _print_EMBL_FTHelper |
|
927
|
|
|
|
|
|
|
Usage : |
|
928
|
|
|
|
|
|
|
Function: Internal function |
|
929
|
|
|
|
|
|
|
Returns : 1 if writing suceeded, otherwise undef |
|
930
|
|
|
|
|
|
|
Args : |
|
931
|
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
=cut |
|
934
|
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
sub _print_EMBL_FTHelper { |
|
936
|
38
|
|
|
38
|
|
34
|
my ($self,$fth) = @_; |
|
937
|
|
|
|
|
|
|
|
|
938
|
38
|
50
|
33
|
|
|
119
|
if ( ! ref $fth || ! $fth->isa('Bio::SeqIO::FTHelper') ) { |
|
939
|
0
|
|
|
|
|
0
|
$fth->warn("$fth is not a FTHelper class. Attempting to print, but there could be tears!"); |
|
940
|
|
|
|
|
|
|
} |
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
#$self->_print( "FH Key Location/Qualifiers\n"); |
|
944
|
|
|
|
|
|
|
#$self->_print( sprintf("FT %-15s %s\n",$fth->key,$fth->loc)); |
|
945
|
|
|
|
|
|
|
# let |
|
946
|
38
|
50
|
|
|
|
48
|
if ( $fth->key eq 'CONTIG' ) { |
|
947
|
0
|
0
|
|
|
|
0
|
$self->_print("XX\n") || return; |
|
948
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("CO ", |
|
949
|
|
|
|
|
|
|
"CO ",$fth->loc, |
|
950
|
|
|
|
|
|
|
'\,|$',80) || return; #' |
|
951
|
0
|
|
|
|
|
0
|
return 1; |
|
952
|
|
|
|
|
|
|
} |
|
953
|
38
|
50
|
|
|
|
56
|
$self->_write_line_EMBL_regex(sprintf("FT %-15s ",$fth->key), |
|
954
|
|
|
|
|
|
|
"FT ",$fth->loc, |
|
955
|
|
|
|
|
|
|
'\,|$',80) || return; #' |
|
956
|
38
|
|
|
|
|
37
|
foreach my $tag ( keys %{$fth->field} ) { |
|
|
38
|
|
|
|
|
64
|
|
|
957
|
108
|
50
|
|
|
|
147
|
if ( ! defined $fth->field->{$tag} ) { |
|
958
|
0
|
|
|
|
|
0
|
next; |
|
959
|
|
|
|
|
|
|
} |
|
960
|
108
|
|
|
|
|
91
|
foreach my $value ( @{$fth->field->{$tag}} ) { |
|
|
108
|
|
|
|
|
121
|
|
|
961
|
139
|
|
|
|
|
141
|
$value =~ s/\"/\"\"/g; |
|
962
|
139
|
50
|
66
|
|
|
346
|
if ($value eq "_no_value") { |
|
|
|
50
|
|
|
|
|
|
|
963
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("FT ", |
|
964
|
|
|
|
|
|
|
"FT ", |
|
965
|
|
|
|
|
|
|
"/$tag",'.|$',80) || return; #' |
|
966
|
|
|
|
|
|
|
} |
|
967
|
|
|
|
|
|
|
# there are almost 3x more quoted qualifier values and they |
|
968
|
|
|
|
|
|
|
# are more common too so we take quoted ones first |
|
969
|
|
|
|
|
|
|
# |
|
970
|
|
|
|
|
|
|
# Long qualifiers, that will be line wrapped, are always quoted |
|
971
|
|
|
|
|
|
|
elsif (!$FTQUAL_NO_QUOTE{$tag} or length("/$tag=$value")>=60) { |
|
972
|
139
|
100
|
|
|
|
264
|
my $pat = $value =~ /\s+/ ? '\s+|\-|$' : '.|\-|$'; |
|
973
|
139
|
50
|
|
|
|
278
|
$self->_write_line_EMBL_regex("FT ", |
|
974
|
|
|
|
|
|
|
"FT ", |
|
975
|
|
|
|
|
|
|
"/$tag=\"$value\"",$pat,80) || return; |
|
976
|
|
|
|
|
|
|
} else { |
|
977
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("FT ", |
|
978
|
|
|
|
|
|
|
"FT ", |
|
979
|
|
|
|
|
|
|
"/$tag=$value",'.|$',80) || return; #' |
|
980
|
|
|
|
|
|
|
} |
|
981
|
|
|
|
|
|
|
} |
|
982
|
|
|
|
|
|
|
} |
|
983
|
|
|
|
|
|
|
|
|
984
|
38
|
|
|
|
|
136
|
return 1; |
|
985
|
|
|
|
|
|
|
} |
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
=head2 _read_EMBL_Contig() |
|
990
|
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
Title : _read_EMBL_Contig |
|
992
|
|
|
|
|
|
|
Usage : |
|
993
|
|
|
|
|
|
|
Function: convert CO lines into annotations |
|
994
|
|
|
|
|
|
|
Returns : |
|
995
|
|
|
|
|
|
|
Args : |
|
996
|
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=cut |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
sub _read_EMBL_Contig { |
|
1000
|
4
|
|
|
4
|
|
5
|
my ($self, $buffer) = @_; |
|
1001
|
4
|
|
|
|
|
7
|
my @ret; |
|
1002
|
4
|
50
|
|
|
|
11
|
if ( $$buffer !~ /^CO/ ) { |
|
1003
|
0
|
|
|
|
|
0
|
warn("Not parsing line '$$buffer' which maybe important"); |
|
1004
|
|
|
|
|
|
|
} |
|
1005
|
4
|
|
|
|
|
11
|
$self->_pushback($$buffer); |
|
1006
|
4
|
|
|
|
|
6
|
while ( defined ($_ = $self->_readline) ) { |
|
1007
|
356
|
100
|
|
|
|
752
|
/^C/ || last; |
|
1008
|
353
|
50
|
|
|
|
775
|
/^CO\s+(.*)/ && do { |
|
1009
|
353
|
|
|
|
|
767
|
push @ret, Bio::Annotation::SimpleValue->new( -tagname => 'contig', |
|
1010
|
|
|
|
|
|
|
-value => $1); |
|
1011
|
|
|
|
|
|
|
}; |
|
1012
|
|
|
|
|
|
|
} |
|
1013
|
4
|
|
|
|
|
7
|
$$buffer = $_; |
|
1014
|
4
|
|
|
|
|
48
|
return @ret; |
|
1015
|
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
} |
|
1017
|
|
|
|
|
|
|
#' |
|
1018
|
|
|
|
|
|
|
=head2 _read_EMBL_References |
|
1019
|
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
Title : _read_EMBL_References |
|
1021
|
|
|
|
|
|
|
Usage : |
|
1022
|
|
|
|
|
|
|
Function: Reads references from EMBL format. Internal function really |
|
1023
|
|
|
|
|
|
|
Example : |
|
1024
|
|
|
|
|
|
|
Returns : |
|
1025
|
|
|
|
|
|
|
Args : |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
=cut |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
sub _read_EMBL_References { |
|
1031
|
99
|
|
|
99
|
|
90
|
my ($self,$buffer) = @_; |
|
1032
|
99
|
|
|
|
|
74
|
my (@refs); |
|
1033
|
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
# assume things are starting with RN |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
99
|
50
|
|
|
|
198
|
if ( $$buffer !~ /^RN/ ) { |
|
1037
|
0
|
|
|
|
|
0
|
warn("Not parsing line '$$buffer' which maybe important"); |
|
1038
|
|
|
|
|
|
|
} |
|
1039
|
99
|
|
|
|
|
76
|
my $b1; |
|
1040
|
|
|
|
|
|
|
my $b2; |
|
1041
|
0
|
|
|
|
|
0
|
my $title; |
|
1042
|
0
|
|
|
|
|
0
|
my $loc; |
|
1043
|
0
|
|
|
|
|
0
|
my $au; |
|
1044
|
0
|
|
|
|
|
0
|
my $med; |
|
1045
|
0
|
|
|
|
|
0
|
my $pm; |
|
1046
|
0
|
|
|
|
|
0
|
my $com; |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
99
|
|
|
|
|
160
|
while ( defined ($_ = $self->_readline) ) { |
|
1049
|
727
|
100
|
|
|
|
1230
|
/^R/ || last; |
|
1050
|
628
|
100
|
|
|
|
806
|
/^RP (\d+)-(\d+)/ && do {$b1=$1;$b2=$2;}; |
|
|
40
|
|
|
|
|
83
|
|
|
|
40
|
|
|
|
|
54
|
|
|
1051
|
628
|
100
|
|
|
|
797
|
/^RX MEDLINE;\s+(\d+)/ && do {$med=$1}; |
|
|
55
|
|
|
|
|
81
|
|
|
1052
|
628
|
100
|
|
|
|
730
|
/^RX PUBMED;\s+(\d+)/ && do {$pm=$1}; |
|
|
10
|
|
|
|
|
15
|
|
|
1053
|
628
|
100
|
|
|
|
832
|
/^RA (.*)/ && do { |
|
1054
|
193
|
|
|
|
|
341
|
$au = $self->_concatenate_lines($au,$1); next; |
|
|
193
|
|
|
|
|
297
|
|
|
1055
|
|
|
|
|
|
|
}; |
|
1056
|
435
|
100
|
|
|
|
615
|
/^RT (.*)/ && do { |
|
1057
|
164
|
|
|
|
|
213
|
$title = $self->_concatenate_lines($title,$1); next; |
|
|
164
|
|
|
|
|
220
|
|
|
1058
|
|
|
|
|
|
|
}; |
|
1059
|
271
|
100
|
|
|
|
425
|
/^RL (.*)/ && do { |
|
1060
|
156
|
|
|
|
|
224
|
$loc = $self->_concatenate_lines($loc,$1); next; |
|
|
156
|
|
|
|
|
238
|
|
|
1061
|
|
|
|
|
|
|
}; |
|
1062
|
115
|
100
|
|
|
|
227
|
/^RC (.*)/ && do { |
|
1063
|
3
|
|
|
|
|
10
|
$com = $self->_concatenate_lines($com,$1); next; |
|
|
3
|
|
|
|
|
4
|
|
|
1064
|
|
|
|
|
|
|
}; |
|
1065
|
|
|
|
|
|
|
} |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
99
|
|
|
|
|
302
|
my $ref = Bio::Annotation::Reference->new(); |
|
1068
|
99
|
|
|
|
|
317
|
$au =~ s/;\s*$//g; |
|
1069
|
99
|
|
|
|
|
198
|
$title =~ s/;\s*$//g; |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
99
|
|
|
|
|
188
|
$ref->start($b1); |
|
1072
|
99
|
|
|
|
|
166
|
$ref->end($b2); |
|
1073
|
99
|
|
|
|
|
135
|
$ref->authors($au); |
|
1074
|
99
|
|
|
|
|
144
|
$ref->title($title); |
|
1075
|
99
|
|
|
|
|
124
|
$ref->location($loc); |
|
1076
|
99
|
|
|
|
|
136
|
$ref->medline($med); |
|
1077
|
99
|
|
|
|
|
145
|
$ref->comment($com); |
|
1078
|
99
|
|
|
|
|
130
|
$ref->pubmed($pm); |
|
1079
|
|
|
|
|
|
|
|
|
1080
|
99
|
|
|
|
|
91
|
push(@refs,$ref); |
|
1081
|
99
|
|
|
|
|
101
|
$$buffer = $_; |
|
1082
|
|
|
|
|
|
|
|
|
1083
|
99
|
|
|
|
|
194
|
return @refs; |
|
1084
|
|
|
|
|
|
|
} |
|
1085
|
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
=head2 _read_EMBL_Species |
|
1087
|
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
Title : _read_EMBL_Species |
|
1089
|
|
|
|
|
|
|
Usage : |
|
1090
|
|
|
|
|
|
|
Function: Reads the EMBL Organism species and classification |
|
1091
|
|
|
|
|
|
|
lines. |
|
1092
|
|
|
|
|
|
|
Example : |
|
1093
|
|
|
|
|
|
|
Returns : A Bio::Species object |
|
1094
|
|
|
|
|
|
|
Args : a reference to the current line buffer, accession number |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
=cut |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
sub _read_EMBL_Species { |
|
1099
|
21
|
|
|
21
|
|
30
|
my( $self, $buffer, $acc ) = @_; |
|
1100
|
21
|
|
|
|
|
20
|
my $org; |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
21
|
|
|
|
|
26
|
$_ = $$buffer; |
|
1103
|
21
|
|
|
|
|
26
|
my( $sub_species, $species, $genus, $common, $sci_name, $class_lines ); |
|
1104
|
21
|
|
66
|
|
|
77
|
while (defined( $_ ||= $self->_readline )) { |
|
1105
|
92
|
100
|
|
|
|
349
|
if (/^OS\s+(.+)/) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1106
|
21
|
50
|
|
|
|
70
|
$sci_name .= ($sci_name) ? ' '.$1 : $1; |
|
1107
|
|
|
|
|
|
|
} elsif (s/^OC\s+(.+)$//) { |
|
1108
|
50
|
|
|
|
|
102
|
$class_lines .= $1; |
|
1109
|
|
|
|
|
|
|
} elsif (/^OG\s+(.*)/) { |
|
1110
|
0
|
|
|
|
|
0
|
$org = $1; |
|
1111
|
|
|
|
|
|
|
} else { |
|
1112
|
21
|
|
|
|
|
38
|
last; |
|
1113
|
|
|
|
|
|
|
} |
|
1114
|
|
|
|
|
|
|
|
|
1115
|
71
|
|
|
|
|
191
|
$_ = undef; # Empty $_ to trigger read of next line |
|
1116
|
|
|
|
|
|
|
} |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
# $$buffer = $_; |
|
1119
|
21
|
|
|
|
|
69
|
$self->_pushback($_); |
|
1120
|
|
|
|
|
|
|
|
|
1121
|
21
|
|
|
|
|
32
|
$sci_name =~ s{\.$}{}; |
|
1122
|
21
|
50
|
|
|
|
39
|
$sci_name || return; |
|
1123
|
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
# Convert data in classification lines into classification array. |
|
1125
|
|
|
|
|
|
|
# only split on ';' or '.' so that classification that is 2 or more words |
|
1126
|
|
|
|
|
|
|
# will still get matched, use map() to remove trailing/leading/intervening |
|
1127
|
|
|
|
|
|
|
# spaces |
|
1128
|
21
|
|
|
|
|
161
|
my @class = map { s/^\s+//; s/\s+$//; s/\s{2,}/ /g; $_; } split /(?
|
|
|
224
|
|
|
|
|
258
|
|
|
|
224
|
|
|
|
|
194
|
|
|
|
224
|
|
|
|
|
164
|
|
|
|
224
|
|
|
|
|
213
|
|
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# do we have a genus? |
|
1131
|
21
|
|
|
|
|
42
|
my $possible_genus = $class[-1]; |
|
1132
|
21
|
50
|
|
|
|
61
|
$possible_genus .= "|$class[-2]" if $class[-2]; |
|
1133
|
21
|
50
|
|
|
|
511
|
if ($sci_name =~ /^($possible_genus)/) { |
|
1134
|
21
|
|
|
|
|
42
|
$genus = $1; |
|
1135
|
21
|
|
|
|
|
468
|
($species) = $sci_name =~ /^$genus\s+(.+)/; |
|
1136
|
|
|
|
|
|
|
} else { |
|
1137
|
0
|
|
|
|
|
0
|
$species = $sci_name; |
|
1138
|
|
|
|
|
|
|
} |
|
1139
|
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
# Don't make a species object if it is "Unknown" or "None" |
|
1141
|
21
|
50
|
|
|
|
62
|
if ($genus) { |
|
1142
|
21
|
50
|
|
|
|
75
|
return if $genus =~ /^(Unknown|None)$/i; |
|
1143
|
|
|
|
|
|
|
} |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
# is this organism of rank species or is it lower? |
|
1146
|
|
|
|
|
|
|
# (doesn't catch everything, but at least the guess isn't dangerous) |
|
1147
|
21
|
50
|
|
|
|
85
|
if ($species =~ /subsp\.|var\./) { |
|
1148
|
0
|
|
|
|
|
0
|
($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/; |
|
1149
|
|
|
|
|
|
|
} |
|
1150
|
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
# sometimes things have common name in brackets, like |
|
1152
|
|
|
|
|
|
|
# Schizosaccharomyces pombe (fission yeast), so get rid of the common |
|
1153
|
|
|
|
|
|
|
# name bit. Probably dangerous if real scientific species name ends in |
|
1154
|
|
|
|
|
|
|
# bracketed bit. |
|
1155
|
21
|
50
|
|
|
|
54
|
unless ($class[-1] eq 'Viruses') { |
|
1156
|
21
|
|
|
|
|
70
|
($species, $common) = $species =~ /^(.+)\s+\((.+)\)$/; |
|
1157
|
21
|
100
|
|
|
|
69
|
$sci_name =~ s/\s+\(.+\)$// if $common; |
|
1158
|
|
|
|
|
|
|
} |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
# Bio::Species array needs array in Species -> Kingdom direction |
|
1161
|
21
|
50
|
|
|
|
54
|
unless ($class[-1] eq $sci_name) { |
|
1162
|
21
|
|
|
|
|
30
|
push(@class, $sci_name); |
|
1163
|
|
|
|
|
|
|
} |
|
1164
|
21
|
|
|
|
|
27
|
@class = reverse @class; |
|
1165
|
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
# do minimal sanity checks before we hand off to Bio::Species which won't |
|
1167
|
|
|
|
|
|
|
# be able to give informative throw messages if it has to throw because |
|
1168
|
|
|
|
|
|
|
# of problems here |
|
1169
|
21
|
50
|
|
|
|
40
|
$self->throw("$acc seems to be missing its OS line: invalid.") unless $sci_name; |
|
1170
|
21
|
|
|
|
|
24
|
my %names; |
|
1171
|
21
|
|
|
|
|
68
|
foreach my $i (0..$#class) { |
|
1172
|
245
|
|
|
|
|
166
|
my $name = $class[$i]; |
|
1173
|
245
|
|
|
|
|
272
|
$names{$name}++; |
|
1174
|
|
|
|
|
|
|
# this code breaks examples like: Xenopus (Silurana) tropicalis |
|
1175
|
|
|
|
|
|
|
# commenting out, see bug 3158 |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
#if ($names{$name} > 1 && ($name ne $class[$i - 1])) { |
|
1178
|
|
|
|
|
|
|
# $self->warn("$acc seems to have an invalid species classification:$name ne $class[$i - 1]"); |
|
1179
|
|
|
|
|
|
|
#} |
|
1180
|
|
|
|
|
|
|
} |
|
1181
|
21
|
|
|
|
|
129
|
my $make = Bio::Species->new(); |
|
1182
|
21
|
|
|
|
|
53
|
$make->scientific_name($sci_name); |
|
1183
|
21
|
|
|
|
|
60
|
$make->classification(@class); |
|
1184
|
21
|
50
|
|
|
|
56
|
unless ($class[-1] eq 'Viruses') { |
|
1185
|
21
|
50
|
|
|
|
96
|
$make->genus($genus) if $genus; |
|
1186
|
21
|
100
|
|
|
|
63
|
$make->species($species) if $species; |
|
1187
|
21
|
50
|
|
|
|
42
|
$make->sub_species($sub_species) if $sub_species; |
|
1188
|
21
|
100
|
|
|
|
61
|
$make->common_name($common) if $common; |
|
1189
|
|
|
|
|
|
|
} |
|
1190
|
21
|
50
|
|
|
|
38
|
$make->organelle($org) if $org; |
|
1191
|
21
|
|
|
|
|
103
|
return $make; |
|
1192
|
|
|
|
|
|
|
} |
|
1193
|
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
=head2 _read_EMBL_DBLink |
|
1195
|
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
Title : _read_EMBL_DBLink |
|
1197
|
|
|
|
|
|
|
Usage : |
|
1198
|
|
|
|
|
|
|
Function: Reads the EMBL database cross reference ("DR") lines |
|
1199
|
|
|
|
|
|
|
Example : |
|
1200
|
|
|
|
|
|
|
Returns : A list of Bio::Annotation::DBLink objects |
|
1201
|
|
|
|
|
|
|
Args : |
|
1202
|
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
=cut |
|
1204
|
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
sub _read_EMBL_DBLink { |
|
1206
|
5
|
|
|
5
|
|
6
|
my( $self,$buffer ) = @_; |
|
1207
|
5
|
|
|
|
|
5
|
my( @db_link ); |
|
1208
|
|
|
|
|
|
|
|
|
1209
|
5
|
|
|
|
|
5
|
$_ = $$buffer; |
|
1210
|
5
|
|
66
|
|
|
21
|
while (defined( $_ ||= $self->_readline )) { |
|
1211
|
30
|
100
|
|
|
|
118
|
if ( /^DR ([^\s;]+);\s*([^\s;]+);?\s*([^\s;]+)?\.$/) { |
|
1212
|
25
|
|
|
|
|
54
|
my ($databse, $prim_id, $sec_id) = ($1,$2,$3); |
|
1213
|
25
|
|
|
|
|
82
|
my $link = Bio::Annotation::DBLink->new(-database => $databse, |
|
1214
|
|
|
|
|
|
|
-primary_id => $prim_id, |
|
1215
|
|
|
|
|
|
|
-optional_id => $sec_id); |
|
1216
|
|
|
|
|
|
|
|
|
1217
|
25
|
|
|
|
|
41
|
push(@db_link, $link); |
|
1218
|
|
|
|
|
|
|
} else { |
|
1219
|
5
|
|
|
|
|
7
|
last; |
|
1220
|
|
|
|
|
|
|
} |
|
1221
|
25
|
|
|
|
|
67
|
$_ = undef; # Empty $_ to trigger read of next line |
|
1222
|
|
|
|
|
|
|
} |
|
1223
|
|
|
|
|
|
|
|
|
1224
|
5
|
|
|
|
|
8
|
$$buffer = $_; |
|
1225
|
5
|
|
|
|
|
12
|
return @db_link; |
|
1226
|
|
|
|
|
|
|
} |
|
1227
|
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
=head2 _read_EMBL_TaxID_DBLink |
|
1229
|
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
Title : _read_EMBL_TaxID_DBLink |
|
1231
|
|
|
|
|
|
|
Usage : |
|
1232
|
|
|
|
|
|
|
Function: Reads the EMBL database cross reference to NCBI TaxID ("OX") lines |
|
1233
|
|
|
|
|
|
|
Example : |
|
1234
|
|
|
|
|
|
|
Returns : A list of Bio::Annotation::DBLink objects |
|
1235
|
|
|
|
|
|
|
Args : |
|
1236
|
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
=cut |
|
1238
|
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
sub _read_EMBL_TaxID_DBLink { |
|
1240
|
3
|
|
|
3
|
|
5
|
my( $self,$buffer ) = @_; |
|
1241
|
3
|
|
|
|
|
3
|
my( @db_link ); |
|
1242
|
|
|
|
|
|
|
|
|
1243
|
3
|
|
|
|
|
5
|
$_ = $$buffer; |
|
1244
|
3
|
|
66
|
|
|
13
|
while (defined( $_ ||= $self->_readline )) { |
|
1245
|
4
|
100
|
|
|
|
19
|
if ( /^OX (\S+)=(\d+);$/ ) { |
|
1246
|
1
|
|
|
|
|
4
|
my ($databse, $prim_id) = ($1,$2); |
|
1247
|
1
|
|
|
|
|
11
|
my $link = Bio::Annotation::DBLink->new(-database => $databse, |
|
1248
|
|
|
|
|
|
|
-primary_id => $prim_id,); |
|
1249
|
1
|
|
|
|
|
4
|
push(@db_link, $link); |
|
1250
|
|
|
|
|
|
|
} else { |
|
1251
|
3
|
|
|
|
|
4
|
last; |
|
1252
|
|
|
|
|
|
|
} |
|
1253
|
1
|
|
|
|
|
6
|
$_ = undef; # Empty $_ to trigger read of next line |
|
1254
|
|
|
|
|
|
|
} |
|
1255
|
|
|
|
|
|
|
|
|
1256
|
3
|
|
|
|
|
5
|
$$buffer = $_; |
|
1257
|
3
|
|
|
|
|
9
|
return @db_link; |
|
1258
|
|
|
|
|
|
|
} |
|
1259
|
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
=head2 _filehandle |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
Title : _filehandle |
|
1263
|
|
|
|
|
|
|
Usage : $obj->_filehandle($newval) |
|
1264
|
|
|
|
|
|
|
Function: |
|
1265
|
|
|
|
|
|
|
Example : |
|
1266
|
|
|
|
|
|
|
Returns : value of _filehandle |
|
1267
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=cut |
|
1271
|
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
sub _filehandle{ |
|
1273
|
0
|
|
|
0
|
|
0
|
my ($obj,$value) = @_; |
|
1274
|
0
|
0
|
|
|
|
0
|
if ( defined $value) { |
|
1275
|
0
|
|
|
|
|
0
|
$obj->{'_filehandle'} = $value; |
|
1276
|
|
|
|
|
|
|
} |
|
1277
|
0
|
|
|
|
|
0
|
return $obj->{'_filehandle'}; |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
} |
|
1280
|
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
=head2 _read_FTHelper_EMBL |
|
1282
|
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
Title : _read_FTHelper_EMBL |
|
1284
|
|
|
|
|
|
|
Usage : _read_FTHelper_EMBL($buffer) |
|
1285
|
|
|
|
|
|
|
Function: reads the next FT key line |
|
1286
|
|
|
|
|
|
|
Example : |
|
1287
|
|
|
|
|
|
|
Returns : Bio::SeqIO::FTHelper object |
|
1288
|
|
|
|
|
|
|
Args : filehandle and reference to a scalar |
|
1289
|
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
=cut |
|
1292
|
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
sub _read_FTHelper_EMBL { |
|
1294
|
256
|
|
|
256
|
|
216
|
my ($self,$buffer) = @_; |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
256
|
|
|
|
|
187
|
my ($key, # The key of the feature |
|
1297
|
|
|
|
|
|
|
$loc, # The location line from the feature |
|
1298
|
|
|
|
|
|
|
@qual, # An arrray of lines making up the qualifiers |
|
1299
|
|
|
|
|
|
|
); |
|
1300
|
|
|
|
|
|
|
|
|
1301
|
256
|
50
|
|
|
|
814
|
if ($$buffer =~ /^FT\s{3}(\S+)\s+(\S+)/ ) { |
|
|
|
0
|
|
|
|
|
|
|
1302
|
256
|
|
|
|
|
386
|
$key = $1; |
|
1303
|
256
|
|
|
|
|
274
|
$loc = $2; |
|
1304
|
|
|
|
|
|
|
# Read all the lines up to the next feature |
|
1305
|
256
|
|
|
|
|
481
|
while ( defined($_ = $self->_readline) ) { |
|
1306
|
2038
|
100
|
|
|
|
9036
|
if (/^FT(\s+)(.+?)\s*$/) { |
|
1307
|
|
|
|
|
|
|
# Lines inside features are preceeded by 19 spaces |
|
1308
|
|
|
|
|
|
|
# A new feature is preceeded by 3 spaces |
|
1309
|
2016
|
100
|
|
|
|
2608
|
if (length($1) > 4) { |
|
1310
|
|
|
|
|
|
|
# Add to qualifiers if we're in the qualifiers |
|
1311
|
1782
|
100
|
|
|
|
2024
|
if (@qual) { |
|
|
|
100
|
|
|
|
|
|
|
1312
|
1523
|
|
|
|
|
2949
|
push(@qual, $2); |
|
1313
|
|
|
|
|
|
|
} |
|
1314
|
|
|
|
|
|
|
# Start the qualifier list if it's the first qualifier |
|
1315
|
|
|
|
|
|
|
elsif (substr($2, 0, 1) eq '/') { |
|
1316
|
254
|
|
|
|
|
642
|
@qual = ($2); |
|
1317
|
|
|
|
|
|
|
} |
|
1318
|
|
|
|
|
|
|
# We're still in the location line, so append to location |
|
1319
|
|
|
|
|
|
|
else { |
|
1320
|
5
|
|
|
|
|
18
|
$loc .= $2; |
|
1321
|
|
|
|
|
|
|
} |
|
1322
|
|
|
|
|
|
|
} else { |
|
1323
|
|
|
|
|
|
|
# We've reached the start of the next feature |
|
1324
|
234
|
|
|
|
|
243
|
last; |
|
1325
|
|
|
|
|
|
|
} |
|
1326
|
|
|
|
|
|
|
} else { |
|
1327
|
|
|
|
|
|
|
# We're at the end of the feature table |
|
1328
|
22
|
|
|
|
|
29
|
last; |
|
1329
|
|
|
|
|
|
|
} |
|
1330
|
|
|
|
|
|
|
} |
|
1331
|
|
|
|
|
|
|
} elsif ( $$buffer =~ /^CO\s+(\S+)/) { |
|
1332
|
0
|
|
|
|
|
0
|
$key = 'CONTIG'; |
|
1333
|
0
|
|
|
|
|
0
|
$loc = $1; |
|
1334
|
|
|
|
|
|
|
# Read all the lines up to the next feature |
|
1335
|
0
|
|
|
|
|
0
|
while ( defined($_ = $self->_readline) ) { |
|
1336
|
0
|
0
|
|
|
|
0
|
if (/^CO\s+(\S+)\s*$/) { |
|
1337
|
0
|
|
|
|
|
0
|
$loc .= $1; |
|
1338
|
|
|
|
|
|
|
} else { |
|
1339
|
|
|
|
|
|
|
# We've reached the start of the next feature |
|
1340
|
0
|
|
|
|
|
0
|
last; |
|
1341
|
|
|
|
|
|
|
} |
|
1342
|
|
|
|
|
|
|
} |
|
1343
|
|
|
|
|
|
|
} else { |
|
1344
|
|
|
|
|
|
|
# No feature key |
|
1345
|
0
|
|
|
|
|
0
|
return; |
|
1346
|
|
|
|
|
|
|
} |
|
1347
|
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
# Put the first line of the next feature into the buffer |
|
1349
|
256
|
|
|
|
|
230
|
$$buffer = $_; |
|
1350
|
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
# Make the new FTHelper object |
|
1352
|
256
|
|
|
|
|
625
|
my $out = Bio::SeqIO::FTHelper->new(); |
|
1353
|
256
|
|
|
|
|
440
|
$out->verbose($self->verbose()); |
|
1354
|
256
|
|
|
|
|
393
|
$out->key($key); |
|
1355
|
256
|
|
|
|
|
381
|
$out->loc($loc); |
|
1356
|
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
# Now parse and add any qualifiers. (@qual is kept |
|
1358
|
|
|
|
|
|
|
# intact to provide informative error messages.) |
|
1359
|
256
|
|
|
|
|
181
|
my $last_unquoted_qualifier; |
|
1360
|
|
|
|
|
|
|
QUAL: |
|
1361
|
256
|
|
|
|
|
449
|
for (my $i = 0; $i < @qual; $i++) { |
|
1362
|
1091
|
|
|
|
|
901
|
my $data = $qual[$i]; |
|
1363
|
1091
|
|
|
|
|
674
|
my ( $qualifier, $value ); |
|
1364
|
|
|
|
|
|
|
|
|
1365
|
1091
|
50
|
|
|
|
3513
|
unless (( $qualifier, $value ) = ($data =~ m{^/([^=]+)(?:=\s*(.*))?})) { |
|
1366
|
0
|
0
|
|
|
|
0
|
if ( defined $last_unquoted_qualifier ) { |
|
1367
|
|
|
|
|
|
|
# handle case of unquoted multiline - read up everything until the next qualifier |
|
1368
|
0
|
|
0
|
|
|
0
|
do { |
|
1369
|
|
|
|
|
|
|
# Protein sequence translations need to be joined without spaces, |
|
1370
|
|
|
|
|
|
|
# other qualifiers need those. |
|
1371
|
0
|
0
|
|
|
|
0
|
$value .= ' ' if $qualifier ne "translation"; |
|
1372
|
0
|
|
|
|
|
0
|
$value .= $data; |
|
1373
|
|
|
|
|
|
|
} while defined($data = $qual[++$i]) && $data !~ m[^/]; |
|
1374
|
0
|
|
|
|
|
0
|
$i--; |
|
1375
|
0
|
|
|
|
|
0
|
$out->field->{$last_unquoted_qualifier}->[-1] .= $value; |
|
1376
|
0
|
|
|
|
|
0
|
$last_unquoted_qualifier = undef; |
|
1377
|
0
|
|
|
|
|
0
|
next QUAL; |
|
1378
|
|
|
|
|
|
|
} else { |
|
1379
|
0
|
|
|
|
|
0
|
$self->throw("Can't see new qualifier in: $_\nfrom:\n" |
|
1380
|
|
|
|
|
|
|
. join('', map "$_\n", @qual)); |
|
1381
|
|
|
|
|
|
|
} |
|
1382
|
|
|
|
|
|
|
} |
|
1383
|
1091
|
50
|
|
|
|
1351
|
$qualifier = '' if not defined $qualifier; |
|
1384
|
|
|
|
|
|
|
|
|
1385
|
1091
|
|
|
|
|
670
|
$last_unquoted_qualifier = undef; |
|
1386
|
1091
|
50
|
|
|
|
1064
|
if (defined $value) { |
|
1387
|
|
|
|
|
|
|
# Do we have a quoted value? |
|
1388
|
1091
|
100
|
|
|
|
1331
|
if (substr($value, 0, 1) eq '"') { |
|
1389
|
|
|
|
|
|
|
# Keep adding to value until we find the trailing quote |
|
1390
|
|
|
|
|
|
|
# and the quotes are balanced |
|
1391
|
|
|
|
|
|
|
QUOTES: |
|
1392
|
971
|
|
66
|
|
|
3337
|
while ($value !~ /"$/ or $value =~ tr/"/"/ % 2) { #" |
|
1393
|
686
|
|
|
|
|
430
|
$i++; |
|
1394
|
686
|
|
|
|
|
454
|
my $next = $qual[$i]; |
|
1395
|
686
|
50
|
|
|
|
701
|
if (!defined($next)) { |
|
1396
|
0
|
|
|
|
|
0
|
$self->warn("Unbalanced quote in:\n".join("\n", @qual). |
|
1397
|
|
|
|
|
|
|
"\nAdding quote to close...". |
|
1398
|
|
|
|
|
|
|
"Check sequence quality!"); |
|
1399
|
0
|
|
|
|
|
0
|
$value .= '"'; |
|
1400
|
0
|
|
|
|
|
0
|
last QUOTES; |
|
1401
|
|
|
|
|
|
|
} |
|
1402
|
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
# Protein sequence translations need to be joined without spaces, |
|
1404
|
|
|
|
|
|
|
# other qualifiers need those. |
|
1405
|
686
|
100
|
|
|
|
659
|
if ($qualifier eq "translation") { |
|
1406
|
569
|
|
|
|
|
1241
|
$value .= $next; |
|
1407
|
|
|
|
|
|
|
} else { |
|
1408
|
117
|
|
|
|
|
454
|
$value .= " $next"; |
|
1409
|
|
|
|
|
|
|
} |
|
1410
|
|
|
|
|
|
|
} |
|
1411
|
|
|
|
|
|
|
# Trim leading and trailing quotes |
|
1412
|
971
|
|
|
|
|
6297
|
$value =~ s/^"|"$//g; |
|
1413
|
|
|
|
|
|
|
# Undouble internal quotes |
|
1414
|
971
|
|
|
|
|
902
|
$value =~ s/""/"/g; #" |
|
1415
|
|
|
|
|
|
|
} else { |
|
1416
|
120
|
|
|
|
|
122
|
$last_unquoted_qualifier = $qualifier; |
|
1417
|
|
|
|
|
|
|
} |
|
1418
|
|
|
|
|
|
|
} else { |
|
1419
|
0
|
|
|
|
|
0
|
$value = '_no_value'; |
|
1420
|
|
|
|
|
|
|
} |
|
1421
|
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
# Store the qualifier |
|
1423
|
1091
|
|
100
|
|
|
1465
|
$out->field->{$qualifier} ||= []; |
|
1424
|
1091
|
|
|
|
|
813
|
push(@{$out->field->{$qualifier}},$value); |
|
|
1091
|
|
|
|
|
1193
|
|
|
1425
|
|
|
|
|
|
|
} |
|
1426
|
|
|
|
|
|
|
|
|
1427
|
256
|
|
|
|
|
456
|
return $out; |
|
1428
|
|
|
|
|
|
|
} |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=head2 _write_line_EMBL |
|
1431
|
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
Title : _write_line_EMBL |
|
1433
|
|
|
|
|
|
|
Usage : |
|
1434
|
|
|
|
|
|
|
Function: internal function |
|
1435
|
|
|
|
|
|
|
Example : |
|
1436
|
|
|
|
|
|
|
Returns : 1 if writing suceeded, else undef |
|
1437
|
|
|
|
|
|
|
Args : |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
=cut |
|
1441
|
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
sub _write_line_EMBL { |
|
1443
|
0
|
|
|
0
|
|
0
|
my ($self,$pre1,$pre2,$line,$length) = @_; |
|
1444
|
|
|
|
|
|
|
|
|
1445
|
0
|
0
|
|
|
|
0
|
$length || $self->throw("Miscalled write_line_EMBL without length. Programming error!"); |
|
1446
|
0
|
|
|
|
|
0
|
my $subl = $length - length $pre2; |
|
1447
|
0
|
|
|
|
|
0
|
my $linel = length $line; |
|
1448
|
0
|
|
|
|
|
0
|
my $i; |
|
1449
|
|
|
|
|
|
|
|
|
1450
|
0
|
|
|
|
|
0
|
my $sub = substr($line,0,$length - length $pre1); |
|
1451
|
|
|
|
|
|
|
|
|
1452
|
0
|
0
|
|
|
|
0
|
$self->_print( "$pre1$sub\n") || return; |
|
1453
|
|
|
|
|
|
|
|
|
1454
|
0
|
|
|
|
|
0
|
for ($i= ($length - length $pre1);$i < $linel;) { |
|
1455
|
0
|
|
|
|
|
0
|
$sub = substr($line,$i,($subl)); |
|
1456
|
0
|
0
|
|
|
|
0
|
$self->_print( "$pre2$sub\n") || return; |
|
1457
|
0
|
|
|
|
|
0
|
$i += $subl; |
|
1458
|
|
|
|
|
|
|
} |
|
1459
|
|
|
|
|
|
|
|
|
1460
|
0
|
|
|
|
|
0
|
return 1; |
|
1461
|
|
|
|
|
|
|
} |
|
1462
|
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
=head2 _write_line_EMBL_regex |
|
1464
|
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
Title : _write_line_EMBL_regex |
|
1466
|
|
|
|
|
|
|
Usage : |
|
1467
|
|
|
|
|
|
|
Function: internal function for writing lines of specified |
|
1468
|
|
|
|
|
|
|
length, with different first and the next line |
|
1469
|
|
|
|
|
|
|
left hand headers and split at specific points in the |
|
1470
|
|
|
|
|
|
|
text |
|
1471
|
|
|
|
|
|
|
Example : |
|
1472
|
|
|
|
|
|
|
Returns : nothing |
|
1473
|
|
|
|
|
|
|
Args : file handle, first header, second header, text-line, regex for line breaks, total line length |
|
1474
|
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
=cut |
|
1477
|
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
sub _write_line_EMBL_regex { |
|
1479
|
263
|
|
|
263
|
|
269
|
my ($self,$pre1,$pre2,$line,$regex,$length) = @_; |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
#print STDOUT "Going to print with $line!\n"; |
|
1482
|
|
|
|
|
|
|
|
|
1483
|
263
|
50
|
|
|
|
310
|
$length || $self->throw("Programming error - called write_line_EMBL_regex without length."); |
|
1484
|
|
|
|
|
|
|
|
|
1485
|
263
|
|
|
|
|
244
|
my $subl = $length - (length $pre1) -1 ; |
|
1486
|
263
|
|
|
|
|
157
|
my( @lines ); |
|
1487
|
|
|
|
|
|
|
|
|
1488
|
263
|
|
|
|
|
308
|
CHUNK: while($line) { |
|
1489
|
401
|
|
|
|
|
419
|
foreach my $pat ($regex, '[,;\.\/-]\s|'.$regex, '[,;\.\/-]|'.$regex) { |
|
1490
|
401
|
50
|
|
|
|
2879
|
if ($line =~ m/^(.{1,$subl})($pat)(.*)/ ) { |
|
1491
|
401
|
|
|
|
|
644
|
my $l = $1.$2; |
|
1492
|
401
|
100
|
|
|
|
505
|
$l =~ s/#/ /g # remove word wrap protection char '#' |
|
1493
|
|
|
|
|
|
|
if $pre1 eq "RA "; |
|
1494
|
401
|
|
|
|
|
377
|
my $newl = $3; |
|
1495
|
401
|
|
|
|
|
423
|
$line = substr($line,length($l)); |
|
1496
|
|
|
|
|
|
|
# be strict about not padding spaces according to |
|
1497
|
|
|
|
|
|
|
# genbank format |
|
1498
|
401
|
|
|
|
|
783
|
$l =~ s/\s+$//; |
|
1499
|
401
|
50
|
|
|
|
477
|
next CHUNK if ($l eq ''); |
|
1500
|
401
|
|
|
|
|
327
|
push(@lines, $l); |
|
1501
|
401
|
|
|
|
|
703
|
next CHUNK; |
|
1502
|
|
|
|
|
|
|
} |
|
1503
|
|
|
|
|
|
|
} |
|
1504
|
|
|
|
|
|
|
# if we get here none of the patterns matched $subl or less chars |
|
1505
|
0
|
|
|
|
|
0
|
$self->warn("trouble dissecting \"$line\"\n into chunks ". |
|
1506
|
|
|
|
|
|
|
"of $subl chars or less - this tag won't print right"); |
|
1507
|
|
|
|
|
|
|
# insert a space char to prevent infinite loops |
|
1508
|
0
|
|
|
|
|
0
|
$line = substr($line,0,$subl) . " " . substr($line,$subl); |
|
1509
|
|
|
|
|
|
|
} |
|
1510
|
263
|
|
|
|
|
226
|
my $s = shift @lines; |
|
1511
|
263
|
100
|
50
|
|
|
683
|
($self->_print("$pre1$s\n") || return) if $s; |
|
1512
|
263
|
|
|
|
|
309
|
foreach my $s ( @lines ) { |
|
1513
|
145
|
50
|
|
|
|
212
|
$self->_print("$pre2$s\n") || return; |
|
1514
|
|
|
|
|
|
|
} |
|
1515
|
|
|
|
|
|
|
|
|
1516
|
263
|
|
|
|
|
531
|
return 1; |
|
1517
|
|
|
|
|
|
|
} |
|
1518
|
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
=head2 _post_sort |
|
1520
|
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
Title : _post_sort |
|
1522
|
|
|
|
|
|
|
Usage : $obj->_post_sort($newval) |
|
1523
|
|
|
|
|
|
|
Function: |
|
1524
|
|
|
|
|
|
|
Returns : value of _post_sort |
|
1525
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
1526
|
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
=cut |
|
1529
|
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
sub _post_sort{ |
|
1531
|
6
|
|
|
6
|
|
7
|
my $obj = shift; |
|
1532
|
6
|
50
|
|
|
|
13
|
if ( @_ ) { |
|
1533
|
0
|
|
|
|
|
0
|
my $value = shift; |
|
1534
|
0
|
|
|
|
|
0
|
$obj->{'_post_sort'} = $value; |
|
1535
|
|
|
|
|
|
|
} |
|
1536
|
6
|
|
|
|
|
14
|
return $obj->{'_post_sort'}; |
|
1537
|
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
} |
|
1539
|
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
=head2 _show_dna |
|
1541
|
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
Title : _show_dna |
|
1543
|
|
|
|
|
|
|
Usage : $obj->_show_dna($newval) |
|
1544
|
|
|
|
|
|
|
Function: |
|
1545
|
|
|
|
|
|
|
Returns : value of _show_dna |
|
1546
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
1547
|
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=cut |
|
1550
|
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
sub _show_dna{ |
|
1552
|
42
|
|
|
42
|
|
50
|
my $obj = shift; |
|
1553
|
42
|
100
|
|
|
|
89
|
if ( @_ ) { |
|
1554
|
31
|
|
|
|
|
34
|
my $value = shift; |
|
1555
|
31
|
|
|
|
|
49
|
$obj->{'_show_dna'} = $value; |
|
1556
|
|
|
|
|
|
|
} |
|
1557
|
42
|
|
|
|
|
59
|
return $obj->{'_show_dna'}; |
|
1558
|
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
} |
|
1560
|
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
=head2 _id_generation_func |
|
1562
|
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
Title : _id_generation_func |
|
1564
|
|
|
|
|
|
|
Usage : $obj->_id_generation_func($newval) |
|
1565
|
|
|
|
|
|
|
Function: |
|
1566
|
|
|
|
|
|
|
Returns : value of _id_generation_func |
|
1567
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
1568
|
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
=cut |
|
1571
|
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
sub _id_generation_func{ |
|
1573
|
11
|
|
|
11
|
|
11
|
my $obj = shift; |
|
1574
|
11
|
50
|
|
|
|
25
|
if ( @_ ) { |
|
1575
|
0
|
|
|
|
|
0
|
my $value = shift; |
|
1576
|
0
|
|
|
|
|
0
|
$obj->{'_id_generation_func'} = $value; |
|
1577
|
|
|
|
|
|
|
} |
|
1578
|
11
|
|
|
|
|
24
|
return $obj->{'_id_generation_func'}; |
|
1579
|
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
} |
|
1581
|
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
=head2 _ac_generation_func |
|
1583
|
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
Title : _ac_generation_func |
|
1585
|
|
|
|
|
|
|
Usage : $obj->_ac_generation_func($newval) |
|
1586
|
|
|
|
|
|
|
Function: |
|
1587
|
|
|
|
|
|
|
Returns : value of _ac_generation_func |
|
1588
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
1589
|
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
=cut |
|
1592
|
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
sub _ac_generation_func{ |
|
1594
|
11
|
|
|
11
|
|
23
|
my $obj = shift; |
|
1595
|
11
|
50
|
|
|
|
25
|
if ( @_ ) { |
|
1596
|
0
|
|
|
|
|
0
|
my $value = shift; |
|
1597
|
0
|
|
|
|
|
0
|
$obj->{'_ac_generation_func'} = $value; |
|
1598
|
|
|
|
|
|
|
} |
|
1599
|
11
|
|
|
|
|
101
|
return $obj->{'_ac_generation_func'}; |
|
1600
|
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
} |
|
1602
|
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=head2 _sv_generation_func |
|
1604
|
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
Title : _sv_generation_func |
|
1606
|
|
|
|
|
|
|
Usage : $obj->_sv_generation_func($newval) |
|
1607
|
|
|
|
|
|
|
Function: |
|
1608
|
|
|
|
|
|
|
Returns : value of _sv_generation_func |
|
1609
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
1610
|
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
=cut |
|
1613
|
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
sub _sv_generation_func{ |
|
1615
|
0
|
|
|
0
|
|
0
|
my $obj = shift; |
|
1616
|
0
|
0
|
|
|
|
0
|
if ( @_ ) { |
|
1617
|
0
|
|
|
|
|
0
|
my $value = shift; |
|
1618
|
0
|
|
|
|
|
0
|
$obj->{'_sv_generation_func'} = $value; |
|
1619
|
|
|
|
|
|
|
} |
|
1620
|
0
|
|
|
|
|
0
|
return $obj->{'_sv_generation_func'}; |
|
1621
|
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
} |
|
1623
|
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
=head2 _kw_generation_func |
|
1625
|
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
Title : _kw_generation_func |
|
1627
|
|
|
|
|
|
|
Usage : $obj->_kw_generation_func($newval) |
|
1628
|
|
|
|
|
|
|
Function: |
|
1629
|
|
|
|
|
|
|
Returns : value of _kw_generation_func |
|
1630
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
1631
|
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
=cut |
|
1634
|
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
sub _kw_generation_func{ |
|
1636
|
11
|
|
|
11
|
|
15
|
my $obj = shift; |
|
1637
|
11
|
50
|
|
|
|
21
|
if ( @_ ) { |
|
1638
|
0
|
|
|
|
|
0
|
my $value = shift; |
|
1639
|
0
|
|
|
|
|
0
|
$obj->{'_kw_generation_func'} = $value; |
|
1640
|
|
|
|
|
|
|
} |
|
1641
|
11
|
|
|
|
|
61
|
return $obj->{'_kw_generation_func'}; |
|
1642
|
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
} |
|
1644
|
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
1; |