line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SeqIO::EMBL |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Ewan Birney |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Ewan Birney |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SeqIO::embl - EMBL sequence input/output stream |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
It is probably best not to use this object directly, but |
21
|
|
|
|
|
|
|
rather go through the SeqIO handler system. Go: |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$stream = Bio::SeqIO->new(-file => $filename, -format => 'EMBL'); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
while ( (my $seq = $stream->next_seq()) ) { |
26
|
|
|
|
|
|
|
# do something with $seq |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 DESCRIPTION |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
This object can transform Bio::Seq objects to and from EMBL flat |
32
|
|
|
|
|
|
|
file databases. |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
There is a lot of flexibility here about how to dump things which |
35
|
|
|
|
|
|
|
should be documented more fully. |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
There should be a common object that this and Genbank share (probably |
38
|
|
|
|
|
|
|
with Swissprot). Too much of the magic is identical. |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
=head2 Optional functions |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=over 3 |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=item _show_dna() |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
(output only) shows the dna or not |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
=item _post_sort() |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
(output only) provides a sorting func which is applied to the FTHelpers |
51
|
|
|
|
|
|
|
before printing |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
=item _id_generation_func() |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
This is function which is called as |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
print "ID ", $func($annseq), "\n"; |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
To generate the ID line. If it is not there, it generates a sensible ID |
60
|
|
|
|
|
|
|
line using a number of tools. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
If you want to output annotations in EMBL format they need to be |
63
|
|
|
|
|
|
|
stored in a Bio::Annotation::Collection object which is accessible |
64
|
|
|
|
|
|
|
through the Bio::SeqI interface method L. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
The following are the names of the keys which are polled from a |
67
|
|
|
|
|
|
|
L object. |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
reference - Should contain Bio::Annotation::Reference objects |
70
|
|
|
|
|
|
|
comment - Should contain Bio::Annotation::Comment objects |
71
|
|
|
|
|
|
|
dblink - Should contain Bio::Annotation::DBLink objects |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=back |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=head1 FEEDBACK |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head2 Mailing Lists |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
80
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
81
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
84
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head2 Support |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
I |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
93
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
94
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
95
|
|
|
|
|
|
|
with code and data examples if at all possible. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head2 Reporting Bugs |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
100
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via |
101
|
|
|
|
|
|
|
the web: |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head1 AUTHOR - Ewan Birney |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Email birney@ebi.ac.uk |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 APPENDIX |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
112
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=cut |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
# Let the code begin... |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
package Bio::SeqIO::embl; |
121
|
7
|
|
|
7
|
|
514
|
use vars qw(%FTQUAL_NO_QUOTE); |
|
7
|
|
|
|
|
9
|
|
|
7
|
|
|
|
|
291
|
|
122
|
7
|
|
|
7
|
|
24
|
use strict; |
|
7
|
|
|
|
|
7
|
|
|
7
|
|
|
|
|
111
|
|
123
|
7
|
|
|
7
|
|
1372
|
use Bio::SeqIO::FTHelper; |
|
7
|
|
|
|
|
10
|
|
|
7
|
|
|
|
|
127
|
|
124
|
7
|
|
|
7
|
|
28
|
use Bio::SeqFeature::Generic; |
|
7
|
|
|
|
|
8
|
|
|
7
|
|
|
|
|
98
|
|
125
|
7
|
|
|
7
|
|
1401
|
use Bio::Species; |
|
7
|
|
|
|
|
11
|
|
|
7
|
|
|
|
|
139
|
|
126
|
7
|
|
|
7
|
|
26
|
use Bio::Seq::SeqFactory; |
|
7
|
|
|
|
|
7
|
|
|
7
|
|
|
|
|
113
|
|
127
|
7
|
|
|
7
|
|
21
|
use Bio::Annotation::Collection; |
|
7
|
|
|
|
|
7
|
|
|
7
|
|
|
|
|
111
|
|
128
|
7
|
|
|
7
|
|
1114
|
use Bio::Annotation::Comment; |
|
7
|
|
|
|
|
10
|
|
|
7
|
|
|
|
|
122
|
|
129
|
7
|
|
|
7
|
|
1500
|
use Bio::Annotation::Reference; |
|
7
|
|
|
|
|
10
|
|
|
7
|
|
|
|
|
143
|
|
130
|
7
|
|
|
7
|
|
26
|
use Bio::Annotation::DBLink; |
|
7
|
|
|
|
|
8
|
|
|
7
|
|
|
|
|
110
|
|
131
|
|
|
|
|
|
|
|
132
|
7
|
|
|
7
|
|
20
|
use base qw(Bio::SeqIO); |
|
7
|
|
|
|
|
8
|
|
|
7
|
|
|
|
|
33181
|
|
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
# Note that a qualifier that exceeds one line (i.e. a long label) will |
135
|
|
|
|
|
|
|
# automatically be quoted regardless: |
136
|
|
|
|
|
|
|
%FTQUAL_NO_QUOTE=( |
137
|
|
|
|
|
|
|
'anticodon'=>1, |
138
|
|
|
|
|
|
|
'citation'=>1, |
139
|
|
|
|
|
|
|
'codon'=>1, |
140
|
|
|
|
|
|
|
'codon_start'=>1, |
141
|
|
|
|
|
|
|
'cons_splice'=>1, |
142
|
|
|
|
|
|
|
'direction'=>1, |
143
|
|
|
|
|
|
|
'evidence'=>1, |
144
|
|
|
|
|
|
|
'label'=>1, |
145
|
|
|
|
|
|
|
'mod_base'=> 1, |
146
|
|
|
|
|
|
|
'number'=> 1, |
147
|
|
|
|
|
|
|
'rpt_type'=> 1, |
148
|
|
|
|
|
|
|
'rpt_unit'=> 1, |
149
|
|
|
|
|
|
|
'transl_except'=> 1, |
150
|
|
|
|
|
|
|
'transl_table'=> 1, |
151
|
|
|
|
|
|
|
'usedin'=> 1, |
152
|
|
|
|
|
|
|
); |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub _initialize { |
155
|
31
|
|
|
31
|
|
49
|
my($self,@args) = @_; |
156
|
|
|
|
|
|
|
|
157
|
31
|
|
|
|
|
110
|
$self->SUPER::_initialize(@args); |
158
|
|
|
|
|
|
|
# hash for functions for decoding keys. |
159
|
31
|
|
|
|
|
83
|
$self->{'_func_ftunit_hash'} = {}; |
160
|
|
|
|
|
|
|
# sets this to one by default. People can change it |
161
|
31
|
|
|
|
|
83
|
$self->_show_dna(1); |
162
|
31
|
50
|
|
|
|
84
|
if ( ! defined $self->sequence_factory ) { |
163
|
31
|
|
|
|
|
87
|
$self->sequence_factory(Bio::Seq::SeqFactory->new |
164
|
|
|
|
|
|
|
(-verbose => $self->verbose(), |
165
|
|
|
|
|
|
|
-type => 'Bio::Seq::RichSeq')); |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head2 next_seq |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
Title : next_seq |
172
|
|
|
|
|
|
|
Usage : $seq = $stream->next_seq() |
173
|
|
|
|
|
|
|
Function: returns the next sequence in the stream |
174
|
|
|
|
|
|
|
Returns : Bio::Seq object |
175
|
|
|
|
|
|
|
Args : |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=cut |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
sub next_seq { |
180
|
25
|
|
|
25
|
1
|
66
|
my ($self,@args) = @_; |
181
|
25
|
|
|
|
|
27
|
my ($pseq,$c,$line,$name,$desc,$acc,$seqc,$mol,$div, |
182
|
|
|
|
|
|
|
$date, $comment, @date_arr); |
183
|
|
|
|
|
|
|
|
184
|
25
|
|
|
|
|
139
|
my ($annotation, %params, @features) = |
185
|
|
|
|
|
|
|
Bio::Annotation::Collection->new(); |
186
|
|
|
|
|
|
|
|
187
|
25
|
|
|
|
|
88
|
$line = $self->_readline; |
188
|
|
|
|
|
|
|
# This needs to be before the first eof() test |
189
|
|
|
|
|
|
|
|
190
|
25
|
100
|
|
|
|
60
|
if ( !defined $line ) { |
191
|
1
|
|
|
|
|
4
|
return; # no throws - end of file |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
24
|
100
|
|
|
|
109
|
if ( $line =~ /^\s+$/ ) { |
195
|
1
|
|
|
|
|
4
|
while ( defined ($line = $self->_readline) ) { |
196
|
1
|
50
|
|
|
|
4
|
$line =~/^\S/ && last; |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
# return without error if the whole next sequence was just a single |
199
|
|
|
|
|
|
|
# blank line and then eof |
200
|
1
|
50
|
|
|
|
4
|
return unless $line; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# no ID as 1st non-blank line, need short circuit and exit routine |
204
|
24
|
50
|
|
|
|
98
|
$self->throw("EMBL stream with no ID. Not embl in my book") |
205
|
|
|
|
|
|
|
unless $line =~ /^ID\s+\S+/; |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# At this point we are sure that $line contains an ID header line |
208
|
24
|
|
|
|
|
26
|
my $alphabet; |
209
|
24
|
100
|
|
|
|
62
|
if ( $line =~ tr/;/;/ == 6) { # New style headers contain exactly six semicolons. |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
# New style header (EMBL Release >= 87, after June 2006) |
212
|
10
|
|
|
|
|
13
|
my $topology; |
213
|
|
|
|
|
|
|
my $sv; |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
# ID DQ299383; SV 1; linear; mRNA; STD; MAM; 431 BP. |
216
|
|
|
|
|
|
|
# This regexp comes from the new2old.pl conversion script, from EBI |
217
|
10
|
100
|
|
|
|
46
|
if ($line =~ m/^ID (\S+);\s+SV (\d+); (\w+); ([^;]+); (\w{3}); (\w{3}); (\d+) BP./) { |
218
|
7
|
|
|
|
|
38
|
($name, $sv, $topology, $mol, $div) = ($1, $2, $3, $4, $6); |
219
|
|
|
|
|
|
|
} |
220
|
10
|
100
|
|
|
|
19
|
if (defined $sv) { |
221
|
7
|
|
|
|
|
13
|
$params{'-seq_version'} = $sv; |
222
|
7
|
|
|
|
|
12
|
$params{'-version'} = $sv; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
10
|
50
|
66
|
|
|
38
|
if (defined $topology && $topology eq 'circular') { |
226
|
0
|
|
|
|
|
0
|
$params{'-is_circular'} = 1; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
10
|
100
|
|
|
|
20
|
if (defined $mol ) { |
230
|
7
|
100
|
|
|
|
20
|
if ($mol =~ /DNA/) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
231
|
6
|
|
|
|
|
12
|
$alphabet = 'dna'; |
232
|
|
|
|
|
|
|
} elsif ($mol =~ /RNA/) { |
233
|
1
|
|
|
|
|
2
|
$alphabet = 'rna'; |
234
|
|
|
|
|
|
|
} elsif ($mol =~ /AA/) { |
235
|
0
|
|
|
|
|
0
|
$alphabet = 'protein'; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
} |
238
|
|
|
|
|
|
|
} else { |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# Old style header (EMBL Release < 87, before June 2006) |
241
|
14
|
100
|
|
|
|
92
|
if ($line =~ /^ID\s+(\S+)[^;]*;\s+(\S+)[^;]*;\s+(\S+)[^;]*;/) { |
242
|
12
|
|
|
|
|
42
|
($name, $mol, $div) = ($1, $2, $3); |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
14
|
100
|
|
|
|
30
|
if ($mol) { |
246
|
12
|
50
|
|
|
|
24
|
if ( $mol =~ /circular/ ) { |
247
|
0
|
|
|
|
|
0
|
$params{'-is_circular'} = 1; |
248
|
0
|
|
|
|
|
0
|
$mol =~ s|circular ||; |
249
|
|
|
|
|
|
|
} |
250
|
12
|
50
|
|
|
|
26
|
if (defined $mol ) { |
251
|
12
|
100
|
|
|
|
36
|
if ($mol =~ /DNA/) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
252
|
10
|
|
|
|
|
12
|
$alphabet='dna'; |
253
|
|
|
|
|
|
|
} elsif ($mol =~ /RNA/) { |
254
|
2
|
|
|
|
|
3
|
$alphabet='rna'; |
255
|
|
|
|
|
|
|
} elsif ($mol =~ /AA/) { |
256
|
0
|
|
|
|
|
0
|
$alphabet='protein'; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
|
262
|
24
|
100
|
66
|
|
|
96
|
unless( defined $name && length($name) ) { |
263
|
5
|
|
|
|
|
9
|
$name = "unknown_id"; |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
# $self->warn("not parsing upper annotation in EMBL file yet!"); |
267
|
24
|
|
|
|
|
28
|
my $buffer = $line; |
268
|
24
|
|
|
|
|
24
|
local $_; |
269
|
24
|
|
|
|
|
26
|
BEFORE_FEATURE_TABLE : |
270
|
|
|
|
|
|
|
my $ncbi_taxid; |
271
|
24
|
|
|
|
|
52
|
until ( !defined $buffer ) { |
272
|
499
|
|
|
|
|
399
|
$_ = $buffer; |
273
|
|
|
|
|
|
|
# Exit at start of Feature table |
274
|
499
|
100
|
|
|
|
957
|
if ( /^(F[HT]|SQ)/ ) { |
275
|
24
|
100
|
100
|
|
|
134
|
$self->_pushback($_) if( $1 eq 'SQ' || $1 eq 'FT'); |
276
|
24
|
|
|
|
|
26
|
last; |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
# Description line(s) |
279
|
475
|
100
|
|
|
|
617
|
if (/^DE\s+(\S.*\S)/) { |
280
|
22
|
100
|
|
|
|
95
|
$desc .= $desc ? " $1" : $1; |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
#accession number |
284
|
475
|
100
|
100
|
|
|
1303
|
if ( /^AC\s+(.*)?/ || /^PA\s+(.*)?/) { |
285
|
24
|
|
|
|
|
102
|
my @accs = split(/[; ]+/, $1); # allow space in addition |
286
|
|
|
|
|
|
|
$params{'-accession_number'} = shift @accs |
287
|
24
|
100
|
|
|
|
76
|
unless defined $params{'-accession_number'}; |
288
|
24
|
|
|
|
|
25
|
push @{$params{'-secondary_accessions'}}, @accs; |
|
24
|
|
|
|
|
61
|
|
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
#version number |
292
|
475
|
100
|
|
|
|
611
|
if ( /^SV\s+\S+\.(\d+);?/ ) { |
293
|
6
|
|
|
|
|
9
|
my $sv = $1; |
294
|
|
|
|
|
|
|
#$sv =~ s/\;//; |
295
|
6
|
|
|
|
|
13
|
$params{'-seq_version'} = $sv; |
296
|
6
|
|
|
|
|
9
|
$params{'-version'} = $sv; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
#date (NOTE: takes last date line) |
300
|
475
|
100
|
|
|
|
633
|
if ( /^DT\s+(.+)$/ ) { |
301
|
26
|
|
|
|
|
44
|
my $line = $1; |
302
|
26
|
|
|
|
|
56
|
my ($date, $version) = split(' ', $line, 2); |
303
|
26
|
|
|
|
|
32
|
$date =~ tr/,//d; # remove comma if new version |
304
|
26
|
100
|
|
|
|
42
|
if ($version) { |
305
|
24
|
100
|
|
|
|
106
|
if ($version =~ /\(Rel\. (\d+), Created\)/ms ) { |
|
|
100
|
|
|
|
|
|
306
|
11
|
|
|
|
|
73
|
my $release = Bio::Annotation::SimpleValue->new( |
307
|
|
|
|
|
|
|
-tagname => 'creation_release', |
308
|
|
|
|
|
|
|
-value => $1 |
309
|
|
|
|
|
|
|
); |
310
|
11
|
|
|
|
|
31
|
$annotation->add_Annotation($release); |
311
|
|
|
|
|
|
|
} elsif ($version =~ /\(Rel\. (\d+), Last updated, Version (\d+)\)/ms ) { |
312
|
11
|
|
|
|
|
44
|
my $release = Bio::Annotation::SimpleValue->new( |
313
|
|
|
|
|
|
|
-tagname => 'update_release', |
314
|
|
|
|
|
|
|
-value => $1 |
315
|
|
|
|
|
|
|
); |
316
|
11
|
|
|
|
|
27
|
$annotation->add_Annotation($release); |
317
|
|
|
|
|
|
|
|
318
|
11
|
|
|
|
|
39
|
my $update = Bio::Annotation::SimpleValue->new( |
319
|
|
|
|
|
|
|
-tagname => 'update_version', |
320
|
|
|
|
|
|
|
-value => $2 |
321
|
|
|
|
|
|
|
); |
322
|
11
|
|
|
|
|
25
|
$annotation->add_Annotation($update); |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
} |
325
|
26
|
|
|
|
|
22
|
push @{$params{'-dates'}}, $date; |
|
26
|
|
|
|
|
49
|
|
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
#keywords |
329
|
475
|
100
|
|
|
|
1633
|
if ( /^KW (.*)\S*$/ ) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
330
|
29
|
|
|
|
|
138
|
my @kw = split(/\s*\;\s*/,$1); |
331
|
29
|
|
|
|
|
35
|
push @{$params{'-keywords'}}, @kw; |
|
29
|
|
|
|
|
91
|
|
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
# Organism name and phylogenetic information |
335
|
|
|
|
|
|
|
elsif (/^O[SC]/) { |
336
|
|
|
|
|
|
|
# pass the accession number so we can give an informative throw message if necessary |
337
|
21
|
|
|
|
|
76
|
my $species = $self->_read_EMBL_Species(\$buffer, $params{'-accession_number'}); |
338
|
21
|
|
|
|
|
46
|
$params{'-species'}= $species; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
# NCBI TaxID Xref |
342
|
|
|
|
|
|
|
elsif (/^OX/) { |
343
|
3
|
50
|
|
|
|
19
|
if (/NCBI_TaxID=(\d+)/) { |
344
|
3
|
|
|
|
|
14
|
$ncbi_taxid=$1; |
345
|
|
|
|
|
|
|
} |
346
|
|
|
|
|
|
|
|
347
|
3
|
|
|
|
|
12
|
my @links = $self->_read_EMBL_TaxID_DBLink(\$buffer); |
348
|
3
|
|
|
|
|
6
|
foreach my $dblink ( @links ) { |
349
|
1
|
|
|
|
|
6
|
$annotation->add_Annotation('dblink',$dblink); |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
# References |
354
|
|
|
|
|
|
|
elsif (/^R/) { |
355
|
99
|
|
|
|
|
187
|
my @refs = $self->_read_EMBL_References(\$buffer); |
356
|
99
|
|
|
|
|
122
|
foreach my $ref ( @refs ) { |
357
|
99
|
|
|
|
|
211
|
$annotation->add_Annotation('reference',$ref); |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# DB Xrefs |
362
|
|
|
|
|
|
|
elsif (/^DR/) { |
363
|
5
|
|
|
|
|
32
|
my @links = $self->_read_EMBL_DBLink(\$buffer); |
364
|
5
|
|
|
|
|
10
|
foreach my $dblink ( @links ) { |
365
|
25
|
|
|
|
|
39
|
$annotation->add_Annotation('dblink',$dblink); |
366
|
|
|
|
|
|
|
} |
367
|
|
|
|
|
|
|
} |
368
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# Comments |
370
|
|
|
|
|
|
|
elsif (/^CC\s+(.*)/) { |
371
|
15
|
|
|
|
|
40
|
$comment .= $1; |
372
|
15
|
|
|
|
|
18
|
$comment .= " "; |
373
|
15
|
|
|
|
|
37
|
while (defined ($_ = $self->_readline) ) { |
374
|
257
|
100
|
|
|
|
519
|
if (/^CC\s+(.*)/) { |
375
|
242
|
|
|
|
|
288
|
$comment .= $1; |
376
|
242
|
|
|
|
|
325
|
$comment .= " "; |
377
|
|
|
|
|
|
|
} else { |
378
|
15
|
|
|
|
|
21
|
last; |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
} |
381
|
15
|
|
|
|
|
96
|
my $commobj = Bio::Annotation::Comment->new(); |
382
|
15
|
|
|
|
|
45
|
$commobj->text($comment); |
383
|
15
|
|
|
|
|
33
|
$annotation->add_Annotation('comment',$commobj); |
384
|
15
|
|
|
|
|
31
|
$comment = ""; |
385
|
|
|
|
|
|
|
} |
386
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# Get next line. |
388
|
475
|
|
|
|
|
744
|
$buffer = $self->_readline; |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
24
|
|
|
|
|
50
|
while ( defined ($_ = $self->_readline) ) { |
392
|
46
|
100
|
|
|
|
114
|
/^FT\s{3}\w/ && last; |
393
|
24
|
100
|
|
|
|
56
|
/^SQ / && last; |
394
|
22
|
50
|
|
|
|
55
|
/^CO / && last; |
395
|
|
|
|
|
|
|
} |
396
|
24
|
|
|
|
|
27
|
$buffer = $_; |
397
|
|
|
|
|
|
|
|
398
|
24
|
100
|
66
|
|
|
116
|
if (defined($buffer) && $buffer =~ /^FT /) { |
399
|
22
|
|
|
|
|
46
|
until ( !defined ($buffer) ) { |
400
|
256
|
|
|
|
|
468
|
my $ftunit = $self->_read_FTHelper_EMBL(\$buffer); |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
# process ftunit |
403
|
256
|
|
|
|
|
530
|
my $feat = |
404
|
|
|
|
|
|
|
$ftunit->_generic_seqfeature($self->location_factory(), $name); |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
# add taxon_id from source if available |
407
|
|
|
|
|
|
|
# Notice, this will override what is found in the OX line. |
408
|
|
|
|
|
|
|
# this is by design as this seems to be the official way |
409
|
|
|
|
|
|
|
# of specifying a TaxID |
410
|
256
|
100
|
100
|
|
|
673
|
if ($params{'-species'} && ($feat->primary_tag eq 'source') |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
411
|
|
|
|
|
|
|
&& $feat->has_tag('db_xref') |
412
|
|
|
|
|
|
|
&& (! $params{'-species'}->ncbi_taxid())) { |
413
|
12
|
|
|
|
|
32
|
foreach my $tagval ($feat->get_tag_values('db_xref')) { |
414
|
12
|
50
|
|
|
|
46
|
if (index($tagval,"taxon:") == 0) { |
415
|
12
|
|
|
|
|
38
|
$params{'-species'}->ncbi_taxid(substr($tagval,6)); |
416
|
12
|
|
|
|
|
24
|
last; |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
} |
419
|
|
|
|
|
|
|
} |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
# add feature to list of features |
422
|
256
|
|
|
|
|
305
|
push(@features, $feat); |
423
|
|
|
|
|
|
|
|
424
|
256
|
100
|
|
|
|
1126
|
if ( $buffer !~ /^FT/ ) { |
425
|
22
|
|
|
|
|
69
|
last; |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
} |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
# Set taxid found in OX line |
430
|
24
|
100
|
100
|
|
|
129
|
if ($params{'-species'} && defined $ncbi_taxid |
|
|
|
100
|
|
|
|
|
431
|
|
|
|
|
|
|
&& (! $params{'-species'}->ncbi_taxid())) { |
432
|
2
|
|
|
|
|
7
|
$params{'-species'}->ncbi_taxid($ncbi_taxid); |
433
|
|
|
|
|
|
|
} |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
# skip comments |
436
|
24
|
|
66
|
|
|
147
|
while ( defined ($buffer) && $buffer =~ /^XX/ ) { |
437
|
20
|
|
|
|
|
49
|
$buffer = $self->_readline(); |
438
|
|
|
|
|
|
|
} |
439
|
|
|
|
|
|
|
|
440
|
24
|
100
|
|
|
|
82
|
if ( $buffer =~ /^CO/ ) { |
441
|
|
|
|
|
|
|
# bug#2982 |
442
|
|
|
|
|
|
|
# special : create contig as annotation |
443
|
4
|
|
|
|
|
11
|
while ( defined ($buffer) ) { |
444
|
4
|
|
|
|
|
16
|
$annotation->add_Annotation($_) for $self->_read_EMBL_Contig(\$buffer); |
445
|
4
|
50
|
66
|
|
|
23
|
if ( !$buffer || $buffer !~ /^CO/ ) { |
446
|
4
|
|
|
|
|
5
|
last; |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
} |
449
|
4
|
|
100
|
|
|
11
|
$buffer ||= ''; |
450
|
|
|
|
|
|
|
} |
451
|
24
|
100
|
|
|
|
66
|
if ($buffer !~ /^\/\//) { # if no SQ lines following CO (bug#2958) |
452
|
20
|
100
|
|
|
|
63
|
if ( $buffer !~ /^SQ/ ) { |
453
|
1
|
|
|
|
|
3
|
while ( defined ($_ = $self->_readline) ) { |
454
|
0
|
0
|
|
|
|
0
|
/^SQ/ && last; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
} |
457
|
20
|
|
|
|
|
24
|
$seqc = ""; |
458
|
20
|
|
|
|
|
48
|
while ( defined ($_ = $self->_readline) ) { |
459
|
2924
|
100
|
|
|
|
3631
|
m{^//} && last; |
460
|
2906
|
|
|
|
|
2431
|
$_ = uc($_); |
461
|
2906
|
|
|
|
|
20861
|
s/[^A-Za-z]//g; |
462
|
2906
|
|
|
|
|
4620
|
$seqc .= $_; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
} |
465
|
24
|
|
|
|
|
84
|
my $seq = $self->sequence_factory->create |
466
|
|
|
|
|
|
|
(-verbose => $self->verbose(), |
467
|
|
|
|
|
|
|
-division => $div, |
468
|
|
|
|
|
|
|
-seq => $seqc, |
469
|
|
|
|
|
|
|
-desc => $desc, |
470
|
|
|
|
|
|
|
-display_id => $name, |
471
|
|
|
|
|
|
|
-annotation => $annotation, |
472
|
|
|
|
|
|
|
-molecule => $mol, |
473
|
|
|
|
|
|
|
-alphabet => $alphabet, |
474
|
|
|
|
|
|
|
-features => \@features, |
475
|
|
|
|
|
|
|
%params); |
476
|
24
|
|
|
|
|
197
|
return $seq; |
477
|
|
|
|
|
|
|
} |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=head2 _write_ID_line |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
Title : _write_ID_line |
484
|
|
|
|
|
|
|
Usage : $self->_write_ID_line($seq); |
485
|
|
|
|
|
|
|
Function: Writes the EMBL Release 87 format ID line to the stream, unless |
486
|
|
|
|
|
|
|
: there is a user-supplied ID line generation function in which |
487
|
|
|
|
|
|
|
: case that is used instead. |
488
|
|
|
|
|
|
|
: ( See Bio::SeqIO::embl::_id_generation_function(). ) |
489
|
|
|
|
|
|
|
Returns : nothing |
490
|
|
|
|
|
|
|
Args : Bio::Seq object |
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=cut |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
sub _write_ID_line { |
495
|
|
|
|
|
|
|
|
496
|
11
|
|
|
11
|
|
17
|
my ($self, $seq) = @_; |
497
|
|
|
|
|
|
|
|
498
|
11
|
|
|
|
|
14
|
my $id_line; |
499
|
|
|
|
|
|
|
# If there is a user-supplied ID generation function, use it. |
500
|
11
|
50
|
|
|
|
27
|
if ( $self->_id_generation_func ) { |
501
|
0
|
|
|
|
|
0
|
$id_line = "ID " . &{$self->_id_generation_func}($seq) . "\nXX\n"; |
|
0
|
|
|
|
|
0
|
|
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
# Otherwise, generate a standard EMBL release 87 (June 2006) ID line. |
504
|
|
|
|
|
|
|
else { |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
# The sequence name is supposed to be the primary accession number, |
507
|
11
|
|
|
|
|
36
|
my $name = $seq->accession_number(); |
508
|
11
|
100
|
66
|
|
|
57
|
if ( not(defined $name) || $name eq 'unknown') { |
509
|
|
|
|
|
|
|
# but if it is not present, use the sequence ID or the empty string |
510
|
5
|
|
100
|
|
|
14
|
$name = $seq->id() || ''; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
|
513
|
11
|
50
|
|
|
|
35
|
$self->warn("No whitespace allowed in EMBL id [". $name. "]") if $name =~ /\s/; |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
# Use the sequence version, or default to 1. |
516
|
11
|
|
50
|
|
|
32
|
my $version = $seq->version() || 1; |
517
|
|
|
|
|
|
|
|
518
|
11
|
|
|
|
|
32
|
my $len = $seq->length(); |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
# Taxonomic division. |
521
|
11
|
|
|
|
|
16
|
my $div; |
522
|
11
|
100
|
100
|
|
|
122
|
if ( $seq->can('division') && defined($seq->division) && |
|
|
|
66
|
|
|
|
|
523
|
|
|
|
|
|
|
$self->_is_valid_division($seq->division) ) { |
524
|
2
|
|
|
|
|
5
|
$div = $seq->division(); |
525
|
|
|
|
|
|
|
} else { |
526
|
9
|
|
50
|
|
|
39
|
$div ||= 'UNC'; # 'UNC' is the EMBL division code for 'unclassified'. |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
|
529
|
11
|
|
|
|
|
16
|
my $mol; |
530
|
|
|
|
|
|
|
# If the molecule type is a valid EMBL type, use it. |
531
|
11
|
50
|
100
|
|
|
86
|
if ( $seq->can('molecule') |
|
|
100
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
532
|
|
|
|
|
|
|
&& defined($seq->molecule) |
533
|
|
|
|
|
|
|
&& $self->_is_valid_molecule_type($seq->molecule) |
534
|
|
|
|
|
|
|
) { |
535
|
0
|
|
|
|
|
0
|
$mol = $seq->molecule(); |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
# Otherwise, choose unassigned DNA or RNA based on the alphabet. |
538
|
|
|
|
|
|
|
elsif ($seq->can('primary_seq') && defined $seq->primary_seq->alphabet) { |
539
|
6
|
|
|
|
|
10
|
my $alphabet =$seq->primary_seq->alphabet; |
540
|
6
|
50
|
|
|
|
12
|
if ($alphabet eq 'dna') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
541
|
6
|
|
|
|
|
8
|
$mol ='unassigned DNA'; |
542
|
|
|
|
|
|
|
} elsif ($alphabet eq 'rna') { |
543
|
0
|
|
|
|
|
0
|
$mol='unassigned RNA'; |
544
|
|
|
|
|
|
|
} elsif ($alphabet eq 'protein') { |
545
|
0
|
|
|
|
|
0
|
$self->warn("Protein sequence found; EMBL is a nucleotide format."); |
546
|
0
|
|
|
|
|
0
|
$mol='AA'; # AA is not a valid EMBL molecule type. |
547
|
|
|
|
|
|
|
} |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
|
550
|
11
|
|
|
|
|
18
|
my $topology = 'linear'; |
551
|
11
|
50
|
|
|
|
31
|
if ($seq->is_circular) { |
552
|
0
|
|
|
|
|
0
|
$topology = 'circular'; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
|
555
|
11
|
|
100
|
|
|
34
|
$mol ||= ''; # 'unassigned'; ? |
556
|
11
|
|
|
|
|
46
|
$id_line = "ID $name; SV $version; $topology; $mol; STD; $div; $len BP.\nXX\n"; |
557
|
11
|
|
|
|
|
48
|
$self->_print($id_line); |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=head2 _is_valid_division |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
Title : _is_valid_division |
564
|
|
|
|
|
|
|
Usage : $self->_is_valid_division($div) |
565
|
|
|
|
|
|
|
Function: tests division code for validity |
566
|
|
|
|
|
|
|
Returns : true if $div is a valid EMBL release 87 taxonomic division. |
567
|
|
|
|
|
|
|
Args : taxonomic division code string |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
=cut |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
sub _is_valid_division { |
572
|
2
|
|
|
2
|
|
4
|
my ($self, $division) = @_; |
573
|
|
|
|
|
|
|
|
574
|
2
|
|
|
|
|
25
|
my %EMBL_divisions = ( |
575
|
|
|
|
|
|
|
"PHG" => 1, # Bacteriophage |
576
|
|
|
|
|
|
|
"ENV" => 1, # Environmental Sample |
577
|
|
|
|
|
|
|
"FUN" => 1, # Fungal |
578
|
|
|
|
|
|
|
"HUM" => 1, # Human |
579
|
|
|
|
|
|
|
"INV" => 1, # Invertebrate |
580
|
|
|
|
|
|
|
"MAM" => 1, # Other Mammal |
581
|
|
|
|
|
|
|
"VRT" => 1, # Other Vertebrate |
582
|
|
|
|
|
|
|
"MUS" => 1, # Mus musculus |
583
|
|
|
|
|
|
|
"PLN" => 1, # Plant |
584
|
|
|
|
|
|
|
"PRO" => 1, # Prokaryote |
585
|
|
|
|
|
|
|
"ROD" => 1, # Other Rodent |
586
|
|
|
|
|
|
|
"SYN" => 1, # Synthetic |
587
|
|
|
|
|
|
|
"UNC" => 1, # Unclassified |
588
|
|
|
|
|
|
|
"VRL" => 1 # Viral |
589
|
|
|
|
|
|
|
); |
590
|
|
|
|
|
|
|
|
591
|
2
|
|
|
|
|
10
|
return exists($EMBL_divisions{$division}); |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
=head2 _is_valid_molecule_type |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
Title : _is_valid_molecule_type |
597
|
|
|
|
|
|
|
Usage : $self->_is_valid_molecule_type($mol) |
598
|
|
|
|
|
|
|
Function: tests molecule type for validity |
599
|
|
|
|
|
|
|
Returns : true if $mol is a valid EMBL release 87 molecule type. |
600
|
|
|
|
|
|
|
Args : molecule type string |
601
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=cut |
603
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub _is_valid_molecule_type { |
605
|
2
|
|
|
2
|
|
3
|
my ($self, $moltype) = @_; |
606
|
|
|
|
|
|
|
|
607
|
2
|
|
|
|
|
15
|
my %EMBL_molecule_types = ( |
608
|
|
|
|
|
|
|
"genomic DNA" => 1, |
609
|
|
|
|
|
|
|
"genomic RNA" => 1, |
610
|
|
|
|
|
|
|
"mRNA" => 1, |
611
|
|
|
|
|
|
|
"tRNA" => 1, |
612
|
|
|
|
|
|
|
"rRNA" => 1, |
613
|
|
|
|
|
|
|
"snoRNA" => 1, |
614
|
|
|
|
|
|
|
"snRNA" => 1, |
615
|
|
|
|
|
|
|
"scRNA" => 1, |
616
|
|
|
|
|
|
|
"pre-RNA" => 1, |
617
|
|
|
|
|
|
|
"other RNA" => 1, |
618
|
|
|
|
|
|
|
"other DNA" => 1, |
619
|
|
|
|
|
|
|
"unassigned DNA" => 1, |
620
|
|
|
|
|
|
|
"unassigned RNA" => 1 |
621
|
|
|
|
|
|
|
); |
622
|
|
|
|
|
|
|
|
623
|
2
|
|
|
|
|
29
|
return exists($EMBL_molecule_types{$moltype}); |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
=head2 write_seq |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
Title : write_seq |
629
|
|
|
|
|
|
|
Usage : $stream->write_seq($seq) |
630
|
|
|
|
|
|
|
Function: writes the $seq object (must be seq) to the stream |
631
|
|
|
|
|
|
|
Returns : 1 for success and undef for error |
632
|
|
|
|
|
|
|
Args : array of 1 to n Bio::SeqI objects |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
=cut |
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
sub write_seq { |
638
|
13
|
|
|
13
|
1
|
55
|
my ($self,@seqs) = @_; |
639
|
|
|
|
|
|
|
|
640
|
13
|
|
|
|
|
27
|
foreach my $seq ( @seqs ) { |
641
|
13
|
50
|
|
|
|
32
|
$self->throw("Attempting to write with no seq!") unless defined $seq; |
642
|
13
|
100
|
100
|
|
|
87
|
unless ( ref $seq && $seq->isa('Bio::SeqI' ) ) { |
643
|
4
|
50
|
|
|
|
12
|
$self->warn("$seq is not a SeqI compliant sequence object!") |
644
|
|
|
|
|
|
|
if $self->verbose >= 0; |
645
|
4
|
100
|
66
|
|
|
22
|
unless ( ref $seq && $seq->isa('Bio::PrimarySeqI' ) ) { |
646
|
2
|
|
|
|
|
27
|
$self->throw("$seq is not a PrimarySeqI compliant sequence object!"); |
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
} |
649
|
11
|
|
100
|
|
|
38
|
my $str = $seq->seq || ''; |
650
|
|
|
|
|
|
|
|
651
|
|
|
|
|
|
|
# Write the ID line. |
652
|
11
|
|
|
|
|
30
|
$self->_write_ID_line($seq); |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
# Write the accession line if present |
656
|
11
|
|
|
|
|
13
|
my( $acc ); |
657
|
|
|
|
|
|
|
{ |
658
|
11
|
50
|
66
|
|
|
10
|
if ( my $func = $self->_ac_generation_func ) { |
|
11
|
100
|
|
|
|
26
|
|
|
|
50
|
|
|
|
|
|
659
|
0
|
|
|
|
|
0
|
$acc = &{$func}($seq); |
|
0
|
|
|
|
|
0
|
|
660
|
|
|
|
|
|
|
} elsif ( $seq->isa('Bio::Seq::RichSeqI') && |
661
|
|
|
|
|
|
|
defined($seq->accession_number) ) { |
662
|
4
|
|
|
|
|
7
|
$acc = $seq->accession_number; |
663
|
4
|
|
|
|
|
10
|
$acc = join("; ", $acc, $seq->get_secondary_accessions); |
664
|
|
|
|
|
|
|
} elsif ( $seq->can('accession_number') ) { |
665
|
7
|
|
|
|
|
15
|
$acc = $seq->accession_number; |
666
|
|
|
|
|
|
|
} |
667
|
|
|
|
|
|
|
|
668
|
11
|
50
|
|
|
|
22
|
if (defined $acc) { |
669
|
11
|
50
|
|
|
|
37
|
$self->_print("AC $acc;\n", |
670
|
|
|
|
|
|
|
"XX\n") || return; |
671
|
|
|
|
|
|
|
} |
672
|
|
|
|
|
|
|
} |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
# Date lines |
675
|
11
|
|
|
|
|
12
|
my $switch=0; |
676
|
11
|
100
|
|
|
|
58
|
if ( $seq->can('get_dates') ) { |
677
|
4
|
|
|
|
|
8
|
my @dates = $seq->get_dates(); |
678
|
4
|
|
|
|
|
4
|
my $ct = 1; |
679
|
4
|
|
|
|
|
3
|
my $date_flag = 0; |
680
|
4
|
|
|
|
|
9
|
my ($cr) = $seq->annotation->get_Annotations("creation_release"); |
681
|
4
|
|
|
|
|
12
|
my ($ur) = $seq->annotation->get_Annotations("update_release"); |
682
|
4
|
|
|
|
|
8
|
my ($uv) = $seq->annotation->get_Annotations("update_version"); |
683
|
|
|
|
|
|
|
|
684
|
4
|
0
|
33
|
|
|
11
|
unless ($cr && $ur && $ur) { |
|
|
|
33
|
|
|
|
|
685
|
4
|
|
|
|
|
5
|
$date_flag = 1; |
686
|
|
|
|
|
|
|
} |
687
|
|
|
|
|
|
|
|
688
|
4
|
|
|
|
|
7
|
foreach my $dt (@dates) { |
689
|
0
|
0
|
|
|
|
0
|
if (!$date_flag) { |
690
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("DT ","DT ", |
691
|
|
|
|
|
|
|
$dt." (Rel. ".($cr->value()).", Created)", |
692
|
|
|
|
|
|
|
'\s+|$',80) if $ct == 1; |
693
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("DT ","DT ", |
694
|
|
|
|
|
|
|
$dt." (Rel. ".($ur->value()).", Last updated, Version ".($uv->value()).")", |
695
|
|
|
|
|
|
|
'\s+|$',80) if $ct == 2; |
696
|
|
|
|
|
|
|
} else { # other formats? |
697
|
0
|
|
|
|
|
0
|
$self->_write_line_EMBL_regex("DT ","DT ", |
698
|
|
|
|
|
|
|
$dt,'\s+|$',80); |
699
|
|
|
|
|
|
|
} |
700
|
0
|
|
|
|
|
0
|
$switch =1; |
701
|
0
|
|
|
|
|
0
|
$ct++; |
702
|
|
|
|
|
|
|
} |
703
|
4
|
50
|
|
|
|
8
|
if ($switch == 1) { |
704
|
0
|
0
|
|
|
|
0
|
$self->_print("XX\n") || return; |
705
|
|
|
|
|
|
|
} |
706
|
|
|
|
|
|
|
} |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
# Description lines |
709
|
11
|
50
|
|
|
|
31
|
$self->_write_line_EMBL_regex("DE ","DE ",$seq->desc(),'\s+|$',80) || return; #' |
710
|
11
|
50
|
|
|
|
28
|
$self->_print( "XX\n") || return; |
711
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
# if there, write the kw line |
713
|
|
|
|
|
|
|
{ |
714
|
11
|
|
|
|
|
17
|
my( $kw ); |
|
11
|
|
|
|
|
563
|
|
715
|
11
|
50
|
|
|
|
28
|
if ( my $func = $self->_kw_generation_func ) { |
|
|
100
|
|
|
|
|
|
716
|
0
|
|
|
|
|
0
|
$kw = &{$func}($seq); |
|
0
|
|
|
|
|
0
|
|
717
|
|
|
|
|
|
|
} elsif ( $seq->can('keywords') ) { |
718
|
4
|
|
|
|
|
9
|
$kw = $seq->keywords; |
719
|
|
|
|
|
|
|
} |
720
|
11
|
100
|
|
|
|
25
|
if (defined $kw) { |
721
|
4
|
50
|
|
|
|
7
|
$self->_write_line_EMBL_regex("KW ", "KW ", $kw, '\s+|$', 80) || return; #' |
722
|
4
|
50
|
|
|
|
7
|
$self->_print( "XX\n") || return; |
723
|
|
|
|
|
|
|
} |
724
|
|
|
|
|
|
|
} |
725
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# Organism lines |
727
|
|
|
|
|
|
|
|
728
|
11
|
100
|
100
|
|
|
53
|
if ($seq->can('species') && (my $spec = $seq->species)) { |
729
|
6
|
|
|
|
|
21
|
my @class = $spec->classification(); |
730
|
6
|
|
|
|
|
9
|
shift @class; # get rid of species name. Some embl files include |
731
|
|
|
|
|
|
|
# the species name in the OC lines, but this seems |
732
|
|
|
|
|
|
|
# more like an error than something we need to |
733
|
|
|
|
|
|
|
# emulate |
734
|
6
|
|
|
|
|
24
|
my $OS = $spec->scientific_name; |
735
|
6
|
50
|
|
|
|
15
|
if ($spec->common_name) { |
736
|
0
|
|
|
|
|
0
|
$OS .= ' ('.$spec->common_name.')'; |
737
|
|
|
|
|
|
|
} |
738
|
6
|
50
|
|
|
|
20
|
$self->_print("OS $OS\n") || return; |
739
|
6
|
|
|
|
|
19
|
my $OC = join('; ', reverse(@class)) .'.'; |
740
|
6
|
50
|
|
|
|
13
|
$self->_write_line_EMBL_regex("OC ","OC ",$OC,'; |$',80) || return; |
741
|
6
|
50
|
|
|
|
20
|
if ($spec->organelle) { |
742
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("OG ","OG ",$spec->organelle,'; |$',80) || return; |
743
|
|
|
|
|
|
|
} |
744
|
6
|
|
|
|
|
15
|
my $ncbi_taxid = $spec->ncbi_taxid; |
745
|
6
|
100
|
|
|
|
12
|
if ($ncbi_taxid) { |
746
|
2
|
50
|
|
|
|
6
|
$self->_print("OX NCBI_TaxID=$ncbi_taxid\n") || return; |
747
|
|
|
|
|
|
|
} |
748
|
6
|
50
|
|
|
|
12
|
$self->_print("XX\n") || return; |
749
|
|
|
|
|
|
|
} |
750
|
|
|
|
|
|
|
# Reference lines |
751
|
11
|
|
|
|
|
12
|
my $t = 1; |
752
|
11
|
100
|
66
|
|
|
63
|
if ( $seq->can('annotation') && defined $seq->annotation ) { |
753
|
9
|
|
|
|
|
18
|
foreach my $ref ( $seq->annotation->get_Annotations('reference') ) { |
754
|
6
|
50
|
|
|
|
15
|
$self->_print( "RN [$t]\n") || return; |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
# Having no RP line is legal, but we need both |
757
|
|
|
|
|
|
|
# start and end for a valid location. |
758
|
6
|
50
|
|
|
|
14
|
if ($ref->comment) { |
759
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("RC ", "RC ", $ref->comment, '\s+|$', 80) || return; #' |
760
|
|
|
|
|
|
|
} |
761
|
6
|
|
|
|
|
15
|
my $start = $ref->start; |
762
|
6
|
|
|
|
|
15
|
my $end = $ref->end; |
763
|
6
|
50
|
33
|
|
|
21
|
if ($start and $end) { |
|
|
0
|
0
|
|
|
|
|
764
|
6
|
50
|
|
|
|
16
|
$self->_print( "RP $start-$end\n") || return; |
765
|
|
|
|
|
|
|
} elsif ($start or $end) { |
766
|
0
|
|
|
|
|
0
|
$self->throw("Both start and end are needed for a valid RP line.". |
767
|
|
|
|
|
|
|
" Got: start='$start' end='$end'"); |
768
|
|
|
|
|
|
|
} |
769
|
|
|
|
|
|
|
|
770
|
6
|
50
|
|
|
|
11
|
if (my $med = $ref->medline) { |
771
|
0
|
0
|
|
|
|
0
|
$self->_print( "RX MEDLINE; $med.\n") || return; |
772
|
|
|
|
|
|
|
} |
773
|
6
|
50
|
|
|
|
9
|
if (my $pm = $ref->pubmed) { |
774
|
0
|
0
|
|
|
|
0
|
$self->_print( "RX PUBMED; $pm.\n") || return; |
775
|
|
|
|
|
|
|
} |
776
|
6
|
|
|
|
|
19
|
my $authors = $ref->authors; |
777
|
6
|
|
|
|
|
59
|
$authors =~ s/([\w\.]) (\w)/$1#$2/g; # add word wrap protection char '#' |
778
|
|
|
|
|
|
|
|
779
|
6
|
50
|
|
|
|
16
|
$self->_write_line_EMBL_regex("RA ", "RA ", |
780
|
|
|
|
|
|
|
$authors . ";", |
781
|
|
|
|
|
|
|
'\s+|$', 80) || return; #' |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
# If there is no title to the reference, it appears |
784
|
|
|
|
|
|
|
# as a single semi-colon. All titles must end in |
785
|
|
|
|
|
|
|
# a semi-colon. |
786
|
6
|
|
100
|
|
|
13
|
my $ref_title = $ref->title || ''; |
787
|
6
|
|
|
|
|
37
|
$ref_title =~ s/[\s;]*$/;/; |
788
|
6
|
50
|
|
|
|
12
|
$self->_write_line_EMBL_regex("RT ", "RT ", $ref_title, '\s+|$', 80) || return; #' |
789
|
6
|
50
|
|
|
|
12
|
$self->_write_line_EMBL_regex("RL ", "RL ", $ref->location, '\s+|$', 80) || return; #' |
790
|
6
|
50
|
|
|
|
9
|
$self->_print("XX\n") || return; |
791
|
6
|
|
|
|
|
10
|
$t++; |
792
|
|
|
|
|
|
|
} |
793
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
# DB Xref lines |
795
|
9
|
100
|
|
|
|
23
|
if (my @db_xref = $seq->annotation->get_Annotations('dblink') ) { |
796
|
2
|
|
|
|
|
2
|
for my $dr (@db_xref) { |
797
|
43
|
|
|
|
|
59
|
my $db_name = $dr->database; |
798
|
43
|
|
|
|
|
49
|
my $prim = $dr->primary_id; |
799
|
|
|
|
|
|
|
|
800
|
43
|
|
50
|
|
|
47
|
my $opt = $dr->optional_id || ''; |
801
|
43
|
50
|
|
|
|
69
|
my $line = $opt ? "$db_name; $prim; $opt." : "$db_name; $prim."; |
802
|
43
|
50
|
|
|
|
47
|
$self->_write_line_EMBL_regex("DR ", "DR ", $line, '\s+|$', 80) || return; #' |
803
|
|
|
|
|
|
|
} |
804
|
2
|
50
|
|
|
|
4
|
$self->_print("XX\n") || return; |
805
|
|
|
|
|
|
|
} |
806
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
# Comment lines |
808
|
9
|
|
|
|
|
20
|
foreach my $comment ( $seq->annotation->get_Annotations('comment') ) { |
809
|
4
|
50
|
|
|
|
10
|
$self->_write_line_EMBL_regex("CC ", "CC ", $comment->text, '\s+|$', 80) || return; #' |
810
|
4
|
50
|
|
|
|
8
|
$self->_print("XX\n") || return; |
811
|
|
|
|
|
|
|
} |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
# "\\s\+\|\$" |
814
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
## FEATURE TABLE |
816
|
|
|
|
|
|
|
|
817
|
11
|
50
|
|
|
|
26
|
$self->_print("FH Key Location/Qualifiers\n") || return; |
818
|
11
|
50
|
|
|
|
23
|
$self->_print("FH\n") || return; |
819
|
|
|
|
|
|
|
|
820
|
11
|
100
|
|
|
|
65
|
my @feats = $seq->can('top_SeqFeatures') ? $seq->top_SeqFeatures : (); |
821
|
11
|
100
|
|
|
|
24
|
if ($feats[0]) { |
822
|
6
|
50
|
|
|
|
14
|
if ( defined $self->_post_sort ) { |
823
|
|
|
|
|
|
|
# we need to read things into an array. |
824
|
|
|
|
|
|
|
# Process. Sort them. Print 'em |
825
|
|
|
|
|
|
|
|
826
|
0
|
|
|
|
|
0
|
my $post_sort_func = $self->_post_sort(); |
827
|
0
|
|
|
|
|
0
|
my @fth; |
828
|
|
|
|
|
|
|
|
829
|
0
|
|
|
|
|
0
|
foreach my $sf ( @feats ) { |
830
|
0
|
|
|
|
|
0
|
push(@fth,Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq)); |
831
|
|
|
|
|
|
|
} |
832
|
|
|
|
|
|
|
|
833
|
0
|
|
|
|
|
0
|
@fth = sort { &$post_sort_func($a,$b) } @fth; |
|
0
|
|
|
|
|
0
|
|
834
|
|
|
|
|
|
|
|
835
|
0
|
|
|
|
|
0
|
foreach my $fth ( @fth ) { |
836
|
0
|
0
|
|
|
|
0
|
$self->_print_EMBL_FTHelper($fth) || return; |
837
|
|
|
|
|
|
|
} |
838
|
|
|
|
|
|
|
} else { |
839
|
|
|
|
|
|
|
# not post sorted. And so we can print as we get them. |
840
|
|
|
|
|
|
|
# lower memory load... |
841
|
|
|
|
|
|
|
|
842
|
6
|
|
|
|
|
10
|
foreach my $sf ( @feats ) { |
843
|
38
|
|
|
|
|
65
|
my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf,$seq); |
844
|
38
|
|
|
|
|
34
|
foreach my $fth ( @fth ) { |
845
|
38
|
50
|
|
|
|
52
|
if ( $fth->key eq 'CONTIG') { |
846
|
0
|
|
|
|
|
0
|
$self->_show_dna(0); |
847
|
|
|
|
|
|
|
} |
848
|
38
|
50
|
|
|
|
66
|
$self->_print_EMBL_FTHelper($fth) || return; |
849
|
|
|
|
|
|
|
} |
850
|
|
|
|
|
|
|
} |
851
|
|
|
|
|
|
|
} |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
|
854
|
11
|
50
|
|
|
|
25
|
if ( $self->_show_dna() == 0 ) { |
855
|
0
|
0
|
|
|
|
0
|
$self->_print( "//\n") || return; |
856
|
0
|
|
|
|
|
0
|
return; |
857
|
|
|
|
|
|
|
} |
858
|
11
|
50
|
|
|
|
24
|
$self->_print( "XX\n") || return; |
859
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
# finished printing features. |
861
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
# print contig if present : bug#2982 |
863
|
11
|
100
|
66
|
|
|
62
|
if ( $seq->can('annotation') && defined $seq->annotation) { |
864
|
9
|
|
|
|
|
19
|
foreach my $ctg ( $seq->annotation->get_Annotations('contig') ) { |
865
|
0
|
0
|
|
|
|
0
|
if ($ctg->value) { |
866
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("CO ","CO ", $ctg->value, |
867
|
|
|
|
|
|
|
'[,]|$', 80) || return; |
868
|
|
|
|
|
|
|
} |
869
|
|
|
|
|
|
|
} |
870
|
|
|
|
|
|
|
} |
871
|
|
|
|
|
|
|
# print sequence lines only if sequence is present! bug#2982 |
872
|
11
|
100
|
|
|
|
26
|
if (length($str)) { |
873
|
8
|
|
|
|
|
24
|
$str =~ tr/A-Z/a-z/; |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
# Count each nucleotide |
876
|
8
|
|
|
|
|
19
|
my $alen = $str =~ tr/a/a/; |
877
|
8
|
|
|
|
|
15
|
my $clen = $str =~ tr/c/c/; |
878
|
8
|
|
|
|
|
15
|
my $glen = $str =~ tr/g/g/; |
879
|
8
|
|
|
|
|
16
|
my $tlen = $str =~ tr/t/t/; |
880
|
|
|
|
|
|
|
|
881
|
8
|
|
|
|
|
15
|
my $len = $seq->length(); |
882
|
8
|
|
|
|
|
13
|
my $olen = $seq->length() - ($alen + $tlen + $clen + $glen); |
883
|
8
|
50
|
|
|
|
19
|
if ( $olen < 0 ) { |
884
|
0
|
|
|
|
|
0
|
$self->warn("Weird. More atgc than bases. Problem!"); |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
|
887
|
8
|
50
|
|
|
|
37
|
$self->_print("SQ Sequence $len BP; $alen A; $clen C; $glen G; $tlen T; $olen other;\n") || return; |
888
|
|
|
|
|
|
|
|
889
|
8
|
|
|
|
|
12
|
my $nuc = 60; # Number of nucleotides per line |
890
|
8
|
|
|
|
|
14
|
my $whole_pat = 'a10' x 6; # Pattern for unpacking a whole line |
891
|
8
|
|
|
|
|
9
|
my $out_pat = 'A11' x 6; # Pattern for packing a line |
892
|
8
|
|
|
|
|
7
|
my $length = length($str); |
893
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
# Calculate the number of nucleotides which fit on whole lines |
895
|
8
|
|
|
|
|
25
|
my $whole = int($length / $nuc) * $nuc; |
896
|
|
|
|
|
|
|
|
897
|
|
|
|
|
|
|
# Print the whole lines |
898
|
8
|
|
|
|
|
6
|
my( $i ); |
899
|
8
|
|
|
|
|
19
|
for ($i = 0; $i < $whole; $i += $nuc) { |
900
|
162
|
|
|
|
|
366
|
my $blocks = pack $out_pat, |
901
|
|
|
|
|
|
|
unpack $whole_pat, |
902
|
|
|
|
|
|
|
substr($str, $i, $nuc); |
903
|
162
|
50
|
|
|
|
347
|
$self->_print(sprintf(" $blocks%9d\n", $i + $nuc)) || return; |
904
|
|
|
|
|
|
|
} |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
# Print the last line |
907
|
8
|
50
|
|
|
|
26
|
if (my $last = substr($str, $i)) { |
908
|
8
|
|
|
|
|
8
|
my $last_len = length($last); |
909
|
8
|
|
|
|
|
31
|
my $last_pat = 'a10' x int($last_len / 10) .'a'. $last_len % 10; |
910
|
8
|
|
|
|
|
62
|
my $blocks = pack $out_pat, |
911
|
|
|
|
|
|
|
unpack($last_pat, $last); |
912
|
8
|
50
|
|
|
|
41
|
$self->_print(sprintf(" $blocks%9d\n", $length)) || |
913
|
|
|
|
|
|
|
return; # Add the length to the end |
914
|
|
|
|
|
|
|
} |
915
|
|
|
|
|
|
|
} |
916
|
|
|
|
|
|
|
|
917
|
11
|
50
|
|
|
|
27
|
$self->_print( "//\n") || return; |
918
|
|
|
|
|
|
|
|
919
|
11
|
50
|
33
|
|
|
24
|
$self->flush if $self->_flush_on_write && defined $self->_fh; |
920
|
|
|
|
|
|
|
} |
921
|
11
|
|
|
|
|
41
|
return 1; |
922
|
|
|
|
|
|
|
} |
923
|
|
|
|
|
|
|
|
924
|
|
|
|
|
|
|
=head2 _print_EMBL_FTHelper |
925
|
|
|
|
|
|
|
|
926
|
|
|
|
|
|
|
Title : _print_EMBL_FTHelper |
927
|
|
|
|
|
|
|
Usage : |
928
|
|
|
|
|
|
|
Function: Internal function |
929
|
|
|
|
|
|
|
Returns : 1 if writing suceeded, otherwise undef |
930
|
|
|
|
|
|
|
Args : |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
=cut |
934
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
sub _print_EMBL_FTHelper { |
936
|
38
|
|
|
38
|
|
34
|
my ($self,$fth) = @_; |
937
|
|
|
|
|
|
|
|
938
|
38
|
50
|
33
|
|
|
119
|
if ( ! ref $fth || ! $fth->isa('Bio::SeqIO::FTHelper') ) { |
939
|
0
|
|
|
|
|
0
|
$fth->warn("$fth is not a FTHelper class. Attempting to print, but there could be tears!"); |
940
|
|
|
|
|
|
|
} |
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
#$self->_print( "FH Key Location/Qualifiers\n"); |
944
|
|
|
|
|
|
|
#$self->_print( sprintf("FT %-15s %s\n",$fth->key,$fth->loc)); |
945
|
|
|
|
|
|
|
# let |
946
|
38
|
50
|
|
|
|
48
|
if ( $fth->key eq 'CONTIG' ) { |
947
|
0
|
0
|
|
|
|
0
|
$self->_print("XX\n") || return; |
948
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("CO ", |
949
|
|
|
|
|
|
|
"CO ",$fth->loc, |
950
|
|
|
|
|
|
|
'\,|$',80) || return; #' |
951
|
0
|
|
|
|
|
0
|
return 1; |
952
|
|
|
|
|
|
|
} |
953
|
38
|
50
|
|
|
|
56
|
$self->_write_line_EMBL_regex(sprintf("FT %-15s ",$fth->key), |
954
|
|
|
|
|
|
|
"FT ",$fth->loc, |
955
|
|
|
|
|
|
|
'\,|$',80) || return; #' |
956
|
38
|
|
|
|
|
37
|
foreach my $tag ( keys %{$fth->field} ) { |
|
38
|
|
|
|
|
64
|
|
957
|
108
|
50
|
|
|
|
147
|
if ( ! defined $fth->field->{$tag} ) { |
958
|
0
|
|
|
|
|
0
|
next; |
959
|
|
|
|
|
|
|
} |
960
|
108
|
|
|
|
|
91
|
foreach my $value ( @{$fth->field->{$tag}} ) { |
|
108
|
|
|
|
|
121
|
|
961
|
139
|
|
|
|
|
141
|
$value =~ s/\"/\"\"/g; |
962
|
139
|
50
|
66
|
|
|
346
|
if ($value eq "_no_value") { |
|
|
50
|
|
|
|
|
|
963
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("FT ", |
964
|
|
|
|
|
|
|
"FT ", |
965
|
|
|
|
|
|
|
"/$tag",'.|$',80) || return; #' |
966
|
|
|
|
|
|
|
} |
967
|
|
|
|
|
|
|
# there are almost 3x more quoted qualifier values and they |
968
|
|
|
|
|
|
|
# are more common too so we take quoted ones first |
969
|
|
|
|
|
|
|
# |
970
|
|
|
|
|
|
|
# Long qualifiers, that will be line wrapped, are always quoted |
971
|
|
|
|
|
|
|
elsif (!$FTQUAL_NO_QUOTE{$tag} or length("/$tag=$value")>=60) { |
972
|
139
|
100
|
|
|
|
264
|
my $pat = $value =~ /\s+/ ? '\s+|\-|$' : '.|\-|$'; |
973
|
139
|
50
|
|
|
|
278
|
$self->_write_line_EMBL_regex("FT ", |
974
|
|
|
|
|
|
|
"FT ", |
975
|
|
|
|
|
|
|
"/$tag=\"$value\"",$pat,80) || return; |
976
|
|
|
|
|
|
|
} else { |
977
|
0
|
0
|
|
|
|
0
|
$self->_write_line_EMBL_regex("FT ", |
978
|
|
|
|
|
|
|
"FT ", |
979
|
|
|
|
|
|
|
"/$tag=$value",'.|$',80) || return; #' |
980
|
|
|
|
|
|
|
} |
981
|
|
|
|
|
|
|
} |
982
|
|
|
|
|
|
|
} |
983
|
|
|
|
|
|
|
|
984
|
38
|
|
|
|
|
136
|
return 1; |
985
|
|
|
|
|
|
|
} |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
=head2 _read_EMBL_Contig() |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
Title : _read_EMBL_Contig |
992
|
|
|
|
|
|
|
Usage : |
993
|
|
|
|
|
|
|
Function: convert CO lines into annotations |
994
|
|
|
|
|
|
|
Returns : |
995
|
|
|
|
|
|
|
Args : |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=cut |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
sub _read_EMBL_Contig { |
1000
|
4
|
|
|
4
|
|
5
|
my ($self, $buffer) = @_; |
1001
|
4
|
|
|
|
|
7
|
my @ret; |
1002
|
4
|
50
|
|
|
|
11
|
if ( $$buffer !~ /^CO/ ) { |
1003
|
0
|
|
|
|
|
0
|
warn("Not parsing line '$$buffer' which maybe important"); |
1004
|
|
|
|
|
|
|
} |
1005
|
4
|
|
|
|
|
11
|
$self->_pushback($$buffer); |
1006
|
4
|
|
|
|
|
6
|
while ( defined ($_ = $self->_readline) ) { |
1007
|
356
|
100
|
|
|
|
752
|
/^C/ || last; |
1008
|
353
|
50
|
|
|
|
775
|
/^CO\s+(.*)/ && do { |
1009
|
353
|
|
|
|
|
767
|
push @ret, Bio::Annotation::SimpleValue->new( -tagname => 'contig', |
1010
|
|
|
|
|
|
|
-value => $1); |
1011
|
|
|
|
|
|
|
}; |
1012
|
|
|
|
|
|
|
} |
1013
|
4
|
|
|
|
|
7
|
$$buffer = $_; |
1014
|
4
|
|
|
|
|
48
|
return @ret; |
1015
|
|
|
|
|
|
|
|
1016
|
|
|
|
|
|
|
} |
1017
|
|
|
|
|
|
|
#' |
1018
|
|
|
|
|
|
|
=head2 _read_EMBL_References |
1019
|
|
|
|
|
|
|
|
1020
|
|
|
|
|
|
|
Title : _read_EMBL_References |
1021
|
|
|
|
|
|
|
Usage : |
1022
|
|
|
|
|
|
|
Function: Reads references from EMBL format. Internal function really |
1023
|
|
|
|
|
|
|
Example : |
1024
|
|
|
|
|
|
|
Returns : |
1025
|
|
|
|
|
|
|
Args : |
1026
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
=cut |
1029
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
sub _read_EMBL_References { |
1031
|
99
|
|
|
99
|
|
90
|
my ($self,$buffer) = @_; |
1032
|
99
|
|
|
|
|
74
|
my (@refs); |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
# assume things are starting with RN |
1035
|
|
|
|
|
|
|
|
1036
|
99
|
50
|
|
|
|
198
|
if ( $$buffer !~ /^RN/ ) { |
1037
|
0
|
|
|
|
|
0
|
warn("Not parsing line '$$buffer' which maybe important"); |
1038
|
|
|
|
|
|
|
} |
1039
|
99
|
|
|
|
|
76
|
my $b1; |
1040
|
|
|
|
|
|
|
my $b2; |
1041
|
0
|
|
|
|
|
0
|
my $title; |
1042
|
0
|
|
|
|
|
0
|
my $loc; |
1043
|
0
|
|
|
|
|
0
|
my $au; |
1044
|
0
|
|
|
|
|
0
|
my $med; |
1045
|
0
|
|
|
|
|
0
|
my $pm; |
1046
|
0
|
|
|
|
|
0
|
my $com; |
1047
|
|
|
|
|
|
|
|
1048
|
99
|
|
|
|
|
160
|
while ( defined ($_ = $self->_readline) ) { |
1049
|
727
|
100
|
|
|
|
1230
|
/^R/ || last; |
1050
|
628
|
100
|
|
|
|
806
|
/^RP (\d+)-(\d+)/ && do {$b1=$1;$b2=$2;}; |
|
40
|
|
|
|
|
83
|
|
|
40
|
|
|
|
|
54
|
|
1051
|
628
|
100
|
|
|
|
797
|
/^RX MEDLINE;\s+(\d+)/ && do {$med=$1}; |
|
55
|
|
|
|
|
81
|
|
1052
|
628
|
100
|
|
|
|
730
|
/^RX PUBMED;\s+(\d+)/ && do {$pm=$1}; |
|
10
|
|
|
|
|
15
|
|
1053
|
628
|
100
|
|
|
|
832
|
/^RA (.*)/ && do { |
1054
|
193
|
|
|
|
|
341
|
$au = $self->_concatenate_lines($au,$1); next; |
|
193
|
|
|
|
|
297
|
|
1055
|
|
|
|
|
|
|
}; |
1056
|
435
|
100
|
|
|
|
615
|
/^RT (.*)/ && do { |
1057
|
164
|
|
|
|
|
213
|
$title = $self->_concatenate_lines($title,$1); next; |
|
164
|
|
|
|
|
220
|
|
1058
|
|
|
|
|
|
|
}; |
1059
|
271
|
100
|
|
|
|
425
|
/^RL (.*)/ && do { |
1060
|
156
|
|
|
|
|
224
|
$loc = $self->_concatenate_lines($loc,$1); next; |
|
156
|
|
|
|
|
238
|
|
1061
|
|
|
|
|
|
|
}; |
1062
|
115
|
100
|
|
|
|
227
|
/^RC (.*)/ && do { |
1063
|
3
|
|
|
|
|
10
|
$com = $self->_concatenate_lines($com,$1); next; |
|
3
|
|
|
|
|
4
|
|
1064
|
|
|
|
|
|
|
}; |
1065
|
|
|
|
|
|
|
} |
1066
|
|
|
|
|
|
|
|
1067
|
99
|
|
|
|
|
302
|
my $ref = Bio::Annotation::Reference->new(); |
1068
|
99
|
|
|
|
|
317
|
$au =~ s/;\s*$//g; |
1069
|
99
|
|
|
|
|
198
|
$title =~ s/;\s*$//g; |
1070
|
|
|
|
|
|
|
|
1071
|
99
|
|
|
|
|
188
|
$ref->start($b1); |
1072
|
99
|
|
|
|
|
166
|
$ref->end($b2); |
1073
|
99
|
|
|
|
|
135
|
$ref->authors($au); |
1074
|
99
|
|
|
|
|
144
|
$ref->title($title); |
1075
|
99
|
|
|
|
|
124
|
$ref->location($loc); |
1076
|
99
|
|
|
|
|
136
|
$ref->medline($med); |
1077
|
99
|
|
|
|
|
145
|
$ref->comment($com); |
1078
|
99
|
|
|
|
|
130
|
$ref->pubmed($pm); |
1079
|
|
|
|
|
|
|
|
1080
|
99
|
|
|
|
|
91
|
push(@refs,$ref); |
1081
|
99
|
|
|
|
|
101
|
$$buffer = $_; |
1082
|
|
|
|
|
|
|
|
1083
|
99
|
|
|
|
|
194
|
return @refs; |
1084
|
|
|
|
|
|
|
} |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
=head2 _read_EMBL_Species |
1087
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
Title : _read_EMBL_Species |
1089
|
|
|
|
|
|
|
Usage : |
1090
|
|
|
|
|
|
|
Function: Reads the EMBL Organism species and classification |
1091
|
|
|
|
|
|
|
lines. |
1092
|
|
|
|
|
|
|
Example : |
1093
|
|
|
|
|
|
|
Returns : A Bio::Species object |
1094
|
|
|
|
|
|
|
Args : a reference to the current line buffer, accession number |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
=cut |
1097
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
sub _read_EMBL_Species { |
1099
|
21
|
|
|
21
|
|
30
|
my( $self, $buffer, $acc ) = @_; |
1100
|
21
|
|
|
|
|
20
|
my $org; |
1101
|
|
|
|
|
|
|
|
1102
|
21
|
|
|
|
|
26
|
$_ = $$buffer; |
1103
|
21
|
|
|
|
|
26
|
my( $sub_species, $species, $genus, $common, $sci_name, $class_lines ); |
1104
|
21
|
|
66
|
|
|
77
|
while (defined( $_ ||= $self->_readline )) { |
1105
|
92
|
100
|
|
|
|
349
|
if (/^OS\s+(.+)/) { |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1106
|
21
|
50
|
|
|
|
70
|
$sci_name .= ($sci_name) ? ' '.$1 : $1; |
1107
|
|
|
|
|
|
|
} elsif (s/^OC\s+(.+)$//) { |
1108
|
50
|
|
|
|
|
102
|
$class_lines .= $1; |
1109
|
|
|
|
|
|
|
} elsif (/^OG\s+(.*)/) { |
1110
|
0
|
|
|
|
|
0
|
$org = $1; |
1111
|
|
|
|
|
|
|
} else { |
1112
|
21
|
|
|
|
|
38
|
last; |
1113
|
|
|
|
|
|
|
} |
1114
|
|
|
|
|
|
|
|
1115
|
71
|
|
|
|
|
191
|
$_ = undef; # Empty $_ to trigger read of next line |
1116
|
|
|
|
|
|
|
} |
1117
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
# $$buffer = $_; |
1119
|
21
|
|
|
|
|
69
|
$self->_pushback($_); |
1120
|
|
|
|
|
|
|
|
1121
|
21
|
|
|
|
|
32
|
$sci_name =~ s{\.$}{}; |
1122
|
21
|
50
|
|
|
|
39
|
$sci_name || return; |
1123
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
# Convert data in classification lines into classification array. |
1125
|
|
|
|
|
|
|
# only split on ';' or '.' so that classification that is 2 or more words |
1126
|
|
|
|
|
|
|
# will still get matched, use map() to remove trailing/leading/intervening |
1127
|
|
|
|
|
|
|
# spaces |
1128
|
21
|
|
|
|
|
161
|
my @class = map { s/^\s+//; s/\s+$//; s/\s{2,}/ /g; $_; } split /(?
|
|
224
|
|
|
|
|
258
|
|
|
224
|
|
|
|
|
194
|
|
|
224
|
|
|
|
|
164
|
|
|
224
|
|
|
|
|
213
|
|
1129
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
# do we have a genus? |
1131
|
21
|
|
|
|
|
42
|
my $possible_genus = $class[-1]; |
1132
|
21
|
50
|
|
|
|
61
|
$possible_genus .= "|$class[-2]" if $class[-2]; |
1133
|
21
|
50
|
|
|
|
511
|
if ($sci_name =~ /^($possible_genus)/) { |
1134
|
21
|
|
|
|
|
42
|
$genus = $1; |
1135
|
21
|
|
|
|
|
468
|
($species) = $sci_name =~ /^$genus\s+(.+)/; |
1136
|
|
|
|
|
|
|
} else { |
1137
|
0
|
|
|
|
|
0
|
$species = $sci_name; |
1138
|
|
|
|
|
|
|
} |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
# Don't make a species object if it is "Unknown" or "None" |
1141
|
21
|
50
|
|
|
|
62
|
if ($genus) { |
1142
|
21
|
50
|
|
|
|
75
|
return if $genus =~ /^(Unknown|None)$/i; |
1143
|
|
|
|
|
|
|
} |
1144
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
# is this organism of rank species or is it lower? |
1146
|
|
|
|
|
|
|
# (doesn't catch everything, but at least the guess isn't dangerous) |
1147
|
21
|
50
|
|
|
|
85
|
if ($species =~ /subsp\.|var\./) { |
1148
|
0
|
|
|
|
|
0
|
($species, $sub_species) = $species =~ /(.+)\s+((?:subsp\.|var\.).+)/; |
1149
|
|
|
|
|
|
|
} |
1150
|
|
|
|
|
|
|
|
1151
|
|
|
|
|
|
|
# sometimes things have common name in brackets, like |
1152
|
|
|
|
|
|
|
# Schizosaccharomyces pombe (fission yeast), so get rid of the common |
1153
|
|
|
|
|
|
|
# name bit. Probably dangerous if real scientific species name ends in |
1154
|
|
|
|
|
|
|
# bracketed bit. |
1155
|
21
|
50
|
|
|
|
54
|
unless ($class[-1] eq 'Viruses') { |
1156
|
21
|
|
|
|
|
70
|
($species, $common) = $species =~ /^(.+)\s+\((.+)\)$/; |
1157
|
21
|
100
|
|
|
|
69
|
$sci_name =~ s/\s+\(.+\)$// if $common; |
1158
|
|
|
|
|
|
|
} |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
# Bio::Species array needs array in Species -> Kingdom direction |
1161
|
21
|
50
|
|
|
|
54
|
unless ($class[-1] eq $sci_name) { |
1162
|
21
|
|
|
|
|
30
|
push(@class, $sci_name); |
1163
|
|
|
|
|
|
|
} |
1164
|
21
|
|
|
|
|
27
|
@class = reverse @class; |
1165
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
# do minimal sanity checks before we hand off to Bio::Species which won't |
1167
|
|
|
|
|
|
|
# be able to give informative throw messages if it has to throw because |
1168
|
|
|
|
|
|
|
# of problems here |
1169
|
21
|
50
|
|
|
|
40
|
$self->throw("$acc seems to be missing its OS line: invalid.") unless $sci_name; |
1170
|
21
|
|
|
|
|
24
|
my %names; |
1171
|
21
|
|
|
|
|
68
|
foreach my $i (0..$#class) { |
1172
|
245
|
|
|
|
|
166
|
my $name = $class[$i]; |
1173
|
245
|
|
|
|
|
272
|
$names{$name}++; |
1174
|
|
|
|
|
|
|
# this code breaks examples like: Xenopus (Silurana) tropicalis |
1175
|
|
|
|
|
|
|
# commenting out, see bug 3158 |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
#if ($names{$name} > 1 && ($name ne $class[$i - 1])) { |
1178
|
|
|
|
|
|
|
# $self->warn("$acc seems to have an invalid species classification:$name ne $class[$i - 1]"); |
1179
|
|
|
|
|
|
|
#} |
1180
|
|
|
|
|
|
|
} |
1181
|
21
|
|
|
|
|
129
|
my $make = Bio::Species->new(); |
1182
|
21
|
|
|
|
|
53
|
$make->scientific_name($sci_name); |
1183
|
21
|
|
|
|
|
60
|
$make->classification(@class); |
1184
|
21
|
50
|
|
|
|
56
|
unless ($class[-1] eq 'Viruses') { |
1185
|
21
|
50
|
|
|
|
96
|
$make->genus($genus) if $genus; |
1186
|
21
|
100
|
|
|
|
63
|
$make->species($species) if $species; |
1187
|
21
|
50
|
|
|
|
42
|
$make->sub_species($sub_species) if $sub_species; |
1188
|
21
|
100
|
|
|
|
61
|
$make->common_name($common) if $common; |
1189
|
|
|
|
|
|
|
} |
1190
|
21
|
50
|
|
|
|
38
|
$make->organelle($org) if $org; |
1191
|
21
|
|
|
|
|
103
|
return $make; |
1192
|
|
|
|
|
|
|
} |
1193
|
|
|
|
|
|
|
|
1194
|
|
|
|
|
|
|
=head2 _read_EMBL_DBLink |
1195
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
Title : _read_EMBL_DBLink |
1197
|
|
|
|
|
|
|
Usage : |
1198
|
|
|
|
|
|
|
Function: Reads the EMBL database cross reference ("DR") lines |
1199
|
|
|
|
|
|
|
Example : |
1200
|
|
|
|
|
|
|
Returns : A list of Bio::Annotation::DBLink objects |
1201
|
|
|
|
|
|
|
Args : |
1202
|
|
|
|
|
|
|
|
1203
|
|
|
|
|
|
|
=cut |
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
sub _read_EMBL_DBLink { |
1206
|
5
|
|
|
5
|
|
6
|
my( $self,$buffer ) = @_; |
1207
|
5
|
|
|
|
|
5
|
my( @db_link ); |
1208
|
|
|
|
|
|
|
|
1209
|
5
|
|
|
|
|
5
|
$_ = $$buffer; |
1210
|
5
|
|
66
|
|
|
21
|
while (defined( $_ ||= $self->_readline )) { |
1211
|
30
|
100
|
|
|
|
118
|
if ( /^DR ([^\s;]+);\s*([^\s;]+);?\s*([^\s;]+)?\.$/) { |
1212
|
25
|
|
|
|
|
54
|
my ($databse, $prim_id, $sec_id) = ($1,$2,$3); |
1213
|
25
|
|
|
|
|
82
|
my $link = Bio::Annotation::DBLink->new(-database => $databse, |
1214
|
|
|
|
|
|
|
-primary_id => $prim_id, |
1215
|
|
|
|
|
|
|
-optional_id => $sec_id); |
1216
|
|
|
|
|
|
|
|
1217
|
25
|
|
|
|
|
41
|
push(@db_link, $link); |
1218
|
|
|
|
|
|
|
} else { |
1219
|
5
|
|
|
|
|
7
|
last; |
1220
|
|
|
|
|
|
|
} |
1221
|
25
|
|
|
|
|
67
|
$_ = undef; # Empty $_ to trigger read of next line |
1222
|
|
|
|
|
|
|
} |
1223
|
|
|
|
|
|
|
|
1224
|
5
|
|
|
|
|
8
|
$$buffer = $_; |
1225
|
5
|
|
|
|
|
12
|
return @db_link; |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
=head2 _read_EMBL_TaxID_DBLink |
1229
|
|
|
|
|
|
|
|
1230
|
|
|
|
|
|
|
Title : _read_EMBL_TaxID_DBLink |
1231
|
|
|
|
|
|
|
Usage : |
1232
|
|
|
|
|
|
|
Function: Reads the EMBL database cross reference to NCBI TaxID ("OX") lines |
1233
|
|
|
|
|
|
|
Example : |
1234
|
|
|
|
|
|
|
Returns : A list of Bio::Annotation::DBLink objects |
1235
|
|
|
|
|
|
|
Args : |
1236
|
|
|
|
|
|
|
|
1237
|
|
|
|
|
|
|
=cut |
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
sub _read_EMBL_TaxID_DBLink { |
1240
|
3
|
|
|
3
|
|
5
|
my( $self,$buffer ) = @_; |
1241
|
3
|
|
|
|
|
3
|
my( @db_link ); |
1242
|
|
|
|
|
|
|
|
1243
|
3
|
|
|
|
|
5
|
$_ = $$buffer; |
1244
|
3
|
|
66
|
|
|
13
|
while (defined( $_ ||= $self->_readline )) { |
1245
|
4
|
100
|
|
|
|
19
|
if ( /^OX (\S+)=(\d+);$/ ) { |
1246
|
1
|
|
|
|
|
4
|
my ($databse, $prim_id) = ($1,$2); |
1247
|
1
|
|
|
|
|
11
|
my $link = Bio::Annotation::DBLink->new(-database => $databse, |
1248
|
|
|
|
|
|
|
-primary_id => $prim_id,); |
1249
|
1
|
|
|
|
|
4
|
push(@db_link, $link); |
1250
|
|
|
|
|
|
|
} else { |
1251
|
3
|
|
|
|
|
4
|
last; |
1252
|
|
|
|
|
|
|
} |
1253
|
1
|
|
|
|
|
6
|
$_ = undef; # Empty $_ to trigger read of next line |
1254
|
|
|
|
|
|
|
} |
1255
|
|
|
|
|
|
|
|
1256
|
3
|
|
|
|
|
5
|
$$buffer = $_; |
1257
|
3
|
|
|
|
|
9
|
return @db_link; |
1258
|
|
|
|
|
|
|
} |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
=head2 _filehandle |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
Title : _filehandle |
1263
|
|
|
|
|
|
|
Usage : $obj->_filehandle($newval) |
1264
|
|
|
|
|
|
|
Function: |
1265
|
|
|
|
|
|
|
Example : |
1266
|
|
|
|
|
|
|
Returns : value of _filehandle |
1267
|
|
|
|
|
|
|
Args : newvalue (optional) |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
=cut |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
sub _filehandle{ |
1273
|
0
|
|
|
0
|
|
0
|
my ($obj,$value) = @_; |
1274
|
0
|
0
|
|
|
|
0
|
if ( defined $value) { |
1275
|
0
|
|
|
|
|
0
|
$obj->{'_filehandle'} = $value; |
1276
|
|
|
|
|
|
|
} |
1277
|
0
|
|
|
|
|
0
|
return $obj->{'_filehandle'}; |
1278
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
} |
1280
|
|
|
|
|
|
|
|
1281
|
|
|
|
|
|
|
=head2 _read_FTHelper_EMBL |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
Title : _read_FTHelper_EMBL |
1284
|
|
|
|
|
|
|
Usage : _read_FTHelper_EMBL($buffer) |
1285
|
|
|
|
|
|
|
Function: reads the next FT key line |
1286
|
|
|
|
|
|
|
Example : |
1287
|
|
|
|
|
|
|
Returns : Bio::SeqIO::FTHelper object |
1288
|
|
|
|
|
|
|
Args : filehandle and reference to a scalar |
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
|
1291
|
|
|
|
|
|
|
=cut |
1292
|
|
|
|
|
|
|
|
1293
|
|
|
|
|
|
|
sub _read_FTHelper_EMBL { |
1294
|
256
|
|
|
256
|
|
216
|
my ($self,$buffer) = @_; |
1295
|
|
|
|
|
|
|
|
1296
|
256
|
|
|
|
|
187
|
my ($key, # The key of the feature |
1297
|
|
|
|
|
|
|
$loc, # The location line from the feature |
1298
|
|
|
|
|
|
|
@qual, # An arrray of lines making up the qualifiers |
1299
|
|
|
|
|
|
|
); |
1300
|
|
|
|
|
|
|
|
1301
|
256
|
50
|
|
|
|
814
|
if ($$buffer =~ /^FT\s{3}(\S+)\s+(\S+)/ ) { |
|
|
0
|
|
|
|
|
|
1302
|
256
|
|
|
|
|
386
|
$key = $1; |
1303
|
256
|
|
|
|
|
274
|
$loc = $2; |
1304
|
|
|
|
|
|
|
# Read all the lines up to the next feature |
1305
|
256
|
|
|
|
|
481
|
while ( defined($_ = $self->_readline) ) { |
1306
|
2038
|
100
|
|
|
|
9036
|
if (/^FT(\s+)(.+?)\s*$/) { |
1307
|
|
|
|
|
|
|
# Lines inside features are preceeded by 19 spaces |
1308
|
|
|
|
|
|
|
# A new feature is preceeded by 3 spaces |
1309
|
2016
|
100
|
|
|
|
2608
|
if (length($1) > 4) { |
1310
|
|
|
|
|
|
|
# Add to qualifiers if we're in the qualifiers |
1311
|
1782
|
100
|
|
|
|
2024
|
if (@qual) { |
|
|
100
|
|
|
|
|
|
1312
|
1523
|
|
|
|
|
2949
|
push(@qual, $2); |
1313
|
|
|
|
|
|
|
} |
1314
|
|
|
|
|
|
|
# Start the qualifier list if it's the first qualifier |
1315
|
|
|
|
|
|
|
elsif (substr($2, 0, 1) eq '/') { |
1316
|
254
|
|
|
|
|
642
|
@qual = ($2); |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
# We're still in the location line, so append to location |
1319
|
|
|
|
|
|
|
else { |
1320
|
5
|
|
|
|
|
18
|
$loc .= $2; |
1321
|
|
|
|
|
|
|
} |
1322
|
|
|
|
|
|
|
} else { |
1323
|
|
|
|
|
|
|
# We've reached the start of the next feature |
1324
|
234
|
|
|
|
|
243
|
last; |
1325
|
|
|
|
|
|
|
} |
1326
|
|
|
|
|
|
|
} else { |
1327
|
|
|
|
|
|
|
# We're at the end of the feature table |
1328
|
22
|
|
|
|
|
29
|
last; |
1329
|
|
|
|
|
|
|
} |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
} elsif ( $$buffer =~ /^CO\s+(\S+)/) { |
1332
|
0
|
|
|
|
|
0
|
$key = 'CONTIG'; |
1333
|
0
|
|
|
|
|
0
|
$loc = $1; |
1334
|
|
|
|
|
|
|
# Read all the lines up to the next feature |
1335
|
0
|
|
|
|
|
0
|
while ( defined($_ = $self->_readline) ) { |
1336
|
0
|
0
|
|
|
|
0
|
if (/^CO\s+(\S+)\s*$/) { |
1337
|
0
|
|
|
|
|
0
|
$loc .= $1; |
1338
|
|
|
|
|
|
|
} else { |
1339
|
|
|
|
|
|
|
# We've reached the start of the next feature |
1340
|
0
|
|
|
|
|
0
|
last; |
1341
|
|
|
|
|
|
|
} |
1342
|
|
|
|
|
|
|
} |
1343
|
|
|
|
|
|
|
} else { |
1344
|
|
|
|
|
|
|
# No feature key |
1345
|
0
|
|
|
|
|
0
|
return; |
1346
|
|
|
|
|
|
|
} |
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
# Put the first line of the next feature into the buffer |
1349
|
256
|
|
|
|
|
230
|
$$buffer = $_; |
1350
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
# Make the new FTHelper object |
1352
|
256
|
|
|
|
|
625
|
my $out = Bio::SeqIO::FTHelper->new(); |
1353
|
256
|
|
|
|
|
440
|
$out->verbose($self->verbose()); |
1354
|
256
|
|
|
|
|
393
|
$out->key($key); |
1355
|
256
|
|
|
|
|
381
|
$out->loc($loc); |
1356
|
|
|
|
|
|
|
|
1357
|
|
|
|
|
|
|
# Now parse and add any qualifiers. (@qual is kept |
1358
|
|
|
|
|
|
|
# intact to provide informative error messages.) |
1359
|
256
|
|
|
|
|
181
|
my $last_unquoted_qualifier; |
1360
|
|
|
|
|
|
|
QUAL: |
1361
|
256
|
|
|
|
|
449
|
for (my $i = 0; $i < @qual; $i++) { |
1362
|
1091
|
|
|
|
|
901
|
my $data = $qual[$i]; |
1363
|
1091
|
|
|
|
|
674
|
my ( $qualifier, $value ); |
1364
|
|
|
|
|
|
|
|
1365
|
1091
|
50
|
|
|
|
3513
|
unless (( $qualifier, $value ) = ($data =~ m{^/([^=]+)(?:=\s*(.*))?})) { |
1366
|
0
|
0
|
|
|
|
0
|
if ( defined $last_unquoted_qualifier ) { |
1367
|
|
|
|
|
|
|
# handle case of unquoted multiline - read up everything until the next qualifier |
1368
|
0
|
|
0
|
|
|
0
|
do { |
1369
|
|
|
|
|
|
|
# Protein sequence translations need to be joined without spaces, |
1370
|
|
|
|
|
|
|
# other qualifiers need those. |
1371
|
0
|
0
|
|
|
|
0
|
$value .= ' ' if $qualifier ne "translation"; |
1372
|
0
|
|
|
|
|
0
|
$value .= $data; |
1373
|
|
|
|
|
|
|
} while defined($data = $qual[++$i]) && $data !~ m[^/]; |
1374
|
0
|
|
|
|
|
0
|
$i--; |
1375
|
0
|
|
|
|
|
0
|
$out->field->{$last_unquoted_qualifier}->[-1] .= $value; |
1376
|
0
|
|
|
|
|
0
|
$last_unquoted_qualifier = undef; |
1377
|
0
|
|
|
|
|
0
|
next QUAL; |
1378
|
|
|
|
|
|
|
} else { |
1379
|
0
|
|
|
|
|
0
|
$self->throw("Can't see new qualifier in: $_\nfrom:\n" |
1380
|
|
|
|
|
|
|
. join('', map "$_\n", @qual)); |
1381
|
|
|
|
|
|
|
} |
1382
|
|
|
|
|
|
|
} |
1383
|
1091
|
50
|
|
|
|
1351
|
$qualifier = '' if not defined $qualifier; |
1384
|
|
|
|
|
|
|
|
1385
|
1091
|
|
|
|
|
670
|
$last_unquoted_qualifier = undef; |
1386
|
1091
|
50
|
|
|
|
1064
|
if (defined $value) { |
1387
|
|
|
|
|
|
|
# Do we have a quoted value? |
1388
|
1091
|
100
|
|
|
|
1331
|
if (substr($value, 0, 1) eq '"') { |
1389
|
|
|
|
|
|
|
# Keep adding to value until we find the trailing quote |
1390
|
|
|
|
|
|
|
# and the quotes are balanced |
1391
|
|
|
|
|
|
|
QUOTES: |
1392
|
971
|
|
66
|
|
|
3337
|
while ($value !~ /"$/ or $value =~ tr/"/"/ % 2) { #" |
1393
|
686
|
|
|
|
|
430
|
$i++; |
1394
|
686
|
|
|
|
|
454
|
my $next = $qual[$i]; |
1395
|
686
|
50
|
|
|
|
701
|
if (!defined($next)) { |
1396
|
0
|
|
|
|
|
0
|
$self->warn("Unbalanced quote in:\n".join("\n", @qual). |
1397
|
|
|
|
|
|
|
"\nAdding quote to close...". |
1398
|
|
|
|
|
|
|
"Check sequence quality!"); |
1399
|
0
|
|
|
|
|
0
|
$value .= '"'; |
1400
|
0
|
|
|
|
|
0
|
last QUOTES; |
1401
|
|
|
|
|
|
|
} |
1402
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
# Protein sequence translations need to be joined without spaces, |
1404
|
|
|
|
|
|
|
# other qualifiers need those. |
1405
|
686
|
100
|
|
|
|
659
|
if ($qualifier eq "translation") { |
1406
|
569
|
|
|
|
|
1241
|
$value .= $next; |
1407
|
|
|
|
|
|
|
} else { |
1408
|
117
|
|
|
|
|
454
|
$value .= " $next"; |
1409
|
|
|
|
|
|
|
} |
1410
|
|
|
|
|
|
|
} |
1411
|
|
|
|
|
|
|
# Trim leading and trailing quotes |
1412
|
971
|
|
|
|
|
6297
|
$value =~ s/^"|"$//g; |
1413
|
|
|
|
|
|
|
# Undouble internal quotes |
1414
|
971
|
|
|
|
|
902
|
$value =~ s/""/"/g; #" |
1415
|
|
|
|
|
|
|
} else { |
1416
|
120
|
|
|
|
|
122
|
$last_unquoted_qualifier = $qualifier; |
1417
|
|
|
|
|
|
|
} |
1418
|
|
|
|
|
|
|
} else { |
1419
|
0
|
|
|
|
|
0
|
$value = '_no_value'; |
1420
|
|
|
|
|
|
|
} |
1421
|
|
|
|
|
|
|
|
1422
|
|
|
|
|
|
|
# Store the qualifier |
1423
|
1091
|
|
100
|
|
|
1465
|
$out->field->{$qualifier} ||= []; |
1424
|
1091
|
|
|
|
|
813
|
push(@{$out->field->{$qualifier}},$value); |
|
1091
|
|
|
|
|
1193
|
|
1425
|
|
|
|
|
|
|
} |
1426
|
|
|
|
|
|
|
|
1427
|
256
|
|
|
|
|
456
|
return $out; |
1428
|
|
|
|
|
|
|
} |
1429
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
=head2 _write_line_EMBL |
1431
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
Title : _write_line_EMBL |
1433
|
|
|
|
|
|
|
Usage : |
1434
|
|
|
|
|
|
|
Function: internal function |
1435
|
|
|
|
|
|
|
Example : |
1436
|
|
|
|
|
|
|
Returns : 1 if writing suceeded, else undef |
1437
|
|
|
|
|
|
|
Args : |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
=cut |
1441
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
sub _write_line_EMBL { |
1443
|
0
|
|
|
0
|
|
0
|
my ($self,$pre1,$pre2,$line,$length) = @_; |
1444
|
|
|
|
|
|
|
|
1445
|
0
|
0
|
|
|
|
0
|
$length || $self->throw("Miscalled write_line_EMBL without length. Programming error!"); |
1446
|
0
|
|
|
|
|
0
|
my $subl = $length - length $pre2; |
1447
|
0
|
|
|
|
|
0
|
my $linel = length $line; |
1448
|
0
|
|
|
|
|
0
|
my $i; |
1449
|
|
|
|
|
|
|
|
1450
|
0
|
|
|
|
|
0
|
my $sub = substr($line,0,$length - length $pre1); |
1451
|
|
|
|
|
|
|
|
1452
|
0
|
0
|
|
|
|
0
|
$self->_print( "$pre1$sub\n") || return; |
1453
|
|
|
|
|
|
|
|
1454
|
0
|
|
|
|
|
0
|
for ($i= ($length - length $pre1);$i < $linel;) { |
1455
|
0
|
|
|
|
|
0
|
$sub = substr($line,$i,($subl)); |
1456
|
0
|
0
|
|
|
|
0
|
$self->_print( "$pre2$sub\n") || return; |
1457
|
0
|
|
|
|
|
0
|
$i += $subl; |
1458
|
|
|
|
|
|
|
} |
1459
|
|
|
|
|
|
|
|
1460
|
0
|
|
|
|
|
0
|
return 1; |
1461
|
|
|
|
|
|
|
} |
1462
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
=head2 _write_line_EMBL_regex |
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
Title : _write_line_EMBL_regex |
1466
|
|
|
|
|
|
|
Usage : |
1467
|
|
|
|
|
|
|
Function: internal function for writing lines of specified |
1468
|
|
|
|
|
|
|
length, with different first and the next line |
1469
|
|
|
|
|
|
|
left hand headers and split at specific points in the |
1470
|
|
|
|
|
|
|
text |
1471
|
|
|
|
|
|
|
Example : |
1472
|
|
|
|
|
|
|
Returns : nothing |
1473
|
|
|
|
|
|
|
Args : file handle, first header, second header, text-line, regex for line breaks, total line length |
1474
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
|
1476
|
|
|
|
|
|
|
=cut |
1477
|
|
|
|
|
|
|
|
1478
|
|
|
|
|
|
|
sub _write_line_EMBL_regex { |
1479
|
263
|
|
|
263
|
|
269
|
my ($self,$pre1,$pre2,$line,$regex,$length) = @_; |
1480
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
#print STDOUT "Going to print with $line!\n"; |
1482
|
|
|
|
|
|
|
|
1483
|
263
|
50
|
|
|
|
310
|
$length || $self->throw("Programming error - called write_line_EMBL_regex without length."); |
1484
|
|
|
|
|
|
|
|
1485
|
263
|
|
|
|
|
244
|
my $subl = $length - (length $pre1) -1 ; |
1486
|
263
|
|
|
|
|
157
|
my( @lines ); |
1487
|
|
|
|
|
|
|
|
1488
|
263
|
|
|
|
|
308
|
CHUNK: while($line) { |
1489
|
401
|
|
|
|
|
419
|
foreach my $pat ($regex, '[,;\.\/-]\s|'.$regex, '[,;\.\/-]|'.$regex) { |
1490
|
401
|
50
|
|
|
|
2879
|
if ($line =~ m/^(.{1,$subl})($pat)(.*)/ ) { |
1491
|
401
|
|
|
|
|
644
|
my $l = $1.$2; |
1492
|
401
|
100
|
|
|
|
505
|
$l =~ s/#/ /g # remove word wrap protection char '#' |
1493
|
|
|
|
|
|
|
if $pre1 eq "RA "; |
1494
|
401
|
|
|
|
|
377
|
my $newl = $3; |
1495
|
401
|
|
|
|
|
423
|
$line = substr($line,length($l)); |
1496
|
|
|
|
|
|
|
# be strict about not padding spaces according to |
1497
|
|
|
|
|
|
|
# genbank format |
1498
|
401
|
|
|
|
|
783
|
$l =~ s/\s+$//; |
1499
|
401
|
50
|
|
|
|
477
|
next CHUNK if ($l eq ''); |
1500
|
401
|
|
|
|
|
327
|
push(@lines, $l); |
1501
|
401
|
|
|
|
|
703
|
next CHUNK; |
1502
|
|
|
|
|
|
|
} |
1503
|
|
|
|
|
|
|
} |
1504
|
|
|
|
|
|
|
# if we get here none of the patterns matched $subl or less chars |
1505
|
0
|
|
|
|
|
0
|
$self->warn("trouble dissecting \"$line\"\n into chunks ". |
1506
|
|
|
|
|
|
|
"of $subl chars or less - this tag won't print right"); |
1507
|
|
|
|
|
|
|
# insert a space char to prevent infinite loops |
1508
|
0
|
|
|
|
|
0
|
$line = substr($line,0,$subl) . " " . substr($line,$subl); |
1509
|
|
|
|
|
|
|
} |
1510
|
263
|
|
|
|
|
226
|
my $s = shift @lines; |
1511
|
263
|
100
|
50
|
|
|
683
|
($self->_print("$pre1$s\n") || return) if $s; |
1512
|
263
|
|
|
|
|
309
|
foreach my $s ( @lines ) { |
1513
|
145
|
50
|
|
|
|
212
|
$self->_print("$pre2$s\n") || return; |
1514
|
|
|
|
|
|
|
} |
1515
|
|
|
|
|
|
|
|
1516
|
263
|
|
|
|
|
531
|
return 1; |
1517
|
|
|
|
|
|
|
} |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
=head2 _post_sort |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
Title : _post_sort |
1522
|
|
|
|
|
|
|
Usage : $obj->_post_sort($newval) |
1523
|
|
|
|
|
|
|
Function: |
1524
|
|
|
|
|
|
|
Returns : value of _post_sort |
1525
|
|
|
|
|
|
|
Args : newvalue (optional) |
1526
|
|
|
|
|
|
|
|
1527
|
|
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
=cut |
1529
|
|
|
|
|
|
|
|
1530
|
|
|
|
|
|
|
sub _post_sort{ |
1531
|
6
|
|
|
6
|
|
7
|
my $obj = shift; |
1532
|
6
|
50
|
|
|
|
13
|
if ( @_ ) { |
1533
|
0
|
|
|
|
|
0
|
my $value = shift; |
1534
|
0
|
|
|
|
|
0
|
$obj->{'_post_sort'} = $value; |
1535
|
|
|
|
|
|
|
} |
1536
|
6
|
|
|
|
|
14
|
return $obj->{'_post_sort'}; |
1537
|
|
|
|
|
|
|
|
1538
|
|
|
|
|
|
|
} |
1539
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
=head2 _show_dna |
1541
|
|
|
|
|
|
|
|
1542
|
|
|
|
|
|
|
Title : _show_dna |
1543
|
|
|
|
|
|
|
Usage : $obj->_show_dna($newval) |
1544
|
|
|
|
|
|
|
Function: |
1545
|
|
|
|
|
|
|
Returns : value of _show_dna |
1546
|
|
|
|
|
|
|
Args : newvalue (optional) |
1547
|
|
|
|
|
|
|
|
1548
|
|
|
|
|
|
|
|
1549
|
|
|
|
|
|
|
=cut |
1550
|
|
|
|
|
|
|
|
1551
|
|
|
|
|
|
|
sub _show_dna{ |
1552
|
42
|
|
|
42
|
|
50
|
my $obj = shift; |
1553
|
42
|
100
|
|
|
|
89
|
if ( @_ ) { |
1554
|
31
|
|
|
|
|
34
|
my $value = shift; |
1555
|
31
|
|
|
|
|
49
|
$obj->{'_show_dna'} = $value; |
1556
|
|
|
|
|
|
|
} |
1557
|
42
|
|
|
|
|
59
|
return $obj->{'_show_dna'}; |
1558
|
|
|
|
|
|
|
|
1559
|
|
|
|
|
|
|
} |
1560
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
=head2 _id_generation_func |
1562
|
|
|
|
|
|
|
|
1563
|
|
|
|
|
|
|
Title : _id_generation_func |
1564
|
|
|
|
|
|
|
Usage : $obj->_id_generation_func($newval) |
1565
|
|
|
|
|
|
|
Function: |
1566
|
|
|
|
|
|
|
Returns : value of _id_generation_func |
1567
|
|
|
|
|
|
|
Args : newvalue (optional) |
1568
|
|
|
|
|
|
|
|
1569
|
|
|
|
|
|
|
|
1570
|
|
|
|
|
|
|
=cut |
1571
|
|
|
|
|
|
|
|
1572
|
|
|
|
|
|
|
sub _id_generation_func{ |
1573
|
11
|
|
|
11
|
|
11
|
my $obj = shift; |
1574
|
11
|
50
|
|
|
|
25
|
if ( @_ ) { |
1575
|
0
|
|
|
|
|
0
|
my $value = shift; |
1576
|
0
|
|
|
|
|
0
|
$obj->{'_id_generation_func'} = $value; |
1577
|
|
|
|
|
|
|
} |
1578
|
11
|
|
|
|
|
24
|
return $obj->{'_id_generation_func'}; |
1579
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
} |
1581
|
|
|
|
|
|
|
|
1582
|
|
|
|
|
|
|
=head2 _ac_generation_func |
1583
|
|
|
|
|
|
|
|
1584
|
|
|
|
|
|
|
Title : _ac_generation_func |
1585
|
|
|
|
|
|
|
Usage : $obj->_ac_generation_func($newval) |
1586
|
|
|
|
|
|
|
Function: |
1587
|
|
|
|
|
|
|
Returns : value of _ac_generation_func |
1588
|
|
|
|
|
|
|
Args : newvalue (optional) |
1589
|
|
|
|
|
|
|
|
1590
|
|
|
|
|
|
|
|
1591
|
|
|
|
|
|
|
=cut |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
sub _ac_generation_func{ |
1594
|
11
|
|
|
11
|
|
23
|
my $obj = shift; |
1595
|
11
|
50
|
|
|
|
25
|
if ( @_ ) { |
1596
|
0
|
|
|
|
|
0
|
my $value = shift; |
1597
|
0
|
|
|
|
|
0
|
$obj->{'_ac_generation_func'} = $value; |
1598
|
|
|
|
|
|
|
} |
1599
|
11
|
|
|
|
|
101
|
return $obj->{'_ac_generation_func'}; |
1600
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
} |
1602
|
|
|
|
|
|
|
|
1603
|
|
|
|
|
|
|
=head2 _sv_generation_func |
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
Title : _sv_generation_func |
1606
|
|
|
|
|
|
|
Usage : $obj->_sv_generation_func($newval) |
1607
|
|
|
|
|
|
|
Function: |
1608
|
|
|
|
|
|
|
Returns : value of _sv_generation_func |
1609
|
|
|
|
|
|
|
Args : newvalue (optional) |
1610
|
|
|
|
|
|
|
|
1611
|
|
|
|
|
|
|
|
1612
|
|
|
|
|
|
|
=cut |
1613
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
sub _sv_generation_func{ |
1615
|
0
|
|
|
0
|
|
0
|
my $obj = shift; |
1616
|
0
|
0
|
|
|
|
0
|
if ( @_ ) { |
1617
|
0
|
|
|
|
|
0
|
my $value = shift; |
1618
|
0
|
|
|
|
|
0
|
$obj->{'_sv_generation_func'} = $value; |
1619
|
|
|
|
|
|
|
} |
1620
|
0
|
|
|
|
|
0
|
return $obj->{'_sv_generation_func'}; |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
} |
1623
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
=head2 _kw_generation_func |
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
Title : _kw_generation_func |
1627
|
|
|
|
|
|
|
Usage : $obj->_kw_generation_func($newval) |
1628
|
|
|
|
|
|
|
Function: |
1629
|
|
|
|
|
|
|
Returns : value of _kw_generation_func |
1630
|
|
|
|
|
|
|
Args : newvalue (optional) |
1631
|
|
|
|
|
|
|
|
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
=cut |
1634
|
|
|
|
|
|
|
|
1635
|
|
|
|
|
|
|
sub _kw_generation_func{ |
1636
|
11
|
|
|
11
|
|
15
|
my $obj = shift; |
1637
|
11
|
50
|
|
|
|
21
|
if ( @_ ) { |
1638
|
0
|
|
|
|
|
0
|
my $value = shift; |
1639
|
0
|
|
|
|
|
0
|
$obj->{'_kw_generation_func'} = $value; |
1640
|
|
|
|
|
|
|
} |
1641
|
11
|
|
|
|
|
61
|
return $obj->{'_kw_generation_func'}; |
1642
|
|
|
|
|
|
|
|
1643
|
|
|
|
|
|
|
} |
1644
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
1; |