line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::SeqIO::genbank |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Bioperl project bioperl-l(at)bioperl.org |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Elia Stupka and contributors see AUTHORS section |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::SeqIO::genbank - GenBank sequence input/output stream |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
It is probably best not to use this object directly, but |
21
|
|
|
|
|
|
|
rather go through the SeqIO handler: |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
$stream = Bio::SeqIO->new(-file => $filename, |
24
|
|
|
|
|
|
|
-format => 'GenBank'); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
while ( my $seq = $stream->next_seq ) { |
27
|
|
|
|
|
|
|
# do something with $seq |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=head1 DESCRIPTION |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This object can transform Bio::Seq objects to and from GenBank flat |
34
|
|
|
|
|
|
|
file databases. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
There is some flexibility here about how to write GenBank output |
37
|
|
|
|
|
|
|
that is not fully documented. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=head2 Optional functions |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=over 3 |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=item _show_dna() |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
(output only) shows the dna or not |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=item _post_sort() |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
(output only) provides a sorting func which is applied to the FTHelpers |
50
|
|
|
|
|
|
|
before printing |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
=item _id_generation_func() |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
This is function which is called as |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
print "ID ", $func($seq), "\n"; |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
To generate the ID line. If it is not there, it generates a sensible ID |
59
|
|
|
|
|
|
|
line using a number of tools. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
If you want to output annotations in Genbank format they need to be |
62
|
|
|
|
|
|
|
stored in a Bio::Annotation::Collection object which is accessible |
63
|
|
|
|
|
|
|
through the Bio::SeqI interface method L. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
The following are the names of the keys which are pulled from a |
66
|
|
|
|
|
|
|
L object: |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
reference - Should contain Bio::Annotation::Reference objects |
69
|
|
|
|
|
|
|
comment - Should contain Bio::Annotation::Comment objects |
70
|
|
|
|
|
|
|
dblink - Should contain a Bio::Annotation::DBLink object |
71
|
|
|
|
|
|
|
segment - Should contain a Bio::Annotation::SimpleValue object |
72
|
|
|
|
|
|
|
origin - Should contain a Bio::Annotation::SimpleValue object |
73
|
|
|
|
|
|
|
wgs - Should contain a Bio::Annotation::SimpleValue object |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
=back |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 Where does the data go? |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
Data parsed in Bio::SeqIO::genbank is stored in a variety of data |
80
|
|
|
|
|
|
|
fields in the sequence object that is returned. Here is a partial list |
81
|
|
|
|
|
|
|
of fields. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
Items listed as RichSeq or Seq or PrimarySeq and then NAME() tell you |
84
|
|
|
|
|
|
|
the top level object which defines a function called NAME() which |
85
|
|
|
|
|
|
|
stores this information. |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Items listed as Annotation 'NAME' tell you the data is stored the |
88
|
|
|
|
|
|
|
associated Bio::AnnotationCollectionI object which is associated with |
89
|
|
|
|
|
|
|
Bio::Seq objects. If it is explicitly requested that no annotations |
90
|
|
|
|
|
|
|
should be stored when parsing a record of course they will not be |
91
|
|
|
|
|
|
|
available when you try and get them. If you are having this problem |
92
|
|
|
|
|
|
|
look at the type of SeqBuilder that is being used to construct your |
93
|
|
|
|
|
|
|
sequence object. |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Comments Annotation 'comment' |
96
|
|
|
|
|
|
|
References Annotation 'reference' |
97
|
|
|
|
|
|
|
Segment Annotation 'segment' |
98
|
|
|
|
|
|
|
Origin Annotation 'origin' |
99
|
|
|
|
|
|
|
Dbsource Annotation 'dblink' |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Accessions PrimarySeq accession_number() |
102
|
|
|
|
|
|
|
Secondary accessions RichSeq get_secondary_accessions() |
103
|
|
|
|
|
|
|
GI number PrimarySeq primary_id() |
104
|
|
|
|
|
|
|
LOCUS PrimarySeq display_id() |
105
|
|
|
|
|
|
|
Keywords RichSeq get_keywords() |
106
|
|
|
|
|
|
|
Dates RichSeq get_dates() |
107
|
|
|
|
|
|
|
Molecule RichSeq molecule() |
108
|
|
|
|
|
|
|
Seq Version RichSeq seq_version() |
109
|
|
|
|
|
|
|
PID RichSeq pid() |
110
|
|
|
|
|
|
|
Division RichSeq division() |
111
|
|
|
|
|
|
|
Features Seq get_SeqFeatures() |
112
|
|
|
|
|
|
|
Alphabet PrimarySeq alphabet() |
113
|
|
|
|
|
|
|
Definition PrimarySeq description() or desc() |
114
|
|
|
|
|
|
|
Version PrimarySeq version() |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
Sequence PrimarySeq seq() |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
There is more information in the Feature-Annotation HOWTO about each |
119
|
|
|
|
|
|
|
field and how it is mapped to the Sequence object |
120
|
|
|
|
|
|
|
L. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=head1 FEEDBACK |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head2 Mailing Lists |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
127
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
128
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
131
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=head2 Support |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
I |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
140
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
141
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
142
|
|
|
|
|
|
|
with code and data examples if at all possible. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=head2 Reporting Bugs |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
147
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the web: |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head1 AUTHOR - Bioperl Project |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
bioperl-l at bioperl.org |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
Original author Elia Stupka, elia -at- tigem.it |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
Ewan Birney birney at ebi.ac.uk |
160
|
|
|
|
|
|
|
Jason Stajich jason at bioperl.org |
161
|
|
|
|
|
|
|
Chris Mungall cjm at fruitfly.bdgp.berkeley.edu |
162
|
|
|
|
|
|
|
Lincoln Stein lstein at cshl.org |
163
|
|
|
|
|
|
|
Heikki Lehvaslaiho, heikki at ebi.ac.uk |
164
|
|
|
|
|
|
|
Hilmar Lapp, hlapp at gmx.net |
165
|
|
|
|
|
|
|
Donald G. Jackson, donald.jackson at bms.com |
166
|
|
|
|
|
|
|
James Wasmuth, james.wasmuth at ed.ac.uk |
167
|
|
|
|
|
|
|
Brian Osborne, bosborne at alum.mit.edu |
168
|
|
|
|
|
|
|
Chris Fields, cjfields at bioperl dot org |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=head1 APPENDIX |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
173
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=cut |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# Let the code begin... |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
package Bio::SeqIO::genbank; |
180
|
16
|
|
|
16
|
|
758
|
use strict; |
|
16
|
|
|
|
|
28
|
|
|
16
|
|
|
|
|
478
|
|
181
|
|
|
|
|
|
|
|
182
|
16
|
|
|
16
|
|
3800
|
use Bio::SeqIO::FTHelper; |
|
16
|
|
|
|
|
45
|
|
|
16
|
|
|
|
|
391
|
|
183
|
16
|
|
|
16
|
|
92
|
use Bio::SeqFeature::Generic; |
|
16
|
|
|
|
|
23
|
|
|
16
|
|
|
|
|
278
|
|
184
|
16
|
|
|
16
|
|
5687
|
use Bio::Species; |
|
16
|
|
|
|
|
38
|
|
|
16
|
|
|
|
|
407
|
|
185
|
16
|
|
|
16
|
|
90
|
use Bio::Seq::SeqFactory; |
|
16
|
|
|
|
|
214
|
|
|
16
|
|
|
|
|
327
|
|
186
|
16
|
|
|
16
|
|
68
|
use Bio::Annotation::Collection; |
|
16
|
|
|
|
|
25
|
|
|
16
|
|
|
|
|
405
|
|
187
|
16
|
|
|
16
|
|
4520
|
use Bio::Annotation::Comment; |
|
16
|
|
|
|
|
32
|
|
|
16
|
|
|
|
|
427
|
|
188
|
16
|
|
|
16
|
|
4014
|
use Bio::Annotation::Reference; |
|
16
|
|
|
|
|
32
|
|
|
16
|
|
|
|
|
436
|
|
189
|
16
|
|
|
16
|
|
93
|
use Bio::Annotation::DBLink; |
|
16
|
|
|
|
|
29
|
|
|
16
|
|
|
|
|
335
|
|
190
|
|
|
|
|
|
|
|
191
|
16
|
|
|
16
|
|
69
|
use base qw(Bio::SeqIO); |
|
16
|
|
|
|
|
26
|
|
|
16
|
|
|
|
|
115043
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# Note that a qualifier that exceeds one line (i.e. a long label) will |
194
|
|
|
|
|
|
|
# automatically be quoted regardless: |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
our $FTQUAL_LINE_LENGTH = 60; |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
our %FTQUAL_NO_QUOTE = map {$_ => 1} qw( |
199
|
|
|
|
|
|
|
anticodon citation |
200
|
|
|
|
|
|
|
codon codon_start |
201
|
|
|
|
|
|
|
cons_splice direction |
202
|
|
|
|
|
|
|
evidence label |
203
|
|
|
|
|
|
|
mod_base number |
204
|
|
|
|
|
|
|
rpt_type rpt_unit |
205
|
|
|
|
|
|
|
transl_except transl_table |
206
|
|
|
|
|
|
|
usedin |
207
|
|
|
|
|
|
|
); |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
our %DBSOURCE = map {$_ => 1} qw( |
210
|
|
|
|
|
|
|
EchoBASE IntAct SWISS-2DPAGE ECO2DBASE ECOGENE TIGRFAMs |
211
|
|
|
|
|
|
|
TIGR GO InterPro Pfam PROSITE SGD GermOnline |
212
|
|
|
|
|
|
|
HSSP PhosSite Ensembl RGD AGD ArrayExpress KEGG |
213
|
|
|
|
|
|
|
H-InvDB HGNC LinkHub PANTHER PRINTS SMART SMR |
214
|
|
|
|
|
|
|
MGI MIM RZPD-ProtExp ProDom MEROPS TRANSFAC Reactome |
215
|
|
|
|
|
|
|
UniGene GlycoSuiteDB PIRSF HSC-2DPAGE PHCI-2DPAGE |
216
|
|
|
|
|
|
|
PMMA-2DPAGE Siena-2DPAGE Rat-heart-2DPAGE Aarhus/Ghent-2DPAGE |
217
|
|
|
|
|
|
|
Biocyc MetaCyc Biocyc:Metacyc GenomeReviews FlyBase |
218
|
|
|
|
|
|
|
TMHOBP COMPLUYEAST-2DPAGE OGP DictyBase HAMAP |
219
|
|
|
|
|
|
|
PhotoList Gramene WormBase WormPep Genew ZFIN |
220
|
|
|
|
|
|
|
PeroxiBase MaizeDB TAIR DrugBank REBASE HPA |
221
|
|
|
|
|
|
|
swissprot GenBank GenPept REFSEQ embl PDB UniProtKB |
222
|
|
|
|
|
|
|
DIP PeptideAtlas PRIDE CYGD HOGENOME Gene3D |
223
|
|
|
|
|
|
|
Project); |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
our %VALID_MOLTYPE = map {$_ => 1} qw(NA DNA RNA tRNA rRNA cDNA cRNA ms-DNA |
226
|
|
|
|
|
|
|
mRNA uRNA ss-RNA ss-DNA snRNA snoRNA PRT); |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
our %VALID_ALPHABET = ( |
229
|
|
|
|
|
|
|
'bp' => 'dna', |
230
|
|
|
|
|
|
|
'aa' => 'protein', |
231
|
|
|
|
|
|
|
'rc' => '' # rc = release candidate; file has no sequences |
232
|
|
|
|
|
|
|
); |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub _initialize { |
235
|
115
|
|
|
115
|
|
392
|
my($self, @args) = @_; |
236
|
|
|
|
|
|
|
|
237
|
115
|
|
|
|
|
551
|
$self->SUPER::_initialize(@args); |
238
|
|
|
|
|
|
|
# hash for functions for decoding keys. |
239
|
115
|
|
|
|
|
437
|
$self->{'_func_ftunit_hash'} = {}; |
240
|
115
|
|
|
|
|
465
|
$self->_show_dna(1); # sets this to one by default. People can change it |
241
|
115
|
50
|
|
|
|
563
|
if ( not defined $self->sequence_factory ) { |
242
|
115
|
|
|
|
|
407
|
$self->sequence_factory |
243
|
|
|
|
|
|
|
(Bio::Seq::SeqFactory->new(-verbose => $self->verbose, |
244
|
|
|
|
|
|
|
-type => 'Bio::Seq::RichSeq')); |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 next_seq |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Title : next_seq |
251
|
|
|
|
|
|
|
Usage : $seq = $stream->next_seq() |
252
|
|
|
|
|
|
|
Function: returns the next sequence in the stream |
253
|
|
|
|
|
|
|
Returns : Bio::Seq object |
254
|
|
|
|
|
|
|
Args : |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=cut |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
sub next_seq { |
259
|
117
|
|
|
117
|
1
|
5914
|
my ($self, @args) = @_; |
260
|
117
|
|
|
|
|
280
|
my %args = @args; |
261
|
117
|
|
|
|
|
399
|
my $builder = $self->sequence_builder; |
262
|
117
|
|
|
|
|
280
|
my $seq; |
263
|
|
|
|
|
|
|
my %params; |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
RECORDSTART: |
266
|
117
|
|
|
|
|
162
|
while (1) { |
267
|
119
|
|
|
|
|
162
|
my $buffer; |
268
|
119
|
|
|
|
|
502
|
my ( @acc, @features ); |
269
|
119
|
|
|
|
|
0
|
my ( $display_id, $annotation ); |
270
|
119
|
|
|
|
|
0
|
my $species; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# initialize; we may come here because of starting over |
273
|
119
|
|
|
|
|
210
|
@features = (); |
274
|
119
|
|
|
|
|
224
|
$annotation = undef; |
275
|
119
|
|
|
|
|
207
|
@acc = (); |
276
|
119
|
|
|
|
|
163
|
$species = undef; |
277
|
119
|
|
|
|
|
366
|
%params = ( -verbose => $self->verbose ); # reset hash |
278
|
119
|
|
|
|
|
661
|
local ($/) = "\n"; |
279
|
119
|
|
|
|
|
551
|
while ( defined( $buffer = $self->_readline ) ) { |
280
|
189
|
100
|
|
|
|
716
|
last if index( $buffer, 'LOCUS ' ) == 0; |
281
|
|
|
|
|
|
|
} |
282
|
119
|
100
|
|
|
|
397
|
return unless defined $buffer; # end of file |
283
|
113
|
50
|
|
|
|
706
|
$buffer =~ /^LOCUS\s+(\S.*)$/o |
284
|
|
|
|
|
|
|
or $self->throw( "GenBank stream with bad LOCUS line. " |
285
|
|
|
|
|
|
|
. "Not GenBank in my book. Got '$buffer'"); |
286
|
113
|
|
|
|
|
793
|
my @tokens = split( ' ', $1 ); |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
# this is important to have the id for display in e.g. FTHelper, |
289
|
|
|
|
|
|
|
# otherwise you won't know which entry caused an error |
290
|
113
|
|
|
|
|
249
|
$display_id = shift @tokens; |
291
|
113
|
|
|
|
|
296
|
$params{'-display_id'} = $display_id; |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# may still be useful if we don't want the seq |
294
|
113
|
|
|
|
|
268
|
my $seqlength = shift @tokens; |
295
|
113
|
50
|
|
|
|
461
|
if ( exists $VALID_ALPHABET{$seqlength} ) { |
296
|
|
|
|
|
|
|
# moved one token too far. No locus name? |
297
|
0
|
|
|
|
|
0
|
$self->warn( "Bad LOCUS name? Changing [$params{'-display_id'}] " |
298
|
|
|
|
|
|
|
. "to 'unknown' and length to '$display_id'" |
299
|
|
|
|
|
|
|
); |
300
|
0
|
|
|
|
|
0
|
$params{'-display_id'} = 'unknown'; |
301
|
0
|
|
|
|
|
0
|
$params{'-length'} = $display_id; |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
# add token back... |
304
|
0
|
|
|
|
|
0
|
unshift @tokens, $seqlength; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
else { |
307
|
113
|
|
|
|
|
279
|
$params{'-length'} = $seqlength; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# the alphabet of the entry |
311
|
|
|
|
|
|
|
# shouldn't assign alphabet unless one |
312
|
|
|
|
|
|
|
# is specifically designated (such as for rc files) |
313
|
113
|
|
|
|
|
287
|
my $alphabet = lc( shift @tokens ); |
314
|
|
|
|
|
|
|
$params{'-alphabet'} = |
315
|
|
|
|
|
|
|
( exists $VALID_ALPHABET{$alphabet} ) |
316
|
113
|
50
|
|
|
|
481
|
? $VALID_ALPHABET{$alphabet} |
317
|
|
|
|
|
|
|
: $self->warn("Unknown alphabet: $alphabet"); |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
# for aa there is usually no 'molecule' (mRNA etc) |
320
|
113
|
100
|
|
|
|
352
|
if ( $params{'-alphabet'} eq 'protein' ) { |
321
|
10
|
|
|
|
|
28
|
$params{'-molecule'} = 'PRT'; |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
else { |
324
|
103
|
|
|
|
|
217
|
$params{'-molecule'} = shift(@tokens); |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
# take care of lower case issues |
328
|
113
|
100
|
66
|
|
|
711
|
if ( $params{'-molecule'} eq 'dna' or $params{'-molecule'} eq 'rna' ) { |
329
|
4
|
|
|
|
|
14
|
$params{'-molecule'} = uc $params{'-molecule'}; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
$self->debug( "Unrecognized molecule type: " . $params{'-molecule'} ) |
332
|
113
|
50
|
|
|
|
488
|
if not exists( $VALID_MOLTYPE{ $params{'-molecule'} } ); |
333
|
|
|
|
|
|
|
|
334
|
113
|
|
|
|
|
226
|
my $circ = shift @tokens; |
335
|
113
|
100
|
|
|
|
280
|
if ( $circ eq 'circular' ) { |
336
|
8
|
|
|
|
|
27
|
$params{'-is_circular'} = 1; |
337
|
8
|
|
|
|
|
26
|
$params{'-division'} = shift @tokens; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
else { |
340
|
|
|
|
|
|
|
# 'linear' or 'circular' may actually be omitted altogether |
341
|
105
|
100
|
|
|
|
385
|
$params{'-division'} = |
342
|
|
|
|
|
|
|
( CORE::length($circ) == 3 ) ? $circ : shift @tokens; |
343
|
|
|
|
|
|
|
} |
344
|
113
|
|
|
|
|
355
|
my $date = join( ' ', @tokens ); # we lump together the rest |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# this is per request bug #1513 |
347
|
|
|
|
|
|
|
# we can handle: |
348
|
|
|
|
|
|
|
# 9-10-2003 |
349
|
|
|
|
|
|
|
# 9-10-03 |
350
|
|
|
|
|
|
|
# 09-10-2003 |
351
|
|
|
|
|
|
|
# 09-10-03 |
352
|
113
|
100
|
|
|
|
949
|
if ( $date =~ s/\s*((\d{1,2})-(\w{3})-(\d{2,4})).*/$1/ ) { |
353
|
107
|
50
|
|
|
|
338
|
if ( length($date) < 11 ) { |
354
|
|
|
|
|
|
|
# improperly formatted date |
355
|
|
|
|
|
|
|
# But we'll be nice and fix it for them |
356
|
0
|
|
|
|
|
0
|
my ( $d, $m, $y ) = ( $2, $3, $4 ); |
357
|
0
|
0
|
|
|
|
0
|
if ( length($d) == 1 ) { |
358
|
0
|
|
|
|
|
0
|
$d = "0$d"; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
# guess the century here |
362
|
0
|
0
|
|
|
|
0
|
if ( length($y) == 2 ) { |
363
|
0
|
0
|
|
|
|
0
|
if ( $y > 60 ) { # arbitrarily guess that '60' means 1960 |
364
|
0
|
|
|
|
|
0
|
$y = "19$y"; |
365
|
|
|
|
|
|
|
} |
366
|
|
|
|
|
|
|
else { |
367
|
0
|
|
|
|
|
0
|
$y = "20$y"; |
368
|
|
|
|
|
|
|
} |
369
|
0
|
|
|
|
|
0
|
$self->warn( "Date was malformed, guessing the " |
370
|
|
|
|
|
|
|
. "century for $date to be $y\n" |
371
|
|
|
|
|
|
|
); |
372
|
|
|
|
|
|
|
} |
373
|
0
|
|
|
|
|
0
|
$params{'-dates'} = [ join( '-', $d, $m, $y ) ]; |
374
|
|
|
|
|
|
|
} |
375
|
|
|
|
|
|
|
else { |
376
|
107
|
|
|
|
|
313
|
$params{'-dates'} = [$date]; |
377
|
|
|
|
|
|
|
} |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# set them all at once |
381
|
113
|
|
|
|
|
703
|
$builder->add_slot_value(%params); |
382
|
113
|
|
|
|
|
369
|
%params = (); |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# parse the rest if desired, otherwise start over |
385
|
113
|
100
|
|
|
|
368
|
if ( not $builder->want_object ) { |
386
|
2
|
|
|
|
|
5
|
$builder->make_object; |
387
|
2
|
|
|
|
|
8
|
next RECORDSTART; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
# set up annotation depending on what the builder wants |
391
|
111
|
100
|
|
|
|
407
|
if ( $builder->want_slot('annotation') ) { |
392
|
106
|
|
|
|
|
787
|
$annotation = Bio::Annotation::Collection->new; |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
|
395
|
111
|
|
|
|
|
365
|
$buffer = $self->_readline; |
396
|
111
|
|
|
|
|
466
|
while ( defined( my $line = $buffer ) ) { |
397
|
|
|
|
|
|
|
# Description line(s) |
398
|
739
|
100
|
|
|
|
2126
|
if ($line =~ /^DEFINITION\s+(\S.*\S)/) { |
399
|
110
|
|
|
|
|
366
|
my @desc = ($1); |
400
|
110
|
|
|
|
|
332
|
while ( defined( $line = $self->_readline ) ) { |
401
|
172
|
100
|
|
|
|
614
|
if ($line =~ /^\s+(.*)/) { |
402
|
62
|
|
|
|
|
173
|
push( @desc, $1 ); |
403
|
62
|
|
|
|
|
161
|
next; |
404
|
|
|
|
|
|
|
} |
405
|
110
|
|
|
|
|
189
|
last; |
406
|
|
|
|
|
|
|
} |
407
|
110
|
|
|
|
|
688
|
$builder->add_slot_value( -desc => join( ' ', @desc ) ); |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# we'll continue right here because DEFINITION |
410
|
|
|
|
|
|
|
# always comes at the top of the entry |
411
|
110
|
|
|
|
|
227
|
$buffer = $line; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
# accession number (there can be multiple accessions) |
415
|
739
|
100
|
|
|
|
5739
|
if ($line =~ /^ACCESSION\s+(\S.*\S)/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
416
|
110
|
|
|
|
|
555
|
push( @acc, split( /\s+/, $1 ) ); |
417
|
110
|
|
|
|
|
359
|
while ( defined( $line = $self->_readline ) ) { |
418
|
113
|
100
|
|
|
|
448
|
if ($line =~ /^\s+(.*)/) { |
419
|
3
|
|
|
|
|
18
|
push( @acc, split( /\s+/, $1 ) ); |
420
|
3
|
|
|
|
|
9
|
next; |
421
|
|
|
|
|
|
|
} |
422
|
110
|
|
|
|
|
219
|
last; |
423
|
|
|
|
|
|
|
} |
424
|
110
|
|
|
|
|
187
|
$buffer = $line; |
425
|
110
|
|
|
|
|
293
|
next; |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# PID |
429
|
|
|
|
|
|
|
elsif ($line =~ /^PID\s+(\S+)/) { |
430
|
1
|
|
|
|
|
5
|
$params{'-pid'} = $1; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
# Version number |
434
|
|
|
|
|
|
|
elsif ($line =~ /^VERSION\s+(\S.+)$/) { |
435
|
91
|
|
|
|
|
402
|
my ( $acc, $gi ) = split( ' ', $1 ); |
436
|
91
|
100
|
|
|
|
496
|
if ( $acc =~ /^\w+\.(\d+)/ ) { |
437
|
87
|
|
|
|
|
310
|
$params{'-version'} = $1; |
438
|
87
|
|
|
|
|
248
|
$params{'-seq_version'} = $1; |
439
|
|
|
|
|
|
|
} |
440
|
91
|
100
|
66
|
|
|
598
|
if ( $gi && ( index( $gi, "GI:" ) == 0 ) ) { |
441
|
89
|
|
|
|
|
310
|
$params{'-primary_id'} = substr( $gi, 3 ); |
442
|
|
|
|
|
|
|
} |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
# Keywords |
446
|
|
|
|
|
|
|
elsif ($line =~ /^KEYWORDS\s+(\S.*)/) { |
447
|
107
|
|
|
|
|
500
|
my @kw = split( /\s*\;\s*/, $1 ); |
448
|
107
|
|
|
|
|
327
|
while ( defined( $line = $self->_readline ) ) { |
449
|
138
|
|
|
|
|
299
|
chomp $line; |
450
|
138
|
100
|
|
|
|
445
|
if ($line =~ /^\s+(.*)/) { |
451
|
31
|
|
|
|
|
1311
|
push( @kw, split( /\s*\;\s*/, $1 ) ); |
452
|
31
|
|
|
|
|
80
|
next; |
453
|
|
|
|
|
|
|
} |
454
|
107
|
|
|
|
|
177
|
last; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
107
|
50
|
|
|
|
584
|
@kw && $kw[-1] =~ s/\.$//; |
458
|
107
|
|
|
|
|
333
|
$params{'-keywords'} = \@kw; |
459
|
107
|
|
|
|
|
221
|
$buffer = $line; |
460
|
107
|
|
|
|
|
274
|
next; |
461
|
|
|
|
|
|
|
} |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
# Organism name and phylogenetic information |
464
|
|
|
|
|
|
|
elsif ($line =~ /^SOURCE\s+\S/) { |
465
|
106
|
100
|
|
|
|
398
|
if ( $builder->want_slot('species') ) { |
466
|
101
|
|
|
|
|
461
|
$species = $self->_read_GenBank_Species( \$buffer ); |
467
|
101
|
|
|
|
|
771
|
$builder->add_slot_value( -species => $species ); |
468
|
|
|
|
|
|
|
} |
469
|
|
|
|
|
|
|
else { |
470
|
5
|
|
|
|
|
11
|
while ( defined( $buffer = $self->_readline ) ) { |
471
|
18
|
100
|
|
|
|
44
|
last if substr( $buffer, 0, 1 ) ne ' '; |
472
|
|
|
|
|
|
|
} |
473
|
|
|
|
|
|
|
} |
474
|
106
|
|
|
|
|
604
|
next; |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# References |
478
|
|
|
|
|
|
|
elsif ($line =~ /^REFERENCE\s+\S/) { |
479
|
109
|
100
|
|
|
|
384
|
if ($annotation) { |
480
|
100
|
|
|
|
|
525
|
my @refs = $self->_read_GenBank_References( \$buffer ); |
481
|
100
|
|
|
|
|
266
|
foreach my $ref (@refs) { |
482
|
417
|
|
|
|
|
1122
|
$annotation->add_Annotation( 'reference', $ref ); |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
else { |
486
|
9
|
|
|
|
|
16
|
while ( defined( $buffer = $self->_readline ) ) { |
487
|
47
|
100
|
|
|
|
99
|
last if substr( $buffer, 0, 1 ) ne ' '; |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
} |
490
|
109
|
|
|
|
|
325
|
next; |
491
|
|
|
|
|
|
|
} |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
# Project |
494
|
|
|
|
|
|
|
elsif ($line =~ /^PROJECT\s+(\S.*)/) { |
495
|
2
|
50
|
|
|
|
9
|
if ($annotation) { |
496
|
2
|
|
|
|
|
14
|
my $project = |
497
|
|
|
|
|
|
|
Bio::Annotation::SimpleValue->new( -value => $1 ); |
498
|
2
|
|
|
|
|
9
|
$annotation->add_Annotation( 'project', $project ); |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
} |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
# Comments may be plain text or Structured Comments. |
503
|
|
|
|
|
|
|
# Structured Comments are made up of tag/value pairs and have beginning |
504
|
|
|
|
|
|
|
# and end delimiters like ##*-Data-START## and ##*-Data-END## |
505
|
|
|
|
|
|
|
elsif ($line =~ /^COMMENT\s+(\S.*)/) { |
506
|
60
|
100
|
|
|
|
170
|
if ($annotation) { |
507
|
58
|
|
|
|
|
196
|
my $comment = $1; |
508
|
58
|
|
|
|
|
207
|
while ( defined( $line = $self->_readline ) ) { |
509
|
481
|
100
|
|
|
|
1106
|
last if ($line =~ /^\S/); |
510
|
423
|
|
|
|
|
970
|
$comment .= $line; |
511
|
|
|
|
|
|
|
} |
512
|
58
|
|
|
|
|
703
|
$comment =~ s/ +/ /g; |
513
|
|
|
|
|
|
|
# Structured Comment, do not remove returns in the tabular section |
514
|
58
|
100
|
|
|
|
264
|
if ( my ( $text, $table )= $comment |
515
|
|
|
|
|
|
|
=~ /([^#]*)(##\S+Data-START##.+?##\S+Data-END##)/is |
516
|
|
|
|
|
|
|
) { |
517
|
2
|
100
|
|
|
|
19
|
$text =~ s/\n/ /g if $text; |
518
|
2
|
|
|
|
|
15
|
$table =~ s/START##/START##\n/; |
519
|
2
|
|
|
|
|
25
|
$table =~ s/^\s+//gm; |
520
|
2
|
|
|
|
|
15
|
$comment = $text . "\n" . $table; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
# Plain text, remove returns |
523
|
|
|
|
|
|
|
else { |
524
|
56
|
|
|
|
|
348
|
$comment =~ s/\n/ /g; |
525
|
|
|
|
|
|
|
} |
526
|
58
|
|
|
|
|
647
|
$annotation->add_Annotation( |
527
|
|
|
|
|
|
|
'comment', |
528
|
|
|
|
|
|
|
Bio::Annotation::Comment->new( |
529
|
|
|
|
|
|
|
-text => $comment, |
530
|
|
|
|
|
|
|
-tagname => 'comment' |
531
|
|
|
|
|
|
|
) |
532
|
|
|
|
|
|
|
); |
533
|
58
|
|
|
|
|
216
|
$buffer = $line; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
else { |
536
|
2
|
|
|
|
|
6
|
while ( defined( $buffer = $self->_readline ) ) { |
537
|
7
|
100
|
|
|
|
18
|
last if substr( $buffer, 0, 1 ) ne ' '; |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
} |
540
|
60
|
|
|
|
|
164
|
next; |
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
# Corresponding Genbank nucleotide id, Genpept only |
544
|
|
|
|
|
|
|
elsif ($line =~ /^DB(?:SOURCE|LINK)\s+(\S.+)/) { |
545
|
20
|
50
|
|
|
|
76
|
if ($annotation) { |
546
|
20
|
|
|
|
|
58
|
my $dbsource = $1; |
547
|
20
|
|
|
|
|
76
|
while ( defined( $line = $self->_readline ) ) { |
548
|
85
|
100
|
|
|
|
228
|
last if ($line =~ /^\S/); |
549
|
65
|
|
|
|
|
171
|
$dbsource .= $line; |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
# deal with UniProKB dbsources |
553
|
20
|
100
|
|
|
|
114
|
if ( $dbsource =~ |
554
|
|
|
|
|
|
|
s/(UniProt(?:KB)?|swissprot):\s+locus\s+(\S+)\,.+\n// |
555
|
|
|
|
|
|
|
) { |
556
|
5
|
|
|
|
|
61
|
$annotation->add_Annotation( |
557
|
|
|
|
|
|
|
'dblink', |
558
|
|
|
|
|
|
|
Bio::Annotation::DBLink->new( |
559
|
|
|
|
|
|
|
-primary_id => $2, |
560
|
|
|
|
|
|
|
-database => $1, |
561
|
|
|
|
|
|
|
-tagname => 'dblink' |
562
|
|
|
|
|
|
|
) |
563
|
|
|
|
|
|
|
); |
564
|
5
|
50
|
|
|
|
60
|
if ( $dbsource =~ s/\s+created:\s+([^\.]+)\.\n// ) { |
565
|
5
|
|
|
|
|
44
|
$annotation->add_Annotation( |
566
|
|
|
|
|
|
|
'swissprot_dates', |
567
|
|
|
|
|
|
|
Bio::Annotation::SimpleValue->new( |
568
|
|
|
|
|
|
|
-tagname => 'date_created', |
569
|
|
|
|
|
|
|
-value => $1 |
570
|
|
|
|
|
|
|
) |
571
|
|
|
|
|
|
|
); |
572
|
|
|
|
|
|
|
} |
573
|
5
|
|
|
|
|
91
|
while ( $dbsource =~ |
574
|
|
|
|
|
|
|
s/\s+(sequence|annotation)\s+updated:\s+([^\.]+)\.\n//g |
575
|
|
|
|
|
|
|
) { |
576
|
5
|
|
|
|
|
35
|
$annotation->add_Annotation( |
577
|
|
|
|
|
|
|
'swissprot_dates', |
578
|
|
|
|
|
|
|
Bio::Annotation::SimpleValue->new( |
579
|
|
|
|
|
|
|
-tagname => 'date_updated', |
580
|
|
|
|
|
|
|
-value => $2 |
581
|
|
|
|
|
|
|
) |
582
|
|
|
|
|
|
|
); |
583
|
|
|
|
|
|
|
} |
584
|
5
|
|
|
|
|
48
|
$dbsource =~ s/\n/ /g; |
585
|
5
|
50
|
|
|
|
78
|
if ( $dbsource =~ |
|
|
0
|
|
|
|
|
|
586
|
|
|
|
|
|
|
s/\s+xrefs:\s+((?:\S+,\s+)+\S+)\s+xrefs/xrefs/ |
587
|
|
|
|
|
|
|
) { |
588
|
|
|
|
|
|
|
# will use $i to determine even or odd |
589
|
|
|
|
|
|
|
# for swissprot the accessions are paired |
590
|
5
|
|
|
|
|
14
|
my $i = 0; |
591
|
5
|
|
|
|
|
51
|
for my $dbsrc ( split( /,\s+/, $1 ) ) { |
592
|
49
|
50
|
66
|
|
|
292
|
if ( $dbsrc =~ /(\S+)\.(\d+)/ |
593
|
|
|
|
|
|
|
or $dbsrc =~ /(\S+)/ |
594
|
|
|
|
|
|
|
) { |
595
|
49
|
|
|
|
|
143
|
my ( $id, $version ) = ( $1, $2 ); |
596
|
49
|
100
|
|
|
|
104
|
$version = '' unless defined $version; |
597
|
49
|
100
|
|
|
|
157
|
my $db = ( $id =~ /^\d\S{3}/ ) ? 'PDB' |
|
|
100
|
|
|
|
|
|
598
|
|
|
|
|
|
|
: ( $i++ % 2 ) ? 'GenPept' |
599
|
|
|
|
|
|
|
: 'GenBank'; |
600
|
|
|
|
|
|
|
|
601
|
49
|
|
|
|
|
179
|
$annotation->add_Annotation( |
602
|
|
|
|
|
|
|
'dblink', |
603
|
|
|
|
|
|
|
Bio::Annotation::DBLink->new( |
604
|
|
|
|
|
|
|
-primary_id => $id, |
605
|
|
|
|
|
|
|
-version => $version, |
606
|
|
|
|
|
|
|
-database => $db, |
607
|
|
|
|
|
|
|
-tagname => 'dblink' |
608
|
|
|
|
|
|
|
) |
609
|
|
|
|
|
|
|
); |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
elsif ( $dbsource =~ s/\s+xrefs:\s+(.+)\s+xrefs/xrefs/i ) { |
614
|
|
|
|
|
|
|
# download screwed up and ncbi didn't put acc in for gi numbers |
615
|
0
|
|
|
|
|
0
|
my $i = 0; |
616
|
0
|
|
|
|
|
0
|
for my $id ( split( /\,\s+/, $1 ) ) { |
617
|
0
|
|
|
|
|
0
|
my ( $acc, $db ); |
618
|
0
|
0
|
|
|
|
0
|
if ( $id =~ /gi:\s+(\d+)/ ) { |
|
|
0
|
|
|
|
|
|
619
|
0
|
|
|
|
|
0
|
$acc = $1; |
620
|
0
|
0
|
|
|
|
0
|
$db = ( $i++ % 2 ) ? 'GenPept' : 'GenBank'; |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
elsif ( $id =~ /pdb\s+accession\s+(\S+)/ ) { |
623
|
0
|
|
|
|
|
0
|
$acc = $1; |
624
|
0
|
|
|
|
|
0
|
$db = 'PDB'; |
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
else { |
627
|
0
|
|
|
|
|
0
|
$acc = $id; |
628
|
0
|
|
|
|
|
0
|
$db = ''; |
629
|
|
|
|
|
|
|
} |
630
|
0
|
|
|
|
|
0
|
$annotation->add_Annotation( |
631
|
|
|
|
|
|
|
'dblink', |
632
|
|
|
|
|
|
|
Bio::Annotation::DBLink->new( |
633
|
|
|
|
|
|
|
-primary_id => $acc, |
634
|
|
|
|
|
|
|
-database => $db, |
635
|
|
|
|
|
|
|
-tagname => 'dblink' |
636
|
|
|
|
|
|
|
) |
637
|
|
|
|
|
|
|
); |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
} |
640
|
|
|
|
|
|
|
else { |
641
|
0
|
|
|
|
|
0
|
$self->debug("Cannot match $dbsource\n"); |
642
|
|
|
|
|
|
|
} |
643
|
5
|
50
|
|
|
|
102
|
if ( $dbsource =~ s/xrefs\s+ |
644
|
|
|
|
|
|
|
\(non\-sequence\s+databases\):\s+ |
645
|
|
|
|
|
|
|
((?:\S+,\s+)+\S+)//x |
646
|
|
|
|
|
|
|
) { |
647
|
5
|
|
|
|
|
83
|
for my $id ( split( /\,\s+/, $1 ) ) { |
648
|
86
|
|
|
|
|
143
|
my $db; |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
# this is because GenBank dropped the spaces!!! |
651
|
|
|
|
|
|
|
# I'm sure we're not going to get this right |
652
|
|
|
|
|
|
|
##if ( $id =~ s/^://i ) { |
653
|
|
|
|
|
|
|
## $db = $1; |
654
|
|
|
|
|
|
|
##} |
655
|
86
|
|
|
|
|
186
|
$db = substr( $id, 0, index( $id, ':' ) ); |
656
|
86
|
100
|
|
|
|
207
|
if ( not exists $DBSOURCE{$db} ) { |
657
|
16
|
|
|
|
|
27
|
$db = ''; # do we want 'GenBank' here? |
658
|
|
|
|
|
|
|
} |
659
|
86
|
|
|
|
|
140
|
$id = substr( $id, index( $id, ':' ) + 1 ); |
660
|
86
|
|
|
|
|
266
|
$annotation->add_Annotation( |
661
|
|
|
|
|
|
|
'dblink', |
662
|
|
|
|
|
|
|
Bio::Annotation::DBLink->new( |
663
|
|
|
|
|
|
|
-primary_id => $id, |
664
|
|
|
|
|
|
|
-database => $db, |
665
|
|
|
|
|
|
|
-tagname => 'dblink' |
666
|
|
|
|
|
|
|
) |
667
|
|
|
|
|
|
|
); |
668
|
|
|
|
|
|
|
} |
669
|
|
|
|
|
|
|
} |
670
|
|
|
|
|
|
|
} |
671
|
|
|
|
|
|
|
else { |
672
|
15
|
100
|
|
|
|
133
|
if ( $dbsource =~ |
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
673
|
|
|
|
|
|
|
/^(\S*?):?\s*accession\s+(\S+)\.(\d+)/ |
674
|
|
|
|
|
|
|
) { |
675
|
4
|
|
|
|
|
21
|
my ( $db, $id, $version ) = ( $1, $2, $3 ); |
676
|
4
|
|
100
|
|
|
40
|
$annotation->add_Annotation( |
677
|
|
|
|
|
|
|
'dblink', |
678
|
|
|
|
|
|
|
Bio::Annotation::DBLink->new( |
679
|
|
|
|
|
|
|
-primary_id => $id, |
680
|
|
|
|
|
|
|
-version => $version, |
681
|
|
|
|
|
|
|
-database => $db || 'GenBank', |
682
|
|
|
|
|
|
|
-tagname => 'dblink' |
683
|
|
|
|
|
|
|
) |
684
|
|
|
|
|
|
|
); |
685
|
|
|
|
|
|
|
} |
686
|
|
|
|
|
|
|
elsif ( $dbsource =~ /^(\S*?):?\s*accession\s+(\S+)/ ) { |
687
|
1
|
|
|
|
|
4
|
my ( $db, $id ) = ( $1, $2 ); |
688
|
1
|
|
50
|
|
|
11
|
$annotation->add_Annotation( |
689
|
|
|
|
|
|
|
'dblink', |
690
|
|
|
|
|
|
|
Bio::Annotation::DBLink->new( |
691
|
|
|
|
|
|
|
-primary_id => $id, |
692
|
|
|
|
|
|
|
-database => $db || 'GenBank', |
693
|
|
|
|
|
|
|
-tagname => 'dblink' |
694
|
|
|
|
|
|
|
) |
695
|
|
|
|
|
|
|
); |
696
|
|
|
|
|
|
|
} |
697
|
|
|
|
|
|
|
elsif ( $dbsource =~ /(\S+)([\.:])\s*(\S+)/ ) { |
698
|
10
|
|
|
|
|
18
|
my ( $db, $version ); |
699
|
10
|
|
|
|
|
21
|
my @ids = (); |
700
|
10
|
100
|
|
|
|
41
|
if ( $2 eq ':' ) { |
701
|
9
|
|
|
|
|
18
|
$db = $1; |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
# Genbank 192 release notes say this: "The second |
704
|
|
|
|
|
|
|
# field can consist of multiple comma-separated |
705
|
|
|
|
|
|
|
# identifiers, if a sequence record has multiple |
706
|
|
|
|
|
|
|
# DBLINK cross-references of a given type." |
707
|
|
|
|
|
|
|
# For example: DBLINK Project:100,200,300" |
708
|
9
|
|
|
|
|
45
|
@ids = split( /,/, $3 ); |
709
|
|
|
|
|
|
|
} |
710
|
|
|
|
|
|
|
else { |
711
|
1
|
|
|
|
|
4
|
( $db, $version ) = ( 'GenBank', $3 ); |
712
|
1
|
|
|
|
|
3
|
$ids[0] = $1; |
713
|
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|
|
715
|
10
|
|
|
|
|
26
|
foreach my $id (@ids) { |
716
|
12
|
|
|
|
|
104
|
$annotation->add_Annotation( |
717
|
|
|
|
|
|
|
'dblink', |
718
|
|
|
|
|
|
|
Bio::Annotation::DBLink->new( |
719
|
|
|
|
|
|
|
-primary_id => $id, |
720
|
|
|
|
|
|
|
-version => $version, |
721
|
|
|
|
|
|
|
-database => $db, |
722
|
|
|
|
|
|
|
-tagname => 'dblink' |
723
|
|
|
|
|
|
|
) |
724
|
|
|
|
|
|
|
); |
725
|
|
|
|
|
|
|
} |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
else { |
728
|
0
|
|
|
|
|
0
|
$self->warn( |
729
|
|
|
|
|
|
|
"Unrecognized DBSOURCE data: $dbsource\n"); |
730
|
|
|
|
|
|
|
} |
731
|
|
|
|
|
|
|
} |
732
|
20
|
|
|
|
|
88
|
$buffer = $line; |
733
|
|
|
|
|
|
|
} |
734
|
|
|
|
|
|
|
else { |
735
|
0
|
|
|
|
|
0
|
while ( defined( $buffer = $self->_readline ) ) { |
736
|
0
|
0
|
|
|
|
0
|
last if substr( $buffer, 0, 1 ) ne ' '; |
737
|
|
|
|
|
|
|
} |
738
|
|
|
|
|
|
|
} |
739
|
20
|
|
|
|
|
61
|
next; |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# Exit at start of Feature table, or start of sequence |
743
|
227
|
100
|
|
|
|
1174
|
if ($line =~ /^(FEATURES|ORIGIN)/) { |
744
|
111
|
|
|
|
|
206
|
my $trap; |
745
|
|
|
|
|
|
|
} |
746
|
227
|
100
|
|
|
|
908
|
last if ($line =~ /^(FEATURES|ORIGIN)/); |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
# Get next line and loop again |
749
|
116
|
|
|
|
|
335
|
$buffer = $self->_readline; |
750
|
|
|
|
|
|
|
} |
751
|
111
|
50
|
|
|
|
467
|
return unless defined $buffer; |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
# add them all at once for efficiency |
754
|
111
|
|
|
|
|
965
|
$builder->add_slot_value( |
755
|
|
|
|
|
|
|
-accession_number => shift(@acc), |
756
|
|
|
|
|
|
|
-secondary_accessions => \@acc, |
757
|
|
|
|
|
|
|
%params |
758
|
|
|
|
|
|
|
); |
759
|
111
|
100
|
|
|
|
561
|
$builder->add_slot_value( -annotation => $annotation ) if $annotation; |
760
|
111
|
|
|
|
|
328
|
%params = (); # reset before possible re-use to avoid setting twice |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
# start over if we don't want to continue with this entry |
763
|
111
|
50
|
|
|
|
432
|
if ( not $builder->want_object ) { |
764
|
0
|
|
|
|
|
0
|
$builder->make_object; |
765
|
0
|
|
|
|
|
0
|
next RECORDSTART; |
766
|
|
|
|
|
|
|
} |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
# some "minimal" formats may not necessarily have a feature table |
769
|
111
|
100
|
66
|
|
|
445
|
if ( $builder->want_slot('features') |
|
|
|
100
|
|
|
|
|
770
|
|
|
|
|
|
|
and defined $buffer |
771
|
|
|
|
|
|
|
and $buffer =~ /^FEATURES/o |
772
|
|
|
|
|
|
|
) { |
773
|
|
|
|
|
|
|
# need to read the first line of the feature table |
774
|
104
|
|
|
|
|
394
|
$buffer = $self->_readline; |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
# DO NOT read lines in the while condition -- this is done |
777
|
|
|
|
|
|
|
# as a side effect in _read_FTHelper_GenBank! |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
# part of new circular spec: |
780
|
|
|
|
|
|
|
# commented out for now until kinks worked out |
781
|
|
|
|
|
|
|
#my $sourceEnd = 0; |
782
|
|
|
|
|
|
|
#$sourceEnd = $2 if ($buffer =~ /(\d+?)\.\.(\d+?)$/); |
783
|
|
|
|
|
|
|
|
784
|
104
|
|
|
|
|
335
|
while ( defined $buffer ) { |
785
|
|
|
|
|
|
|
# check immediately -- not at the end of the loop |
786
|
|
|
|
|
|
|
# note: GenPept entries obviously do not have a BASE line |
787
|
8384
|
100
|
|
|
|
58514
|
last if ( $buffer =~ /^BASE|ORIGIN|CONTIG|WGS/o ); |
788
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
# slurp in one feature at a time -- at return, the start of |
790
|
|
|
|
|
|
|
# the next feature will have been read already, so we need |
791
|
|
|
|
|
|
|
# to pass a reference, and the called method must set this |
792
|
|
|
|
|
|
|
# to the last line read before returning |
793
|
8280
|
|
|
|
|
19335
|
my $ftunit = $self->_read_FTHelper_GenBank( \$buffer ); |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
# implement new circular spec: features that cross the origin are now |
796
|
|
|
|
|
|
|
# seamless instead of being 2 separate joined features |
797
|
|
|
|
|
|
|
# commented out until kinks get worked out |
798
|
|
|
|
|
|
|
#if ((! $args{'-nojoin'}) && $ftunit->{'loc'} =~ /^join\((\d+?)\.\.(\d+?),(\d+?)..(\d+?)\)$/ |
799
|
|
|
|
|
|
|
#&& $sourceEnd == $2 && $3 == 1) { |
800
|
|
|
|
|
|
|
#my $start = $1; |
801
|
|
|
|
|
|
|
#my $end = $2 + $4; |
802
|
|
|
|
|
|
|
#$ftunit->{'loc'} = "$start..$end"; |
803
|
|
|
|
|
|
|
#} |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
# fix suggested by James Diggans |
806
|
|
|
|
|
|
|
|
807
|
8280
|
50
|
|
|
|
13220
|
if ( not defined $ftunit ) { |
808
|
|
|
|
|
|
|
# GRRRR. We have fallen over. Try to recover |
809
|
|
|
|
|
|
|
$self->warn( "Unexpected error in feature table for " |
810
|
0
|
|
|
|
|
0
|
. $params{'-display_id'} |
811
|
|
|
|
|
|
|
. " Skipping feature, attempting to recover" ); |
812
|
|
|
|
|
|
|
|
813
|
0
|
0
|
0
|
|
|
0
|
unless ( $buffer =~ /^\s{5,5}\S+/o |
814
|
|
|
|
|
|
|
or $buffer =~ /^\S+/o |
815
|
|
|
|
|
|
|
) { |
816
|
0
|
|
|
|
|
0
|
$buffer = $self->_readline; |
817
|
|
|
|
|
|
|
} |
818
|
0
|
|
|
|
|
0
|
next; # back to reading FTHelpers |
819
|
|
|
|
|
|
|
} |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
# process ftunit |
822
|
8280
|
|
|
|
|
20579
|
my $feat = |
823
|
|
|
|
|
|
|
$ftunit->_generic_seqfeature( $self->location_factory, |
824
|
|
|
|
|
|
|
$display_id ); |
825
|
|
|
|
|
|
|
|
826
|
|
|
|
|
|
|
# add taxon_id from source if available |
827
|
8280
|
100
|
100
|
|
|
21167
|
if ( $species |
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
828
|
|
|
|
|
|
|
and $feat->primary_tag eq 'source' |
829
|
|
|
|
|
|
|
and $feat->has_tag('db_xref') |
830
|
|
|
|
|
|
|
and ( not $species->ncbi_taxid |
831
|
|
|
|
|
|
|
or ( $species->ncbi_taxid |
832
|
|
|
|
|
|
|
and $species->ncbi_taxid =~ /^list/ ) ) |
833
|
|
|
|
|
|
|
) { |
834
|
95
|
|
|
|
|
352
|
foreach my $tagval ( $feat->get_tag_values('db_xref') ) { |
835
|
96
|
100
|
|
|
|
368
|
if ( index( $tagval, "taxon:" ) == 0 ) { |
836
|
95
|
|
|
|
|
470
|
$species->ncbi_taxid( substr( $tagval, 6 ) ); |
837
|
95
|
|
|
|
|
308
|
last; |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
} |
840
|
|
|
|
|
|
|
} |
841
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
# add feature to list of features |
843
|
8280
|
|
|
|
|
28555
|
push( @features, $feat ); |
844
|
|
|
|
|
|
|
} |
845
|
104
|
|
|
|
|
628
|
$builder->add_slot_value( -features => \@features ); |
846
|
|
|
|
|
|
|
} |
847
|
|
|
|
|
|
|
|
848
|
111
|
50
|
|
|
|
380
|
if ( defined $buffer ) { |
849
|
|
|
|
|
|
|
# CONTIG lines: TODO, this needs to be cleaned up |
850
|
111
|
100
|
|
|
|
1321
|
if ($buffer =~/^CONTIG\s+(.*)/o) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
851
|
7
|
|
|
|
|
24
|
my $ctg = $1; |
852
|
7
|
|
|
|
|
29
|
while ( defined( $buffer = $self->_readline ) ) { |
853
|
29
|
100
|
|
|
|
174
|
last if $buffer =~ m{^ORIGIN|//}o; |
854
|
22
|
|
|
|
|
68
|
$buffer =~ s/\s+(.*)/$1/; |
855
|
22
|
|
|
|
|
45
|
$ctg .= $buffer; |
856
|
|
|
|
|
|
|
} |
857
|
7
|
50
|
|
|
|
27
|
if ($ctg) { |
858
|
7
|
|
|
|
|
71
|
$annotation->add_Annotation( |
859
|
|
|
|
|
|
|
Bio::Annotation::SimpleValue->new( |
860
|
|
|
|
|
|
|
-tagname => 'contig', |
861
|
|
|
|
|
|
|
-value => $ctg |
862
|
|
|
|
|
|
|
) |
863
|
|
|
|
|
|
|
); |
864
|
|
|
|
|
|
|
} |
865
|
|
|
|
|
|
|
} |
866
|
|
|
|
|
|
|
elsif ($buffer =~ /^WGS|WGS_SCAFLD\s+/o) { # catch WGS/WGS_SCAFLD lines |
867
|
1
|
|
|
|
|
6
|
while ( $buffer =~ s/(^WGS|WGS_SCAFLD)\s+// ) { # gulp lines |
868
|
3
|
|
|
|
|
8
|
chomp $buffer; |
869
|
3
|
|
|
|
|
20
|
$annotation->add_Annotation( |
870
|
|
|
|
|
|
|
Bio::Annotation::SimpleValue->new( |
871
|
|
|
|
|
|
|
-value => $buffer, |
872
|
|
|
|
|
|
|
-tagname => lc $1 |
873
|
|
|
|
|
|
|
) |
874
|
|
|
|
|
|
|
); |
875
|
3
|
|
|
|
|
12
|
$buffer = $self->_readline; |
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
} |
878
|
|
|
|
|
|
|
elsif ( $buffer !~ m{^ORIGIN|//}o ) { # advance to the sequence, if any |
879
|
53
|
|
|
|
|
209
|
while ( defined( $buffer = $self->_readline ) ) { |
880
|
148
|
100
|
|
|
|
548
|
last if $buffer =~ m{^(ORIGIN|//)}; |
881
|
|
|
|
|
|
|
} |
882
|
|
|
|
|
|
|
} |
883
|
|
|
|
|
|
|
} |
884
|
111
|
50
|
|
|
|
469
|
if ( not $builder->want_object ) { |
885
|
0
|
|
|
|
|
0
|
$builder->make_object; # implicit end-of-object |
886
|
0
|
|
|
|
|
0
|
next RECORDSTART; |
887
|
|
|
|
|
|
|
} |
888
|
111
|
100
|
33
|
|
|
386
|
if ( $builder->want_slot('seq') ) { |
|
|
50
|
|
|
|
|
|
889
|
|
|
|
|
|
|
# the fact that we want a sequence does not necessarily mean that |
890
|
|
|
|
|
|
|
# there also is a sequence ... |
891
|
101
|
100
|
100
|
|
|
954
|
if ( defined $buffer and $buffer =~ s/^ORIGIN\s+// ) { |
892
|
93
|
100
|
66
|
|
|
574
|
if ( $annotation and length($buffer) > 0 ) { |
893
|
1
|
|
|
|
|
13
|
$annotation->add_Annotation( |
894
|
|
|
|
|
|
|
'origin', |
895
|
|
|
|
|
|
|
Bio::Annotation::SimpleValue->new( |
896
|
|
|
|
|
|
|
-tagname => 'origin', |
897
|
|
|
|
|
|
|
-value => $buffer |
898
|
|
|
|
|
|
|
) |
899
|
|
|
|
|
|
|
); |
900
|
|
|
|
|
|
|
} |
901
|
93
|
|
|
|
|
226
|
my $seqc = ''; |
902
|
93
|
|
|
|
|
324
|
while ( defined( $buffer = $self->_readline ) ) { |
903
|
32735
|
100
|
|
|
|
48518
|
last if $buffer =~ m{^//}; |
904
|
32643
|
|
|
|
|
38116
|
$buffer = uc $buffer; |
905
|
32643
|
|
|
|
|
214194
|
$buffer =~ s/[^A-Za-z]//g; |
906
|
32643
|
|
|
|
|
64543
|
$seqc .= $buffer; |
907
|
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
|
909
|
93
|
|
|
|
|
390
|
$builder->add_slot_value( -seq => $seqc ); |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
} |
912
|
|
|
|
|
|
|
elsif ( defined($buffer) and ( substr( $buffer, 0, 2 ) ne '//' ) ) { |
913
|
|
|
|
|
|
|
# advance to the end of the record |
914
|
10
|
|
|
|
|
26
|
while ( defined( $buffer = $self->_readline ) ) { |
915
|
172
|
100
|
|
|
|
347
|
last if substr( $buffer, 0, 2 ) eq '//'; |
916
|
|
|
|
|
|
|
} |
917
|
|
|
|
|
|
|
} |
918
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
# Unlikely, but maybe the sequence is so weird that we don't want it |
920
|
|
|
|
|
|
|
# anymore. We don't want to return undef if the stream's not exhausted |
921
|
|
|
|
|
|
|
# yet. |
922
|
111
|
|
|
|
|
466
|
$seq = $builder->make_object; |
923
|
111
|
50
|
|
|
|
333
|
next RECORDSTART unless $seq; |
924
|
111
|
|
|
|
|
2408
|
last RECORDSTART; |
925
|
|
|
|
|
|
|
} # end while RECORDSTART |
926
|
|
|
|
|
|
|
|
927
|
111
|
|
|
|
|
872
|
return $seq; |
928
|
|
|
|
|
|
|
} |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=head2 write_seq |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
Title : write_seq |
933
|
|
|
|
|
|
|
Usage : $stream->write_seq($seq) |
934
|
|
|
|
|
|
|
Function: writes the $seq object (must be seq) to the stream |
935
|
|
|
|
|
|
|
Returns : 1 for success and 0 for error |
936
|
|
|
|
|
|
|
Args : array of 1 to n Bio::SeqI objects |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
=cut |
939
|
|
|
|
|
|
|
|
940
|
|
|
|
|
|
|
sub write_seq { |
941
|
25
|
|
|
25
|
1
|
198
|
my ($self,@seqs) = @_; |
942
|
|
|
|
|
|
|
|
943
|
25
|
|
|
|
|
77
|
foreach my $seq ( @seqs ) { |
944
|
25
|
50
|
|
|
|
115
|
$self->throw("Attempting to write with no seq!") unless defined $seq; |
945
|
|
|
|
|
|
|
|
946
|
25
|
50
|
33
|
|
|
228
|
if ( not ref $seq or not $seq->isa('Bio::SeqI') ) { |
947
|
0
|
|
|
|
|
0
|
$self->warn(" $seq is not a SeqI compliant module. Attempting to dump, but may fail!"); |
948
|
|
|
|
|
|
|
} |
949
|
|
|
|
|
|
|
|
950
|
25
|
|
|
|
|
124
|
my $str = $seq->seq; |
951
|
25
|
|
|
|
|
205
|
my $len = $seq->length; |
952
|
25
|
|
|
|
|
126
|
my $alpha = $seq->alphabet; |
953
|
|
|
|
|
|
|
|
954
|
25
|
|
|
|
|
122
|
my ($div, $mol); |
955
|
25
|
100
|
66
|
|
|
268
|
if ( not $seq->can('division') |
956
|
|
|
|
|
|
|
or not defined($div = $seq->division) |
957
|
|
|
|
|
|
|
) { |
958
|
2
|
|
|
|
|
6
|
$div = 'UNK'; |
959
|
|
|
|
|
|
|
} |
960
|
25
|
100
|
66
|
|
|
318
|
if ( not $seq->can('molecule') |
961
|
|
|
|
|
|
|
or not defined ($mol = $seq->molecule) |
962
|
|
|
|
|
|
|
) { |
963
|
2
|
|
100
|
|
|
10
|
$mol = $alpha || 'DNA'; |
964
|
|
|
|
|
|
|
} |
965
|
|
|
|
|
|
|
|
966
|
25
|
100
|
|
|
|
117
|
my $circular = ($seq->is_circular) ? 'circular' : 'linear '; |
967
|
|
|
|
|
|
|
|
968
|
25
|
|
|
|
|
153
|
local($^W) = 0; # suppressing warnings about uninitialized fields. |
969
|
|
|
|
|
|
|
|
970
|
25
|
|
|
|
|
55
|
my $temp_line; |
971
|
25
|
50
|
|
|
|
91
|
if ( $self->_id_generation_func ) { |
972
|
0
|
|
|
|
|
0
|
$temp_line = &{$self->_id_generation_func}($seq); |
|
0
|
|
|
|
|
0
|
|
973
|
|
|
|
|
|
|
} |
974
|
|
|
|
|
|
|
else { |
975
|
25
|
|
|
|
|
57
|
my $date = ''; |
976
|
25
|
100
|
|
|
|
168
|
if ( $seq->can('get_dates') ) { |
977
|
23
|
|
|
|
|
94
|
($date) = $seq->get_dates; |
978
|
|
|
|
|
|
|
} |
979
|
|
|
|
|
|
|
|
980
|
25
|
50
|
|
|
|
115
|
$self->warn("No whitespace allowed in GenBank display id [". $seq->display_id. "]") |
981
|
|
|
|
|
|
|
if $seq->display_id =~ /\s/; |
982
|
|
|
|
|
|
|
|
983
|
25
|
100
|
|
|
|
154
|
my @data = ( lc($alpha) eq 'protein' ) ? ('aa', '', '') : ('bp', '', $mol); |
984
|
25
|
|
|
|
|
114
|
$temp_line = sprintf ("%-12s%-15s%13s %s%4s%-8s%-8s %3s %-s\n", |
985
|
|
|
|
|
|
|
'LOCUS', $seq->id, $len, |
986
|
|
|
|
|
|
|
@data, $circular, $div, $date); |
987
|
|
|
|
|
|
|
} |
988
|
|
|
|
|
|
|
|
989
|
25
|
|
|
|
|
177
|
$self->_print($temp_line); |
990
|
25
|
|
|
|
|
112
|
$self->_write_line_GenBank_regex("DEFINITION ", " ", |
991
|
|
|
|
|
|
|
$seq->desc, "\\s\+\|\$",80); |
992
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
# if there, write the accession line |
994
|
|
|
|
|
|
|
|
995
|
25
|
50
|
|
|
|
95
|
if ( $self->_ac_generation_func ) { |
996
|
0
|
|
|
|
|
0
|
$temp_line = &{$self->_ac_generation_func}($seq); |
|
0
|
|
|
|
|
0
|
|
997
|
0
|
|
|
|
|
0
|
$self->_print("ACCESSION $temp_line\n"); |
998
|
|
|
|
|
|
|
} |
999
|
|
|
|
|
|
|
else { |
1000
|
25
|
|
|
|
|
64
|
my @acc = (); |
1001
|
25
|
|
|
|
|
113
|
push @acc, $seq->accession_number; |
1002
|
25
|
100
|
|
|
|
142
|
if ( $seq->isa('Bio::Seq::RichSeqI') ) { |
1003
|
23
|
|
|
|
|
99
|
push @acc, $seq->get_secondary_accessions; |
1004
|
|
|
|
|
|
|
} |
1005
|
25
|
|
|
|
|
135
|
$self->_print("ACCESSION ", join(" ", @acc), "\n"); |
1006
|
|
|
|
|
|
|
# otherwise - cannot print |
1007
|
|
|
|
|
|
|
} |
1008
|
|
|
|
|
|
|
|
1009
|
|
|
|
|
|
|
# if PID defined, print it |
1010
|
25
|
50
|
66
|
|
|
219
|
if ($seq->isa('Bio::Seq::RichSeqI') and $seq->pid) { |
1011
|
0
|
|
|
|
|
0
|
$self->_print("PID ", $seq->pid, "\n"); |
1012
|
|
|
|
|
|
|
} |
1013
|
|
|
|
|
|
|
|
1014
|
|
|
|
|
|
|
# if there, write the version line |
1015
|
25
|
50
|
100
|
|
|
95
|
if ( defined $self->_sv_generation_func ) { |
|
|
100
|
|
|
|
|
|
1016
|
0
|
|
|
|
|
0
|
$temp_line = &{$self->_sv_generation_func}($seq); |
|
0
|
|
|
|
|
0
|
|
1017
|
0
|
0
|
|
|
|
0
|
if ( $temp_line ) { |
1018
|
0
|
|
|
|
|
0
|
$self->_print("VERSION $temp_line\n"); |
1019
|
|
|
|
|
|
|
} |
1020
|
|
|
|
|
|
|
} |
1021
|
|
|
|
|
|
|
elsif ($seq->isa('Bio::Seq::RichSeqI') and defined($seq->seq_version)) { |
1022
|
18
|
|
|
|
|
72
|
my $id = $seq->primary_id; # this may be a GI number |
1023
|
18
|
50
|
33
|
|
|
212
|
my $data = (defined $id and $id =~ /^\d+$/) ? " GI:$id" : ""; |
1024
|
18
|
|
|
|
|
97
|
$self->_print("VERSION ", |
1025
|
|
|
|
|
|
|
$seq->accession_number, ".", |
1026
|
|
|
|
|
|
|
$seq->seq_version, $data, "\n"); |
1027
|
|
|
|
|
|
|
} |
1028
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
# if there, write the PROJECT line |
1030
|
25
|
|
|
|
|
88
|
for my $proj ( $seq->annotation->get_Annotations('project') ) { |
1031
|
2
|
|
|
|
|
12
|
$self->_print("PROJECT ".$proj->value."\n"); |
1032
|
|
|
|
|
|
|
} |
1033
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
# if there, write the DBSOURCE line |
1035
|
25
|
|
|
|
|
85
|
foreach my $ref ( $seq->annotation->get_Annotations('dblink') ) { |
1036
|
2
|
|
|
|
|
9
|
my ($db, $id) = ($ref->database, $ref->primary_id); |
1037
|
2
|
100
|
|
|
|
7
|
my $prefix = $db eq 'Project' ? 'DBLINK' : 'DBSOURCE'; |
1038
|
2
|
100
|
|
|
|
9
|
my $text = $db eq 'GenBank' ? '' |
|
|
50
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
: $db eq 'Project' ? "$db:$id" |
1040
|
|
|
|
|
|
|
: "$db accession $id"; |
1041
|
2
|
|
|
|
|
11
|
$self->_print(sprintf ("%-11s %s\n", $prefix, $text)); |
1042
|
|
|
|
|
|
|
} |
1043
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
# if there, write the keywords line |
1045
|
25
|
50
|
|
|
|
84
|
if ( defined $self->_kw_generation_func ) { |
|
|
100
|
|
|
|
|
|
1046
|
0
|
|
|
|
|
0
|
$temp_line = &{$self->_kw_generation_func}($seq); |
|
0
|
|
|
|
|
0
|
|
1047
|
0
|
|
|
|
|
0
|
$self->_print("KEYWORDS $temp_line\n"); |
1048
|
|
|
|
|
|
|
} |
1049
|
|
|
|
|
|
|
elsif ( $seq->can('keywords') ) { |
1050
|
23
|
|
|
|
|
112
|
my $kw = $seq->keywords; |
1051
|
23
|
50
|
|
|
|
101
|
$kw .= '.' if ( $kw !~ /\.$/ ); |
1052
|
23
|
|
|
|
|
108
|
$self->_print("KEYWORDS $kw\n"); |
1053
|
|
|
|
|
|
|
} |
1054
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
# SEGMENT if it exists |
1056
|
25
|
|
|
|
|
78
|
foreach my $ref ( $seq->annotation->get_Annotations('segment') ) { |
1057
|
0
|
|
|
|
|
0
|
$self->_print(sprintf ("%-11s %s\n",'SEGMENT', |
1058
|
|
|
|
|
|
|
$ref->value)); |
1059
|
|
|
|
|
|
|
} |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
# Organism lines |
1062
|
25
|
100
|
|
|
|
87
|
if (my $spec = $seq->species) { |
1063
|
22
|
50
|
|
|
|
218
|
my ($on, $sn, $cn) = ($spec->can('organelle') ? $spec->organelle : '', |
1064
|
|
|
|
|
|
|
$spec->scientific_name, |
1065
|
|
|
|
|
|
|
$spec->common_name); |
1066
|
22
|
|
|
|
|
57
|
my @classification; |
1067
|
22
|
50
|
|
|
|
122
|
if ($spec->isa('Bio::Species')) { |
1068
|
22
|
|
|
|
|
101
|
@classification = $spec->classification; |
1069
|
22
|
|
|
|
|
45
|
shift @classification; |
1070
|
|
|
|
|
|
|
} |
1071
|
|
|
|
|
|
|
else { |
1072
|
|
|
|
|
|
|
# Bio::Taxon should have a DB handle of some type attached, so |
1073
|
|
|
|
|
|
|
# derive the classification from that |
1074
|
0
|
|
|
|
|
0
|
my $node = $spec; |
1075
|
0
|
|
|
|
|
0
|
while ($node) { |
1076
|
0
|
|
0
|
|
|
0
|
$node = $node->ancestor || last; |
1077
|
0
|
|
|
|
|
0
|
unshift @classification, $node->node_name; |
1078
|
|
|
|
|
|
|
#$node eq $root && last; |
1079
|
|
|
|
|
|
|
} |
1080
|
0
|
|
|
|
|
0
|
@classification = reverse @classification; |
1081
|
|
|
|
|
|
|
} |
1082
|
22
|
50
|
|
|
|
87
|
my $abname = $spec->name('abbreviated') ? # from genbank file |
1083
|
|
|
|
|
|
|
$spec->name('abbreviated')->[0] : $sn; |
1084
|
22
|
100
|
|
|
|
80
|
my $sl = $on ? "$on " : ''; |
1085
|
22
|
100
|
|
|
|
80
|
$sl .= $cn ? "$abname ($cn)" : $abname; |
1086
|
|
|
|
|
|
|
|
1087
|
22
|
|
|
|
|
87
|
$self->_write_line_GenBank_regex("SOURCE ", ' 'x12, $sl, "\\s\+\|\$", 80); |
1088
|
22
|
|
|
|
|
66
|
$self->_print(" ORGANISM ", $spec->scientific_name, "\n"); |
1089
|
22
|
|
|
|
|
110
|
my $OC = join('; ', reverse @classification) . '.'; |
1090
|
22
|
|
|
|
|
72
|
$self->_write_line_GenBank_regex(' 'x12,' 'x12, $OC, "\\s\+\|\$", 80); |
1091
|
|
|
|
|
|
|
} |
1092
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
# Reference lines |
1094
|
25
|
|
|
|
|
57
|
my $count = 1; |
1095
|
25
|
|
|
|
|
85
|
foreach my $ref ( $seq->annotation->get_Annotations('reference') ) { |
1096
|
47
|
|
|
|
|
112
|
$temp_line = "REFERENCE $count"; |
1097
|
47
|
50
|
|
|
|
198
|
if ($ref->start) { |
|
|
0
|
|
|
|
|
|
1098
|
47
|
100
|
|
|
|
133
|
$temp_line .= sprintf (" (%s %d to %d)", |
1099
|
|
|
|
|
|
|
($seq->alphabet() eq "protein" ? |
1100
|
|
|
|
|
|
|
"residues" : "bases"), |
1101
|
|
|
|
|
|
|
$ref->start, $ref->end); |
1102
|
|
|
|
|
|
|
} |
1103
|
|
|
|
|
|
|
elsif ($ref->gb_reference) { |
1104
|
0
|
|
|
|
|
0
|
$temp_line .= sprintf (" (%s)", $ref->gb_reference); |
1105
|
|
|
|
|
|
|
} |
1106
|
47
|
|
|
|
|
283
|
$self->_print("$temp_line\n"); |
1107
|
47
|
|
|
|
|
143
|
$self->_write_line_GenBank_regex(" AUTHORS ", ' 'x12, |
1108
|
|
|
|
|
|
|
$ref->authors, "\\s\+\|\$", 80); |
1109
|
47
|
100
|
|
|
|
152
|
$self->_write_line_GenBank_regex(" CONSRTM ", ' 'x12, |
1110
|
|
|
|
|
|
|
$ref->consortium, "\\s\+\|\$", 80) if $ref->consortium; |
1111
|
47
|
|
|
|
|
172
|
$self->_write_line_GenBank_regex(" TITLE ", ' 'x12, |
1112
|
|
|
|
|
|
|
$ref->title, "\\s\+\|\$", 80); |
1113
|
47
|
|
|
|
|
137
|
$self->_write_line_GenBank_regex(" JOURNAL ", ' 'x12, |
1114
|
|
|
|
|
|
|
$ref->location, "\\s\+\|\$", 80); |
1115
|
47
|
100
|
|
|
|
134
|
if ( $ref->medline) { |
1116
|
5
|
|
|
|
|
20
|
$self->_write_line_GenBank_regex(" MEDLINE ", ' 'x12, |
1117
|
|
|
|
|
|
|
$ref->medline, "\\s\+\|\$", 80); |
1118
|
|
|
|
|
|
|
# I am assuming that pubmed entries only exist when there |
1119
|
|
|
|
|
|
|
# are also MEDLINE entries due to the indentation |
1120
|
|
|
|
|
|
|
} |
1121
|
|
|
|
|
|
|
# This could be a wrong assumption |
1122
|
47
|
100
|
|
|
|
109
|
if ( $ref->pubmed ) { |
1123
|
13
|
|
|
|
|
46
|
$self->_write_line_GenBank_regex(" PUBMED ", ' 'x12, |
1124
|
|
|
|
|
|
|
$ref->pubmed, "\\s\+\|\$", 80); |
1125
|
|
|
|
|
|
|
} |
1126
|
|
|
|
|
|
|
# put remark at the end |
1127
|
47
|
50
|
|
|
|
175
|
if ($ref->comment) { |
1128
|
0
|
|
|
|
|
0
|
$self->_write_line_GenBank_regex(" REMARK ", ' 'x12, |
1129
|
|
|
|
|
|
|
$ref->comment, "\\s\+\|\$", 80); |
1130
|
|
|
|
|
|
|
} |
1131
|
47
|
|
|
|
|
93
|
$count++; |
1132
|
|
|
|
|
|
|
} |
1133
|
|
|
|
|
|
|
|
1134
|
|
|
|
|
|
|
# Comment lines |
1135
|
25
|
|
|
|
|
104
|
foreach my $comment ( $seq->annotation->get_Annotations('comment') ) { |
1136
|
13
|
|
|
|
|
41
|
$self->_write_line_GenBank_regex("COMMENT ", ' 'x12, |
1137
|
|
|
|
|
|
|
$comment->text, "\\s\+\|\$", 80); |
1138
|
|
|
|
|
|
|
} |
1139
|
|
|
|
|
|
|
|
1140
|
|
|
|
|
|
|
# FEATURES section |
1141
|
25
|
|
|
|
|
97
|
$self->_print("FEATURES Location/Qualifiers\n"); |
1142
|
|
|
|
|
|
|
|
1143
|
25
|
50
|
|
|
|
180
|
if ( defined $self->_post_sort ) { |
1144
|
|
|
|
|
|
|
# we need to read things into an array. Process. Sort them. Print 'em |
1145
|
0
|
|
|
|
|
0
|
my $post_sort_func = $self->_post_sort; |
1146
|
0
|
|
|
|
|
0
|
my @fth; |
1147
|
|
|
|
|
|
|
|
1148
|
0
|
|
|
|
|
0
|
foreach my $sf ( $seq->top_SeqFeatures ) { |
1149
|
0
|
|
|
|
|
0
|
push @fth, Bio::SeqIO::FTHelper::from_SeqFeature($sf, $seq); |
1150
|
|
|
|
|
|
|
} |
1151
|
|
|
|
|
|
|
|
1152
|
0
|
|
|
|
|
0
|
@fth = sort { &$post_sort_func($a, $b) } @fth; |
|
0
|
|
|
|
|
0
|
|
1153
|
|
|
|
|
|
|
|
1154
|
0
|
|
|
|
|
0
|
foreach my $fth ( @fth ) { |
1155
|
0
|
|
|
|
|
0
|
$self->_print_GenBank_FTHelper($fth); |
1156
|
|
|
|
|
|
|
} |
1157
|
|
|
|
|
|
|
} |
1158
|
|
|
|
|
|
|
else { |
1159
|
|
|
|
|
|
|
# not post sorted. And so we can print as we get them. |
1160
|
|
|
|
|
|
|
# lower memory load... |
1161
|
25
|
|
|
|
|
157
|
foreach my $sf ( $seq->top_SeqFeatures ) { |
1162
|
430
|
|
|
|
|
1113
|
my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf, $seq); |
1163
|
430
|
|
|
|
|
564
|
foreach my $fth ( @fth ) { |
1164
|
430
|
50
|
|
|
|
865
|
if ( ! $fth->isa('Bio::SeqIO::FTHelper') ) { |
1165
|
0
|
|
|
|
|
0
|
$sf->throw("Cannot process FTHelper... $fth"); |
1166
|
|
|
|
|
|
|
} |
1167
|
430
|
|
|
|
|
908
|
$self->_print_GenBank_FTHelper($fth); |
1168
|
|
|
|
|
|
|
} |
1169
|
|
|
|
|
|
|
} |
1170
|
|
|
|
|
|
|
} |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
# deal with WGS; WGS_SCAFLD present only if WGS is also present |
1173
|
25
|
50
|
|
|
|
210
|
if ($seq->annotation->get_Annotations('wgs')) { |
1174
|
0
|
|
|
|
|
0
|
foreach my $wgs (map {$seq->annotation->get_Annotations($_)} |
|
0
|
|
|
|
|
0
|
|
1175
|
|
|
|
|
|
|
qw(wgs wgs_scaffold) |
1176
|
|
|
|
|
|
|
) { |
1177
|
0
|
|
|
|
|
0
|
$self->_print(sprintf ("%-11s %s\n", |
1178
|
|
|
|
|
|
|
uc($wgs->tagname), |
1179
|
|
|
|
|
|
|
$wgs->value)); |
1180
|
|
|
|
|
|
|
} |
1181
|
0
|
|
|
|
|
0
|
$self->_show_dna(0); |
1182
|
|
|
|
|
|
|
} |
1183
|
25
|
100
|
|
|
|
109
|
if ($seq->annotation->get_Annotations('contig')) { |
1184
|
1
|
|
|
|
|
4
|
my $ct = 0; |
1185
|
1
|
|
|
|
|
4
|
my $cline; |
1186
|
1
|
|
|
|
|
6
|
foreach my $contig ($seq->annotation->get_Annotations('contig')) { |
1187
|
1
|
50
|
|
|
|
5
|
unless ($ct) { |
1188
|
1
|
|
|
|
|
3
|
$cline = uc($contig->tagname) . " " . $contig->value . "\n"; |
1189
|
|
|
|
|
|
|
} |
1190
|
|
|
|
|
|
|
else { |
1191
|
0
|
|
|
|
|
0
|
$cline = " " . $contig->value . "\n"; |
1192
|
|
|
|
|
|
|
} |
1193
|
1
|
|
|
|
|
6
|
$self->_print($cline); |
1194
|
1
|
|
|
|
|
3
|
$ct++; |
1195
|
|
|
|
|
|
|
} |
1196
|
|
|
|
|
|
|
} |
1197
|
25
|
100
|
|
|
|
107
|
if ( $seq->length == 0 ) { |
1198
|
1
|
|
|
|
|
3
|
$self->_show_dna(0); |
1199
|
|
|
|
|
|
|
} |
1200
|
|
|
|
|
|
|
|
1201
|
25
|
100
|
|
|
|
106
|
if ( $self->_show_dna == 0 ) { |
1202
|
1
|
|
|
|
|
3
|
$self->_print("\n//\n"); |
1203
|
1
|
|
|
|
|
5
|
return; |
1204
|
|
|
|
|
|
|
} |
1205
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
# finished printing features. |
1207
|
|
|
|
|
|
|
|
1208
|
24
|
|
|
|
|
388
|
$str =~ tr/A-Z/a-z/; |
1209
|
|
|
|
|
|
|
|
1210
|
24
|
|
|
|
|
79
|
my ($o) = $seq->annotation->get_Annotations('origin'); |
1211
|
24
|
50
|
|
|
|
197
|
$self->_print(sprintf("%-12s%s\n", |
1212
|
|
|
|
|
|
|
'ORIGIN', $o ? $o->value : '')); |
1213
|
|
|
|
|
|
|
# print out the sequence |
1214
|
24
|
|
|
|
|
46
|
my $nuc = 60; # Number of nucleotides per line |
1215
|
24
|
|
|
|
|
53
|
my $whole_pat = 'a10' x 6; # Pattern for unpacking a whole line |
1216
|
24
|
|
|
|
|
53
|
my $out_pat = 'A11' x 6; # Pattern for packing a line |
1217
|
24
|
|
|
|
|
44
|
my $length = length $str; |
1218
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
# Calculate the number of nucleotides which fit on whole lines |
1220
|
24
|
|
|
|
|
134
|
my $whole = int($length / $nuc) * $nuc; |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
# Print the whole lines |
1223
|
24
|
|
|
|
|
37
|
my $i; |
1224
|
24
|
|
|
|
|
96
|
for ($i = 0; $i < $whole; $i += $nuc) { |
1225
|
1953
|
|
|
|
|
6604
|
my $blocks = pack $out_pat, |
1226
|
|
|
|
|
|
|
unpack $whole_pat, |
1227
|
|
|
|
|
|
|
substr($str, $i, $nuc); |
1228
|
1953
|
|
|
|
|
3024
|
chop $blocks; |
1229
|
1953
|
|
|
|
|
5501
|
$self->_print(sprintf("%9d $blocks\n", $i + $nuc - 59)); |
1230
|
|
|
|
|
|
|
} |
1231
|
|
|
|
|
|
|
|
1232
|
|
|
|
|
|
|
# Print the last line |
1233
|
24
|
100
|
|
|
|
93
|
if (my $last = substr($str, $i)) { |
1234
|
19
|
|
|
|
|
36
|
my $last_len = length($last); |
1235
|
19
|
|
|
|
|
114
|
my $last_pat = 'a10' x int($last_len / 10) |
1236
|
|
|
|
|
|
|
. 'a' . $last_len % 10; |
1237
|
19
|
|
|
|
|
107
|
my $blocks = pack $out_pat, |
1238
|
|
|
|
|
|
|
unpack($last_pat, $last); |
1239
|
19
|
|
|
|
|
143
|
$blocks =~ s/ +$//; |
1240
|
19
|
|
|
|
|
130
|
$self->_print(sprintf("%9d $blocks\n", |
1241
|
|
|
|
|
|
|
$length - $last_len + 1)); |
1242
|
|
|
|
|
|
|
} |
1243
|
|
|
|
|
|
|
|
1244
|
24
|
|
|
|
|
79
|
$self->_print("//\n"); |
1245
|
|
|
|
|
|
|
|
1246
|
24
|
50
|
33
|
|
|
100
|
$self->flush if $self->_flush_on_write && defined $self->_fh; |
1247
|
24
|
|
|
|
|
198
|
return 1; |
1248
|
|
|
|
|
|
|
} |
1249
|
|
|
|
|
|
|
} |
1250
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
=head2 _print_GenBank_FTHelper |
1252
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
Title : _print_GenBank_FTHelper |
1254
|
|
|
|
|
|
|
Usage : |
1255
|
|
|
|
|
|
|
Function: |
1256
|
|
|
|
|
|
|
Example : |
1257
|
|
|
|
|
|
|
Returns : |
1258
|
|
|
|
|
|
|
Args : |
1259
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
=cut |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
sub _print_GenBank_FTHelper { |
1263
|
430
|
|
|
430
|
|
619
|
my ( $self, $fth ) = @_; |
1264
|
|
|
|
|
|
|
|
1265
|
430
|
50
|
33
|
|
|
1519
|
if ( not ref $fth or not $fth->isa('Bio::SeqIO::FTHelper') ) { |
1266
|
0
|
|
|
|
|
0
|
$fth->warn( |
1267
|
|
|
|
|
|
|
"$fth is not a FTHelper class. Attempting to print but there could be issues" |
1268
|
|
|
|
|
|
|
); |
1269
|
|
|
|
|
|
|
} |
1270
|
|
|
|
|
|
|
|
1271
|
430
|
50
|
|
|
|
753
|
my $spacer = ( length $fth->key >= 15 ) ? ' ' : ''; |
1272
|
430
|
|
|
|
|
775
|
$self->_write_line_GenBank_regex( |
1273
|
|
|
|
|
|
|
sprintf( " %-16s%s", $fth->key, $spacer ), |
1274
|
|
|
|
|
|
|
" " x 21, $fth->loc, "\,\|\$", 80 ); |
1275
|
|
|
|
|
|
|
|
1276
|
430
|
|
|
|
|
656
|
foreach my $tag ( sort keys %{ $fth->field } ) { |
|
430
|
|
|
|
|
875
|
|
1277
|
|
|
|
|
|
|
# Account for hash structure in Annotation::DBLink, not the expected array |
1278
|
997
|
50
|
66
|
|
|
2132
|
if ( $tag eq 'db_xref' and grep /HASH/, @{ $fth->field->{$tag} }) { |
|
140
|
|
|
|
|
259
|
|
1279
|
0
|
|
|
|
|
0
|
for my $ref ( @{ $fth->field->{$tag} } ) { |
|
0
|
|
|
|
|
0
|
|
1280
|
0
|
|
|
|
|
0
|
my $db = $ref->{'database'}; |
1281
|
0
|
|
|
|
|
0
|
my $id = $ref->{'primary_id'}; |
1282
|
0
|
|
|
|
|
0
|
$self->_write_line_GenBank_regex |
1283
|
|
|
|
|
|
|
( " " x 21, " " x 21, |
1284
|
|
|
|
|
|
|
"/$tag=\"$db:$id\"", "\.\|\$", 80 ); |
1285
|
|
|
|
|
|
|
} |
1286
|
|
|
|
|
|
|
} |
1287
|
|
|
|
|
|
|
# The usual case, where all values are found in an array |
1288
|
|
|
|
|
|
|
else { |
1289
|
997
|
|
|
|
|
1072
|
foreach my $value ( @{ $fth->field->{$tag} } ) { |
|
997
|
|
|
|
|
1528
|
|
1290
|
1085
|
|
|
|
|
1497
|
$value =~ s/\"/\"\"/g; |
1291
|
1085
|
50
|
100
|
|
|
2752
|
if ( $value eq "_no_value" ) { |
|
|
100
|
|
|
|
|
|
1292
|
0
|
|
|
|
|
0
|
$self->_write_line_GenBank_regex |
1293
|
|
|
|
|
|
|
( " " x 21, " " x 21, |
1294
|
|
|
|
|
|
|
"/$tag", "\.\|\$", 80 ); |
1295
|
|
|
|
|
|
|
} |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
# There are almost 3x more quoted qualifier values and they |
1298
|
|
|
|
|
|
|
# are more common too so we take quoted ones first. |
1299
|
|
|
|
|
|
|
# Long qualifiers, that will be line wrapped, are always quoted |
1300
|
|
|
|
|
|
|
elsif ( not $FTQUAL_NO_QUOTE{$tag} |
1301
|
|
|
|
|
|
|
or length("/$tag=$value") >= $FTQUAL_LINE_LENGTH |
1302
|
|
|
|
|
|
|
) { |
1303
|
976
|
100
|
|
|
|
2254
|
my ($pat) = ( $value =~ /\s/ ? '\s|$' : '.|$' ); |
1304
|
976
|
|
|
|
|
2764
|
$self->_write_line_GenBank_regex |
1305
|
|
|
|
|
|
|
( " " x 21, " " x 21, |
1306
|
|
|
|
|
|
|
"/$tag=\"$value\"", $pat, 80 ); |
1307
|
|
|
|
|
|
|
} |
1308
|
|
|
|
|
|
|
else { |
1309
|
109
|
|
|
|
|
310
|
$self->_write_line_GenBank_regex |
1310
|
|
|
|
|
|
|
( " " x 21, " " x 21, |
1311
|
|
|
|
|
|
|
"/$tag=$value", "\.\|\$", 80 ); |
1312
|
|
|
|
|
|
|
} |
1313
|
|
|
|
|
|
|
} |
1314
|
|
|
|
|
|
|
} |
1315
|
|
|
|
|
|
|
} |
1316
|
|
|
|
|
|
|
} |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
=head2 _read_GenBank_References |
1319
|
|
|
|
|
|
|
|
1320
|
|
|
|
|
|
|
Title : _read_GenBank_References |
1321
|
|
|
|
|
|
|
Usage : |
1322
|
|
|
|
|
|
|
Function: Reads references from GenBank format. Internal function really |
1323
|
|
|
|
|
|
|
Returns : |
1324
|
|
|
|
|
|
|
Args : |
1325
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
=cut |
1327
|
|
|
|
|
|
|
|
1328
|
|
|
|
|
|
|
sub _read_GenBank_References { |
1329
|
100
|
|
|
100
|
|
271
|
my ($self, $buffer) = @_; |
1330
|
100
|
|
|
|
|
191
|
my (@refs); |
1331
|
|
|
|
|
|
|
my $ref; |
1332
|
|
|
|
|
|
|
|
1333
|
|
|
|
|
|
|
# assume things are starting with RN |
1334
|
100
|
50
|
|
|
|
499
|
if ( $$buffer !~ /^REFERENCE/ ) { |
1335
|
0
|
|
|
|
|
0
|
warn("Not parsing line '$$buffer' which maybe important"); |
1336
|
|
|
|
|
|
|
} |
1337
|
|
|
|
|
|
|
|
1338
|
100
|
|
|
|
|
219
|
my $line = $$buffer; |
1339
|
|
|
|
|
|
|
|
1340
|
100
|
|
|
|
|
252
|
my (@title,@loc,@authors,@consort,@com,@medline,@pubmed); |
1341
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
REFLOOP: |
1343
|
100
|
|
66
|
|
|
702
|
while( defined($line) or defined($line = $self->_readline) ) { |
1344
|
1379
|
100
|
|
|
|
3185
|
if ($line =~ /^\s{2}AUTHORS\s+(.*)/o) { |
1345
|
402
|
|
|
|
|
1053
|
push @authors, $1; |
1346
|
402
|
|
|
|
|
827
|
while ( defined($line = $self->_readline) ) { |
1347
|
1040
|
100
|
|
|
|
2660
|
if ($line =~ /^\s{9,}(.*)/o) { |
1348
|
638
|
|
|
|
|
1260
|
push @authors, $1; |
1349
|
638
|
|
|
|
|
1184
|
next; |
1350
|
|
|
|
|
|
|
} |
1351
|
402
|
|
|
|
|
529
|
last; |
1352
|
|
|
|
|
|
|
} |
1353
|
402
|
|
|
|
|
1740
|
$ref->authors(join(' ', @authors)); |
1354
|
|
|
|
|
|
|
} |
1355
|
|
|
|
|
|
|
|
1356
|
1379
|
100
|
|
|
|
2211
|
if ($line =~ /^\s{2}CONSRTM\s+(.*)/o) { |
1357
|
35
|
|
|
|
|
96
|
push @consort, $1; |
1358
|
35
|
|
|
|
|
84
|
while ( defined($line = $self->_readline) ) { |
1359
|
36
|
100
|
|
|
|
137
|
if ($line =~ /^\s{9,}(.*)/o) { |
1360
|
1
|
|
|
|
|
6
|
push @consort, $1; |
1361
|
1
|
|
|
|
|
5
|
next; |
1362
|
|
|
|
|
|
|
} |
1363
|
35
|
|
|
|
|
145
|
last; |
1364
|
|
|
|
|
|
|
} |
1365
|
35
|
|
|
|
|
188
|
$ref->consortium(join(' ', @consort)); |
1366
|
|
|
|
|
|
|
} |
1367
|
|
|
|
|
|
|
|
1368
|
1379
|
100
|
|
|
|
2688
|
if ($line =~ /^\s{2}TITLE\s+(.*)/o) { |
1369
|
408
|
|
|
|
|
881
|
push @title, $1; |
1370
|
408
|
|
|
|
|
879
|
while ( defined($line = $self->_readline) ) { |
1371
|
693
|
100
|
|
|
|
1758
|
if ($line =~ /^\s{9,}(.*)/o) { |
1372
|
285
|
|
|
|
|
605
|
push @title, $1; |
1373
|
285
|
|
|
|
|
598
|
next; |
1374
|
|
|
|
|
|
|
} |
1375
|
408
|
|
|
|
|
463
|
last; |
1376
|
|
|
|
|
|
|
} |
1377
|
408
|
|
|
|
|
1525
|
$ref->title(join(' ', @title)); |
1378
|
|
|
|
|
|
|
} |
1379
|
|
|
|
|
|
|
|
1380
|
1379
|
100
|
|
|
|
2677
|
if ($line =~ /^\s{2}JOURNAL\s+(.*)/o) { |
1381
|
416
|
|
|
|
|
952
|
push @loc, $1; |
1382
|
416
|
|
|
|
|
836
|
while ( defined($line = $self->_readline) ) { |
1383
|
|
|
|
|
|
|
# we only match when there are at least 4 spaces |
1384
|
|
|
|
|
|
|
# there is probably a better way to match this |
1385
|
|
|
|
|
|
|
# as it assumes that the describing tag is short enough |
1386
|
565
|
100
|
|
|
|
1433
|
if ($line =~ /^\s{9,}(.*)/o) { |
1387
|
149
|
|
|
|
|
348
|
push @loc, $1; |
1388
|
149
|
|
|
|
|
313
|
next; |
1389
|
|
|
|
|
|
|
} |
1390
|
416
|
|
|
|
|
538
|
last; |
1391
|
|
|
|
|
|
|
} |
1392
|
416
|
|
|
|
|
1562
|
$ref->location(join(' ', @loc)); |
1393
|
416
|
|
|
|
|
671
|
redo REFLOOP; |
1394
|
|
|
|
|
|
|
} |
1395
|
|
|
|
|
|
|
|
1396
|
963
|
100
|
|
|
|
1501
|
if ($line =~ /^\s{2}REMARK\s+(.*)/o) { |
1397
|
59
|
|
|
|
|
139
|
push @com, $1; |
1398
|
59
|
|
|
|
|
132
|
while ( defined($line = $self->_readline) ) { |
1399
|
83
|
100
|
|
|
|
214
|
if ($line =~ /^\s{9,}(.*)/o) { |
1400
|
24
|
|
|
|
|
53
|
push @com, $1; |
1401
|
24
|
|
|
|
|
51
|
next; |
1402
|
|
|
|
|
|
|
} |
1403
|
59
|
|
|
|
|
73
|
last; |
1404
|
|
|
|
|
|
|
} |
1405
|
59
|
|
|
|
|
279
|
$ref->comment(join(' ', @com)); |
1406
|
59
|
|
|
|
|
93
|
redo REFLOOP; |
1407
|
|
|
|
|
|
|
} |
1408
|
|
|
|
|
|
|
|
1409
|
904
|
100
|
|
|
|
1625
|
if ( $line =~ /^\s{2}MEDLINE\s+(.*)/ ) { |
1410
|
130
|
|
|
|
|
287
|
push @medline, $1; |
1411
|
130
|
|
|
|
|
284
|
while ( defined($line = $self->_readline) ) { |
1412
|
130
|
50
|
|
|
|
354
|
if ($line =~ /^\s{9,}(.*)/) { |
1413
|
0
|
|
|
|
|
0
|
push @medline, $1; |
1414
|
0
|
|
|
|
|
0
|
next; |
1415
|
|
|
|
|
|
|
} |
1416
|
130
|
|
|
|
|
161
|
last; |
1417
|
|
|
|
|
|
|
} |
1418
|
130
|
|
|
|
|
402
|
$ref->medline(join(' ', @medline)); |
1419
|
130
|
|
|
|
|
171
|
redo REFLOOP; |
1420
|
|
|
|
|
|
|
} |
1421
|
|
|
|
|
|
|
|
1422
|
774
|
100
|
|
|
|
1536
|
if ( $line =~ /^\s{3}PUBMED\s+(.*)/ ) { |
1423
|
237
|
|
|
|
|
505
|
push @pubmed, $1; |
1424
|
237
|
|
|
|
|
476
|
while ( defined($line = $self->_readline) ) { |
1425
|
237
|
50
|
|
|
|
617
|
if ($line =~ /^\s{9,}(.*)/) { |
1426
|
0
|
|
|
|
|
0
|
push @pubmed, $1; |
1427
|
0
|
|
|
|
|
0
|
next; |
1428
|
|
|
|
|
|
|
} |
1429
|
237
|
|
|
|
|
275
|
last; |
1430
|
|
|
|
|
|
|
} |
1431
|
237
|
|
|
|
|
736
|
$ref->pubmed(join(' ', @pubmed)); |
1432
|
237
|
|
|
|
|
310
|
redo REFLOOP; |
1433
|
|
|
|
|
|
|
} |
1434
|
|
|
|
|
|
|
|
1435
|
537
|
100
|
|
|
|
1282
|
if ( $line =~ /^REFERENCE/o ) { |
1436
|
|
|
|
|
|
|
# store current reference |
1437
|
417
|
100
|
|
|
|
1162
|
$self->_add_ref_to_array(\@refs,$ref) if defined $ref; |
1438
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
# reset |
1440
|
417
|
|
|
|
|
701
|
@authors = (); |
1441
|
417
|
|
|
|
|
544
|
@title = (); |
1442
|
417
|
|
|
|
|
463
|
@loc = (); |
1443
|
417
|
|
|
|
|
535
|
@com = (); |
1444
|
417
|
|
|
|
|
493
|
@pubmed = (); |
1445
|
417
|
|
|
|
|
472
|
@medline = (); |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
# create the new reference object |
1448
|
417
|
|
|
|
|
1898
|
$ref = Bio::Annotation::Reference->new(-tagname => 'reference'); |
1449
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
# check whether start and end base is given |
1451
|
417
|
100
|
|
|
|
1877
|
if ($line =~ /^REFERENCE\s+\d+\s+\([a-z]+ (\d+) to (\d+)\)/){ |
|
|
100
|
|
|
|
|
|
1452
|
286
|
|
|
|
|
960
|
$ref->start($1); |
1453
|
286
|
|
|
|
|
946
|
$ref->end($2); |
1454
|
|
|
|
|
|
|
} |
1455
|
|
|
|
|
|
|
elsif ($line =~ /^REFERENCE\s+\d+\s+\((.*)\)/) { |
1456
|
14
|
|
|
|
|
38
|
$ref->gb_reference($1); |
1457
|
|
|
|
|
|
|
} |
1458
|
|
|
|
|
|
|
} |
1459
|
|
|
|
|
|
|
|
1460
|
537
|
100
|
|
|
|
2474
|
last if ($line =~ /^(FEATURES)|(COMMENT)/o); |
1461
|
|
|
|
|
|
|
|
1462
|
437
|
|
|
|
|
1848
|
$line = undef; # Empty $line to trigger read of next line |
1463
|
|
|
|
|
|
|
} |
1464
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
# store last reference |
1466
|
100
|
50
|
|
|
|
559
|
$self->_add_ref_to_array(\@refs, $ref) if defined $ref; |
1467
|
|
|
|
|
|
|
|
1468
|
100
|
|
|
|
|
317
|
$$buffer = $line; |
1469
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
#print "\nnumber of references found: ", $#refs+1,"\n"; |
1471
|
|
|
|
|
|
|
|
1472
|
100
|
|
|
|
|
445
|
return @refs; |
1473
|
|
|
|
|
|
|
} |
1474
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
=head2 _add_ref_to_array |
1476
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
Title: _add_ref_to_array |
1478
|
|
|
|
|
|
|
Usage: |
1479
|
|
|
|
|
|
|
Function: Adds a Reference object to an array of Reference objects, takes |
1480
|
|
|
|
|
|
|
care of possible cleanups to be done (currently, only author and title |
1481
|
|
|
|
|
|
|
will be chopped of trailing semicolons). |
1482
|
|
|
|
|
|
|
Args: A reference to an array of Reference objects and |
1483
|
|
|
|
|
|
|
the Reference object to be added |
1484
|
|
|
|
|
|
|
Returns: nothing |
1485
|
|
|
|
|
|
|
|
1486
|
|
|
|
|
|
|
=cut |
1487
|
|
|
|
|
|
|
|
1488
|
|
|
|
|
|
|
sub _add_ref_to_array { |
1489
|
417
|
|
|
417
|
|
673
|
my ($self, $refs, $ref) = @_; |
1490
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
# first, polish author and title by removing possible trailing semicolons |
1492
|
417
|
|
|
|
|
755
|
my $au = $ref->authors; |
1493
|
417
|
|
|
|
|
834
|
my $title = $ref->title; |
1494
|
417
|
100
|
|
|
|
1032
|
$au =~ s/;\s*$//g if $au; |
1495
|
417
|
100
|
|
|
|
835
|
$title =~ s/;\s*$//g if $title; |
1496
|
417
|
|
|
|
|
883
|
$ref->authors($au); |
1497
|
417
|
|
|
|
|
939
|
$ref->title($title); |
1498
|
|
|
|
|
|
|
# the rest should be clean already, so go ahead and add it |
1499
|
417
|
|
|
|
|
451
|
push @{$refs}, $ref; |
|
417
|
|
|
|
|
809
|
|
1500
|
|
|
|
|
|
|
} |
1501
|
|
|
|
|
|
|
|
1502
|
|
|
|
|
|
|
=head2 _read_GenBank_Species |
1503
|
|
|
|
|
|
|
|
1504
|
|
|
|
|
|
|
Title : _read_GenBank_Species |
1505
|
|
|
|
|
|
|
Usage : |
1506
|
|
|
|
|
|
|
Function: Reads the GenBank Organism species and classification |
1507
|
|
|
|
|
|
|
lines. Able to deal with unconvential Organism naming |
1508
|
|
|
|
|
|
|
formats, and varietas in plants |
1509
|
|
|
|
|
|
|
Example : ORGANISM unknown marine gamma proteobacterium NOR5 |
1510
|
|
|
|
|
|
|
$genus = undef |
1511
|
|
|
|
|
|
|
$species = unknown marine gamma proteobacterium NOR5 |
1512
|
|
|
|
|
|
|
|
1513
|
|
|
|
|
|
|
ORGANISM Drosophila sp. 'white tip scutellum' |
1514
|
|
|
|
|
|
|
$genus = Drosophila |
1515
|
|
|
|
|
|
|
$species = sp. 'white tip scutellum' |
1516
|
|
|
|
|
|
|
(yes, this really is a species and that is its name) |
1517
|
|
|
|
|
|
|
$subspecies = undef |
1518
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
ORGANISM Ajellomyces capsulatus var. farciminosus |
1520
|
|
|
|
|
|
|
$genus = Ajellomyces |
1521
|
|
|
|
|
|
|
$species = capsulatus |
1522
|
|
|
|
|
|
|
$subspecies = var. farciminosus |
1523
|
|
|
|
|
|
|
|
1524
|
|
|
|
|
|
|
ORGANISM Hepatitis delta virus |
1525
|
|
|
|
|
|
|
$genus = undef (though this virus has a genus in its lineage, we |
1526
|
|
|
|
|
|
|
cannot know that without a database lookup) |
1527
|
|
|
|
|
|
|
$species = Hepatitis delta virus |
1528
|
|
|
|
|
|
|
|
1529
|
|
|
|
|
|
|
Returns : A Bio::Species object |
1530
|
|
|
|
|
|
|
Args : A reference to the current line buffer |
1531
|
|
|
|
|
|
|
|
1532
|
|
|
|
|
|
|
=cut |
1533
|
|
|
|
|
|
|
|
1534
|
|
|
|
|
|
|
sub _read_GenBank_Species { |
1535
|
101
|
|
|
101
|
|
264
|
my ($self, $buffer) = @_; |
1536
|
|
|
|
|
|
|
|
1537
|
101
|
|
|
|
|
448
|
my @unkn_names = ('other', 'unknown organism', 'not specified', 'not shown', |
1538
|
|
|
|
|
|
|
'Unspecified', 'Unknown', 'None', 'unclassified', |
1539
|
|
|
|
|
|
|
'unidentified organism', 'not supplied'); |
1540
|
|
|
|
|
|
|
# dictionary of synonyms for taxid 32644 |
1541
|
101
|
|
|
|
|
287
|
my @unkn_genus = ('unknown', 'unclassified', 'uncultured', 'unidentified'); |
1542
|
|
|
|
|
|
|
# all above can be part of valid species name |
1543
|
|
|
|
|
|
|
|
1544
|
101
|
|
|
|
|
215
|
my $line = $$buffer; |
1545
|
|
|
|
|
|
|
|
1546
|
101
|
|
|
|
|
235
|
my( $sub_species, $species, $genus, $sci_name, $common, |
1547
|
|
|
|
|
|
|
$class_lines, $source_flag, $abbr_name, $organelle, $sl ); |
1548
|
101
|
|
|
|
|
236
|
my %source = map { $_ => 1 } qw(SOURCE ORGANISM CLASSIFICATION); |
|
303
|
|
|
|
|
797
|
|
1549
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
# upon first entering the loop, we must not read a new line -- the SOURCE |
1551
|
|
|
|
|
|
|
# line is already in the buffer (HL 05/10/2000) |
1552
|
101
|
|
|
|
|
211
|
my ($ann, $tag, $data); |
1553
|
101
|
|
66
|
|
|
385
|
while (defined($line) or defined($line = $self->_readline)) { |
1554
|
|
|
|
|
|
|
# de-HTMLify (links that may be encountered here don't contain |
1555
|
|
|
|
|
|
|
# escaped '>', so a simple-minded approach suffices) |
1556
|
517
|
|
|
|
|
881
|
$line =~ s{<[^>]+>}{}g; |
1557
|
517
|
100
|
|
|
|
2055
|
if ($line =~ m{^(?:\s{0,2})(\w+)\s+(.+)?$}ox) { |
1558
|
303
|
|
50
|
|
|
1263
|
($tag, $data) = ($1, $2 || ''); |
1559
|
303
|
100
|
66
|
|
|
1224
|
last if ($tag and not exists $source{$tag}); |
1560
|
|
|
|
|
|
|
} |
1561
|
|
|
|
|
|
|
else { |
1562
|
214
|
50
|
|
|
|
510
|
return unless $tag; |
1563
|
214
|
|
|
|
|
816
|
($data = $line) =~ s{^\s+}{}; |
1564
|
214
|
|
|
|
|
392
|
chomp $data; |
1565
|
214
|
100
|
100
|
|
|
1245
|
$tag = 'CLASSIFICATION' if ( $tag ne 'CLASSIFICATION' |
|
|
|
100
|
|
|
|
|
1566
|
|
|
|
|
|
|
and $tag eq 'ORGANISM' |
1567
|
|
|
|
|
|
|
# Don't match "str." or "var." (fix NC_021815), |
1568
|
|
|
|
|
|
|
# and don't match ".1" (fix NC_021902) |
1569
|
|
|
|
|
|
|
and $line =~ m{(?
|
1570
|
|
|
|
|
|
|
} |
1571
|
416
|
100
|
|
|
|
1414
|
(exists $ann->{$tag}) ? ($ann->{$tag} .= ' '.$data) : ($ann->{$tag} .= $data); |
1572
|
416
|
|
|
|
|
2451
|
$line = undef; |
1573
|
|
|
|
|
|
|
} |
1574
|
|
|
|
|
|
|
|
1575
|
101
|
|
|
|
|
388
|
($sl, $class_lines, $sci_name) = ($ann->{SOURCE}, $ann->{CLASSIFICATION}, $ann->{ORGANISM}); |
1576
|
|
|
|
|
|
|
|
1577
|
101
|
|
|
|
|
211
|
$$buffer = $line; |
1578
|
|
|
|
|
|
|
|
1579
|
101
|
50
|
|
|
|
281
|
$sci_name or return; |
1580
|
|
|
|
|
|
|
|
1581
|
|
|
|
|
|
|
# parse out organelle, common name, abbreviated name if present; |
1582
|
|
|
|
|
|
|
# this should catch everything, but falls back to |
1583
|
|
|
|
|
|
|
# entire SOURCE line just in case |
1584
|
101
|
50
|
|
|
|
1126
|
if ($sl =~ m{^(mitochondrion|chloroplast|plastid)? |
1585
|
|
|
|
|
|
|
\s*(.*?) |
1586
|
|
|
|
|
|
|
\s*(?: \( (.*?) \) )?\.? |
1587
|
|
|
|
|
|
|
$ |
1588
|
|
|
|
|
|
|
}xms |
1589
|
|
|
|
|
|
|
) { |
1590
|
101
|
|
|
|
|
407
|
($organelle, $abbr_name, $common) = ($1, $2, $3); # optional |
1591
|
|
|
|
|
|
|
} |
1592
|
|
|
|
|
|
|
else { |
1593
|
0
|
|
|
|
|
0
|
$abbr_name = $sl; # nothing caught; this is a backup! |
1594
|
|
|
|
|
|
|
} |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
# Convert data in classification lines into classification array. |
1597
|
|
|
|
|
|
|
# only split on ';' or '.' so that classification that is 2 or more words will |
1598
|
|
|
|
|
|
|
# still get matched, use map() to remove trailing/leading/intervening spaces |
1599
|
101
|
|
|
|
|
945
|
my @class = map { $_ =~ s/^\s+//; |
|
955
|
|
|
|
|
1816
|
|
1600
|
955
|
|
|
|
|
1398
|
$_ =~ s/\s+$//; |
1601
|
955
|
|
|
|
|
1134
|
$_ =~ s/\s{2,}/ /g; |
1602
|
955
|
|
|
|
|
1404
|
$_; } |
1603
|
|
|
|
|
|
|
split /(?
|
1604
|
|
|
|
|
|
|
|
1605
|
|
|
|
|
|
|
# do we have a genus? |
1606
|
101
|
50
|
|
|
|
575
|
my $possible_genus = quotemeta($class[-1]) |
1607
|
|
|
|
|
|
|
. ($class[-2] ? "|" . quotemeta($class[-2]) : ''); |
1608
|
101
|
100
|
|
|
|
4231
|
if ($sci_name =~ /^($possible_genus)/) { |
1609
|
88
|
|
|
|
|
272
|
$genus = $1; |
1610
|
88
|
|
|
|
|
1131
|
($species) = $sci_name =~ /^$genus\s+(.+)/; |
1611
|
|
|
|
|
|
|
} |
1612
|
|
|
|
|
|
|
else { |
1613
|
13
|
|
|
|
|
43
|
$species = $sci_name; |
1614
|
|
|
|
|
|
|
} |
1615
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
# is this organism of rank species or is it lower? |
1617
|
|
|
|
|
|
|
# (we don't catch everything lower than species, but it doesn't matter - |
1618
|
|
|
|
|
|
|
# this is just so we abide by previous behaviour whilst not calling a |
1619
|
|
|
|
|
|
|
# species a subspecies) |
1620
|
101
|
100
|
66
|
|
|
1425
|
if ($species and $species =~ /(.+)\s+((?:subsp\.|var\.).+)/) { |
1621
|
5
|
|
|
|
|
19
|
($species, $sub_species) = ($1, $2); |
1622
|
|
|
|
|
|
|
} |
1623
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
# Don't make a species object if it's empty or "Unknown" or "None" |
1625
|
|
|
|
|
|
|
# return unless $genus and $genus !~ /^(Unknown|None)$/oi; |
1626
|
|
|
|
|
|
|
# Don't make a species object if it belongs to taxid 32644 |
1627
|
|
|
|
|
|
|
# my $unkn = grep { $_ =~ /^\Q$sl\E$/; } @unkn_names; |
1628
|
101
|
|
|
|
|
253
|
my $unkn = grep { $_ eq $sl } @unkn_names; |
|
1010
|
|
|
|
|
1424
|
|
1629
|
101
|
50
|
33
|
|
|
564
|
return unless (defined $species or defined $genus) and $unkn == 0; |
|
|
|
33
|
|
|
|
|
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
# Bio::Species array needs array in Species -> Kingdom direction |
1632
|
101
|
|
|
|
|
242
|
push @class, $sci_name; |
1633
|
101
|
|
|
|
|
197
|
@class = reverse @class; |
1634
|
|
|
|
|
|
|
|
1635
|
101
|
|
|
|
|
775
|
my $make = Bio::Species->new; |
1636
|
101
|
|
|
|
|
426
|
$make->scientific_name($sci_name); |
1637
|
101
|
50
|
|
|
|
717
|
$make->classification(@class) if @class > 0; |
1638
|
101
|
100
|
|
|
|
469
|
$make->common_name( $common ) if $common; |
1639
|
101
|
50
|
|
|
|
583
|
$make->name('abbreviated', $abbr_name) if $abbr_name; |
1640
|
101
|
100
|
|
|
|
361
|
$make->organelle($organelle) if $organelle; |
1641
|
|
|
|
|
|
|
#$make->sub_species( $sub_species ) if $sub_species; |
1642
|
101
|
|
|
|
|
1311
|
return $make; |
1643
|
|
|
|
|
|
|
} |
1644
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
=head2 _read_FTHelper_GenBank |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
Title : _read_FTHelper_GenBank |
1648
|
|
|
|
|
|
|
Usage : _read_FTHelper_GenBank($buffer) |
1649
|
|
|
|
|
|
|
Function: reads the next FT key line |
1650
|
|
|
|
|
|
|
Example : |
1651
|
|
|
|
|
|
|
Returns : Bio::SeqIO::FTHelper object |
1652
|
|
|
|
|
|
|
Args : filehandle and reference to a scalar |
1653
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
=cut |
1655
|
|
|
|
|
|
|
|
1656
|
|
|
|
|
|
|
sub _read_FTHelper_GenBank { |
1657
|
8280
|
|
|
8280
|
|
12021
|
my ($self, $buffer) = @_; |
1658
|
|
|
|
|
|
|
|
1659
|
8280
|
|
|
|
|
10144
|
my ($key, # The key of the feature |
1660
|
|
|
|
|
|
|
$loc # The location line from the feature |
1661
|
|
|
|
|
|
|
); |
1662
|
8280
|
|
|
|
|
9524
|
my @qual = (); # An array of lines making up the qualifiers |
1663
|
|
|
|
|
|
|
|
1664
|
8280
|
50
|
|
|
|
52960
|
if ($$buffer =~ /^\s{5}(\S+)\s+(.+?)\s*$/o) { |
1665
|
8280
|
|
|
|
|
19631
|
$key = $1; |
1666
|
8280
|
|
|
|
|
12436
|
$loc = $2; |
1667
|
|
|
|
|
|
|
# Read all the lines up to the next feature |
1668
|
8280
|
|
|
|
|
20540
|
while ( defined(my $line = $self->_readline) ) { |
1669
|
88592
|
100
|
|
|
|
396003
|
if ($line =~ /^(\s+)(.+?)\s*$/o) { |
1670
|
|
|
|
|
|
|
# Lines inside features are preceded by 21 spaces |
1671
|
|
|
|
|
|
|
# A new feature is preceded by 5 spaces |
1672
|
88489
|
100
|
|
|
|
145149
|
if (length($1) > 6) { |
1673
|
|
|
|
|
|
|
# Add to qualifiers if we're in the qualifiers, or if it's |
1674
|
|
|
|
|
|
|
# the first qualifier |
1675
|
80312
|
100
|
100
|
|
|
159431
|
if (@qual or (index($2,'/') == 0)) { |
1676
|
68121
|
|
|
|
|
164611
|
push @qual, $2; |
1677
|
|
|
|
|
|
|
} |
1678
|
|
|
|
|
|
|
# We're still in the location line, so append to location |
1679
|
|
|
|
|
|
|
else { |
1680
|
12191
|
|
|
|
|
32051
|
$loc .= $2; |
1681
|
|
|
|
|
|
|
} |
1682
|
|
|
|
|
|
|
} |
1683
|
|
|
|
|
|
|
else { |
1684
|
|
|
|
|
|
|
# We've reached the start of the next feature |
1685
|
|
|
|
|
|
|
# Put the first line of the next feature into the buffer |
1686
|
8177
|
|
|
|
|
10835
|
$$buffer = $line; |
1687
|
8177
|
|
|
|
|
12839
|
last; |
1688
|
|
|
|
|
|
|
} |
1689
|
|
|
|
|
|
|
} |
1690
|
|
|
|
|
|
|
else { |
1691
|
|
|
|
|
|
|
# We're at the end of the feature table |
1692
|
|
|
|
|
|
|
# Put the first line of the next feature into the buffer |
1693
|
103
|
|
|
|
|
189
|
$$buffer = $line; |
1694
|
103
|
|
|
|
|
181
|
last; |
1695
|
|
|
|
|
|
|
} |
1696
|
|
|
|
|
|
|
} |
1697
|
|
|
|
|
|
|
} |
1698
|
|
|
|
|
|
|
else { |
1699
|
|
|
|
|
|
|
# No feature key |
1700
|
0
|
|
|
|
|
0
|
$self->debug("no feature key!\n"); |
1701
|
|
|
|
|
|
|
# change suggested by JDiggans to avoid infinite loop- |
1702
|
|
|
|
|
|
|
# see bugreport 1062. |
1703
|
|
|
|
|
|
|
# reset buffer to prevent infinite loop |
1704
|
0
|
|
|
|
|
0
|
$$buffer = $self->_readline; |
1705
|
0
|
|
|
|
|
0
|
return; |
1706
|
|
|
|
|
|
|
} |
1707
|
|
|
|
|
|
|
|
1708
|
|
|
|
|
|
|
# Make the new FTHelper object |
1709
|
8280
|
|
|
|
|
21210
|
my $out = Bio::SeqIO::FTHelper->new; |
1710
|
8280
|
|
|
|
|
17808
|
$out->verbose($self->verbose); |
1711
|
8280
|
|
|
|
|
17124
|
$out->key($key); |
1712
|
8280
|
|
|
|
|
16320
|
$out->loc($loc); |
1713
|
|
|
|
|
|
|
|
1714
|
|
|
|
|
|
|
# Now parse and add any qualifiers. (@qual is kept |
1715
|
|
|
|
|
|
|
# intact to provide informative error messages.) |
1716
|
|
|
|
|
|
|
QUAL: |
1717
|
8280
|
|
|
|
|
16130
|
for (my $i = 0; $i < @qual; $i++) { |
1718
|
55807
|
|
|
|
|
67013
|
my $data = $qual[$i]; |
1719
|
55807
|
50
|
|
|
|
210304
|
my ( $qualifier, $value ) = ($data =~ m{^/([^=]+)(?:=\s*(.+))?}) |
1720
|
|
|
|
|
|
|
or $self->warn( "cannot see new qualifier in feature $key: " |
1721
|
|
|
|
|
|
|
. $data); |
1722
|
55807
|
50
|
|
|
|
87055
|
$qualifier = '' if not defined $qualifier; |
1723
|
|
|
|
|
|
|
|
1724
|
55807
|
100
|
|
|
|
64904
|
if (defined $value) { |
1725
|
|
|
|
|
|
|
# Do we have a quoted value? |
1726
|
54903
|
100
|
|
|
|
81539
|
if (substr($value, 0, 1) eq '"') { |
|
|
100
|
|
|
|
|
|
1727
|
|
|
|
|
|
|
# Keep adding to value until we find the trailing quote |
1728
|
|
|
|
|
|
|
# and the quotes are balanced |
1729
|
52083
|
|
66
|
|
|
166172
|
while ($value !~ /\"$/ or $value =~ tr/"/"/ % 2) { |
1730
|
12312
|
50
|
|
|
|
18491
|
if ($i >= $#qual) { |
1731
|
0
|
|
|
|
|
0
|
$self->warn( "Unbalanced quote in:\n" |
1732
|
|
|
|
|
|
|
. join("\n", @qual) |
1733
|
|
|
|
|
|
|
. "No further qualifiers will " |
1734
|
|
|
|
|
|
|
. "be added for this feature"); |
1735
|
0
|
|
|
|
|
0
|
last QUAL; |
1736
|
|
|
|
|
|
|
} |
1737
|
|
|
|
|
|
|
# modifying a for-loop variable inside of the loop |
1738
|
|
|
|
|
|
|
# is not the best programming style ... |
1739
|
12312
|
|
|
|
|
11478
|
$i++; |
1740
|
12312
|
|
|
|
|
13454
|
my $next = $qual[$i]; |
1741
|
|
|
|
|
|
|
|
1742
|
|
|
|
|
|
|
# add to value with a space unless the value appears |
1743
|
|
|
|
|
|
|
# to be a sequence (translation for example) |
1744
|
|
|
|
|
|
|
# if (($value.$next) =~ /[^A-Za-z\"\-]/o) { |
1745
|
|
|
|
|
|
|
# changed to explicitly look for translation tag - cjf 06/8/29 |
1746
|
12312
|
100
|
|
|
|
18027
|
if ($qualifier !~ /^translation$/i ) { |
1747
|
9870
|
|
|
|
|
14569
|
$value .= " "; |
1748
|
|
|
|
|
|
|
} |
1749
|
12312
|
|
|
|
|
41785
|
$value .= $next; |
1750
|
|
|
|
|
|
|
} |
1751
|
|
|
|
|
|
|
# Trim leading and trailing quotes |
1752
|
52083
|
|
|
|
|
257882
|
$value =~ s/^"|"$//g; |
1753
|
|
|
|
|
|
|
# Undouble internal quotes |
1754
|
52083
|
|
|
|
|
76821
|
$value =~ s/""/\"/g; |
1755
|
|
|
|
|
|
|
} |
1756
|
|
|
|
|
|
|
elsif ( $value =~ /^\(/ ) { # values quoted by ()s |
1757
|
|
|
|
|
|
|
# Keep adding to value until we find the trailing bracket |
1758
|
|
|
|
|
|
|
# and the ()s are balanced |
1759
|
27
|
|
|
|
|
42
|
my $left = ($value =~ tr/\(/\(/); # count left parens |
1760
|
27
|
|
|
|
|
32
|
my $right = ($value =~ tr/\)/\)/); # count right parens |
1761
|
|
|
|
|
|
|
|
1762
|
27
|
|
|
|
|
83
|
while( $left != $right ) { # was "$value !~ /\)$/ or $left != $right" |
1763
|
2
|
50
|
|
|
|
8
|
if ( $i >= $#qual) { |
1764
|
0
|
|
|
|
|
0
|
$self->warn( "Unbalanced parens in:\n" |
1765
|
|
|
|
|
|
|
. join("\n", @qual) |
1766
|
|
|
|
|
|
|
. "\nNo further qualifiers will " |
1767
|
|
|
|
|
|
|
. "be added for this feature"); |
1768
|
0
|
|
|
|
|
0
|
last QUAL; |
1769
|
|
|
|
|
|
|
} |
1770
|
2
|
|
|
|
|
5
|
$i++; |
1771
|
2
|
|
|
|
|
5
|
my $next = $qual[$i]; |
1772
|
2
|
|
|
|
|
5
|
$value .= $next; |
1773
|
2
|
|
|
|
|
4
|
$left += ($next =~ tr/\(/\(/); |
1774
|
2
|
|
|
|
|
5
|
$right += ($next =~ tr/\)/\)/); |
1775
|
|
|
|
|
|
|
} |
1776
|
|
|
|
|
|
|
} |
1777
|
|
|
|
|
|
|
} |
1778
|
|
|
|
|
|
|
else { |
1779
|
904
|
|
|
|
|
1159
|
$value = '_no_value'; |
1780
|
|
|
|
|
|
|
} |
1781
|
|
|
|
|
|
|
# Store the qualifier |
1782
|
55807
|
|
100
|
|
|
86996
|
$out->field->{$qualifier} ||= []; |
1783
|
55807
|
|
|
|
|
53394
|
push @{$out->field->{$qualifier}}, $value; |
|
55807
|
|
|
|
|
69021
|
|
1784
|
|
|
|
|
|
|
} |
1785
|
8280
|
|
|
|
|
18704
|
return $out; |
1786
|
|
|
|
|
|
|
} |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
=head2 _write_line_GenBank |
1789
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
Title : _write_line_GenBank |
1791
|
|
|
|
|
|
|
Usage : |
1792
|
|
|
|
|
|
|
Function: internal function |
1793
|
|
|
|
|
|
|
Example : |
1794
|
|
|
|
|
|
|
Returns : |
1795
|
|
|
|
|
|
|
Args : |
1796
|
|
|
|
|
|
|
|
1797
|
|
|
|
|
|
|
=cut |
1798
|
|
|
|
|
|
|
|
1799
|
|
|
|
|
|
|
sub _write_line_GenBank { |
1800
|
0
|
|
|
0
|
|
0
|
my ($self, $pre1, $pre2, $line, $length) = @_; |
1801
|
|
|
|
|
|
|
|
1802
|
0
|
0
|
|
|
|
0
|
$length or $self->throw("Miscalled write_line_GenBank without length. Programming error!"); |
1803
|
0
|
|
|
|
|
0
|
my $subl = $length - length $pre2; |
1804
|
0
|
|
|
|
|
0
|
my $linel = length $line; |
1805
|
0
|
|
|
|
|
0
|
my $i; |
1806
|
|
|
|
|
|
|
|
1807
|
0
|
|
|
|
|
0
|
my $subr = substr($line,0,$length - length $pre1); |
1808
|
|
|
|
|
|
|
|
1809
|
0
|
|
|
|
|
0
|
$self->_print("$pre1$subr\n"); |
1810
|
0
|
|
|
|
|
0
|
for($i = ($length - length $pre1); $i < $linel; $i += $subl) { |
1811
|
0
|
|
|
|
|
0
|
$subr = substr($line, $i, $subl); |
1812
|
0
|
|
|
|
|
0
|
$self->_print("$pre2$subr\n"); |
1813
|
|
|
|
|
|
|
} |
1814
|
|
|
|
|
|
|
} |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
=head2 _write_line_GenBank_regex |
1817
|
|
|
|
|
|
|
|
1818
|
|
|
|
|
|
|
Title : _write_line_GenBank_regex |
1819
|
|
|
|
|
|
|
Usage : |
1820
|
|
|
|
|
|
|
Function: internal function for writing lines of specified |
1821
|
|
|
|
|
|
|
length, with different first and the next line |
1822
|
|
|
|
|
|
|
left hand headers and split at specific points in the |
1823
|
|
|
|
|
|
|
text |
1824
|
|
|
|
|
|
|
Example : |
1825
|
|
|
|
|
|
|
Returns : nothing |
1826
|
|
|
|
|
|
|
Args : file handle, |
1827
|
|
|
|
|
|
|
first header, |
1828
|
|
|
|
|
|
|
second header, |
1829
|
|
|
|
|
|
|
text-line, |
1830
|
|
|
|
|
|
|
regex for line breaks, |
1831
|
|
|
|
|
|
|
total line length |
1832
|
|
|
|
|
|
|
|
1833
|
|
|
|
|
|
|
=cut |
1834
|
|
|
|
|
|
|
|
1835
|
|
|
|
|
|
|
sub _write_line_GenBank_regex { |
1836
|
1765
|
|
|
1765
|
|
3018
|
my ($self, $pre1, $pre2, $line, $regex, $length) = @_; |
1837
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
#print STDOUT "Going to print with $line!\n"; |
1839
|
|
|
|
|
|
|
|
1840
|
1765
|
50
|
|
|
|
2734
|
$length or $self->throw("Miscalled write_line_GenBank without length. Programming error!"); |
1841
|
|
|
|
|
|
|
|
1842
|
1765
|
|
|
|
|
2147
|
my $subl = $length - (length $pre1) - 2; |
1843
|
1765
|
|
|
|
|
1929
|
my @lines = (); |
1844
|
|
|
|
|
|
|
|
1845
|
|
|
|
|
|
|
CHUNK: |
1846
|
1765
|
|
|
|
|
2402
|
while ($line) { |
1847
|
2129
|
|
|
|
|
3230
|
foreach my $pat ($regex, '[,;\.\/-]\s|'.$regex, '[,;\.\/-]|'.$regex) { |
1848
|
2129
|
50
|
|
|
|
23520
|
if ($line =~ m/^(.{0,$subl})($pat)(.*)/ ) { |
1849
|
2129
|
|
|
|
|
5665
|
my $l = $1 . $2; |
1850
|
2129
|
|
|
|
|
3154
|
$line = substr($line, length $l); |
1851
|
|
|
|
|
|
|
# be strict about not padding spaces according to |
1852
|
|
|
|
|
|
|
# genbank format |
1853
|
2129
|
|
|
|
|
4141
|
$l =~ s/\s+$//; |
1854
|
2129
|
50
|
|
|
|
3214
|
next CHUNK if ($l eq ''); |
1855
|
2129
|
|
|
|
|
2571
|
push @lines, $l; |
1856
|
2129
|
|
|
|
|
4942
|
next CHUNK; |
1857
|
|
|
|
|
|
|
} |
1858
|
|
|
|
|
|
|
} |
1859
|
|
|
|
|
|
|
# if we get here none of the patterns matched $subl or less chars |
1860
|
0
|
|
|
|
|
0
|
$self->warn( "trouble dissecting \"$line\"\n into chunks " |
1861
|
|
|
|
|
|
|
. "of $subl chars or less - this tag won't print right"); |
1862
|
|
|
|
|
|
|
# insert a space char to prevent infinite loops |
1863
|
0
|
|
|
|
|
0
|
$line = substr($line, 0, $subl) . " " . substr($line, $subl); |
1864
|
|
|
|
|
|
|
} |
1865
|
1765
|
|
|
|
|
2184
|
my $s = shift @lines; |
1866
|
1765
|
100
|
|
|
|
6341
|
$self->_print("$pre1$s\n") if $s; |
1867
|
1765
|
|
|
|
|
4788
|
foreach my $s ( @lines ) { |
1868
|
369
|
|
|
|
|
727
|
$self->_print("$pre2$s\n"); |
1869
|
|
|
|
|
|
|
} |
1870
|
|
|
|
|
|
|
} |
1871
|
|
|
|
|
|
|
|
1872
|
|
|
|
|
|
|
=head2 _post_sort |
1873
|
|
|
|
|
|
|
|
1874
|
|
|
|
|
|
|
Title : _post_sort |
1875
|
|
|
|
|
|
|
Usage : $obj->_post_sort($newval) |
1876
|
|
|
|
|
|
|
Function: |
1877
|
|
|
|
|
|
|
Returns : value of _post_sort |
1878
|
|
|
|
|
|
|
Args : newvalue (optional) |
1879
|
|
|
|
|
|
|
|
1880
|
|
|
|
|
|
|
=cut |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
sub _post_sort { |
1883
|
25
|
|
|
25
|
|
57
|
my ($obj,$value) = @_; |
1884
|
25
|
50
|
|
|
|
110
|
if ( defined $value) { |
1885
|
0
|
|
|
|
|
0
|
$obj->{'_post_sort'} = $value; |
1886
|
|
|
|
|
|
|
} |
1887
|
25
|
|
|
|
|
94
|
return $obj->{'_post_sort'}; |
1888
|
|
|
|
|
|
|
} |
1889
|
|
|
|
|
|
|
|
1890
|
|
|
|
|
|
|
=head2 _show_dna |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
Title : _show_dna |
1893
|
|
|
|
|
|
|
Usage : $obj->_show_dna($newval) |
1894
|
|
|
|
|
|
|
Function: |
1895
|
|
|
|
|
|
|
Returns : value of _show_dna |
1896
|
|
|
|
|
|
|
Args : newvalue (optional) |
1897
|
|
|
|
|
|
|
|
1898
|
|
|
|
|
|
|
=cut |
1899
|
|
|
|
|
|
|
|
1900
|
|
|
|
|
|
|
sub _show_dna { |
1901
|
141
|
|
|
141
|
|
312
|
my ($obj,$value) = @_; |
1902
|
141
|
100
|
|
|
|
362
|
if ( defined $value) { |
1903
|
116
|
|
|
|
|
293
|
$obj->{'_show_dna'} = $value; |
1904
|
|
|
|
|
|
|
} |
1905
|
141
|
|
|
|
|
278
|
return $obj->{'_show_dna'}; |
1906
|
|
|
|
|
|
|
} |
1907
|
|
|
|
|
|
|
|
1908
|
|
|
|
|
|
|
=head2 _id_generation_func |
1909
|
|
|
|
|
|
|
|
1910
|
|
|
|
|
|
|
Title : _id_generation_func |
1911
|
|
|
|
|
|
|
Usage : $obj->_id_generation_func($newval) |
1912
|
|
|
|
|
|
|
Function: |
1913
|
|
|
|
|
|
|
Returns : value of _id_generation_func |
1914
|
|
|
|
|
|
|
Args : newvalue (optional) |
1915
|
|
|
|
|
|
|
|
1916
|
|
|
|
|
|
|
=cut |
1917
|
|
|
|
|
|
|
|
1918
|
|
|
|
|
|
|
sub _id_generation_func { |
1919
|
25
|
|
|
25
|
|
58
|
my ($obj,$value) = @_; |
1920
|
25
|
50
|
|
|
|
78
|
if ( defined $value ) { |
1921
|
0
|
|
|
|
|
0
|
$obj->{'_id_generation_func'} = $value; |
1922
|
|
|
|
|
|
|
} |
1923
|
25
|
|
|
|
|
109
|
return $obj->{'_id_generation_func'}; |
1924
|
|
|
|
|
|
|
} |
1925
|
|
|
|
|
|
|
|
1926
|
|
|
|
|
|
|
=head2 _ac_generation_func |
1927
|
|
|
|
|
|
|
|
1928
|
|
|
|
|
|
|
Title : _ac_generation_func |
1929
|
|
|
|
|
|
|
Usage : $obj->_ac_generation_func($newval) |
1930
|
|
|
|
|
|
|
Function: |
1931
|
|
|
|
|
|
|
Returns : value of _ac_generation_func |
1932
|
|
|
|
|
|
|
Args : newvalue (optional) |
1933
|
|
|
|
|
|
|
|
1934
|
|
|
|
|
|
|
=cut |
1935
|
|
|
|
|
|
|
|
1936
|
|
|
|
|
|
|
sub _ac_generation_func { |
1937
|
25
|
|
|
25
|
|
70
|
my ($obj,$value) = @_; |
1938
|
25
|
50
|
|
|
|
83
|
if ( defined $value ) { |
1939
|
0
|
|
|
|
|
0
|
$obj->{'_ac_generation_func'} = $value; |
1940
|
|
|
|
|
|
|
} |
1941
|
25
|
|
|
|
|
81
|
return $obj->{'_ac_generation_func'}; |
1942
|
|
|
|
|
|
|
} |
1943
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
=head2 _sv_generation_func |
1945
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
Title : _sv_generation_func |
1947
|
|
|
|
|
|
|
Usage : $obj->_sv_generation_func($newval) |
1948
|
|
|
|
|
|
|
Function: |
1949
|
|
|
|
|
|
|
Returns : value of _sv_generation_func |
1950
|
|
|
|
|
|
|
Args : newvalue (optional) |
1951
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
=cut |
1953
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
sub _sv_generation_func { |
1955
|
25
|
|
|
25
|
|
66
|
my ($obj,$value) = @_; |
1956
|
25
|
50
|
|
|
|
82
|
if ( defined $value ) { |
1957
|
0
|
|
|
|
|
0
|
$obj->{'_sv_generation_func'} = $value; |
1958
|
|
|
|
|
|
|
} |
1959
|
25
|
|
|
|
|
216
|
return $obj->{'_sv_generation_func'}; |
1960
|
|
|
|
|
|
|
} |
1961
|
|
|
|
|
|
|
|
1962
|
|
|
|
|
|
|
=head2 _kw_generation_func |
1963
|
|
|
|
|
|
|
|
1964
|
|
|
|
|
|
|
Title : _kw_generation_func |
1965
|
|
|
|
|
|
|
Usage : $obj->_kw_generation_func($newval) |
1966
|
|
|
|
|
|
|
Function: |
1967
|
|
|
|
|
|
|
Returns : value of _kw_generation_func |
1968
|
|
|
|
|
|
|
Args : newvalue (optional) |
1969
|
|
|
|
|
|
|
|
1970
|
|
|
|
|
|
|
=cut |
1971
|
|
|
|
|
|
|
|
1972
|
|
|
|
|
|
|
sub _kw_generation_func { |
1973
|
25
|
|
|
25
|
|
59
|
my ($obj,$value) = @_; |
1974
|
25
|
50
|
|
|
|
72
|
if ( defined $value ) { |
1975
|
0
|
|
|
|
|
0
|
$obj->{'_kw_generation_func'} = $value; |
1976
|
|
|
|
|
|
|
} |
1977
|
25
|
|
|
|
|
250
|
return $obj->{'_kw_generation_func'}; |
1978
|
|
|
|
|
|
|
} |
1979
|
|
|
|
|
|
|
|
1980
|
|
|
|
|
|
|
1; |