| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# BioPerl module: Bio::SeqIO::agave |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# AGAVE: Architecture for Genomic Annotation, Visualization and Exchange. |
|
4
|
|
|
|
|
|
|
# |
|
5
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
6
|
|
|
|
|
|
|
# |
|
7
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
8
|
|
|
|
|
|
|
# |
|
9
|
|
|
|
|
|
|
# The original version of the module can be found here: |
|
10
|
|
|
|
|
|
|
# http://www.agavexml.org/ |
|
11
|
|
|
|
|
|
|
# |
|
12
|
|
|
|
|
|
|
# ### TODO: live link for this anymore? |
|
13
|
|
|
|
|
|
|
# The DTD for AGAVE XML was once located here (dead link): |
|
14
|
|
|
|
|
|
|
# http://www.lifecde.com/products/agave/schema/v2_3/agave.dtd |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
# |
|
17
|
|
|
|
|
|
|
=head1 NAME |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Bio::SeqIO::agave - AGAVE sequence output stream. |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
It is probably best not to use this object directly, but |
|
24
|
|
|
|
|
|
|
rather go through the SeqIO handler system. Go: |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
$in = Bio::SeqIO->new('-file' => "$file_in", |
|
27
|
|
|
|
|
|
|
'-format' => 'EMBL'); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$out = Bio::SeqIO->new('-file' => ">$file_out", |
|
30
|
|
|
|
|
|
|
'-format' => 'AGAVE'); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
while (my $seq = $in->next_seq){ |
|
33
|
|
|
|
|
|
|
$out->write_seq($seq); |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
This object can transform Bio::Seq objects to agave xml file and |
|
39
|
|
|
|
|
|
|
vice-versa. I (Simon) coded up this module because I needed a parser |
|
40
|
|
|
|
|
|
|
to extract data from AGAVE xml to be utitlized by the GenQuire genome |
|
41
|
|
|
|
|
|
|
annotation system (See http://www.bioinformatics.org/Genquire). |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
***NOTE*** At the moment, not all of the tags are implemented. In |
|
44
|
|
|
|
|
|
|
general, I followed the output format for the XEMBL project |
|
45
|
|
|
|
|
|
|
http://www.ebi.ac.uk/xembl/ |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
=cut |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
54
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
|
55
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
58
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
59
|
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=head2 Support |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
I |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
67
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
68
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
69
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
|
74
|
|
|
|
|
|
|
the bugs and their resolution. |
|
75
|
|
|
|
|
|
|
Bug reports can be submitted via the web: |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=head1 AUTHOR - Simon K. Chan |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Email: |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head1 APPENDIX |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
86
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=cut |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
# =================== |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
# Let the code begin... |
|
94
|
|
|
|
|
|
|
package Bio::SeqIO::agave; |
|
95
|
1
|
|
|
1
|
|
646
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
30
|
|
|
96
|
|
|
|
|
|
|
|
|
97
|
1
|
|
|
1
|
|
245
|
use IO::File; |
|
|
1
|
|
|
|
|
678
|
|
|
|
1
|
|
|
|
|
89
|
|
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
|
|
100
|
1
|
|
|
1
|
|
328
|
use Bio::SeqFeature::Generic; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
26
|
|
|
101
|
1
|
|
|
1
|
|
6
|
use Bio::Seq; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
15
|
|
|
102
|
1
|
|
|
1
|
|
4
|
use Bio::PrimarySeq; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
16
|
|
|
103
|
1
|
|
|
1
|
|
4
|
use Bio::Seq::SeqFactory; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
15
|
|
|
104
|
1
|
|
|
1
|
|
224
|
use Bio::Annotation::Reference; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
26
|
|
|
105
|
1
|
|
|
1
|
|
239
|
use Bio::Species; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
25
|
|
|
106
|
|
|
|
|
|
|
|
|
107
|
1
|
|
|
1
|
|
6
|
use XML::Writer; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
18
|
|
|
108
|
|
|
|
|
|
|
|
|
109
|
1
|
|
|
1
|
|
4
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
45
|
|
|
110
|
|
|
|
|
|
|
|
|
111
|
1
|
|
|
1
|
|
5
|
use base qw(Bio::SeqIO); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
297
|
|
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
# ================================================================================== |
|
114
|
|
|
|
|
|
|
sub _initialize { |
|
115
|
|
|
|
|
|
|
|
|
116
|
0
|
|
|
0
|
|
|
my ($self,@args) = @_; |
|
117
|
0
|
|
|
|
|
|
$self->SUPER::_initialize(@args); # Run the constructor of the parent class. |
|
118
|
|
|
|
|
|
|
|
|
119
|
0
|
|
|
|
|
|
my %tmp = @args ; |
|
120
|
0
|
|
|
|
|
|
$self->{'file'} = $tmp{'-file'}; |
|
121
|
|
|
|
|
|
|
|
|
122
|
0
|
0
|
|
|
|
|
if ($self->{'file'} !~ /^>/) { |
|
123
|
0
|
|
|
|
|
|
$self->_process; |
|
124
|
|
|
|
|
|
|
# Parse the thing, but only if it is the input file (ie not |
|
125
|
|
|
|
|
|
|
# outputing agave file, but reading it). |
|
126
|
0
|
|
|
|
|
|
$self->{'parsed'} = 1; |
|
127
|
|
|
|
|
|
|
# Set the flag to let the code know that the agave xml file |
|
128
|
|
|
|
|
|
|
# has been parsed. |
|
129
|
|
|
|
|
|
|
} |
|
130
|
0
|
|
|
|
|
|
$self->{'seqs_stored'} = 0; |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
# ================================================================================== |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 _process |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Title : _process |
|
138
|
|
|
|
|
|
|
Usage : $self->_process |
|
139
|
|
|
|
|
|
|
Function : Parses the agave xml file. |
|
140
|
|
|
|
|
|
|
Args : None. |
|
141
|
|
|
|
|
|
|
Returns : Nothing. |
|
142
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _initialize |
|
143
|
|
|
|
|
|
|
Method(s) that this method calls : _process_sciobj |
|
144
|
|
|
|
|
|
|
FIRST/START sub. |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=cut |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub _process { |
|
149
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
|
150
|
|
|
|
|
|
|
|
|
151
|
0
|
|
|
|
|
|
while (1) { |
|
152
|
|
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
my $line = $self->_readline; |
|
154
|
0
|
0
|
|
|
|
|
next unless $line; |
|
155
|
0
|
0
|
|
|
|
|
next if $line =~ /^\s*$/; |
|
156
|
|
|
|
|
|
|
|
|
157
|
0
|
0
|
|
|
|
|
if ($line =~ /<\?xml version/o) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# do nothing |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
} elsif ($line =~ /\/) { |
|
162
|
|
|
|
|
|
|
|
|
163
|
0
|
0
|
0
|
|
|
|
$self->throw("Error: This xml file is not in AGAVE format! DOCTYPE: $1 , SYSTEM: $2\n\n") |
|
164
|
|
|
|
|
|
|
if $1 ne 'sciobj' || $2 ne 'sciobj.dtd'; |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
} elsif ($line =~ //) { |
|
167
|
|
|
|
|
|
|
|
|
168
|
0
|
|
|
|
|
|
push @{$self->{'sciobj'}}, $self->_process_sciobj($1); |
|
|
0
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
} elsif ($line =~ /<\/sciobj>/) { |
|
171
|
|
|
|
|
|
|
|
|
172
|
0
|
|
|
|
|
|
last; # It is finished. |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
} else { |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# throw an error message. The above conditions should |
|
177
|
|
|
|
|
|
|
# take care all of the possible options...? |
|
178
|
|
|
|
|
|
|
# $self->throw("Error: Do not recognize this AGAVE xml |
|
179
|
|
|
|
|
|
|
# line: $line\n\n"); |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
} # close while loop |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
|
|
187
|
0
|
|
|
|
|
|
return; |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
# ================================================================================== |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=head2 _process_sciobj |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Title : _process_sciobj |
|
195
|
|
|
|
|
|
|
Usage : $self->_process_sciobj |
|
196
|
|
|
|
|
|
|
Function : Parses the data between the tags. |
|
197
|
|
|
|
|
|
|
Args : The string that holds the attributes for . |
|
198
|
|
|
|
|
|
|
Returns : Data structure holding the values parsed between |
|
199
|
|
|
|
|
|
|
the tags. |
|
200
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process |
|
201
|
|
|
|
|
|
|
Method(s) that this method calls : |
|
202
|
|
|
|
|
|
|
_helper_store_attribute_list , _process_contig |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=cut |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
sub _process_sciobj { |
|
207
|
|
|
|
|
|
|
|
|
208
|
0
|
|
|
0
|
|
|
my ($self, $attribute_line) = @_; |
|
209
|
0
|
|
|
|
|
|
my $sciobj; |
|
210
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($attribute_line, \$sciobj); |
|
211
|
|
|
|
|
|
|
|
|
212
|
0
|
|
|
|
|
|
my $line = $self->_readline; |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# Zero or more |
|
215
|
0
|
|
|
|
|
|
while ($line =~ //) { |
|
216
|
0
|
|
|
|
|
|
my $contig = $self->_process_contig(\$line, $1); |
|
217
|
0
|
|
|
|
|
|
push @{$sciobj->{'contig'}}, $contig; |
|
|
0
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
# print "line in _process_sciobj: $line\n"; |
|
219
|
|
|
|
|
|
|
# $line changes value within the subs called in this sub (_process_contig). |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
0
|
|
|
|
|
|
return $sciobj; |
|
223
|
|
|
|
|
|
|
} |
|
224
|
|
|
|
|
|
|
# ================================================================================== |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head2 _process_contig |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Title : _process_contig |
|
229
|
|
|
|
|
|
|
Usage : $self->_process_contig |
|
230
|
|
|
|
|
|
|
Function : Parses the data between the tags. |
|
231
|
|
|
|
|
|
|
Args : 2 scalars: |
|
232
|
|
|
|
|
|
|
- reference to a scalar holding the line to be parsed. |
|
233
|
|
|
|
|
|
|
- scalar holding the attributes for the tag |
|
234
|
|
|
|
|
|
|
to be parsed. |
|
235
|
|
|
|
|
|
|
Returns : Data structure holding the values parsed between |
|
236
|
|
|
|
|
|
|
the tags. |
|
237
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process_sciobj |
|
238
|
|
|
|
|
|
|
Method(s) that this method calls : |
|
239
|
|
|
|
|
|
|
_helper_store_attribute_list, _one_tag , _process_fragment_order |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
=cut |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub _process_contig { |
|
244
|
|
|
|
|
|
|
|
|
245
|
0
|
|
|
0
|
|
|
my ($self, $line, $attribute_line) = @_; |
|
246
|
|
|
|
|
|
|
|
|
247
|
0
|
|
|
|
|
|
my $contig; |
|
248
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($attribute_line, \$contig); |
|
249
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# One : |
|
252
|
0
|
|
|
|
|
|
$self->_one_tag($line, \$contig, 'db_id'); |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# Zero or more |
|
256
|
0
|
|
|
|
|
|
$self->_process_fragment_order($line, \$contig); |
|
257
|
|
|
|
|
|
|
|
|
258
|
0
|
|
|
|
|
|
return $contig; |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
# ================================================================================== |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
=head2 _process_fragment_order |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
Title : _process_fragment_order |
|
266
|
|
|
|
|
|
|
Usage : $self->_process_fragment_order |
|
267
|
|
|
|
|
|
|
Function : Parses the data between the tags. |
|
268
|
|
|
|
|
|
|
Args : 2 scalars: |
|
269
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
270
|
|
|
|
|
|
|
- reference to a data structure to store the data. |
|
271
|
|
|
|
|
|
|
Returns : Nothing. |
|
272
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process_contig |
|
273
|
|
|
|
|
|
|
Method(s) that this method calls : |
|
274
|
|
|
|
|
|
|
_helper_store_attribute_list , _process_fragment_orientation |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=cut |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub _process_fragment_order { |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
|
|
281
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
282
|
|
|
|
|
|
|
# Because I'm passing a reference to a data structure, I don't need to return it |
|
283
|
|
|
|
|
|
|
# after values have been added. |
|
284
|
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
286
|
|
|
|
|
|
|
|
|
287
|
0
|
|
|
|
|
|
my $fragment_order; |
|
288
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($1, \$fragment_order); |
|
289
|
|
|
|
|
|
|
# Store the attribute(s) for into the |
|
290
|
|
|
|
|
|
|
# $fragment_order data structure. |
|
291
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# One or more |
|
294
|
0
|
|
|
|
|
|
$self->_process_fragment_orientation($line, \$fragment_order); |
|
295
|
|
|
|
|
|
|
# Don't forget: $line is a reference to a scalar. |
|
296
|
|
|
|
|
|
|
|
|
297
|
0
|
|
|
|
|
|
push @{$$data_structure->{'fragment_order'}}, $fragment_order; |
|
|
0
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
# Store the data between |
|
299
|
|
|
|
|
|
|
# in $$data_structure. |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
|
|
303
|
0
|
|
|
|
|
|
return; |
|
304
|
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
# ================================================================================== |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=head2 _process_fragment_orientation |
|
309
|
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Title : _process_fragment_orientation |
|
311
|
|
|
|
|
|
|
Usage : $self->_process_fragment_orientation |
|
312
|
|
|
|
|
|
|
Function : Parses the data between the and |
|
313
|
|
|
|
|
|
|
tags. |
|
314
|
|
|
|
|
|
|
Args : 2 scalars: |
|
315
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
316
|
|
|
|
|
|
|
- reference to a data structure to store the data. |
|
317
|
|
|
|
|
|
|
Returns : Nothing. |
|
318
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process_fragment_order |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
Method(s) that this method calls : _helper_store_attribute_list , |
|
321
|
|
|
|
|
|
|
_process_bio_sequence |
|
322
|
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=cut |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub _process_fragment_orientation { |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
|
|
328
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
# counter to determine the number of iterations within this while loop. |
|
331
|
0
|
|
|
|
|
|
my $count = 0; |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
# One or more |
|
334
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
335
|
|
|
|
|
|
|
|
|
336
|
0
|
|
|
|
|
|
my $fragment_orientation; |
|
337
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($1, \$fragment_orientation); |
|
338
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# One |
|
341
|
0
|
|
|
|
|
|
$$line =~ //; |
|
342
|
|
|
|
|
|
|
# Process the data between |
|
343
|
0
|
|
|
|
|
|
my $bio_sequence = $self->_process_bio_sequence($line, $1); |
|
344
|
0
|
|
|
|
|
|
$fragment_orientation->{'bio_sequence'} = $bio_sequence; |
|
345
|
|
|
|
|
|
|
|
|
346
|
0
|
|
|
|
|
|
push @{$$data_structure->{'fragment_orientation'}}, $fragment_orientation; |
|
|
0
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
|
|
348
|
0
|
|
|
|
|
|
++$count; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
|
|
352
|
0
|
0
|
|
|
|
|
$self->throw("Error: Missing tag. Got this: $$line\n\n") |
|
353
|
|
|
|
|
|
|
if $count == 0; |
|
354
|
|
|
|
|
|
|
|
|
355
|
0
|
|
|
|
|
|
return; |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
} |
|
358
|
|
|
|
|
|
|
# ================================================================================== |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
=head2 _process_bio_sequence |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
Title : _process_bio_sequence |
|
363
|
|
|
|
|
|
|
Usage : $self->_process_bio_sequence |
|
364
|
|
|
|
|
|
|
Function : Parses the data between the tags. |
|
365
|
|
|
|
|
|
|
Args : 2 scalars: |
|
366
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
367
|
|
|
|
|
|
|
- scalar holding the value of the attributes for |
|
368
|
|
|
|
|
|
|
Returns : data structure holding the values between |
|
369
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process_fragment_orientation |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
Method(s) that this method calls : _helper_store_attribute_list , |
|
372
|
|
|
|
|
|
|
_one_tag , _question_mark_tag , _star_tag , _process_alt_ids , |
|
373
|
|
|
|
|
|
|
_process_xrefs , _process_sequence_map |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=cut |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
sub _process_bio_sequence { |
|
378
|
|
|
|
|
|
|
|
|
379
|
0
|
|
|
0
|
|
|
my ($self, $line, $attribute_line) = @_; |
|
380
|
|
|
|
|
|
|
|
|
381
|
0
|
|
|
|
|
|
my $bio_sequence; |
|
382
|
|
|
|
|
|
|
|
|
383
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($attribute_line, \$bio_sequence); |
|
384
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
# One . |
|
388
|
0
|
|
|
|
|
|
$self->_one_tag($line, \$bio_sequence, 'db_id'); |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# Zero or one . |
|
392
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$bio_sequence, 'note'); |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
# Zero or more |
|
396
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$bio_sequence, 'description'); |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
# Zero or more |
|
400
|
0
|
|
|
|
|
|
$self->_star_tag($line, \$bio_sequence, 'keyword'); |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
# Zero or one |
|
404
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$bio_sequence, 'sequence'); |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# Zero or one |
|
408
|
|
|
|
|
|
|
# NOT IMPLEMENTED!!!! |
|
409
|
|
|
|
|
|
|
#if ($line =~ //){ # NOT DONE YET! |
|
410
|
|
|
|
|
|
|
# my $alt_ids; |
|
411
|
|
|
|
|
|
|
# $bio_sequence->{'alt_ids'} = $self->_process_alt_ids(\$alt_ids); |
|
412
|
|
|
|
|
|
|
#} |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
# Zero or one |
|
416
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
417
|
0
|
|
|
|
|
|
my $xrefs = $self->_process_xrefs($line, \$bio_sequence); |
|
418
|
0
|
|
0
|
|
|
|
$bio_sequence->{'xrefs'} = $xrefs || 'null'; |
|
419
|
|
|
|
|
|
|
} |
|
420
|
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
# Zero or more |
|
423
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
424
|
0
|
|
|
|
|
|
my $sequence_map = $self->_process_sequence_map($line); |
|
425
|
0
|
|
|
|
|
|
push @{$bio_sequence->{'sequence_map'}}, $sequence_map; |
|
|
0
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
} |
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
# print Data::Dumper->Dump([$bio_sequence]); exit; |
|
429
|
|
|
|
|
|
|
|
|
430
|
0
|
|
|
|
|
|
return $bio_sequence; |
|
431
|
|
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
} |
|
433
|
|
|
|
|
|
|
# ================================================================================== |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
=head2 _process_xrefs |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
Title : _process_xrefs |
|
438
|
|
|
|
|
|
|
Usage : $self->_process_xrefs |
|
439
|
|
|
|
|
|
|
Function : Parse the data between the tags. |
|
440
|
|
|
|
|
|
|
Args : reference to a scalar holding the value of the line to be parsed. |
|
441
|
|
|
|
|
|
|
Return : Nothing. |
|
442
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method: _process_bio_sequence |
|
443
|
|
|
|
|
|
|
Method(s) that this method calls: _one_tag , _process_xref |
|
444
|
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
=cut |
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
sub _process_xrefs { |
|
448
|
|
|
|
|
|
|
|
|
449
|
0
|
|
|
0
|
|
|
my ($self, $line) = @_; |
|
450
|
|
|
|
|
|
|
|
|
451
|
0
|
|
|
|
|
|
my $xrefs; |
|
452
|
|
|
|
|
|
|
|
|
453
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# One or more or within . Check if |
|
456
|
|
|
|
|
|
|
# to see if there's at least one. |
|
457
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
458
|
|
|
|
|
|
|
|
|
459
|
0
|
|
|
|
|
|
while ($$line =~ /<(db_id|xref)\s?(.*?)\s?>/) { |
|
460
|
|
|
|
|
|
|
|
|
461
|
0
|
0
|
|
|
|
|
if ($1 eq "db_id") { |
|
|
|
0
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
|
|
463
|
0
|
|
|
|
|
|
my $db_id; |
|
464
|
0
|
|
|
|
|
|
$self->_one_tag($line, \$db_id, 'db_id'); |
|
465
|
0
|
|
|
|
|
|
push @{$xrefs->{'db_id'}}, $db_id; |
|
|
0
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
} elsif ($1 eq "xref") { |
|
468
|
|
|
|
|
|
|
|
|
469
|
0
|
|
|
|
|
|
my $xref; |
|
470
|
0
|
|
|
|
|
|
$self->_process_xref($line, \$xref); |
|
471
|
0
|
|
|
|
|
|
push @{$xrefs->{'xref'}}, $xref; |
|
|
0
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
} else { |
|
474
|
|
|
|
|
|
|
|
|
475
|
0
|
|
|
|
|
|
$self->throw("Error: Tag type should be one of db_id or xref! Got this: $$line\n\n"); |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
} # close while loop |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
|
|
482
|
0
|
0
|
|
|
|
|
if ($$line =~ /<\/xrefs>/) { |
|
483
|
0
|
|
|
|
|
|
$$line = $self->_readline; # get the next line to be _processed by the next sub. |
|
484
|
0
|
|
|
|
|
|
return $xrefs; |
|
485
|
|
|
|
|
|
|
} else { |
|
486
|
0
|
|
|
|
|
|
$self->throw("Error: Missing tag. Got this: $$line\n\n"); |
|
487
|
|
|
|
|
|
|
} |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
} else { |
|
492
|
|
|
|
|
|
|
|
|
493
|
0
|
|
|
|
|
|
$self->throw("Error: Missing or tag. Got this: $$line\n\n"); |
|
494
|
|
|
|
|
|
|
} |
|
495
|
|
|
|
|
|
|
|
|
496
|
0
|
|
|
|
|
|
return; |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
} |
|
499
|
|
|
|
|
|
|
# ================================================================================== |
|
500
|
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
=head2 _process_xref |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Title : _process_xref |
|
504
|
|
|
|
|
|
|
Usage : $self->_process_xref |
|
505
|
|
|
|
|
|
|
Function : Parses the data between the tags. |
|
506
|
|
|
|
|
|
|
Args : 2 scalars: |
|
507
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
508
|
|
|
|
|
|
|
- reference to a data structure to store the data. |
|
509
|
|
|
|
|
|
|
Returns : Nothing. |
|
510
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process_xrefs (note the 's' in 'xrefs') |
|
511
|
|
|
|
|
|
|
Method(s) that this method calls : _helper_store_attribute_list , _star_tag |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=cut |
|
514
|
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub _process_xref { |
|
516
|
|
|
|
|
|
|
|
|
517
|
0
|
|
|
0
|
|
|
my ($self, $line, $xref) = @_; |
|
518
|
|
|
|
|
|
|
|
|
519
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
# One |
|
522
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
523
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($1, $xref); |
|
524
|
|
|
|
|
|
|
} else { |
|
525
|
0
|
|
|
|
|
|
$self->throw("Error: Missing tag. Got this: $$line\n\n"); |
|
526
|
|
|
|
|
|
|
} |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
# Zero or more |
|
530
|
0
|
|
|
|
|
|
$self->_star_tag($line, $xref, 'xref_propery'); |
|
531
|
|
|
|
|
|
|
|
|
532
|
0
|
|
|
|
|
|
return; |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
# ================================================================================== |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=head2 _process_sequence_map |
|
538
|
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
Title : _process_sequence_map |
|
540
|
|
|
|
|
|
|
Usage : $self->_process_sequence_map |
|
541
|
|
|
|
|
|
|
Function : Parses the data between the tags. |
|
542
|
|
|
|
|
|
|
Args : Reference to scalar holding the line to be parsed. |
|
543
|
|
|
|
|
|
|
Returns : Data structure that holds the values that were parsed. |
|
544
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process_bio_sequence |
|
545
|
|
|
|
|
|
|
Method(s) that this method calls : _helper_store_attribute_list , |
|
546
|
|
|
|
|
|
|
_question_mark_tag , _process_annotations |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
=cut |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub _process_sequence_map { |
|
551
|
|
|
|
|
|
|
|
|
552
|
0
|
|
|
0
|
|
|
my ($self, $line) = @_; |
|
553
|
|
|
|
|
|
|
|
|
554
|
0
|
|
|
|
|
|
my $sequence_map; |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
# Zero or more |
|
557
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
558
|
|
|
|
|
|
|
|
|
559
|
0
|
0
|
|
|
|
|
$self->_helper_store_attribute_list($1, \$sequence_map) if defined $1; |
|
560
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# Zero or one |
|
563
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$sequence_map, 'note'); |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
# NOT IMPLEMENTED!!! |
|
566
|
|
|
|
|
|
|
#if ($$line =~ //){ |
|
567
|
|
|
|
|
|
|
# # $self->_process_computations(); |
|
568
|
|
|
|
|
|
|
#} |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
# Zero or one |
|
572
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
573
|
0
|
|
|
|
|
|
my $annotations = $self->_process_annotations($line); |
|
574
|
0
|
|
|
|
|
|
$sequence_map->{'annotations'} = $annotations; |
|
575
|
|
|
|
|
|
|
} |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
} # closes the while loop |
|
579
|
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
# Match closing tag: |
|
582
|
0
|
0
|
|
|
|
|
if ($$line =~ /<\/sequence_map>/) { |
|
583
|
0
|
|
|
|
|
|
return $sequence_map; |
|
584
|
|
|
|
|
|
|
} else { |
|
585
|
0
|
|
|
|
|
|
$self->throw("Error: Missing tag. Got this: $$line\n\n"); |
|
586
|
|
|
|
|
|
|
} |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
} |
|
590
|
|
|
|
|
|
|
# ================================================================================== |
|
591
|
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
=head2 _process_annotations |
|
593
|
|
|
|
|
|
|
|
|
594
|
|
|
|
|
|
|
Title : _process_annotations |
|
595
|
|
|
|
|
|
|
Usage : $self->_process_annotations |
|
596
|
|
|
|
|
|
|
Function : Parse the data between the tags. |
|
597
|
|
|
|
|
|
|
Args : Reference to scalar holding the line to be parsed. |
|
598
|
|
|
|
|
|
|
Returns : Data structure that holds the values that were parsed. |
|
599
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process_sequence_map |
|
600
|
|
|
|
|
|
|
Method(s) that this method calls : _process_seq_feature |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
=cut |
|
603
|
|
|
|
|
|
|
|
|
604
|
|
|
|
|
|
|
sub _process_annotations { |
|
605
|
|
|
|
|
|
|
|
|
606
|
0
|
|
|
0
|
|
|
my ($self, $line) = @_; |
|
607
|
|
|
|
|
|
|
# ( seq_feature | gene | comp_result )+ |
|
608
|
|
|
|
|
|
|
|
|
609
|
0
|
|
|
|
|
|
my $annotations; |
|
610
|
|
|
|
|
|
|
|
|
611
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
612
|
|
|
|
|
|
|
|
|
613
|
0
|
|
|
|
|
|
my $count = 0; # counter to keep track of number of iterations in the loop. |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# One or more of these: |
|
616
|
0
|
|
|
|
|
|
while ($$line =~ /<(seq_feature|gene|comp_result)\s?(.*?)\s?>/) { |
|
617
|
|
|
|
|
|
|
|
|
618
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
619
|
|
|
|
|
|
|
|
|
620
|
0
|
|
|
|
|
|
my $seq_feature = $self->_process_seq_feature($line, $1); |
|
621
|
0
|
|
|
|
|
|
push @{$annotations->{'seq_feature'}}, $seq_feature; |
|
|
0
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
} elsif ($$line =~ //) { |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
# gene |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
} elsif ($$line =~ //) { |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# comp_result |
|
630
|
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
} |
|
632
|
|
|
|
|
|
|
|
|
633
|
0
|
|
|
|
|
|
++$count; |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
} # closes the while loop. |
|
636
|
|
|
|
|
|
|
|
|
637
|
0
|
0
|
|
|
|
|
$self->throw("Error: Missing tag. Got: $$line\n\n") if $count == 0; |
|
638
|
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
# Match closing tag: |
|
640
|
0
|
0
|
|
|
|
|
if ($$line =~ /<\/annotations/) { |
|
641
|
|
|
|
|
|
|
|
|
642
|
0
|
|
|
|
|
|
$$line = $self->_readline; # get the next line to be _processed by the next sub. |
|
643
|
0
|
|
|
|
|
|
return $annotations; |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
} else { |
|
646
|
0
|
|
|
|
|
|
$self->throw("Error: Missing tag. Got this: $$line\n\n"); |
|
647
|
|
|
|
|
|
|
} |
|
648
|
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
} |
|
651
|
|
|
|
|
|
|
# ================================================================================== |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
=head2 _process_seq_feature |
|
654
|
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
Title : _process_seq_feature |
|
656
|
|
|
|
|
|
|
Usage : $self->_process_seq_feature |
|
657
|
|
|
|
|
|
|
Function : Parses the data between the tag. |
|
658
|
|
|
|
|
|
|
Args : 2 scalars: |
|
659
|
|
|
|
|
|
|
- Reference to scalar holding the line to be parsed. |
|
660
|
|
|
|
|
|
|
- Scalar holding the attributes for . |
|
661
|
|
|
|
|
|
|
Returns : Data structure holding the values parsed. |
|
662
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method: _process_annotations |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
Method(s) that this method calls: _helper_store_attribute_list , |
|
665
|
|
|
|
|
|
|
_process_classification , _question_mark_tag , _one_tag , |
|
666
|
|
|
|
|
|
|
_process_evidence , _process_qualifier , _process_seq_feature , |
|
667
|
|
|
|
|
|
|
_process_related_annot |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=cut |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
sub _process_seq_feature { |
|
672
|
|
|
|
|
|
|
|
|
673
|
0
|
|
|
0
|
|
|
my ($self, $line, $attribute_line) = @_; |
|
674
|
|
|
|
|
|
|
|
|
675
|
0
|
|
|
|
|
|
my $seq_feature; |
|
676
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($attribute_line, \$seq_feature); |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
|
|
679
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
# Zero or more |
|
683
|
0
|
|
|
|
|
|
$self->_process_classification($line, \$seq_feature); |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
# Zero or one |
|
688
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$seq_feature, 'note'); |
|
689
|
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
|
|
692
|
|
|
|
|
|
|
# One |
|
693
|
0
|
|
|
|
|
|
$self->_one_tag($line, \$seq_feature, 'seq_location'); |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
# Zero or one |
|
698
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$seq_feature, 'xrefs'); |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
|
|
701
|
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
# Zero or one |
|
703
|
0
|
|
|
|
|
|
$self->_process_evidence($line, \$seq_feature); |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
|
|
707
|
|
|
|
|
|
|
# Zero or more |
|
708
|
0
|
|
|
|
|
|
$self->_process_qualifier($line, \$seq_feature); |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
|
|
711
|
|
|
|
|
|
|
|
|
712
|
|
|
|
|
|
|
# Zero or more . A tag within a tag? Oh, well. Whatever... |
|
713
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
714
|
0
|
|
|
|
|
|
$self->_process_seq_feature($line, $1); |
|
715
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
716
|
|
|
|
|
|
|
} |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
# Zero or more |
|
720
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
721
|
0
|
|
|
|
|
|
$self->_process_related_annot($line, $1); |
|
722
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
723
|
|
|
|
|
|
|
} |
|
724
|
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
|
|
726
|
|
|
|
|
|
|
# Match the closing tag: |
|
727
|
0
|
0
|
|
|
|
|
if ($$line =~ /<\/seq_feature>/) { |
|
728
|
|
|
|
|
|
|
|
|
729
|
0
|
|
|
|
|
|
$$line = $self->_readline; # for the next sub... |
|
730
|
0
|
|
|
|
|
|
return $seq_feature; |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
} else { |
|
733
|
|
|
|
|
|
|
|
|
734
|
0
|
|
|
|
|
|
$self->throw("Error. Missing tag. Got this: $$line\n"); |
|
735
|
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
} |
|
737
|
|
|
|
|
|
|
|
|
738
|
|
|
|
|
|
|
} |
|
739
|
|
|
|
|
|
|
# ================================================================================== |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
=head2 _process_qualifier |
|
742
|
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
Title : _process_qualifier |
|
744
|
|
|
|
|
|
|
Usage : $self->_process_qualifier |
|
745
|
|
|
|
|
|
|
Function : Parse the data between the tags. |
|
746
|
|
|
|
|
|
|
Args : 2 scalars: |
|
747
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
748
|
|
|
|
|
|
|
- reference to a data structure to store the data. |
|
749
|
|
|
|
|
|
|
Returns : Nothing. |
|
750
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : _process_seq_feature |
|
751
|
|
|
|
|
|
|
Method(s) that this method calls : _star_tag |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
=cut |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
sub _process_qualifier { |
|
756
|
|
|
|
|
|
|
|
|
757
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
758
|
|
|
|
|
|
|
|
|
759
|
0
|
|
|
|
|
|
my $qualifier; |
|
760
|
0
|
|
|
|
|
|
$self->_star_tag($line, \$qualifier, 'qualifier'); |
|
761
|
0
|
|
|
|
|
|
push @{$$data_structure->{'qualifier'}},$qualifier; |
|
|
0
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
|
|
764
|
0
|
|
|
|
|
|
return; |
|
765
|
|
|
|
|
|
|
# No need to return the data structure since its reference was what was modified. |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
} |
|
768
|
|
|
|
|
|
|
# ================================================================================== |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
=head2 _process_classification |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
Title : _process_classification |
|
773
|
|
|
|
|
|
|
Usage : $self->_process_classification |
|
774
|
|
|
|
|
|
|
Function: Parse the data between the tags. |
|
775
|
|
|
|
|
|
|
Args : 2 scalars: |
|
776
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
777
|
|
|
|
|
|
|
- reference to a data structure to store the data. |
|
778
|
|
|
|
|
|
|
Returns : Nothing. |
|
779
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method: _process_seq_feature |
|
780
|
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
Method(s) that this method calls: _helper_store_attribute_list , |
|
782
|
|
|
|
|
|
|
_question_mark_tag , _star_tag, _process_evidence |
|
783
|
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
=cut |
|
785
|
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
sub _process_classification { # NOT IN USE. |
|
787
|
|
|
|
|
|
|
|
|
788
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
789
|
|
|
|
|
|
|
|
|
790
|
0
|
|
|
|
|
|
my $classification = $$data_structure->{'classification'}; |
|
791
|
|
|
|
|
|
|
|
|
792
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
793
|
|
|
|
|
|
|
|
|
794
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($1, \$classification); |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
# Zero or one |
|
797
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$classification, 'description'); |
|
798
|
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
# Zero or more |
|
800
|
0
|
|
|
|
|
|
$self->_star_tag($line, \$classification, 'id_alias'); |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
# Zero or one |
|
803
|
0
|
|
|
|
|
|
$self->_process_evidence($line, \$classification); |
|
804
|
|
|
|
|
|
|
} |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
|
|
807
|
|
|
|
|
|
|
} |
|
808
|
|
|
|
|
|
|
# ================================================================================== |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
sub _process_evidence { # NOT done. |
|
811
|
|
|
|
|
|
|
|
|
812
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
813
|
|
|
|
|
|
|
|
|
814
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
815
|
|
|
|
|
|
|
|
|
816
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
# One or more OR One or more |
|
819
|
0
|
|
|
|
|
|
while ($$line =~ /<(element_id|comp_result)\s?(.*?)\s?>/) { |
|
820
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
|
|
0
|
|
|
|
|
|
|
821
|
0
|
|
|
|
|
|
my $element_id; |
|
822
|
0
|
|
|
|
|
|
$self->_plus_tag($line, \$element_id, 'element_id'); |
|
823
|
0
|
|
|
|
|
|
push @{$$data_structure->{'element_id'}}, $element_id; |
|
|
0
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
} elsif ($$line =~ //) { |
|
825
|
0
|
|
|
|
|
|
my $comp_result; |
|
826
|
0
|
|
|
|
|
|
$self->_process_comp_result($line, \$comp_result, $1); |
|
827
|
0
|
|
|
|
|
|
push @{$$data_structure->{'comp_result'}}, $comp_result; |
|
|
0
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
} |
|
829
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
830
|
|
|
|
|
|
|
} |
|
831
|
|
|
|
|
|
|
|
|
832
|
|
|
|
|
|
|
} |
|
833
|
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
|
|
835
|
|
|
|
|
|
|
} |
|
836
|
|
|
|
|
|
|
# ================================================================================== |
|
837
|
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
sub _process_comp_result { # NOT IN USE. |
|
839
|
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
|
|
841
|
0
|
|
|
0
|
|
|
my ($self, $line, $comp_result, $attribute_line) = @_; |
|
842
|
|
|
|
|
|
|
|
|
843
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($attribute_line, $comp_result); |
|
844
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
# Zero or one |
|
847
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, $comp_result, 'note'); |
|
848
|
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
# Zero or one |
|
850
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, $comp_result, 'match_desc'); |
|
851
|
|
|
|
|
|
|
|
|
852
|
|
|
|
|
|
|
# Zero or one |
|
853
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, $comp_result, 'match_align'); |
|
854
|
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
# Zero or one |
|
856
|
0
|
|
|
|
|
|
$self->_process_query_region($line, $comp_result); |
|
857
|
|
|
|
|
|
|
|
|
858
|
|
|
|
|
|
|
# Zero or one |
|
859
|
0
|
|
|
|
|
|
$self->_process_match_region($line, $comp_result); |
|
860
|
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
# Zero or more |
|
862
|
0
|
|
|
|
|
|
$self->_star_tag($line, $comp_result, 'result_property'); |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
# Zero or more |
|
865
|
0
|
|
|
|
|
|
$self->_process_result_group($line, $comp_result); |
|
866
|
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
# Zero or more |
|
868
|
0
|
|
|
|
|
|
$self->_process_related_annot($line, $comp_result); |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
} |
|
871
|
|
|
|
|
|
|
# ================================================================================== |
|
872
|
|
|
|
|
|
|
|
|
873
|
|
|
|
|
|
|
sub _process_related_annot { # NOT IN USE. |
|
874
|
|
|
|
|
|
|
|
|
875
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
876
|
|
|
|
|
|
|
|
|
877
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
878
|
|
|
|
|
|
|
|
|
879
|
0
|
|
|
|
|
|
my $related_annot; |
|
880
|
|
|
|
|
|
|
# Zero or one |
|
881
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($1, \$related_annot); |
|
882
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
# One or more |
|
885
|
0
|
|
|
|
|
|
my $element_id_count = 0; |
|
886
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
887
|
0
|
|
|
|
|
|
my $element_id; |
|
888
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($1, \$element_id); |
|
889
|
0
|
|
|
|
|
|
push @{$related_annot->{'element_id'}}, $element_id; |
|
|
0
|
|
|
|
|
|
|
|
890
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
891
|
0
|
|
|
|
|
|
++$element_id_count; |
|
892
|
|
|
|
|
|
|
} |
|
893
|
|
|
|
|
|
|
|
|
894
|
0
|
0
|
|
|
|
|
if ($element_id_count == 0) { |
|
895
|
0
|
|
|
|
|
|
$self->throw("Error. Missing tag. Got: $$line"); |
|
896
|
|
|
|
|
|
|
} |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
# Zero or more |
|
899
|
0
|
|
|
|
|
|
$self->_star_tag($line, \$related_annot, 'sci_property'); |
|
900
|
|
|
|
|
|
|
# while ($$line =~ //){ |
|
901
|
|
|
|
|
|
|
# |
|
902
|
|
|
|
|
|
|
# } |
|
903
|
|
|
|
|
|
|
|
|
904
|
0
|
|
|
|
|
|
push @{$data_structure->{'related_annot'}}, $related_annot; |
|
|
0
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
|
|
906
|
0
|
0
|
|
|
|
|
unless ($$line =~ /<\/related_annot>/){ |
|
907
|
0
|
|
|
|
|
|
$self->throw("Error. Missing . Got: $$line\n"); |
|
908
|
|
|
|
|
|
|
} |
|
909
|
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
} |
|
911
|
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
|
|
913
|
|
|
|
|
|
|
} |
|
914
|
|
|
|
|
|
|
# ================================================================================== |
|
915
|
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
sub _process_result_group { # NOT IN USE. |
|
917
|
|
|
|
|
|
|
|
|
918
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
919
|
|
|
|
|
|
|
|
|
920
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
921
|
0
|
|
|
|
|
|
my $result_group = $$data_structure->{'result_group'}; |
|
922
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($1, \$result_group); |
|
923
|
|
|
|
|
|
|
|
|
924
|
0
|
|
|
|
|
|
my $count = 0; |
|
925
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
926
|
0
|
|
|
|
|
|
while ($$line =~ //) { |
|
927
|
|
|
|
|
|
|
# one or more |
|
928
|
0
|
|
|
|
|
|
$self->_process_comp_result(\$line, \$result_group, $1); |
|
929
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
930
|
0
|
|
|
|
|
|
++$count; |
|
931
|
|
|
|
|
|
|
} |
|
932
|
|
|
|
|
|
|
|
|
933
|
0
|
0
|
|
|
|
|
$self->throw("Error. No tag! Got this: $$line") |
|
934
|
|
|
|
|
|
|
if $count == 0; |
|
935
|
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
# in the last iteration in the inner while loop, $line will |
|
937
|
|
|
|
|
|
|
# have a value of the closing tag of 'result_group' |
|
938
|
0
|
0
|
|
|
|
|
if ($line =~ /<\/result_group>/) { |
|
939
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
940
|
|
|
|
|
|
|
} else { |
|
941
|
0
|
|
|
|
|
|
$self->throw("Error. No ! Got this: $$line"); |
|
942
|
|
|
|
|
|
|
} |
|
943
|
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
} |
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
} |
|
949
|
|
|
|
|
|
|
# ================================================================================== |
|
950
|
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
sub _process_match_region { # NOT IN USE. |
|
952
|
|
|
|
|
|
|
|
|
953
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
954
|
|
|
|
|
|
|
|
|
955
|
0
|
|
|
|
|
|
my $match_region = $data_structure->{'match_region'}; |
|
956
|
|
|
|
|
|
|
|
|
957
|
0
|
0
|
|
|
|
|
if ($$line =~ /(.*?)>/) { |
|
958
|
|
|
|
|
|
|
|
|
959
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_line($1, \$match_region); |
|
960
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
961
|
|
|
|
|
|
|
|
|
962
|
|
|
|
|
|
|
# Zero or one db_id | element_id | bio_sequence |
|
963
|
0
|
0
|
|
|
|
|
if ($$line =~ /(.*?)<\/db_id>/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
964
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$match_region, 'db_id'); |
|
965
|
|
|
|
|
|
|
} elsif ($$line =~ //) { # empty... |
|
966
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$match_region, 'element_id'); |
|
967
|
|
|
|
|
|
|
} elsif ($$line =~ //) { |
|
968
|
0
|
|
|
|
|
|
$match_region->{'bio_sequence'} = $self->_process_bio_sequence($line, $1); |
|
969
|
|
|
|
|
|
|
} |
|
970
|
|
|
|
|
|
|
|
|
971
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
972
|
0
|
0
|
|
|
|
|
if ($$line =~ /<\/match_region>/o) { |
|
973
|
0
|
|
|
|
|
|
$$line = $self->_readline; # get the next line to be _processed by the next sub |
|
974
|
0
|
|
|
|
|
|
return; |
|
975
|
|
|
|
|
|
|
} else { |
|
976
|
0
|
|
|
|
|
|
$self->throw("No closing tag ! Got this: $$line\n"); |
|
977
|
|
|
|
|
|
|
} |
|
978
|
|
|
|
|
|
|
|
|
979
|
|
|
|
|
|
|
} |
|
980
|
|
|
|
|
|
|
} |
|
981
|
|
|
|
|
|
|
# ================================================================================== |
|
982
|
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
sub _process_query_region { # NOT IN USE. |
|
984
|
|
|
|
|
|
|
|
|
985
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure) = @_; |
|
986
|
|
|
|
|
|
|
|
|
987
|
0
|
|
|
|
|
|
my $query_region = $data_structure->{'query_region'}; |
|
988
|
0
|
0
|
|
|
|
|
if ($$line =~ //) { |
|
989
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($1, \$query_region); |
|
990
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
991
|
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
# Zero or one |
|
993
|
0
|
|
|
|
|
|
$self->_question_mark_tag($line, \$query_region, 'db_id'); |
|
994
|
|
|
|
|
|
|
|
|
995
|
0
|
0
|
|
|
|
|
if ($$line =~ /<\/query_region>/) { |
|
996
|
0
|
|
|
|
|
|
$$line = $self->_readline; # get the next line to _process. |
|
997
|
0
|
|
|
|
|
|
return; |
|
998
|
|
|
|
|
|
|
} else { |
|
999
|
0
|
|
|
|
|
|
$self->throw("No closing tag . Got this: $$line\n"); |
|
1000
|
|
|
|
|
|
|
} |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
} |
|
1003
|
|
|
|
|
|
|
|
|
1004
|
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
} |
|
1006
|
|
|
|
|
|
|
# ================================================================================== |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
=head2 _tag_processing_helper |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
Title : _tag_processing_helper |
|
1011
|
|
|
|
|
|
|
Usage : $self->_tag_processing_helper |
|
1012
|
|
|
|
|
|
|
Function : Stores the tag value within the data structure. |
|
1013
|
|
|
|
|
|
|
Also calls _helper_store_attribute_list to store the |
|
1014
|
|
|
|
|
|
|
attributes and their values in the data structure. |
|
1015
|
|
|
|
|
|
|
Args : 5 scalars: |
|
1016
|
|
|
|
|
|
|
- Scalar holding the value of the attributes |
|
1017
|
|
|
|
|
|
|
- Reference to a data structure to store the data for <$tag_name> |
|
1018
|
|
|
|
|
|
|
- Scalar holding the tag name. |
|
1019
|
|
|
|
|
|
|
- Scalar holding the value of the tag. |
|
1020
|
|
|
|
|
|
|
- Scalar holding the value of either 'star', 'plus', |
|
1021
|
|
|
|
|
|
|
or 'question mark' which specifies what type of method |
|
1022
|
|
|
|
|
|
|
called this method. |
|
1023
|
|
|
|
|
|
|
Returns : Nothing. |
|
1024
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method: |
|
1025
|
|
|
|
|
|
|
Method(s) that this method calls: _helper_store_attribute_list |
|
1026
|
|
|
|
|
|
|
|
|
1027
|
|
|
|
|
|
|
=cut |
|
1028
|
|
|
|
|
|
|
|
|
1029
|
|
|
|
|
|
|
sub _tag_processing_helper { |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
0
|
|
|
0
|
|
|
my ($self, $attribute_list, $data_structure, $tag_name, $tag_value, $caller) = @_; |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
# Add the attributes to the $$data_structure if they exist. |
|
1034
|
|
|
|
|
|
|
# print "tag_name: $tag_name , attribute_list: $attribute_list\n"; |
|
1035
|
0
|
0
|
|
|
|
|
if (defined $attribute_list) { |
|
1036
|
0
|
|
|
|
|
|
$self->_helper_store_attribute_list($attribute_list, $data_structure); |
|
1037
|
|
|
|
|
|
|
} |
|
1038
|
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
|
|
1040
|
0
|
0
|
0
|
|
|
|
if ($caller eq 'star' || $caller eq 'plus') { |
|
1041
|
0
|
|
|
|
|
|
push @{$$data_structure->{$tag_name}}, $tag_value; |
|
|
0
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
# There's either zero or more tags (*) or one or more (+) |
|
1043
|
|
|
|
|
|
|
} else { |
|
1044
|
0
|
|
0
|
|
|
|
$$data_structure->{$tag_name} = $tag_value || 'null'; |
|
1045
|
|
|
|
|
|
|
# There's zero or one tag (?) |
|
1046
|
|
|
|
|
|
|
} |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
0
|
|
|
|
|
|
return; |
|
1049
|
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
} |
|
1051
|
|
|
|
|
|
|
# ================================================================================== |
|
1052
|
|
|
|
|
|
|
|
|
1053
|
|
|
|
|
|
|
=head2 _one_tag |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
Title : _one_tag |
|
1056
|
|
|
|
|
|
|
Usage : $self->_one_tag |
|
1057
|
|
|
|
|
|
|
Function : A method to store data from tags that occurs just once. |
|
1058
|
|
|
|
|
|
|
Args : 2 scalars: |
|
1059
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
1060
|
|
|
|
|
|
|
- reference to a data structure to store the data for <$tag_name> |
|
1061
|
|
|
|
|
|
|
Returns : Nothing. |
|
1062
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : many |
|
1063
|
|
|
|
|
|
|
Method(s) that this method calls : _tag_processing_helper |
|
1064
|
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
=cut |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
sub _one_tag { |
|
1068
|
|
|
|
|
|
|
|
|
1069
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure, $tag_name) = @_; |
|
1070
|
|
|
|
|
|
|
|
|
1071
|
0
|
0
|
|
|
|
|
$self->throw("Error: Missing <$tag_name>$tag_name>. Got: $$line\n\n") |
|
1072
|
|
|
|
|
|
|
if $$line !~ /\<$tag_name/; |
|
1073
|
|
|
|
|
|
|
# check to see if $$line is in correct format. |
|
1074
|
|
|
|
|
|
|
|
|
1075
|
0
|
0
|
|
|
|
|
if ($$line =~ /<$tag_name\s?(.*?)\s?\/?>(.*?)<\/$tag_name>/) { |
|
|
|
0
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
|
|
1077
|
0
|
|
|
|
|
|
$self->_tag_processing_helper($1, $data_structure, $tag_name, $2, 'one'); |
|
1078
|
|
|
|
|
|
|
# $1 = attributes $data_structure = to hold the parsed values |
|
1079
|
|
|
|
|
|
|
# # $tag_name = name of the tag $2 = tag value 'one' = lets |
|
1080
|
|
|
|
|
|
|
# _tag_processing_helper know that it was called from the |
|
1081
|
|
|
|
|
|
|
# _one_tag method. |
|
1082
|
|
|
|
|
|
|
|
|
1083
|
|
|
|
|
|
|
} elsif ($$line =~ /<$tag_name\s?(.*?)\s?\/?>/) { |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
0
|
|
|
|
|
|
$self->_tag_processing_helper($1, $data_structure, $tag_name, '', 'one'); |
|
1086
|
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
} else { |
|
1088
|
0
|
|
|
|
|
|
$self->throw("Error: Cannot parse this line: $$line\n\n"); |
|
1089
|
|
|
|
|
|
|
} |
|
1090
|
|
|
|
|
|
|
|
|
1091
|
0
|
|
|
|
|
|
$$line = $self->_readline; # get the next line. |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
0
|
|
|
|
|
|
return; |
|
1094
|
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
} |
|
1096
|
|
|
|
|
|
|
# ================================================================================== |
|
1097
|
|
|
|
|
|
|
|
|
1098
|
|
|
|
|
|
|
=head2 _question_mark_tag |
|
1099
|
|
|
|
|
|
|
|
|
1100
|
|
|
|
|
|
|
Title : _question_mark_tag |
|
1101
|
|
|
|
|
|
|
Usage : $self->_question_mark_tag |
|
1102
|
|
|
|
|
|
|
Function : Parses values from tags that occurs zero or one time. ie: tag_name? |
|
1103
|
|
|
|
|
|
|
Args : 3 scalars: |
|
1104
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
1105
|
|
|
|
|
|
|
- reference to a data structure to store the data for <$tag_name> |
|
1106
|
|
|
|
|
|
|
- scalar holding the name of the tag. |
|
1107
|
|
|
|
|
|
|
Returns : Nothing. |
|
1108
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : many. |
|
1109
|
|
|
|
|
|
|
Method(s) that this method calls : _tag_processing_helper |
|
1110
|
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
|
|
1112
|
|
|
|
|
|
|
=cut |
|
1113
|
|
|
|
|
|
|
|
|
1114
|
|
|
|
|
|
|
sub _question_mark_tag { |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure, $tag_name) = @_; |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
0
|
0
|
|
|
|
|
if ($$line =~ /<$tag_name\s?(.*?)\s?>(.*?)<\/$tag_name>/) { |
|
1119
|
0
|
|
|
|
|
|
$self->_tag_processing_helper($1, $data_structure, $tag_name, $2, 'question mark'); |
|
1120
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
1121
|
|
|
|
|
|
|
} |
|
1122
|
|
|
|
|
|
|
|
|
1123
|
0
|
|
|
|
|
|
return; |
|
1124
|
|
|
|
|
|
|
|
|
1125
|
|
|
|
|
|
|
} |
|
1126
|
|
|
|
|
|
|
# ================================================================================== |
|
1127
|
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
=head2 _star_tag |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
Title : _star_tag |
|
1131
|
|
|
|
|
|
|
Usage : $self->_star_tag |
|
1132
|
|
|
|
|
|
|
Function : Parses values from tags that occur zero or more times. ie: tag_name* |
|
1133
|
|
|
|
|
|
|
Args : 3 scalars: |
|
1134
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
1135
|
|
|
|
|
|
|
- reference to a data structure to store the data for <$tag_name> |
|
1136
|
|
|
|
|
|
|
- scalar holding the name of the tag. |
|
1137
|
|
|
|
|
|
|
Returns : Nothing. |
|
1138
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : many. |
|
1139
|
|
|
|
|
|
|
Method(s) that this method calls : _tag_processing_helper |
|
1140
|
|
|
|
|
|
|
|
|
1141
|
|
|
|
|
|
|
=cut |
|
1142
|
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
sub _star_tag { |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure, $tag_name) = @_; |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
#print "tag_name: $tag_name\n"; |
|
1148
|
0
|
|
|
|
|
|
while ($$line =~ /<$tag_name\s?(.*?)\s?>(.*?)<\/$tag_name>/) { |
|
1149
|
0
|
|
|
|
|
|
$self->_tag_processing_helper |
|
1150
|
|
|
|
|
|
|
($1, $data_structure, $tag_name, $2, 'star'); |
|
1151
|
|
|
|
|
|
|
# The tag and attribute values are stored within |
|
1152
|
|
|
|
|
|
|
# $$data_structure within the _tag_processing_helper method. |
|
1153
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
1154
|
|
|
|
|
|
|
} |
|
1155
|
|
|
|
|
|
|
#if ($tag_name eq 'qualifier'){ |
|
1156
|
|
|
|
|
|
|
# print "this one:\n"; |
|
1157
|
|
|
|
|
|
|
# print Data::Dumper->Dump([$data_structure]); exit; |
|
1158
|
|
|
|
|
|
|
#} |
|
1159
|
|
|
|
|
|
|
|
|
1160
|
0
|
|
|
|
|
|
return; |
|
1161
|
|
|
|
|
|
|
|
|
1162
|
|
|
|
|
|
|
} |
|
1163
|
|
|
|
|
|
|
# ================================================================================== |
|
1164
|
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
=head2 _plus_tag |
|
1166
|
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
Title : _plus_tag |
|
1168
|
|
|
|
|
|
|
Usage : $self->_plus_tag |
|
1169
|
|
|
|
|
|
|
Function : Handles 'plus' tags (tags that occur one or more times). tag_name+ |
|
1170
|
|
|
|
|
|
|
Args : 3 scalars: |
|
1171
|
|
|
|
|
|
|
- reference to a scalar holding the value of the line to be parsed. |
|
1172
|
|
|
|
|
|
|
- reference to a data structure to store the data for <$tag_name> |
|
1173
|
|
|
|
|
|
|
- scalar holding the name of the tag. |
|
1174
|
|
|
|
|
|
|
Returns : Nothing. |
|
1175
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : many. |
|
1176
|
|
|
|
|
|
|
Method(s) that this method calls : _star_tag |
|
1177
|
|
|
|
|
|
|
|
|
1178
|
|
|
|
|
|
|
=cut |
|
1179
|
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
sub _plus_tag { |
|
1181
|
|
|
|
|
|
|
|
|
1182
|
0
|
|
|
0
|
|
|
my ($self, $line, $data_structure, $tag_name) = @_; |
|
1183
|
|
|
|
|
|
|
|
|
1184
|
0
|
0
|
|
|
|
|
if ($$line =~ /<$tag_name\s?(.*?)\s?>(.*?)<\/$tag_name>/) { |
|
1185
|
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
# Store value of the first occurence of $tag_name. |
|
1187
|
|
|
|
|
|
|
# All subsequent values, if any, will be stored in the method _star_tag. |
|
1188
|
0
|
|
|
|
|
|
$self->_tag_processing_helper($1, $data_structure, $tag_name, $2, 'plus'); |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
|
|
1191
|
|
|
|
|
|
|
# If the flow gets within this block, we've already determined |
|
1192
|
|
|
|
|
|
|
# that there's at least one of <$tag_name> Are there more? To |
|
1193
|
|
|
|
|
|
|
# answer this, we could just treat the tag as a * tag now |
|
1194
|
|
|
|
|
|
|
# (zero or more). We've already determined that it's NOT |
|
1195
|
|
|
|
|
|
|
# zero, so how many more? Thus, call _star_tag. |
|
1196
|
0
|
|
|
|
|
|
$$line = $self->_readline; |
|
1197
|
0
|
|
|
|
|
|
$self->_star_tag($line, $data_structure, $tag_name); |
|
1198
|
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
|
|
1200
|
|
|
|
|
|
|
} else { |
|
1201
|
0
|
|
|
|
|
|
$self->throw("Error: Missing <$tag_name>$tag_name>. Got: $$line\n\n"); |
|
1202
|
|
|
|
|
|
|
} |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
0
|
|
|
|
|
|
return; |
|
1205
|
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
} |
|
1207
|
|
|
|
|
|
|
# ================================================================================== |
|
1208
|
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
=head2 _helper_store_attribute_list |
|
1210
|
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
Title : _helper_store_attribute_list |
|
1212
|
|
|
|
|
|
|
Usage : $self->_helper_store_attribute_list |
|
1213
|
|
|
|
|
|
|
Function : A helper method used to store the attributes from |
|
1214
|
|
|
|
|
|
|
the tags into the data structure. |
|
1215
|
|
|
|
|
|
|
Args : 2 scalars: |
|
1216
|
|
|
|
|
|
|
- scalar holding the attribute values to be parsed. |
|
1217
|
|
|
|
|
|
|
- reference to a data structure to store the data between the 2 tags. |
|
1218
|
|
|
|
|
|
|
Returns : Nothing. |
|
1219
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : Many. |
|
1220
|
|
|
|
|
|
|
Method(s) that this method call(s) : None. |
|
1221
|
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
=cut |
|
1223
|
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
sub _helper_store_attribute_list { |
|
1225
|
|
|
|
|
|
|
|
|
1226
|
0
|
|
|
0
|
|
|
my ($self, $attribute_line, $data_structure) = @_; |
|
1227
|
|
|
|
|
|
|
|
|
1228
|
0
|
|
|
|
|
|
my %attribs = ($attribute_line =~ /(\w+)\s*=\s*"([^"]*)"/g); |
|
1229
|
|
|
|
|
|
|
|
|
1230
|
0
|
|
|
|
|
|
my $attribute_list; |
|
1231
|
0
|
|
|
|
|
|
for my $key (keys %attribs) { |
|
1232
|
|
|
|
|
|
|
# print "\tkey: $key , value: $attribs{$key}\n"; |
|
1233
|
|
|
|
|
|
|
###$$data_structure->{$key} = $attribs{$key}; # <- The ORIGINAL. |
|
1234
|
0
|
|
|
|
|
|
push @{$$data_structure->{$key}}, $attribs{$key}; |
|
|
0
|
|
|
|
|
|
|
|
1235
|
|
|
|
|
|
|
# Now, store them in an array because there may be > 1 tag, thus |
|
1236
|
|
|
|
|
|
|
# > 1 attribute of the same name. |
|
1237
|
|
|
|
|
|
|
# Doing this has made it necessary to change the _store_seqs method. |
|
1238
|
|
|
|
|
|
|
# ie: Change $bio_sequence->{'molecule_type'}; |
|
1239
|
|
|
|
|
|
|
# to |
|
1240
|
|
|
|
|
|
|
# $bio_sequence->{'molecule_type'}->[0]; |
|
1241
|
|
|
|
|
|
|
} |
|
1242
|
|
|
|
|
|
|
|
|
1243
|
0
|
|
|
|
|
|
return; |
|
1244
|
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
} |
|
1246
|
|
|
|
|
|
|
# ================================================================================== |
|
1247
|
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
=head2 _store_seqs |
|
1249
|
|
|
|
|
|
|
|
|
1250
|
|
|
|
|
|
|
Title : _store_seqs |
|
1251
|
|
|
|
|
|
|
Usage : $self->_store_seqs |
|
1252
|
|
|
|
|
|
|
Function : This method is called once in the life time of the script. |
|
1253
|
|
|
|
|
|
|
It stores the data parsed from the agave xml file into |
|
1254
|
|
|
|
|
|
|
the Bio::Seq object. |
|
1255
|
|
|
|
|
|
|
Args : None. |
|
1256
|
|
|
|
|
|
|
Returns : Nothing. |
|
1257
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method : next_seq |
|
1258
|
|
|
|
|
|
|
Method(s) that this method calls : None. |
|
1259
|
|
|
|
|
|
|
|
|
1260
|
|
|
|
|
|
|
=cut |
|
1261
|
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
sub _store_seqs { |
|
1263
|
|
|
|
|
|
|
|
|
1264
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
|
1265
|
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
|
|
1267
|
0
|
|
|
|
|
|
for my $sciobj (@{$self->{'sciobj'}}) { |
|
|
0
|
|
|
|
|
|
|
|
1268
|
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
### $sciobj = $self->{'sciobj'}; # The root node. |
|
1270
|
|
|
|
|
|
|
|
|
1271
|
|
|
|
|
|
|
|
|
1272
|
0
|
|
|
|
|
|
for my $contig (@{$sciobj->{'contig'}}) { # Each contig has a fragment order. |
|
|
0
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
|
|
1274
|
0
|
|
|
|
|
|
for my $fragment_order (@{$contig->{'fragment_order'}}) { # Each fragment order has a fragment_orientation. |
|
|
0
|
|
|
|
|
|
|
|
1275
|
|
|
|
|
|
|
|
|
1276
|
0
|
|
|
|
|
|
for my $fragment_orientation (@{$fragment_order->{'fragment_orientation'}}) { |
|
|
0
|
|
|
|
|
|
|
|
1277
|
|
|
|
|
|
|
# Each fragment_orientation contain 1 bio sequence. |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
0
|
|
|
|
|
|
my $bio_sequence = $fragment_orientation->{'bio_sequence'}; # contains all the |
|
1280
|
|
|
|
|
|
|
# interesting stuff: |
|
1281
|
|
|
|
|
|
|
|
|
1282
|
0
|
|
|
|
|
|
my $sequence = $bio_sequence->{'sequence'}; |
|
1283
|
0
|
|
|
|
|
|
my $accession_number = $bio_sequence->{'sequence_id'}->[0]; # also use for primary_id |
|
1284
|
0
|
|
|
|
|
|
my $organism = $bio_sequence->{'organism'}; |
|
1285
|
0
|
|
|
|
|
|
my $description = $bio_sequence->{'description'}; |
|
1286
|
0
|
|
|
|
|
|
my $molecule_type = $bio_sequence->{'molecule_type'}->[0]; |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
0
|
|
|
|
|
|
my $primary_seq = Bio::PrimarySeq->new( |
|
1289
|
|
|
|
|
|
|
-id => $accession_number, |
|
1290
|
|
|
|
|
|
|
-alphabet => $molecule_type, |
|
1291
|
|
|
|
|
|
|
-seq => $sequence, |
|
1292
|
|
|
|
|
|
|
-desc => $description, |
|
1293
|
|
|
|
|
|
|
); |
|
1294
|
|
|
|
|
|
|
|
|
1295
|
0
|
|
|
|
|
|
my $seq = Bio::Seq->new ( |
|
1296
|
|
|
|
|
|
|
-display_id => $accession_number, |
|
1297
|
|
|
|
|
|
|
-accession_number => $accession_number, |
|
1298
|
|
|
|
|
|
|
-primary_seq => $primary_seq, |
|
1299
|
|
|
|
|
|
|
-seq => $sequence, |
|
1300
|
|
|
|
|
|
|
-description => $description, |
|
1301
|
|
|
|
|
|
|
); |
|
1302
|
|
|
|
|
|
|
|
|
1303
|
0
|
|
|
|
|
|
my $organism_name = $bio_sequence->{organism_name}->[0]; |
|
1304
|
0
|
0
|
|
|
|
|
if (defined $organism_name) { |
|
1305
|
|
|
|
|
|
|
|
|
1306
|
0
|
|
|
|
|
|
my @classification = split(' ', $organism_name); |
|
1307
|
0
|
|
|
|
|
|
my $species = Bio::Species->new(); |
|
1308
|
0
|
|
|
|
|
|
$species->classification(@classification); |
|
1309
|
0
|
|
|
|
|
|
$seq->species($species); |
|
1310
|
|
|
|
|
|
|
} |
|
1311
|
|
|
|
|
|
|
# Pull out the keywords: $keywords is an array ref. |
|
1312
|
|
|
|
|
|
|
|
|
1313
|
0
|
|
|
|
|
|
my $keywords = $bio_sequence->{keyword}; |
|
1314
|
0
|
|
|
|
|
|
my %key_to_value; |
|
1315
|
|
|
|
|
|
|
|
|
1316
|
0
|
|
|
|
|
|
for my $keywords (@$keywords) { |
|
1317
|
|
|
|
|
|
|
# print "keywords: $keywords\n"; |
|
1318
|
0
|
|
|
|
|
|
my @words = split(':', $keywords); |
|
1319
|
0
|
|
|
|
|
|
for (my $i = 0; $i < scalar @words - 1; $i++) { |
|
1320
|
0
|
0
|
|
|
|
|
if ($i % 2 == 0) { |
|
1321
|
0
|
|
|
|
|
|
my $j = $i; $j++; |
|
|
0
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
# print "$words[$i] , $words[$j]\n"; |
|
1323
|
0
|
|
|
|
|
|
$key_to_value{$words[$i]} = $words[$j]; |
|
1324
|
|
|
|
|
|
|
} |
|
1325
|
|
|
|
|
|
|
} |
|
1326
|
|
|
|
|
|
|
# print Data::Dumper->Dump([%key_to_value]); |
|
1327
|
|
|
|
|
|
|
my $reference = Bio::Annotation::Reference-> |
|
1328
|
|
|
|
|
|
|
new(-authors => $key_to_value{authors}, |
|
1329
|
|
|
|
|
|
|
-title => $key_to_value{title}, |
|
1330
|
|
|
|
|
|
|
-database => $key_to_value{database}, |
|
1331
|
|
|
|
|
|
|
-pubmed => $key_to_value{pubmed}, |
|
1332
|
0
|
|
|
|
|
|
); |
|
1333
|
0
|
|
|
|
|
|
$seq->annotation->add_Annotation('reference', $reference); |
|
1334
|
|
|
|
|
|
|
|
|
1335
|
|
|
|
|
|
|
} # close for my $keywords |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
|
|
1338
|
|
|
|
|
|
|
# print Data::Dumper->Dump([$bio_sequence]); print "here\n"; exit; |
|
1339
|
0
|
0
|
|
|
|
|
if (defined $bio_sequence->{'sequence_map'}) { |
|
1340
|
|
|
|
|
|
|
|
|
1341
|
0
|
|
|
|
|
|
for my $sequence_map (@{$bio_sequence->{'sequence_map'}}) { |
|
|
0
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
# print Data::Dumper->Dump([$sequence_map]); print "here\n"; exit; |
|
1344
|
|
|
|
|
|
|
|
|
1345
|
0
|
|
|
|
|
|
my $label = $sequence_map->{label}; |
|
1346
|
|
|
|
|
|
|
|
|
1347
|
0
|
0
|
0
|
|
|
|
if (defined $sequence_map->{annotations} && |
|
1348
|
|
|
|
|
|
|
ref($sequence_map->{annotations}) eq 'HASH') { |
|
1349
|
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
# Get the sequence features (ie genes, exons, etc) from this $sequence_map |
|
1351
|
0
|
|
|
|
|
|
for my $seq_feature (@{$sequence_map->{'annotations'}->{'seq_feature'}}) { |
|
|
0
|
|
|
|
|
|
|
|
1352
|
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
# print Data::Dumper->Dump([$seq_feature]); exit; |
|
1354
|
0
|
|
|
|
|
|
my $seq_location = $seq_feature->{'seq_location'}; |
|
1355
|
0
|
|
|
|
|
|
my $start_coord = $seq_feature->{'least_start'}->[0]; |
|
1356
|
0
|
|
|
|
|
|
my $feature_type = $seq_feature->{'feature_type'}->[0]; |
|
1357
|
0
|
|
|
|
|
|
my $end_coord = $seq_feature->{'greatest_end'}->[0]; |
|
1358
|
0
|
|
|
|
|
|
my $is_on_complement = $seq_feature->{'is_on_complement'}->[0]; |
|
1359
|
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
# Specify the coordinates and the tag for this seq feature. |
|
1361
|
|
|
|
|
|
|
# print "Primary Tag for this SeqFeature: $feature_type\n"; |
|
1362
|
0
|
|
|
|
|
|
my $feat = Bio::SeqFeature::Generic-> |
|
1363
|
|
|
|
|
|
|
new( |
|
1364
|
|
|
|
|
|
|
-start => $start_coord, |
|
1365
|
|
|
|
|
|
|
-end => $end_coord, |
|
1366
|
|
|
|
|
|
|
-primary_tag => $feature_type, |
|
1367
|
|
|
|
|
|
|
); |
|
1368
|
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
|
|
1370
|
0
|
0
|
0
|
|
|
|
if (defined $seq_feature->{'qualifier'} && |
|
1371
|
|
|
|
|
|
|
ref($seq_feature->{'qualifier'}) eq 'ARRAY') { |
|
1372
|
|
|
|
|
|
|
|
|
1373
|
0
|
|
|
|
|
|
for my $feature (@{$seq_feature->{'qualifier'}}) { |
|
|
0
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
|
|
1375
|
0
|
|
|
|
|
|
my $value = $feature->{'qualifier'}; |
|
1376
|
0
|
|
|
|
|
|
my $feature_type = $feature->{'qualifier_type'}; |
|
1377
|
|
|
|
|
|
|
|
|
1378
|
0
|
|
|
|
|
|
for (my $i = 0; |
|
1379
|
0
|
|
|
|
|
|
$i < scalar @{$value}; |
|
1380
|
|
|
|
|
|
|
$i++) { |
|
1381
|
0
|
|
|
|
|
|
$feat->add_tag_value( |
|
1382
|
|
|
|
|
|
|
$feature_type->[$i] => $value->[$i] |
|
1383
|
|
|
|
|
|
|
); |
|
1384
|
|
|
|
|
|
|
} # close the for loop |
|
1385
|
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
} |
|
1387
|
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
} # close if (defined $seq_feature->... |
|
1389
|
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
|
|
1391
|
0
|
|
|
|
|
|
$seq->add_SeqFeature($feat); |
|
1392
|
|
|
|
|
|
|
|
|
1393
|
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
} # close for my $seq_feature (@{$sequence_map->... |
|
1395
|
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
} # close if (defined $sequence_map->{annotations} && |
|
1398
|
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
} # close for my $sequence_map (@{$bio_sequence->{'sequence_map'}}){ |
|
1401
|
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
} # close if (defined $bio_sequence->{'sequence_map'}){ |
|
1403
|
|
|
|
|
|
|
|
|
1404
|
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
# This is where the Bio::Seq objects are stored: |
|
1406
|
0
|
|
|
|
|
|
push @{$self->{'sequence_objects'}}, $seq; |
|
|
0
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
|
|
1409
|
|
|
|
|
|
|
} # close for my $fragment_orientation |
|
1410
|
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
|
|
1412
|
|
|
|
|
|
|
} # close for my $fragment_order |
|
1413
|
|
|
|
|
|
|
|
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
} # close for my $contig |
|
1416
|
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
|
|
1418
|
|
|
|
|
|
|
} # close for my $sciobj |
|
1419
|
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
# Flag is set so that we know that the sequence objects are now stored in $self. |
|
1421
|
0
|
|
|
|
|
|
$self->{'seqs_stored'} = 1; |
|
1422
|
|
|
|
|
|
|
|
|
1423
|
0
|
|
|
|
|
|
return; |
|
1424
|
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
} |
|
1426
|
|
|
|
|
|
|
# ================================================================================== |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
=head2 next_seq |
|
1429
|
|
|
|
|
|
|
|
|
1430
|
|
|
|
|
|
|
Title : next_seq |
|
1431
|
|
|
|
|
|
|
Usage : $seq = $stream->next_seq() |
|
1432
|
|
|
|
|
|
|
Function : Returns the next sequence in the stream. |
|
1433
|
|
|
|
|
|
|
Args : None. |
|
1434
|
|
|
|
|
|
|
Returns : Bio::Seq object |
|
1435
|
|
|
|
|
|
|
|
|
1436
|
|
|
|
|
|
|
Method is called from the script. Method(s) that this method calls: |
|
1437
|
|
|
|
|
|
|
_store_seqs (only once throughout the life time of script execution). |
|
1438
|
|
|
|
|
|
|
|
|
1439
|
|
|
|
|
|
|
|
|
1440
|
|
|
|
|
|
|
=cut |
|
1441
|
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
sub next_seq { |
|
1443
|
|
|
|
|
|
|
|
|
1444
|
0
|
|
|
0
|
1
|
|
my ($self) = @_; |
|
1445
|
|
|
|
|
|
|
|
|
1446
|
|
|
|
|
|
|
# convert agave to genbank/fasta/embl whatever. |
|
1447
|
|
|
|
|
|
|
|
|
1448
|
0
|
0
|
|
|
|
|
$self->_store_seqs if $self->{'seqs_stored'} == 0; |
|
1449
|
|
|
|
|
|
|
|
|
1450
|
|
|
|
|
|
|
$self->throw("Error: No Bio::Seq objects stored yet!\n\n") |
|
1451
|
0
|
0
|
|
|
|
|
if !defined $self->{'sequence_objects'}; # This should never occur... |
|
1452
|
|
|
|
|
|
|
|
|
1453
|
0
|
0
|
|
|
|
|
if (scalar @{$self->{'sequence_objects'}} > 0) { |
|
|
0
|
|
|
|
|
|
|
|
1454
|
0
|
|
|
|
|
|
return shift @{$self->{'sequence_objects'}}; |
|
|
0
|
|
|
|
|
|
|
|
1455
|
|
|
|
|
|
|
} else { |
|
1456
|
|
|
|
|
|
|
# All done. Nothing more to parse. |
|
1457
|
|
|
|
|
|
|
# print "returning nothing!\n"; |
|
1458
|
0
|
|
|
|
|
|
return; |
|
1459
|
|
|
|
|
|
|
} |
|
1460
|
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
|
|
1462
|
|
|
|
|
|
|
} |
|
1463
|
|
|
|
|
|
|
# ================================================================================== |
|
1464
|
|
|
|
|
|
|
|
|
1465
|
|
|
|
|
|
|
=head2 next_primary_seq |
|
1466
|
|
|
|
|
|
|
|
|
1467
|
|
|
|
|
|
|
Title : next_primary_seq |
|
1468
|
|
|
|
|
|
|
Usage : $seq = $stream->next_primary_seq() |
|
1469
|
|
|
|
|
|
|
Function: returns the next primary sequence (ie no seq_features) in the stream |
|
1470
|
|
|
|
|
|
|
Returns : Bio::PrimarySeq object |
|
1471
|
|
|
|
|
|
|
Args : NONE |
|
1472
|
|
|
|
|
|
|
|
|
1473
|
|
|
|
|
|
|
=cut |
|
1474
|
|
|
|
|
|
|
|
|
1475
|
|
|
|
|
|
|
sub next_primary_seq { |
|
1476
|
0
|
|
|
0
|
1
|
|
my $self=shift; |
|
1477
|
0
|
|
|
|
|
|
return 0; |
|
1478
|
|
|
|
|
|
|
} |
|
1479
|
|
|
|
|
|
|
# ================================================================================== |
|
1480
|
|
|
|
|
|
|
|
|
1481
|
|
|
|
|
|
|
=head2 write_seq |
|
1482
|
|
|
|
|
|
|
|
|
1483
|
|
|
|
|
|
|
Title : write_seq |
|
1484
|
|
|
|
|
|
|
Usage : Not Yet Implemented! $stream->write_seq(@seq) |
|
1485
|
|
|
|
|
|
|
Function: writes the $seq object into the stream |
|
1486
|
|
|
|
|
|
|
Returns : 1 for success and 0 for error |
|
1487
|
|
|
|
|
|
|
Args : Bio::Seq object |
|
1488
|
|
|
|
|
|
|
|
|
1489
|
|
|
|
|
|
|
=cut |
|
1490
|
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
sub write_seq { |
|
1492
|
|
|
|
|
|
|
|
|
1493
|
|
|
|
|
|
|
# Convert the Bio::Seq object(s) to AGAVE xml file. |
|
1494
|
|
|
|
|
|
|
|
|
1495
|
0
|
|
|
0
|
1
|
|
my ($self,@seqs) = @_; |
|
1496
|
|
|
|
|
|
|
|
|
1497
|
0
|
|
|
|
|
|
foreach my $seq ( @seqs ) { |
|
1498
|
0
|
|
|
|
|
|
$self->_write_each_record( $seq ); # where most of the work actually takes place. |
|
1499
|
|
|
|
|
|
|
} |
|
1500
|
|
|
|
|
|
|
|
|
1501
|
0
|
|
|
|
|
|
return; |
|
1502
|
|
|
|
|
|
|
|
|
1503
|
|
|
|
|
|
|
} |
|
1504
|
|
|
|
|
|
|
# ================================================================================== |
|
1505
|
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
=head2 _write_each_record |
|
1507
|
|
|
|
|
|
|
|
|
1508
|
|
|
|
|
|
|
Title : _write_each_record |
|
1509
|
|
|
|
|
|
|
Usage : $agave->_write_each_record( $seqI ) |
|
1510
|
|
|
|
|
|
|
Function: change data into agave format |
|
1511
|
|
|
|
|
|
|
Returns : NONE |
|
1512
|
|
|
|
|
|
|
Args : Bio::SeqI object |
|
1513
|
|
|
|
|
|
|
|
|
1514
|
|
|
|
|
|
|
=cut |
|
1515
|
|
|
|
|
|
|
|
|
1516
|
|
|
|
|
|
|
sub _write_each_record { |
|
1517
|
0
|
|
|
0
|
|
|
my ($self,$seq) = @_; |
|
1518
|
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
# $self->{'file'} =~ s/>//g; |
|
1520
|
0
|
|
|
|
|
|
my $output = IO::File->new(">" . $self->{'file'}); |
|
1521
|
0
|
|
|
|
|
|
my $writer = XML::Writer->new(OUTPUT => $output, |
|
1522
|
|
|
|
|
|
|
NAMESPACES => 0, |
|
1523
|
|
|
|
|
|
|
DATA_MODE => 1, |
|
1524
|
|
|
|
|
|
|
DATA_INDENT => 2 ) ; |
|
1525
|
|
|
|
|
|
|
|
|
1526
|
0
|
|
|
|
|
|
$writer->xmlDecl("UTF-8"); |
|
1527
|
0
|
|
|
|
|
|
$writer->doctype("sciobj", '', "sciobj.dtd"); |
|
1528
|
0
|
|
|
|
|
|
$writer ->startTag('sciobj', |
|
1529
|
|
|
|
|
|
|
'version', '2', |
|
1530
|
|
|
|
|
|
|
'release', '2'); |
|
1531
|
|
|
|
|
|
|
|
|
1532
|
0
|
|
|
|
|
|
$writer->startTag('contig', 'length', $seq->length); |
|
1533
|
0
|
|
|
|
|
|
my $annotation = $seq ->annotation; |
|
1534
|
|
|
|
|
|
|
# print "annotation: $annotation\n"; exit; Bio::Annotation::Collection=HASH(0x8112e6c) |
|
1535
|
0
|
0
|
|
|
|
|
if ( $annotation->get_Annotations('dblink') ) { |
|
1536
|
|
|
|
|
|
|
# used to be $annotation->each_DBLink, but Bio::Annotation::Collection::each_DBLink |
|
1537
|
|
|
|
|
|
|
# is now replaced with get_Annotations('dblink') |
|
1538
|
0
|
|
|
|
|
|
my $dblink = $annotation->get_Annotations('dblink')->[0] ; |
|
1539
|
|
|
|
|
|
|
|
|
1540
|
0
|
|
|
|
|
|
$writer ->startTag('db_id', |
|
1541
|
|
|
|
|
|
|
'id', $dblink->primary_id , |
|
1542
|
|
|
|
|
|
|
'db_code', $dblink->database ); |
|
1543
|
|
|
|
|
|
|
} else { |
|
1544
|
0
|
|
|
|
|
|
$writer ->startTag('db_id', |
|
1545
|
|
|
|
|
|
|
'id', $seq->display_id , |
|
1546
|
|
|
|
|
|
|
'db_code', 'default' ); |
|
1547
|
|
|
|
|
|
|
} |
|
1548
|
0
|
|
|
|
|
|
$writer ->endTag('db_id') ; |
|
1549
|
|
|
|
|
|
|
|
|
1550
|
|
|
|
|
|
|
|
|
1551
|
0
|
|
|
|
|
|
$writer->startTag('fragment_order'); |
|
1552
|
0
|
|
|
|
|
|
$writer->startTag('fragment_orientation'); |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
##start bio_sequence |
|
1555
|
|
|
|
|
|
|
####my $organism = $seq->species->genus . " " . $seq->species->species; |
|
1556
|
0
|
|
|
|
|
|
$writer ->startTag('bio_sequence', |
|
1557
|
|
|
|
|
|
|
'sequence_id', $seq->display_id, |
|
1558
|
|
|
|
|
|
|
'seq_length', $seq->length, |
|
1559
|
|
|
|
|
|
|
# 'molecule_type', $seq->moltype, # deprecated |
|
1560
|
|
|
|
|
|
|
'molecule_type', $self->alphabet, |
|
1561
|
|
|
|
|
|
|
#'organism_name', $organism |
|
1562
|
|
|
|
|
|
|
); |
|
1563
|
|
|
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
# my $desc = $seq->{primary_seq}->{desc}; |
|
1565
|
|
|
|
|
|
|
# print "desc: $desc\n"; exit; |
|
1566
|
|
|
|
|
|
|
# print Data::Dumper->Dump([$seq]); exit; |
|
1567
|
|
|
|
|
|
|
##start db_id under bio_sequence |
|
1568
|
0
|
|
|
|
|
|
$annotation = $seq ->annotation; |
|
1569
|
|
|
|
|
|
|
# print "annotation: $annotation\n"; exit; Bio::Annotation::Collection=HASH(0x8112e6c) |
|
1570
|
0
|
0
|
|
|
|
|
if ( $annotation->get_Annotations('dblink') ) { |
|
1571
|
|
|
|
|
|
|
# used to be $annotation->each_DBLink, but Bio::Annotation::Collection::each_DBLink |
|
1572
|
|
|
|
|
|
|
# is now replaced with get_Annotations('dblink') |
|
1573
|
0
|
|
|
|
|
|
my $dblink = $annotation->get_Annotations('dblink')->[0] ; |
|
1574
|
|
|
|
|
|
|
|
|
1575
|
0
|
|
|
|
|
|
$writer ->startTag('db_id', |
|
1576
|
|
|
|
|
|
|
'id', $dblink->primary_id , |
|
1577
|
|
|
|
|
|
|
'db_code', $dblink->database ); |
|
1578
|
|
|
|
|
|
|
} else { |
|
1579
|
0
|
|
|
|
|
|
$writer ->startTag('db_id', |
|
1580
|
|
|
|
|
|
|
'id', $seq->display_id , |
|
1581
|
|
|
|
|
|
|
'db_code', 'default' ); |
|
1582
|
|
|
|
|
|
|
} |
|
1583
|
0
|
|
|
|
|
|
$writer ->endTag('db_id') ; |
|
1584
|
|
|
|
|
|
|
|
|
1585
|
|
|
|
|
|
|
##start note |
|
1586
|
0
|
|
|
|
|
|
my $note = "" ; |
|
1587
|
0
|
|
|
|
|
|
foreach my $comment ( $annotation->get_Annotations('comment') ) { |
|
1588
|
|
|
|
|
|
|
# used to be $annotations->each_Comment(), but that's now been replaced |
|
1589
|
|
|
|
|
|
|
# with get_Annotations() |
|
1590
|
|
|
|
|
|
|
# $comment is a Bio::Annotation::Comment object |
|
1591
|
0
|
|
|
|
|
|
$note .= $comment->text() . "\n"; |
|
1592
|
|
|
|
|
|
|
} |
|
1593
|
|
|
|
|
|
|
|
|
1594
|
0
|
|
|
|
|
|
$writer ->startTag('note'); |
|
1595
|
0
|
|
|
|
|
|
$writer ->characters( $note ) ; |
|
1596
|
0
|
|
|
|
|
|
$writer ->endTag('note'); |
|
1597
|
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
##start description |
|
1599
|
0
|
|
|
|
|
|
$writer ->startTag('description'); |
|
1600
|
|
|
|
|
|
|
|
|
1601
|
|
|
|
|
|
|
# $writer ->characters( $annotation->get_Annotations('description') ) ; |
|
1602
|
|
|
|
|
|
|
# used to be $annotations->each_description(), but that's now been |
|
1603
|
|
|
|
|
|
|
# replaced with get_Annotations. |
|
1604
|
|
|
|
|
|
|
# Simon added this: this is the primary_seq's desc (the DEFINITION tag in a genbank file) |
|
1605
|
0
|
|
|
|
|
|
$writer->characters($seq->{primary_seq}->{desc}); |
|
1606
|
0
|
|
|
|
|
|
$writer ->endTag('description'); |
|
1607
|
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
##start keywords |
|
1609
|
0
|
|
|
|
|
|
foreach my $genename ( $annotation->get_Annotations('gene_name') ) { |
|
1610
|
|
|
|
|
|
|
# used to be $annotations->each_gene_name, but that's now been |
|
1611
|
|
|
|
|
|
|
# replaced with get_Annotations() |
|
1612
|
0
|
|
|
|
|
|
$writer ->startTag('keyword'); |
|
1613
|
0
|
|
|
|
|
|
$writer ->characters( $genename ) ; |
|
1614
|
0
|
|
|
|
|
|
$writer ->endTag('keyword'); |
|
1615
|
|
|
|
|
|
|
} |
|
1616
|
|
|
|
|
|
|
|
|
1617
|
|
|
|
|
|
|
|
|
1618
|
0
|
|
|
|
|
|
foreach my $ref ( $annotation->get_Annotations('reference') ) { |
|
1619
|
|
|
|
|
|
|
# used to be $annotation->each_Reference, but |
|
1620
|
|
|
|
|
|
|
# that's now been replaced with get_Annotations('reference'); |
|
1621
|
|
|
|
|
|
|
# link is a Bio::Annotation::Reference object |
|
1622
|
0
|
|
|
|
|
|
$writer ->startTag('keyword'); |
|
1623
|
|
|
|
|
|
|
# print Data::Dumper->Dump([$ref]); exit; |
|
1624
|
0
|
|
0
|
|
|
|
my $medline = $ref->medline || 'null'; |
|
1625
|
0
|
|
0
|
|
|
|
my $pubmed = $ref->pubmed || 'null'; |
|
1626
|
0
|
|
0
|
|
|
|
my $database = $ref->database || 'null'; |
|
1627
|
0
|
|
0
|
|
|
|
my $authors = $ref->authors || 'null'; |
|
1628
|
0
|
|
0
|
|
|
|
my $title = $ref->title || 'null'; |
|
1629
|
|
|
|
|
|
|
|
|
1630
|
|
|
|
|
|
|
|
|
1631
|
0
|
|
|
|
|
|
$writer ->characters( 'medline:' . "$medline" . ':' . 'pubmed:' . |
|
1632
|
|
|
|
|
|
|
"$pubmed" . ':' . 'database:' . "$database" . |
|
1633
|
|
|
|
|
|
|
':' .'authors:' . "$authors" . ':' . 'title:' . "$title" ) ; |
|
1634
|
0
|
|
|
|
|
|
$writer ->endTag('keyword'); |
|
1635
|
|
|
|
|
|
|
} |
|
1636
|
|
|
|
|
|
|
|
|
1637
|
|
|
|
|
|
|
## start sequence |
|
1638
|
0
|
|
|
|
|
|
$writer ->startTag('sequence'); |
|
1639
|
0
|
|
|
|
|
|
$writer ->characters( $seq->seq ) ; |
|
1640
|
0
|
|
|
|
|
|
$writer ->endTag('sequence'); |
|
1641
|
|
|
|
|
|
|
|
|
1642
|
|
|
|
|
|
|
## start xrefs |
|
1643
|
0
|
|
|
|
|
|
$writer ->startTag('xrefs'); |
|
1644
|
0
|
|
|
|
|
|
foreach my $link ( $annotation->get_Annotations('dblink') ) { |
|
1645
|
|
|
|
|
|
|
# link is a Bio::Annotation::DBLink object |
|
1646
|
0
|
|
|
|
|
|
$writer ->startTag('db_id', |
|
1647
|
|
|
|
|
|
|
'db_code', $link->database, |
|
1648
|
|
|
|
|
|
|
'id', $link->primary_id); |
|
1649
|
0
|
|
|
|
|
|
$writer ->characters( $link->comment ) ; |
|
1650
|
0
|
|
|
|
|
|
$writer ->endTag('db_id'); |
|
1651
|
|
|
|
|
|
|
} |
|
1652
|
0
|
|
|
|
|
|
$writer ->endTag('xrefs') ; |
|
1653
|
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
##start sequence map |
|
1655
|
|
|
|
|
|
|
##we can not use : my @feats = $seq->all_SeqFeatures; |
|
1656
|
|
|
|
|
|
|
##rather, we use top_SeqFeatures() to keep the tree structure |
|
1657
|
0
|
|
|
|
|
|
my @feats = $seq->top_SeqFeatures ; |
|
1658
|
|
|
|
|
|
|
|
|
1659
|
0
|
|
|
|
|
|
my $features; |
|
1660
|
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
##now we need cluster top level seqfeature by algorithm |
|
1662
|
|
|
|
|
|
|
my $maps; |
|
1663
|
0
|
|
|
|
|
|
foreach my $feature (@feats) { |
|
1664
|
0
|
|
|
|
|
|
my $map_type = $feature ->source_tag; |
|
1665
|
0
|
|
|
|
|
|
push (@{$maps->{ $map_type }}, $feature); |
|
|
0
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
} |
|
1667
|
|
|
|
|
|
|
|
|
1668
|
|
|
|
|
|
|
##now we enter each sequence_map |
|
1669
|
0
|
|
|
|
|
|
foreach my $map_type (keys %$maps ) { |
|
1670
|
0
|
|
|
|
|
|
$writer->startTag('sequence_map', |
|
1671
|
|
|
|
|
|
|
'label', $map_type ); |
|
1672
|
0
|
|
|
|
|
|
$writer->startTag('annotations'); |
|
1673
|
|
|
|
|
|
|
# the original author accidently entered 'annotation' instead of 'annotations' |
|
1674
|
|
|
|
|
|
|
|
|
1675
|
0
|
|
|
|
|
|
foreach my $feature ( @{$maps->{ $map_type }} ) { |
|
|
0
|
|
|
|
|
|
|
|
1676
|
0
|
|
|
|
|
|
$self->_write_seqfeature( $feature, $writer ) ; |
|
1677
|
|
|
|
|
|
|
} |
|
1678
|
|
|
|
|
|
|
|
|
1679
|
0
|
|
|
|
|
|
$writer->endTag('annotations'); |
|
1680
|
0
|
|
|
|
|
|
$writer->endTag('sequence_map'); |
|
1681
|
|
|
|
|
|
|
} |
|
1682
|
|
|
|
|
|
|
|
|
1683
|
0
|
|
|
|
|
|
$writer->endTag('bio_sequence'); |
|
1684
|
0
|
|
|
|
|
|
$writer->endTag('fragment_orientation'); |
|
1685
|
0
|
|
|
|
|
|
$writer->endTag('fragment_order'); |
|
1686
|
0
|
|
|
|
|
|
$writer->endTag('contig'); |
|
1687
|
0
|
|
|
|
|
|
$writer->endTag('sciobj'); |
|
1688
|
|
|
|
|
|
|
|
|
1689
|
|
|
|
|
|
|
} |
|
1690
|
|
|
|
|
|
|
# ================================================================================== |
|
1691
|
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
=head2 _write_seqfeature |
|
1693
|
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
Usage : $agave->_write_each_record( $seqfeature, $write ) |
|
1695
|
|
|
|
|
|
|
Function: change seeqfeature data into agave format |
|
1696
|
|
|
|
|
|
|
Returns : NONE |
|
1697
|
|
|
|
|
|
|
Args : Bio::SeqFeature object and XML::writer object |
|
1698
|
|
|
|
|
|
|
|
|
1699
|
|
|
|
|
|
|
=cut |
|
1700
|
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
sub _write_seqfeature{ |
|
1702
|
|
|
|
|
|
|
|
|
1703
|
0
|
|
|
0
|
|
|
my ($self,$seqf, $writer) = @_; |
|
1704
|
|
|
|
|
|
|
|
|
1705
|
|
|
|
|
|
|
##now enter seq feature |
|
1706
|
0
|
|
|
|
|
|
$writer ->startTag('seq_feature', |
|
1707
|
|
|
|
|
|
|
'feature_type', $seqf->primary_tag() ); |
|
1708
|
|
|
|
|
|
|
|
|
1709
|
0
|
|
|
|
|
|
my $strand = $seqf->strand(); |
|
1710
|
0
|
0
|
|
|
|
|
$strand = 0 if !defined $strand; |
|
1711
|
|
|
|
|
|
|
# $strand == 1 ? 'false' : 'true'; |
|
1712
|
0
|
|
|
|
|
|
my $is_on_complement; |
|
1713
|
0
|
0
|
|
|
|
|
if ($strand == 1) { |
|
1714
|
0
|
|
|
|
|
|
$is_on_complement = 'true'; |
|
1715
|
|
|
|
|
|
|
} else { |
|
1716
|
0
|
|
|
|
|
|
$is_on_complement = 'false'; |
|
1717
|
|
|
|
|
|
|
} |
|
1718
|
|
|
|
|
|
|
|
|
1719
|
|
|
|
|
|
|
# die Data::Dumper->Dump([$seqf]) if !defined $strand; |
|
1720
|
0
|
|
|
|
|
|
$writer ->startTag('seq_location', |
|
1721
|
|
|
|
|
|
|
'lease_start', $seqf->start(), |
|
1722
|
|
|
|
|
|
|
'greatest_end', $seqf->end(), |
|
1723
|
|
|
|
|
|
|
# 'is_on_complement', $seqf->strand() == 1 ? 'false' : 'true') ; |
|
1724
|
|
|
|
|
|
|
'is_on_complement' , $is_on_complement); |
|
1725
|
|
|
|
|
|
|
# is_on_complement: is the feature found on the complementary |
|
1726
|
|
|
|
|
|
|
# strand (true) or not (false)? |
|
1727
|
0
|
|
|
|
|
|
$writer ->endTag('seq_location'); |
|
1728
|
|
|
|
|
|
|
|
|
1729
|
|
|
|
|
|
|
##enter qualifier |
|
1730
|
0
|
|
|
|
|
|
foreach my $tag ( $seqf->all_tags() ) { |
|
1731
|
0
|
|
|
|
|
|
$writer ->startTag('qualifier', |
|
1732
|
|
|
|
|
|
|
'qualifier_type', $tag); |
|
1733
|
0
|
|
|
|
|
|
$writer ->characters( $seqf->each_tag_value($tag) ) ; |
|
1734
|
0
|
|
|
|
|
|
$writer ->endTag('qualifier'); |
|
1735
|
|
|
|
|
|
|
} |
|
1736
|
|
|
|
|
|
|
|
|
1737
|
|
|
|
|
|
|
##now recursively travel the seqFeature |
|
1738
|
0
|
|
|
|
|
|
foreach my $subfeat ( $seqf->sub_SeqFeature ) { |
|
1739
|
0
|
|
|
|
|
|
$self->_write_seqfeature( $subfeat, $writer ) ; |
|
1740
|
|
|
|
|
|
|
} |
|
1741
|
|
|
|
|
|
|
|
|
1742
|
0
|
|
|
|
|
|
$writer->endTag('seq_feature'); |
|
1743
|
|
|
|
|
|
|
|
|
1744
|
0
|
|
|
|
|
|
return; |
|
1745
|
|
|
|
|
|
|
|
|
1746
|
|
|
|
|
|
|
} |
|
1747
|
|
|
|
|
|
|
# ================================================================================== |
|
1748
|
|
|
|
|
|
|
|
|
1749
|
|
|
|
|
|
|
=head2 _filehandle |
|
1750
|
|
|
|
|
|
|
|
|
1751
|
|
|
|
|
|
|
Title : _filehandle |
|
1752
|
|
|
|
|
|
|
Usage : $obj->_filehandle($newval) |
|
1753
|
|
|
|
|
|
|
Function: |
|
1754
|
|
|
|
|
|
|
Example : |
|
1755
|
|
|
|
|
|
|
Returns : value of _filehandle |
|
1756
|
|
|
|
|
|
|
Args : newvalue (optional) |
|
1757
|
|
|
|
|
|
|
|
|
1758
|
|
|
|
|
|
|
=cut |
|
1759
|
|
|
|
|
|
|
|
|
1760
|
|
|
|
|
|
|
sub _filehandle{ |
|
1761
|
|
|
|
|
|
|
|
|
1762
|
0
|
|
|
0
|
|
|
my ($obj,$value) = @_; |
|
1763
|
0
|
0
|
|
|
|
|
if ( defined $value) { |
|
1764
|
0
|
|
|
|
|
|
$obj->{'_filehandle'} = $value; |
|
1765
|
|
|
|
|
|
|
} |
|
1766
|
0
|
|
|
|
|
|
return $obj->{'_filehandle'}; |
|
1767
|
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
} |
|
1769
|
|
|
|
|
|
|
# ================================================================================== |
|
1770
|
|
|
|
|
|
|
|
|
1771
|
|
|
|
|
|
|
=head2 throw |
|
1772
|
|
|
|
|
|
|
|
|
1773
|
|
|
|
|
|
|
Title : throw |
|
1774
|
|
|
|
|
|
|
Usage : $self->throw; |
|
1775
|
|
|
|
|
|
|
Function : Throw's error message. Calls SeqIO's throw method. |
|
1776
|
|
|
|
|
|
|
Args : Array of string(s), holding error message(s). |
|
1777
|
|
|
|
|
|
|
Returns : Nothing. |
|
1778
|
|
|
|
|
|
|
Note : Method(s) that call(s) this method: many. |
|
1779
|
|
|
|
|
|
|
Method(s) that this method calls: Bio::SeqIO's throw method. |
|
1780
|
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
=cut |
|
1782
|
|
|
|
|
|
|
|
|
1783
|
|
|
|
|
|
|
sub throw { |
|
1784
|
|
|
|
|
|
|
|
|
1785
|
0
|
|
|
0
|
1
|
|
my ($self, @s) = @_; |
|
1786
|
0
|
|
|
|
|
|
my $string = "[$.]" . join('', @s); |
|
1787
|
0
|
|
|
|
|
|
$self->SUPER::throw($string); |
|
1788
|
0
|
|
|
|
|
|
return; |
|
1789
|
|
|
|
|
|
|
|
|
1790
|
|
|
|
|
|
|
} |
|
1791
|
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
1; |