| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# Let the code begin... |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Bio::AlignIO::Handler::GenericAlignHandler; |
|
4
|
|
|
|
|
|
|
|
|
5
|
3
|
|
|
3
|
|
13
|
use strict; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
73
|
|
|
6
|
3
|
|
|
3
|
|
10
|
use warnings; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
73
|
|
|
7
|
|
|
|
|
|
|
|
|
8
|
3
|
|
|
3
|
|
277
|
use Bio::Annotation::Collection; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
63
|
|
|
9
|
3
|
|
|
3
|
|
517
|
use Bio::Annotation::Comment; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
56
|
|
|
10
|
3
|
|
|
3
|
|
13
|
use Bio::Annotation::SimpleValue; |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
115
|
|
|
11
|
3
|
|
|
3
|
|
883
|
use Bio::Annotation::Target; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
69
|
|
|
12
|
3
|
|
|
3
|
|
12
|
use Bio::Annotation::DBLink; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
43
|
|
|
13
|
3
|
|
|
3
|
|
598
|
use Bio::Annotation::Reference; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
60
|
|
|
14
|
3
|
|
|
3
|
|
549
|
use Bio::SimpleAlign; |
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
62
|
|
|
15
|
3
|
|
|
3
|
|
13
|
use Data::Dumper; |
|
|
3
|
|
|
|
|
3
|
|
|
|
3
|
|
|
|
|
187
|
|
|
16
|
|
|
|
|
|
|
|
|
17
|
3
|
|
|
3
|
|
13
|
use base qw(Bio::Root::Root Bio::HandlerBaseI); |
|
|
3
|
|
|
|
|
4
|
|
|
|
3
|
|
|
|
|
1001
|
|
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# only stockholm is defined for now... |
|
20
|
|
|
|
|
|
|
my %HANDLERS = ( |
|
21
|
|
|
|
|
|
|
# stockholm has sequence and alignment specific annotation; this |
|
22
|
|
|
|
|
|
|
'stockholm' => { |
|
23
|
|
|
|
|
|
|
'CONSENSUS_META' => \&_generic_consensus_meta, |
|
24
|
|
|
|
|
|
|
'SEQUENCE' => \&_generic_metaseq, |
|
25
|
|
|
|
|
|
|
'NAMED_META' => \&_generic_metaseq, |
|
26
|
|
|
|
|
|
|
'ACCESSION' => \&_generic_store, |
|
27
|
|
|
|
|
|
|
'ALPHABET' => \&_generic_store, |
|
28
|
|
|
|
|
|
|
'ID' => \&_generic_store, |
|
29
|
|
|
|
|
|
|
'DESCRIPTION' => \&_generic_store, |
|
30
|
|
|
|
|
|
|
'REFERENCE' => \&_generic_reference, |
|
31
|
|
|
|
|
|
|
'DBLINK' => \&_stockholm_target, |
|
32
|
|
|
|
|
|
|
'DATABASE_COMMENT' => \&_generic_comment, |
|
33
|
|
|
|
|
|
|
'ALIGNMENT_COMMENT' => \&_generic_comment, |
|
34
|
|
|
|
|
|
|
'_DEFAULT_' => \&_generic_simplevalue |
|
35
|
|
|
|
|
|
|
}, |
|
36
|
|
|
|
|
|
|
); |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub new { |
|
39
|
7
|
|
|
7
|
1
|
15
|
my ($class, @args) = @_; |
|
40
|
7
|
|
|
|
|
29
|
my $self = $class->SUPER::new(@args); |
|
41
|
7
|
|
|
|
|
28
|
my ($format, $verbose) = $self->_rearrange([qw(FORMAT VERBOSE)], @args); |
|
42
|
7
|
50
|
|
|
|
20
|
$self->throw("Must define alignment record format") if !$format; |
|
43
|
7
|
50
|
|
|
|
18
|
$verbose && $self->verbose($verbose); |
|
44
|
7
|
|
|
|
|
21
|
$self->format($format); |
|
45
|
7
|
|
|
|
|
18
|
$self->handler_methods(); |
|
46
|
|
|
|
|
|
|
# if we intend at a later point we can add a Builder |
|
47
|
|
|
|
|
|
|
#$builder && $self->alignbuilder($builder); |
|
48
|
7
|
|
|
|
|
23
|
return $self; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub handler_methods { |
|
52
|
7
|
|
|
7
|
1
|
8
|
my $self = shift; |
|
53
|
7
|
50
|
|
|
|
18
|
if (!($self->{'handlers'})) { |
|
54
|
|
|
|
|
|
|
$self->throw("No handlers defined for alignment format ",$self->format) |
|
55
|
7
|
50
|
|
|
|
16
|
unless exists $HANDLERS{$self->format}; |
|
56
|
7
|
|
|
|
|
12
|
$self->{'handlers'} = $HANDLERS{$self->format}; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
7
|
|
|
|
|
10
|
return ($self->{'handlers'}); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub data_handler { |
|
62
|
562
|
|
|
562
|
1
|
501
|
my ($self, $data) = @_; |
|
63
|
562
|
|
33
|
|
|
889
|
my $nm = $data->{NAME} || $self->throw("No name tag defined!"); |
|
64
|
|
|
|
|
|
|
# this should handle data on the fly w/o caching; any caching should be |
|
65
|
|
|
|
|
|
|
# done in the driver! |
|
66
|
|
|
|
|
|
|
my $method = (exists $self->{'handlers'}->{$nm}) ? ($self->{'handlers'}->{$nm}) : |
|
67
|
562
|
50
|
|
|
|
982
|
(exists $self->{'handlers'}->{'_DEFAULT_'}) ? ($self->{'handlers'}->{'_DEFAULT_'}) : |
|
|
|
100
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
undef; |
|
69
|
562
|
50
|
|
|
|
736
|
if (!$method) { |
|
70
|
0
|
|
|
|
|
0
|
$self->debug("No handler defined for $nm\n"); |
|
71
|
0
|
|
|
|
|
0
|
return; |
|
72
|
|
|
|
|
|
|
}; |
|
73
|
562
|
|
|
|
|
761
|
$self->$method($data); |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub reset_parameters { |
|
77
|
9
|
|
|
9
|
1
|
10
|
my $self = shift; |
|
78
|
9
|
|
|
|
|
14
|
$self->{'_params'} = undef; |
|
79
|
9
|
|
|
|
|
36
|
$self->{'_nse_cache'} = undef; |
|
80
|
9
|
|
|
|
|
13
|
$self->{'_features'} = undef; |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub format { |
|
84
|
30
|
|
|
30
|
1
|
33
|
my $self = shift; |
|
85
|
30
|
100
|
|
|
|
48
|
if (@_) { |
|
86
|
7
|
|
|
|
|
14
|
my $format = lc shift; |
|
87
|
7
|
50
|
|
|
|
17
|
$self->throw("Format $format not supported") unless exists $HANDLERS{$format}; |
|
88
|
7
|
|
|
|
|
13
|
$self->{'_alignformat'} = $format; |
|
89
|
|
|
|
|
|
|
}; |
|
90
|
30
|
|
|
|
|
96
|
return $self->{'_alignformat'}; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub get_params { |
|
94
|
192
|
|
|
192
|
1
|
207
|
my ($self, @ids) = @_; |
|
95
|
192
|
|
|
|
|
121
|
my $data; |
|
96
|
192
|
100
|
|
|
|
264
|
if (scalar(@ids)) { |
|
97
|
183
|
|
|
|
|
188
|
for my $id (@ids) { |
|
98
|
201
|
50
|
|
|
|
367
|
if (!index($id, '-')==0) { |
|
99
|
0
|
|
|
|
|
0
|
$id = '-'.$id ; |
|
100
|
|
|
|
|
|
|
} |
|
101
|
201
|
100
|
|
|
|
555
|
$data->{$id} = $self->{'_params'}->{$id} if (exists $self->{'_params'}->{$id}); |
|
102
|
|
|
|
|
|
|
} |
|
103
|
183
|
|
100
|
|
|
292
|
$data ||= {}; |
|
104
|
|
|
|
|
|
|
} else { |
|
105
|
9
|
|
|
|
|
13
|
$data = $self->{'_params'}; |
|
106
|
|
|
|
|
|
|
} |
|
107
|
192
|
|
|
|
|
393
|
return $data; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub set_params { |
|
111
|
0
|
|
|
0
|
1
|
0
|
shift->throw('Not implemented yet!'); |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub build_alignment { |
|
115
|
9
|
|
|
9
|
1
|
13
|
my $self = shift; |
|
116
|
9
|
|
|
|
|
10
|
my %init; |
|
117
|
9
|
|
|
|
|
23
|
$self->process_seqs; |
|
118
|
9
|
|
|
|
|
23
|
my $param = $self->get_params; |
|
119
|
9
|
50
|
|
|
|
35
|
if (defined $param->{-seqs}) { |
|
120
|
9
|
|
|
|
|
44
|
return Bio::SimpleAlign->new(%$param, -source => $self->format); |
|
121
|
|
|
|
|
|
|
} |
|
122
|
0
|
|
|
|
|
0
|
return; |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub annotation_collection { |
|
126
|
134
|
|
|
134
|
1
|
126
|
my ($self, $coll) = @_; |
|
127
|
134
|
50
|
|
|
|
331
|
if ($coll) { |
|
|
|
100
|
|
|
|
|
|
|
128
|
0
|
0
|
0
|
|
|
0
|
$self->throw("Must have Bio::AnnotationCollectionI ". |
|
129
|
|
|
|
|
|
|
"when explicitly setting annotation_collection()") |
|
130
|
|
|
|
|
|
|
unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI')); |
|
131
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-annotation'} = $coll; |
|
132
|
|
|
|
|
|
|
} elsif (!exists($self->{'_params'}->{'-annotation'})) { |
|
133
|
9
|
|
|
|
|
43
|
$self->{'_params'}->{'-annotation'} = Bio::Annotation::Collection->new() |
|
134
|
|
|
|
|
|
|
} |
|
135
|
134
|
|
|
|
|
331
|
return $self->{'_params'}->{'-annotation'}; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub seq_annotation_collection { |
|
139
|
0
|
|
|
0
|
1
|
0
|
my ($self, $coll) = @_; |
|
140
|
0
|
0
|
|
|
|
0
|
if ($coll) { |
|
|
|
0
|
|
|
|
|
|
|
141
|
0
|
0
|
0
|
|
|
0
|
$self->throw("Must have Bio::AnnotationCollectionI ". |
|
142
|
|
|
|
|
|
|
"when explicitly setting seq_annotation_collection()") |
|
143
|
|
|
|
|
|
|
unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI')); |
|
144
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-seq_annotation'} = $coll; |
|
145
|
|
|
|
|
|
|
} elsif (!exists($self->{'_params'}->{'-seq_annotation'})) { |
|
146
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-seq_annotation'} = Bio::Annotation::Collection->new() |
|
147
|
|
|
|
|
|
|
} |
|
148
|
0
|
|
|
|
|
0
|
return $self->{'_params'}->{'-seq_annotation'}; |
|
149
|
|
|
|
|
|
|
} |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub process_seqs { |
|
152
|
9
|
|
|
9
|
1
|
13
|
my $self = shift; |
|
153
|
|
|
|
|
|
|
|
|
154
|
9
|
|
|
|
|
20
|
my $data = $self->get_params(qw(-seqs -seq_class -consensus_meta)); |
|
155
|
9
|
|
50
|
|
|
27
|
my $class = $data->{-seq_class} || 'Bio::LocatableSeq'; |
|
156
|
|
|
|
|
|
|
# cache classes loaded already |
|
157
|
9
|
100
|
|
|
|
25
|
if (!exists($self->{'_loaded_modules'}->{$class})) { |
|
158
|
5
|
|
|
|
|
29
|
$self->_load_module($class); |
|
159
|
5
|
|
|
|
|
15
|
$self->{'_loaded_modules'}->{$class}++; |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
# process any meta sequence data |
|
162
|
9
|
100
|
66
|
|
|
63
|
if ( $data->{-consensus_meta} && !UNIVERSAL::isa($data->{-consensus_meta},'Bio::Seq::Meta')) { |
|
163
|
8
|
|
|
|
|
14
|
my $ref = $data->{-consensus_meta}; |
|
164
|
8
|
50
|
|
|
|
22
|
if (!exists($self->{'_loaded_modules'}->{'Bio::Seq::Meta'})) { |
|
165
|
0
|
|
|
|
|
0
|
$self->_load_module('Bio::Seq::Meta'); |
|
166
|
0
|
|
|
|
|
0
|
$self->{'_loaded_modules'}->{'Bio::Seq::Meta'}++; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
8
|
|
|
|
|
41
|
my $ms = Bio::Seq::Meta->new(); |
|
169
|
8
|
|
|
|
|
11
|
for my $tag (sort keys %{$ref}) { |
|
|
8
|
|
|
|
|
34
|
|
|
170
|
10
|
|
|
|
|
23
|
$ms->named_meta($tag, $ref->{$tag}); |
|
171
|
|
|
|
|
|
|
} |
|
172
|
8
|
|
|
|
|
14
|
$self->{'_params'}->{'-consensus_meta'} = $ms; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
# this should always be an array ref! |
|
175
|
9
|
|
|
|
|
10
|
for my $seq (@{$data->{-seqs}}) { |
|
|
9
|
|
|
|
|
25
|
|
|
176
|
168
|
50
|
|
|
|
386
|
next if (UNIVERSAL::isa($seq,'Bio::LocatableI')); |
|
177
|
|
|
|
|
|
|
# process anything else |
|
178
|
168
|
50
|
|
|
|
415
|
$self->_from_nse($seq) if $seq->{NSE}; |
|
179
|
168
|
50
|
|
|
|
317
|
if (UNIVERSAL::isa($seq,'HASH')) { |
|
180
|
168
|
|
|
|
|
127
|
my %param; |
|
181
|
168
|
|
|
|
|
373
|
for my $p (keys %$seq) { |
|
182
|
1503
|
50
|
|
|
|
2657
|
$param{'-'.lc $p} = $seq->{$p} if exists $seq->{$p}; |
|
183
|
|
|
|
|
|
|
} |
|
184
|
168
|
|
|
|
|
550
|
my $ls = $class->new(%param); |
|
185
|
|
|
|
|
|
|
# a little switcheroo to attach the sequence |
|
186
|
|
|
|
|
|
|
# (though using it to get seq() doesn't work correctly yet!) |
|
187
|
168
|
100
|
33
|
|
|
716
|
if (defined $seq->{NSE} && |
|
|
|
|
100
|
|
|
|
|
|
188
|
|
|
|
|
|
|
exists $self->{'_features'} && |
|
189
|
|
|
|
|
|
|
exists $self->{'_features'}->{ $seq->{NSE} }) { |
|
190
|
1
|
|
|
|
|
2
|
for my $feat (@{ $self->{'_features'}->{ $seq->{NSE} } }) { |
|
|
1
|
|
|
|
|
3
|
|
|
191
|
6
|
|
|
|
|
3
|
push @{ $self->{'_params'}->{'-features'} }, $feat; |
|
|
6
|
|
|
|
|
9
|
|
|
192
|
6
|
|
|
|
|
10
|
$feat->attach_seq($ls); |
|
193
|
|
|
|
|
|
|
} |
|
194
|
|
|
|
|
|
|
} |
|
195
|
168
|
|
|
|
|
615
|
$seq = $ls; |
|
196
|
|
|
|
|
|
|
} |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
####################### SEQUENCE HANDLERS ####################### |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# any sequence data for a Bio::Seq::Meta |
|
203
|
|
|
|
|
|
|
sub _generic_metaseq { |
|
204
|
319
|
|
|
319
|
|
225
|
my ($self, $data) = @_; |
|
205
|
319
|
50
|
|
|
|
372
|
return unless $data; |
|
206
|
319
|
50
|
|
|
|
395
|
$self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE}); |
|
207
|
319
|
50
|
|
|
|
428
|
$self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1; |
|
208
|
319
|
|
|
|
|
279
|
$self->{'_params'}->{'-seq_class'} = 'Bio::Seq::Meta'; |
|
209
|
319
|
|
|
|
|
267
|
my $index = $data->{BLOCK_LINE} - 1; |
|
210
|
319
|
100
|
|
|
|
481
|
if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) { |
|
211
|
151
|
50
|
|
|
|
225
|
$self->throw("NSE in passed data doesn't match stored data in same position: $nse") unless $nse eq $data->{NSE}; |
|
212
|
|
|
|
|
|
|
} else { |
|
213
|
168
|
|
|
|
|
255
|
$self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE}; |
|
214
|
|
|
|
|
|
|
} |
|
215
|
319
|
100
|
|
|
|
363
|
if ($data->{NAME} eq 'SEQUENCE') { |
|
|
|
50
|
|
|
|
|
|
|
216
|
317
|
|
|
|
|
696
|
$self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA}; |
|
217
|
|
|
|
|
|
|
} elsif ($data->{NAME} eq 'NAMED_META') { |
|
218
|
2
|
|
|
|
|
8
|
$self->{'_params'}->{'-seqs'}->[$index]->{NAMED_META}->{$data->{META_TAG}} .= $data->{DATA}; |
|
219
|
|
|
|
|
|
|
} |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub _generic_consensus_meta { |
|
223
|
18
|
|
|
18
|
|
15
|
my ($self, $data) = @_; |
|
224
|
18
|
50
|
|
|
|
29
|
return unless $data; |
|
225
|
18
|
50
|
|
|
|
32
|
if ($data->{NAME} eq 'CONSENSUS_META') { |
|
226
|
18
|
|
|
|
|
53
|
$self->{'_params'}->{'-consensus_meta'}->{$data->{META_TAG}} .= $data->{DATA}; |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
} |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# any sequence data for a Bio::LocatableSeq |
|
231
|
|
|
|
|
|
|
sub _generic_locatableseq { |
|
232
|
0
|
|
|
0
|
|
0
|
my ($self, $data) = @_; |
|
233
|
0
|
0
|
|
|
|
0
|
return unless $data; |
|
234
|
0
|
0
|
|
|
|
0
|
$self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE}); |
|
235
|
0
|
0
|
|
|
|
0
|
$self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1; |
|
236
|
0
|
|
|
|
|
0
|
my $index = $data->{BLOCK_LINE} - 1; |
|
237
|
0
|
0
|
|
|
|
0
|
if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) { |
|
238
|
0
|
0
|
|
|
|
0
|
$self->throw("NSE in passed data doesn't match stored data in same position: $nse") if $nse ne $data->{NSE}; |
|
239
|
|
|
|
|
|
|
} else { |
|
240
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE}; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
0
|
0
|
|
|
|
0
|
if ($data->{NAME} eq 'SEQUENCE') { |
|
243
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA}; |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
} |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
####################### RAW DATA HANDLERS ####################### |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# store by data name (ACCESSION, ID, etc), which can be mapped to the |
|
250
|
|
|
|
|
|
|
# appropriate alignment or sequence parameter |
|
251
|
|
|
|
|
|
|
sub _generic_store { |
|
252
|
85
|
|
|
85
|
|
84
|
my ($self, $data) = @_; |
|
253
|
85
|
50
|
|
|
|
109
|
return unless $data; |
|
254
|
85
|
100
|
|
|
|
113
|
if ($data->{ALIGNMENT}) { |
|
255
|
32
|
|
|
|
|
113
|
$self->{'_params'}->{'-'.lc $data->{NAME}} = $data->{DATA}; |
|
256
|
|
|
|
|
|
|
} else { |
|
257
|
|
|
|
|
|
|
$self->{'_params'}->{'-seq_'.lc $data->{NAME}}->{$data->{NSE}} = $data->{DATA} |
|
258
|
53
|
|
|
|
|
205
|
} |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub _generic_reference { |
|
262
|
22
|
|
|
22
|
|
22
|
my ($self, $data) = @_; |
|
263
|
|
|
|
|
|
|
my $ref = Bio::Annotation::Reference->new(-title => $data->{TITLE}, |
|
264
|
|
|
|
|
|
|
-authors => $data->{AUTHORS}, |
|
265
|
|
|
|
|
|
|
-pubmed => $data->{PUBMED}, |
|
266
|
|
|
|
|
|
|
-location => $data->{JOURNAL}, |
|
267
|
22
|
|
|
|
|
146
|
-tagname => lc $data->{NAME}); |
|
268
|
22
|
|
|
|
|
58
|
$self->annotation_collection->add_Annotation($ref); |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub _generic_simplevalue { |
|
272
|
85
|
|
|
85
|
|
83
|
my ($self, $data) = @_; |
|
273
|
|
|
|
|
|
|
my $sv = Bio::Annotation::SimpleValue->new(-value => $data->{DATA}, |
|
274
|
85
|
|
|
|
|
337
|
-tagname => lc $data->{NAME}); |
|
275
|
85
|
|
|
|
|
152
|
$self->annotation_collection->add_Annotation($sv); |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub _generic_comment { |
|
279
|
6
|
|
|
6
|
|
287
|
my ($self, $data) = @_; |
|
280
|
|
|
|
|
|
|
my $comment = Bio::Annotation::Comment->new(-type => lc $data->{NAME}, |
|
281
|
|
|
|
|
|
|
-text => $data->{DATA}, |
|
282
|
6
|
|
|
|
|
48
|
-tagname => lc $data->{NAME}); |
|
283
|
6
|
|
|
|
|
15
|
$self->annotation_collection->add_Annotation($comment); |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# Some DBLinks in Stockholm format are unique, so a unique handler for them |
|
287
|
|
|
|
|
|
|
sub _stockholm_target { |
|
288
|
27
|
|
|
27
|
|
27
|
my ($self, $data) = @_; |
|
289
|
|
|
|
|
|
|
# process database info |
|
290
|
27
|
|
|
|
|
54
|
$self->_from_stk_dblink($data); |
|
291
|
27
|
|
|
|
|
25
|
my $comment; |
|
292
|
|
|
|
|
|
|
# Bio::Annotation::Target is now a DBLink, but has additional (RangeI) |
|
293
|
|
|
|
|
|
|
# capabilities (for PDB data) |
|
294
|
|
|
|
|
|
|
my $dblink = Bio::Annotation::Target->new( |
|
295
|
|
|
|
|
|
|
-database => $data->{DBLINK_DB}, |
|
296
|
|
|
|
|
|
|
-primary_id => $data->{DBLINK_ACC}, |
|
297
|
|
|
|
|
|
|
-optional_id => $data->{DBLINK_OPT}, |
|
298
|
|
|
|
|
|
|
-start => $data->{DBLINK_START}, |
|
299
|
|
|
|
|
|
|
-end => $data->{DBLINK_END}, |
|
300
|
|
|
|
|
|
|
-strand => $data->{DBLINK_STRAND}, |
|
301
|
27
|
|
|
|
|
207
|
-comment => $comment, |
|
302
|
|
|
|
|
|
|
-tagname => 'dblink', |
|
303
|
|
|
|
|
|
|
); |
|
304
|
27
|
100
|
|
|
|
79
|
if ($data->{ALIGNMENT}) { |
|
305
|
|
|
|
|
|
|
# Alignment-specific DBLinks |
|
306
|
21
|
|
|
|
|
49
|
$self->annotation_collection->add_Annotation($dblink); |
|
307
|
|
|
|
|
|
|
} else { |
|
308
|
|
|
|
|
|
|
# Sequence-specific DBLinks |
|
309
|
|
|
|
|
|
|
# These should come with identifying information of some sort |
|
310
|
|
|
|
|
|
|
# (ID/START/END/STRAND). Make into a SeqFeature (SimpleAlign is |
|
311
|
|
|
|
|
|
|
# FeatureHolderI) spanning the length acc. to the NSE. Add the DBLink as |
|
312
|
|
|
|
|
|
|
# Annotation specific to that SeqFeature, store in an internal hash by |
|
313
|
|
|
|
|
|
|
# NSE so we can tie the LocatableSeq to the proper Features |
|
314
|
6
|
50
|
|
|
|
15
|
$self->_from_nse($data) if $data->{NSE}; |
|
315
|
|
|
|
|
|
|
$self->throw("Must supply an sequence DISPLAY_ID or NSE for sequence-related |
|
316
|
6
|
0
|
33
|
|
|
11
|
DBLinks") unless $data->{ACCESSION_NUMBER} || $data->{DISPLAY_ID}; |
|
317
|
|
|
|
|
|
|
my $sf = Bio::SeqFeature::Generic->new(-seq_id => $data->{DISPLAY_ID}, |
|
318
|
|
|
|
|
|
|
-accession_number => $data->{ACCESSION_NUMBER}, |
|
319
|
|
|
|
|
|
|
-start => $data->{START}, |
|
320
|
|
|
|
|
|
|
-end => $data->{END}, |
|
321
|
|
|
|
|
|
|
-strand => $data->{STRAND} |
|
322
|
6
|
|
|
|
|
30
|
); |
|
323
|
6
|
|
|
|
|
11
|
$sf->annotation->add_Annotation($dblink); |
|
324
|
|
|
|
|
|
|
# index by NSE |
|
325
|
6
|
|
|
|
|
5
|
push @{ $self->{'_features'}->{ $data->{NSE} } }, $sf; |
|
|
6
|
|
|
|
|
18
|
|
|
326
|
|
|
|
|
|
|
#$self->seq_annotation_collection->add_Annotation($dblink); |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
} |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
####################### HELPER METHODS ####################### |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# returns ACCESSION VERSION START END STRAND ALPHABET |
|
333
|
|
|
|
|
|
|
# cached for multiple lookups, should reset in between uses |
|
334
|
|
|
|
|
|
|
sub _from_nse { |
|
335
|
174
|
|
|
174
|
|
146
|
my ($self, $data) = @_; |
|
336
|
174
|
50
|
|
|
|
289
|
return unless my $nse = $data->{NSE}; |
|
337
|
174
|
|
100
|
|
|
266
|
$data->{ALPHABET} = $self->get_params('-alphabet')->{'-alphabet'} || 'protein'; |
|
338
|
|
|
|
|
|
|
# grab any accessions if present, switch out with ACCESSION from NSE |
|
339
|
|
|
|
|
|
|
# (move that to primary_id) |
|
340
|
174
|
|
|
|
|
178
|
my $new_acc; |
|
341
|
174
|
100
|
|
|
|
256
|
if (exists $self->{'_params'}->{'-seq_accession'}) { |
|
342
|
59
|
|
|
|
|
82
|
$new_acc = $self->{'_params'}->{'-seq_accession'}->{$data->{NSE}}; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
174
|
100
|
|
|
|
894
|
if ($nse =~ m{(\S+?)(?:\.(\d+))?/(\d+)-(\d+)}xmso) { |
|
345
|
172
|
100
|
66
|
|
|
600
|
my $strand = $data->{ALPHABET} eq 'dna' || $data->{ALPHABET} eq 'rna' ? 1 : undef; |
|
346
|
172
|
|
|
|
|
346
|
my ($start, $end) = ($3, $4); |
|
347
|
172
|
100
|
|
|
|
363
|
if ($start > $end) { |
|
348
|
44
|
|
|
|
|
77
|
($start, $end, $strand) = ($end, $start, -1); |
|
349
|
|
|
|
|
|
|
} |
|
350
|
172
|
|
66
|
|
|
375
|
$data->{ACCESSION_NUMBER} = $new_acc || $1; |
|
351
|
172
|
|
|
|
|
206
|
$data->{DISPLAY_ID} = $1; |
|
352
|
172
|
|
|
|
|
225
|
$data->{VERSION} = $2; |
|
353
|
172
|
|
|
|
|
157
|
$data->{START} = $start; |
|
354
|
172
|
|
|
|
|
273
|
$data->{END} = $end; |
|
355
|
172
|
|
|
|
|
237
|
$data->{STRAND} = $strand; |
|
356
|
|
|
|
|
|
|
} else { |
|
357
|
|
|
|
|
|
|
# we can parse for version here if needed |
|
358
|
2
|
|
|
|
|
236
|
$data->{DISPLAY_ID} = $data->{NSE}; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# this will probably be split up into subhandlers based on Record/DB |
|
363
|
|
|
|
|
|
|
sub _from_stk_dblink { |
|
364
|
27
|
|
|
27
|
|
26
|
my ($self, $data) = @_; |
|
365
|
27
|
50
|
|
|
|
57
|
return unless my $raw = $data->{DATA}; |
|
366
|
27
|
|
|
|
|
162
|
my @rawdata = split(m{\s*;\s*}, $raw); |
|
367
|
27
|
|
|
|
|
29
|
my %dblink_data; |
|
368
|
27
|
100
|
|
|
|
61
|
if ($rawdata[0] eq 'PDB') { |
|
|
|
100
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# fix for older Stockholm PDB range format |
|
370
|
12
|
50
|
33
|
|
|
35
|
if (scalar(@rawdata) == 3 && $rawdata[2] =~ m{-}) { |
|
371
|
0
|
|
|
|
|
0
|
@rawdata[2,3] = split('-',$rawdata[2],2); |
|
372
|
|
|
|
|
|
|
} |
|
373
|
12
|
50
|
|
|
|
24
|
$self->throw("Not standard PDB form: ".$data->{DATA}) if scalar(@rawdata) != 4; |
|
374
|
12
|
|
|
|
|
40
|
my ($main, $chain) = split(m{\s+}, $rawdata[1]); |
|
375
|
12
|
|
50
|
|
|
70
|
%dblink_data = ( |
|
376
|
|
|
|
|
|
|
DBLINK_DB => $rawdata[0], |
|
377
|
|
|
|
|
|
|
DBLINK_ACC => $main, |
|
378
|
|
|
|
|
|
|
DBLINK_OPT => $chain || '', |
|
379
|
|
|
|
|
|
|
DBLINK_START => $rawdata[2], |
|
380
|
|
|
|
|
|
|
DBLINK_END => $rawdata[3] |
|
381
|
|
|
|
|
|
|
); |
|
382
|
|
|
|
|
|
|
} elsif ($rawdata[0] eq 'SCOP') { |
|
383
|
3
|
50
|
|
|
|
9
|
$self->throw("Not standard SCOP form: ".$data->{DATA}) if scalar(@rawdata) != 3; |
|
384
|
3
|
|
|
|
|
15
|
%dblink_data = ( |
|
385
|
|
|
|
|
|
|
DBLINK_DB => $rawdata[0], |
|
386
|
|
|
|
|
|
|
DBLINK_ACC => $rawdata[1], |
|
387
|
|
|
|
|
|
|
DBLINK_OPT => $rawdata[2], |
|
388
|
|
|
|
|
|
|
); |
|
389
|
|
|
|
|
|
|
} else { |
|
390
|
12
|
50
|
|
|
|
24
|
$self->warn("Some data missed: ".$data->{DATA}) if scalar(@rawdata) > 2; |
|
391
|
12
|
|
|
|
|
33
|
%dblink_data = ( |
|
392
|
|
|
|
|
|
|
DBLINK_DB => $rawdata[0], |
|
393
|
|
|
|
|
|
|
DBLINK_ACC => $rawdata[1], |
|
394
|
|
|
|
|
|
|
); |
|
395
|
|
|
|
|
|
|
} |
|
396
|
27
|
|
|
|
|
78
|
while (my ($k, $v) = each %dblink_data) { |
|
397
|
93
|
50
|
|
|
|
338
|
$data->{$k} = $v if $v; |
|
398
|
|
|
|
|
|
|
} |
|
399
|
|
|
|
|
|
|
} |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
1; |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
__END__ |