line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# Let the code begin... |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package Bio::AlignIO::Handler::GenericAlignHandler; |
4
|
|
|
|
|
|
|
|
5
|
3
|
|
|
3
|
|
12
|
use strict; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
74
|
|
6
|
3
|
|
|
3
|
|
9
|
use warnings; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
73
|
|
7
|
|
|
|
|
|
|
|
8
|
3
|
|
|
3
|
|
418
|
use Bio::Annotation::Collection; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
70
|
|
9
|
3
|
|
|
3
|
|
604
|
use Bio::Annotation::Comment; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
63
|
|
10
|
3
|
|
|
3
|
|
12
|
use Bio::Annotation::SimpleValue; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
46
|
|
11
|
3
|
|
|
3
|
|
873
|
use Bio::Annotation::Target; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
67
|
|
12
|
3
|
|
|
3
|
|
12
|
use Bio::Annotation::DBLink; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
41
|
|
13
|
3
|
|
|
3
|
|
572
|
use Bio::Annotation::Reference; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
60
|
|
14
|
3
|
|
|
3
|
|
503
|
use Bio::SimpleAlign; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
73
|
|
15
|
3
|
|
|
3
|
|
11
|
use Data::Dumper; |
|
3
|
|
|
|
|
2
|
|
|
3
|
|
|
|
|
156
|
|
16
|
|
|
|
|
|
|
|
17
|
3
|
|
|
3
|
|
10
|
use base qw(Bio::Root::Root Bio::HandlerBaseI); |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
988
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
# only stockholm is defined for now... |
20
|
|
|
|
|
|
|
my %HANDLERS = ( |
21
|
|
|
|
|
|
|
# stockholm has sequence and alignment specific annotation; this |
22
|
|
|
|
|
|
|
'stockholm' => { |
23
|
|
|
|
|
|
|
'CONSENSUS_META' => \&_generic_consensus_meta, |
24
|
|
|
|
|
|
|
'SEQUENCE' => \&_generic_metaseq, |
25
|
|
|
|
|
|
|
'NAMED_META' => \&_generic_metaseq, |
26
|
|
|
|
|
|
|
'ACCESSION' => \&_generic_store, |
27
|
|
|
|
|
|
|
'ALPHABET' => \&_generic_store, |
28
|
|
|
|
|
|
|
'ID' => \&_generic_store, |
29
|
|
|
|
|
|
|
'DESCRIPTION' => \&_generic_store, |
30
|
|
|
|
|
|
|
'REFERENCE' => \&_generic_reference, |
31
|
|
|
|
|
|
|
'DBLINK' => \&_stockholm_target, |
32
|
|
|
|
|
|
|
'DATABASE_COMMENT' => \&_generic_comment, |
33
|
|
|
|
|
|
|
'ALIGNMENT_COMMENT' => \&_generic_comment, |
34
|
|
|
|
|
|
|
'_DEFAULT_' => \&_generic_simplevalue |
35
|
|
|
|
|
|
|
}, |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
sub new { |
39
|
7
|
|
|
7
|
1
|
19
|
my ($class, @args) = @_; |
40
|
7
|
|
|
|
|
34
|
my $self = $class->SUPER::new(@args); |
41
|
7
|
|
|
|
|
34
|
my ($format, $verbose) = $self->_rearrange([qw(FORMAT VERBOSE)], @args); |
42
|
7
|
50
|
|
|
|
21
|
$self->throw("Must define alignment record format") if !$format; |
43
|
7
|
50
|
|
|
|
16
|
$verbose && $self->verbose($verbose); |
44
|
7
|
|
|
|
|
18
|
$self->format($format); |
45
|
7
|
|
|
|
|
16
|
$self->handler_methods(); |
46
|
|
|
|
|
|
|
# if we intend at a later point we can add a Builder |
47
|
|
|
|
|
|
|
#$builder && $self->alignbuilder($builder); |
48
|
7
|
|
|
|
|
22
|
return $self; |
49
|
|
|
|
|
|
|
} |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
sub handler_methods { |
52
|
7
|
|
|
7
|
1
|
8
|
my $self = shift; |
53
|
7
|
50
|
|
|
|
18
|
if (!($self->{'handlers'})) { |
54
|
|
|
|
|
|
|
$self->throw("No handlers defined for alignment format ",$self->format) |
55
|
7
|
50
|
|
|
|
14
|
unless exists $HANDLERS{$self->format}; |
56
|
7
|
|
|
|
|
13
|
$self->{'handlers'} = $HANDLERS{$self->format}; |
57
|
|
|
|
|
|
|
} |
58
|
7
|
|
|
|
|
12
|
return ($self->{'handlers'}); |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub data_handler { |
62
|
562
|
|
|
562
|
1
|
496
|
my ($self, $data) = @_; |
63
|
562
|
|
33
|
|
|
955
|
my $nm = $data->{NAME} || $self->throw("No name tag defined!"); |
64
|
|
|
|
|
|
|
# this should handle data on the fly w/o caching; any caching should be |
65
|
|
|
|
|
|
|
# done in the driver! |
66
|
|
|
|
|
|
|
my $method = (exists $self->{'handlers'}->{$nm}) ? ($self->{'handlers'}->{$nm}) : |
67
|
562
|
50
|
|
|
|
973
|
(exists $self->{'handlers'}->{'_DEFAULT_'}) ? ($self->{'handlers'}->{'_DEFAULT_'}) : |
|
|
100
|
|
|
|
|
|
68
|
|
|
|
|
|
|
undef; |
69
|
562
|
50
|
|
|
|
731
|
if (!$method) { |
70
|
0
|
|
|
|
|
0
|
$self->debug("No handler defined for $nm\n"); |
71
|
0
|
|
|
|
|
0
|
return; |
72
|
|
|
|
|
|
|
}; |
73
|
562
|
|
|
|
|
712
|
$self->$method($data); |
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
sub reset_parameters { |
77
|
9
|
|
|
9
|
1
|
9
|
my $self = shift; |
78
|
9
|
|
|
|
|
12
|
$self->{'_params'} = undef; |
79
|
9
|
|
|
|
|
37
|
$self->{'_nse_cache'} = undef; |
80
|
9
|
|
|
|
|
16
|
$self->{'_features'} = undef; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub format { |
84
|
30
|
|
|
30
|
1
|
31
|
my $self = shift; |
85
|
30
|
100
|
|
|
|
48
|
if (@_) { |
86
|
7
|
|
|
|
|
12
|
my $format = lc shift; |
87
|
7
|
50
|
|
|
|
18
|
$self->throw("Format $format not supported") unless exists $HANDLERS{$format}; |
88
|
7
|
|
|
|
|
15
|
$self->{'_alignformat'} = $format; |
89
|
|
|
|
|
|
|
}; |
90
|
30
|
|
|
|
|
98
|
return $self->{'_alignformat'}; |
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
sub get_params { |
94
|
192
|
|
|
192
|
1
|
244
|
my ($self, @ids) = @_; |
95
|
192
|
|
|
|
|
146
|
my $data; |
96
|
192
|
100
|
|
|
|
280
|
if (scalar(@ids)) { |
97
|
183
|
|
|
|
|
239
|
for my $id (@ids) { |
98
|
201
|
50
|
|
|
|
402
|
if (!index($id, '-')==0) { |
99
|
0
|
|
|
|
|
0
|
$id = '-'.$id ; |
100
|
|
|
|
|
|
|
} |
101
|
201
|
100
|
|
|
|
617
|
$data->{$id} = $self->{'_params'}->{$id} if (exists $self->{'_params'}->{$id}); |
102
|
|
|
|
|
|
|
} |
103
|
183
|
|
100
|
|
|
310
|
$data ||= {}; |
104
|
|
|
|
|
|
|
} else { |
105
|
9
|
|
|
|
|
14
|
$data = $self->{'_params'}; |
106
|
|
|
|
|
|
|
} |
107
|
192
|
|
|
|
|
457
|
return $data; |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub set_params { |
111
|
0
|
|
|
0
|
1
|
0
|
shift->throw('Not implemented yet!'); |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub build_alignment { |
115
|
9
|
|
|
9
|
1
|
12
|
my $self = shift; |
116
|
9
|
|
|
|
|
10
|
my %init; |
117
|
9
|
|
|
|
|
19
|
$self->process_seqs; |
118
|
9
|
|
|
|
|
22
|
my $param = $self->get_params; |
119
|
9
|
50
|
|
|
|
27
|
if (defined $param->{-seqs}) { |
120
|
9
|
|
|
|
|
40
|
return Bio::SimpleAlign->new(%$param, -source => $self->format); |
121
|
|
|
|
|
|
|
} |
122
|
0
|
|
|
|
|
0
|
return; |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub annotation_collection { |
126
|
134
|
|
|
134
|
1
|
124
|
my ($self, $coll) = @_; |
127
|
134
|
50
|
|
|
|
301
|
if ($coll) { |
|
|
100
|
|
|
|
|
|
128
|
0
|
0
|
0
|
|
|
0
|
$self->throw("Must have Bio::AnnotationCollectionI ". |
129
|
|
|
|
|
|
|
"when explicitly setting annotation_collection()") |
130
|
|
|
|
|
|
|
unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI')); |
131
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-annotation'} = $coll; |
132
|
|
|
|
|
|
|
} elsif (!exists($self->{'_params'}->{'-annotation'})) { |
133
|
9
|
|
|
|
|
44
|
$self->{'_params'}->{'-annotation'} = Bio::Annotation::Collection->new() |
134
|
|
|
|
|
|
|
} |
135
|
134
|
|
|
|
|
312
|
return $self->{'_params'}->{'-annotation'}; |
136
|
|
|
|
|
|
|
} |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub seq_annotation_collection { |
139
|
0
|
|
|
0
|
1
|
0
|
my ($self, $coll) = @_; |
140
|
0
|
0
|
|
|
|
0
|
if ($coll) { |
|
|
0
|
|
|
|
|
|
141
|
0
|
0
|
0
|
|
|
0
|
$self->throw("Must have Bio::AnnotationCollectionI ". |
142
|
|
|
|
|
|
|
"when explicitly setting seq_annotation_collection()") |
143
|
|
|
|
|
|
|
unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI')); |
144
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-seq_annotation'} = $coll; |
145
|
|
|
|
|
|
|
} elsif (!exists($self->{'_params'}->{'-seq_annotation'})) { |
146
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-seq_annotation'} = Bio::Annotation::Collection->new() |
147
|
|
|
|
|
|
|
} |
148
|
0
|
|
|
|
|
0
|
return $self->{'_params'}->{'-seq_annotation'}; |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
sub process_seqs { |
152
|
9
|
|
|
9
|
1
|
13
|
my $self = shift; |
153
|
|
|
|
|
|
|
|
154
|
9
|
|
|
|
|
21
|
my $data = $self->get_params(qw(-seqs -seq_class -consensus_meta)); |
155
|
9
|
|
50
|
|
|
31
|
my $class = $data->{-seq_class} || 'Bio::LocatableSeq'; |
156
|
|
|
|
|
|
|
# cache classes loaded already |
157
|
9
|
100
|
|
|
|
26
|
if (!exists($self->{'_loaded_modules'}->{$class})) { |
158
|
5
|
|
|
|
|
25
|
$self->_load_module($class); |
159
|
5
|
|
|
|
|
10
|
$self->{'_loaded_modules'}->{$class}++; |
160
|
|
|
|
|
|
|
} |
161
|
|
|
|
|
|
|
# process any meta sequence data |
162
|
9
|
100
|
66
|
|
|
64
|
if ( $data->{-consensus_meta} && !UNIVERSAL::isa($data->{-consensus_meta},'Bio::Seq::Meta')) { |
163
|
8
|
|
|
|
|
13
|
my $ref = $data->{-consensus_meta}; |
164
|
8
|
50
|
|
|
|
20
|
if (!exists($self->{'_loaded_modules'}->{'Bio::Seq::Meta'})) { |
165
|
0
|
|
|
|
|
0
|
$self->_load_module('Bio::Seq::Meta'); |
166
|
0
|
|
|
|
|
0
|
$self->{'_loaded_modules'}->{'Bio::Seq::Meta'}++; |
167
|
|
|
|
|
|
|
} |
168
|
8
|
|
|
|
|
36
|
my $ms = Bio::Seq::Meta->new(); |
169
|
8
|
|
|
|
|
11
|
for my $tag (sort keys %{$ref}) { |
|
8
|
|
|
|
|
32
|
|
170
|
10
|
|
|
|
|
27
|
$ms->named_meta($tag, $ref->{$tag}); |
171
|
|
|
|
|
|
|
} |
172
|
8
|
|
|
|
|
16
|
$self->{'_params'}->{'-consensus_meta'} = $ms; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
# this should always be an array ref! |
175
|
9
|
|
|
|
|
10
|
for my $seq (@{$data->{-seqs}}) { |
|
9
|
|
|
|
|
23
|
|
176
|
168
|
50
|
|
|
|
393
|
next if (UNIVERSAL::isa($seq,'Bio::LocatableI')); |
177
|
|
|
|
|
|
|
# process anything else |
178
|
168
|
50
|
|
|
|
466
|
$self->_from_nse($seq) if $seq->{NSE}; |
179
|
168
|
50
|
|
|
|
384
|
if (UNIVERSAL::isa($seq,'HASH')) { |
180
|
168
|
|
|
|
|
147
|
my %param; |
181
|
168
|
|
|
|
|
446
|
for my $p (keys %$seq) { |
182
|
1503
|
50
|
|
|
|
3226
|
$param{'-'.lc $p} = $seq->{$p} if exists $seq->{$p}; |
183
|
|
|
|
|
|
|
} |
184
|
168
|
|
|
|
|
652
|
my $ls = $class->new(%param); |
185
|
|
|
|
|
|
|
# a little switcheroo to attach the sequence |
186
|
|
|
|
|
|
|
# (though using it to get seq() doesn't work correctly yet!) |
187
|
168
|
100
|
33
|
|
|
856
|
if (defined $seq->{NSE} && |
|
|
|
100
|
|
|
|
|
188
|
|
|
|
|
|
|
exists $self->{'_features'} && |
189
|
|
|
|
|
|
|
exists $self->{'_features'}->{ $seq->{NSE} }) { |
190
|
1
|
|
|
|
|
2
|
for my $feat (@{ $self->{'_features'}->{ $seq->{NSE} } }) { |
|
1
|
|
|
|
|
2
|
|
191
|
6
|
|
|
|
|
5
|
push @{ $self->{'_params'}->{'-features'} }, $feat; |
|
6
|
|
|
|
|
8
|
|
192
|
6
|
|
|
|
|
34
|
$feat->attach_seq($ls); |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
168
|
|
|
|
|
747
|
$seq = $ls; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
} |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
####################### SEQUENCE HANDLERS ####################### |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# any sequence data for a Bio::Seq::Meta |
203
|
|
|
|
|
|
|
sub _generic_metaseq { |
204
|
319
|
|
|
319
|
|
279
|
my ($self, $data) = @_; |
205
|
319
|
50
|
|
|
|
428
|
return unless $data; |
206
|
319
|
50
|
|
|
|
436
|
$self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE}); |
207
|
319
|
50
|
|
|
|
526
|
$self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1; |
208
|
319
|
|
|
|
|
366
|
$self->{'_params'}->{'-seq_class'} = 'Bio::Seq::Meta'; |
209
|
319
|
|
|
|
|
300
|
my $index = $data->{BLOCK_LINE} - 1; |
210
|
319
|
100
|
|
|
|
621
|
if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) { |
211
|
151
|
50
|
|
|
|
279
|
$self->throw("NSE in passed data doesn't match stored data in same position: $nse") unless $nse eq $data->{NSE}; |
212
|
|
|
|
|
|
|
} else { |
213
|
168
|
|
|
|
|
350
|
$self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE}; |
214
|
|
|
|
|
|
|
} |
215
|
319
|
100
|
|
|
|
430
|
if ($data->{NAME} eq 'SEQUENCE') { |
|
|
50
|
|
|
|
|
|
216
|
317
|
|
|
|
|
1224
|
$self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA}; |
217
|
|
|
|
|
|
|
} elsif ($data->{NAME} eq 'NAMED_META') { |
218
|
2
|
|
|
|
|
8
|
$self->{'_params'}->{'-seqs'}->[$index]->{NAMED_META}->{$data->{META_TAG}} .= $data->{DATA}; |
219
|
|
|
|
|
|
|
} |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub _generic_consensus_meta { |
223
|
18
|
|
|
18
|
|
21
|
my ($self, $data) = @_; |
224
|
18
|
50
|
|
|
|
26
|
return unless $data; |
225
|
18
|
50
|
|
|
|
34
|
if ($data->{NAME} eq 'CONSENSUS_META') { |
226
|
18
|
|
|
|
|
68
|
$self->{'_params'}->{'-consensus_meta'}->{$data->{META_TAG}} .= $data->{DATA}; |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
# any sequence data for a Bio::LocatableSeq |
231
|
|
|
|
|
|
|
sub _generic_locatableseq { |
232
|
0
|
|
|
0
|
|
0
|
my ($self, $data) = @_; |
233
|
0
|
0
|
|
|
|
0
|
return unless $data; |
234
|
0
|
0
|
|
|
|
0
|
$self->throw("No alignment position passed") if !exists($data->{BLOCK_LINE}); |
235
|
0
|
0
|
|
|
|
0
|
$self->throw("Alignment position must be an index greater than 0") if $data->{BLOCK_LINE} < 1; |
236
|
0
|
|
|
|
|
0
|
my $index = $data->{BLOCK_LINE} - 1; |
237
|
0
|
0
|
|
|
|
0
|
if (my $nse = $self->{'_params'}->{'-seqs'}->[$index]->{NSE}) { |
238
|
0
|
0
|
|
|
|
0
|
$self->throw("NSE in passed data doesn't match stored data in same position: $nse") if $nse ne $data->{NSE}; |
239
|
|
|
|
|
|
|
} else { |
240
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-seqs'}->[$index]->{NSE} = $data->{NSE}; |
241
|
|
|
|
|
|
|
} |
242
|
0
|
0
|
|
|
|
0
|
if ($data->{NAME} eq 'SEQUENCE') { |
243
|
0
|
|
|
|
|
0
|
$self->{'_params'}->{'-seqs'}->[$index]->{SEQ} .= $data->{DATA}; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
####################### RAW DATA HANDLERS ####################### |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
# store by data name (ACCESSION, ID, etc), which can be mapped to the |
250
|
|
|
|
|
|
|
# appropriate alignment or sequence parameter |
251
|
|
|
|
|
|
|
sub _generic_store { |
252
|
85
|
|
|
85
|
|
62
|
my ($self, $data) = @_; |
253
|
85
|
50
|
|
|
|
114
|
return unless $data; |
254
|
85
|
100
|
|
|
|
105
|
if ($data->{ALIGNMENT}) { |
255
|
32
|
|
|
|
|
120
|
$self->{'_params'}->{'-'.lc $data->{NAME}} = $data->{DATA}; |
256
|
|
|
|
|
|
|
} else { |
257
|
|
|
|
|
|
|
$self->{'_params'}->{'-seq_'.lc $data->{NAME}}->{$data->{NSE}} = $data->{DATA} |
258
|
53
|
|
|
|
|
172
|
} |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub _generic_reference { |
262
|
22
|
|
|
22
|
|
28
|
my ($self, $data) = @_; |
263
|
|
|
|
|
|
|
my $ref = Bio::Annotation::Reference->new(-title => $data->{TITLE}, |
264
|
|
|
|
|
|
|
-authors => $data->{AUTHORS}, |
265
|
|
|
|
|
|
|
-pubmed => $data->{PUBMED}, |
266
|
|
|
|
|
|
|
-location => $data->{JOURNAL}, |
267
|
22
|
|
|
|
|
124
|
-tagname => lc $data->{NAME}); |
268
|
22
|
|
|
|
|
44
|
$self->annotation_collection->add_Annotation($ref); |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub _generic_simplevalue { |
272
|
85
|
|
|
85
|
|
66
|
my ($self, $data) = @_; |
273
|
|
|
|
|
|
|
my $sv = Bio::Annotation::SimpleValue->new(-value => $data->{DATA}, |
274
|
85
|
|
|
|
|
327
|
-tagname => lc $data->{NAME}); |
275
|
85
|
|
|
|
|
142
|
$self->annotation_collection->add_Annotation($sv); |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub _generic_comment { |
279
|
6
|
|
|
6
|
|
8
|
my ($self, $data) = @_; |
280
|
|
|
|
|
|
|
my $comment = Bio::Annotation::Comment->new(-type => lc $data->{NAME}, |
281
|
|
|
|
|
|
|
-text => $data->{DATA}, |
282
|
6
|
|
|
|
|
52
|
-tagname => lc $data->{NAME}); |
283
|
6
|
|
|
|
|
14
|
$self->annotation_collection->add_Annotation($comment); |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
# Some DBLinks in Stockholm format are unique, so a unique handler for them |
287
|
|
|
|
|
|
|
sub _stockholm_target { |
288
|
27
|
|
|
27
|
|
24
|
my ($self, $data) = @_; |
289
|
|
|
|
|
|
|
# process database info |
290
|
27
|
|
|
|
|
38
|
$self->_from_stk_dblink($data); |
291
|
27
|
|
|
|
|
21
|
my $comment; |
292
|
|
|
|
|
|
|
# Bio::Annotation::Target is now a DBLink, but has additional (RangeI) |
293
|
|
|
|
|
|
|
# capabilities (for PDB data) |
294
|
|
|
|
|
|
|
my $dblink = Bio::Annotation::Target->new( |
295
|
|
|
|
|
|
|
-database => $data->{DBLINK_DB}, |
296
|
|
|
|
|
|
|
-primary_id => $data->{DBLINK_ACC}, |
297
|
|
|
|
|
|
|
-optional_id => $data->{DBLINK_OPT}, |
298
|
|
|
|
|
|
|
-start => $data->{DBLINK_START}, |
299
|
|
|
|
|
|
|
-end => $data->{DBLINK_END}, |
300
|
|
|
|
|
|
|
-strand => $data->{DBLINK_STRAND}, |
301
|
27
|
|
|
|
|
179
|
-comment => $comment, |
302
|
|
|
|
|
|
|
-tagname => 'dblink', |
303
|
|
|
|
|
|
|
); |
304
|
27
|
100
|
|
|
|
80
|
if ($data->{ALIGNMENT}) { |
305
|
|
|
|
|
|
|
# Alignment-specific DBLinks |
306
|
21
|
|
|
|
|
38
|
$self->annotation_collection->add_Annotation($dblink); |
307
|
|
|
|
|
|
|
} else { |
308
|
|
|
|
|
|
|
# Sequence-specific DBLinks |
309
|
|
|
|
|
|
|
# These should come with identifying information of some sort |
310
|
|
|
|
|
|
|
# (ID/START/END/STRAND). Make into a SeqFeature (SimpleAlign is |
311
|
|
|
|
|
|
|
# FeatureHolderI) spanning the length acc. to the NSE. Add the DBLink as |
312
|
|
|
|
|
|
|
# Annotation specific to that SeqFeature, store in an internal hash by |
313
|
|
|
|
|
|
|
# NSE so we can tie the LocatableSeq to the proper Features |
314
|
6
|
50
|
|
|
|
18
|
$self->_from_nse($data) if $data->{NSE}; |
315
|
|
|
|
|
|
|
$self->throw("Must supply an sequence DISPLAY_ID or NSE for sequence-related |
316
|
6
|
0
|
33
|
|
|
10
|
DBLinks") unless $data->{ACCESSION_NUMBER} || $data->{DISPLAY_ID}; |
317
|
|
|
|
|
|
|
my $sf = Bio::SeqFeature::Generic->new(-seq_id => $data->{DISPLAY_ID}, |
318
|
|
|
|
|
|
|
-accession_number => $data->{ACCESSION_NUMBER}, |
319
|
|
|
|
|
|
|
-start => $data->{START}, |
320
|
|
|
|
|
|
|
-end => $data->{END}, |
321
|
|
|
|
|
|
|
-strand => $data->{STRAND} |
322
|
6
|
|
|
|
|
36
|
); |
323
|
6
|
|
|
|
|
14
|
$sf->annotation->add_Annotation($dblink); |
324
|
|
|
|
|
|
|
# index by NSE |
325
|
6
|
|
|
|
|
7
|
push @{ $self->{'_features'}->{ $data->{NSE} } }, $sf; |
|
6
|
|
|
|
|
18
|
|
326
|
|
|
|
|
|
|
#$self->seq_annotation_collection->add_Annotation($dblink); |
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
####################### HELPER METHODS ####################### |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
# returns ACCESSION VERSION START END STRAND ALPHABET |
333
|
|
|
|
|
|
|
# cached for multiple lookups, should reset in between uses |
334
|
|
|
|
|
|
|
sub _from_nse { |
335
|
174
|
|
|
174
|
|
163
|
my ($self, $data) = @_; |
336
|
174
|
50
|
|
|
|
326
|
return unless my $nse = $data->{NSE}; |
337
|
174
|
|
100
|
|
|
280
|
$data->{ALPHABET} = $self->get_params('-alphabet')->{'-alphabet'} || 'protein'; |
338
|
|
|
|
|
|
|
# grab any accessions if present, switch out with ACCESSION from NSE |
339
|
|
|
|
|
|
|
# (move that to primary_id) |
340
|
174
|
|
|
|
|
201
|
my $new_acc; |
341
|
174
|
100
|
|
|
|
280
|
if (exists $self->{'_params'}->{'-seq_accession'}) { |
342
|
59
|
|
|
|
|
78
|
$new_acc = $self->{'_params'}->{'-seq_accession'}->{$data->{NSE}}; |
343
|
|
|
|
|
|
|
} |
344
|
174
|
100
|
|
|
|
1083
|
if ($nse =~ m{(\S+?)(?:\.(\d+))?/(\d+)-(\d+)}xmso) { |
345
|
172
|
100
|
66
|
|
|
671
|
my $strand = $data->{ALPHABET} eq 'dna' || $data->{ALPHABET} eq 'rna' ? 1 : undef; |
346
|
172
|
|
|
|
|
416
|
my ($start, $end) = ($3, $4); |
347
|
172
|
100
|
|
|
|
351
|
if ($start > $end) { |
348
|
44
|
|
|
|
|
102
|
($start, $end, $strand) = ($end, $start, -1); |
349
|
|
|
|
|
|
|
} |
350
|
172
|
|
66
|
|
|
451
|
$data->{ACCESSION_NUMBER} = $new_acc || $1; |
351
|
172
|
|
|
|
|
224
|
$data->{DISPLAY_ID} = $1; |
352
|
172
|
|
|
|
|
215
|
$data->{VERSION} = $2; |
353
|
172
|
|
|
|
|
178
|
$data->{START} = $start; |
354
|
172
|
|
|
|
|
326
|
$data->{END} = $end; |
355
|
172
|
|
|
|
|
279
|
$data->{STRAND} = $strand; |
356
|
|
|
|
|
|
|
} else { |
357
|
|
|
|
|
|
|
# we can parse for version here if needed |
358
|
2
|
|
|
|
|
242
|
$data->{DISPLAY_ID} = $data->{NSE}; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
} |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
# this will probably be split up into subhandlers based on Record/DB |
363
|
|
|
|
|
|
|
sub _from_stk_dblink { |
364
|
27
|
|
|
27
|
|
36
|
my ($self, $data) = @_; |
365
|
27
|
50
|
|
|
|
57
|
return unless my $raw = $data->{DATA}; |
366
|
27
|
|
|
|
|
126
|
my @rawdata = split(m{\s*;\s*}, $raw); |
367
|
27
|
|
|
|
|
25
|
my %dblink_data; |
368
|
27
|
100
|
|
|
|
57
|
if ($rawdata[0] eq 'PDB') { |
|
|
100
|
|
|
|
|
|
369
|
|
|
|
|
|
|
# fix for older Stockholm PDB range format |
370
|
12
|
50
|
33
|
|
|
22
|
if (scalar(@rawdata) == 3 && $rawdata[2] =~ m{-}) { |
371
|
0
|
|
|
|
|
0
|
@rawdata[2,3] = split('-',$rawdata[2],2); |
372
|
|
|
|
|
|
|
} |
373
|
12
|
50
|
|
|
|
22
|
$self->throw("Not standard PDB form: ".$data->{DATA}) if scalar(@rawdata) != 4; |
374
|
12
|
|
|
|
|
25
|
my ($main, $chain) = split(m{\s+}, $rawdata[1]); |
375
|
12
|
|
50
|
|
|
56
|
%dblink_data = ( |
376
|
|
|
|
|
|
|
DBLINK_DB => $rawdata[0], |
377
|
|
|
|
|
|
|
DBLINK_ACC => $main, |
378
|
|
|
|
|
|
|
DBLINK_OPT => $chain || '', |
379
|
|
|
|
|
|
|
DBLINK_START => $rawdata[2], |
380
|
|
|
|
|
|
|
DBLINK_END => $rawdata[3] |
381
|
|
|
|
|
|
|
); |
382
|
|
|
|
|
|
|
} elsif ($rawdata[0] eq 'SCOP') { |
383
|
3
|
50
|
|
|
|
8
|
$self->throw("Not standard SCOP form: ".$data->{DATA}) if scalar(@rawdata) != 3; |
384
|
3
|
|
|
|
|
12
|
%dblink_data = ( |
385
|
|
|
|
|
|
|
DBLINK_DB => $rawdata[0], |
386
|
|
|
|
|
|
|
DBLINK_ACC => $rawdata[1], |
387
|
|
|
|
|
|
|
DBLINK_OPT => $rawdata[2], |
388
|
|
|
|
|
|
|
); |
389
|
|
|
|
|
|
|
} else { |
390
|
12
|
50
|
|
|
|
21
|
$self->warn("Some data missed: ".$data->{DATA}) if scalar(@rawdata) > 2; |
391
|
12
|
|
|
|
|
36
|
%dblink_data = ( |
392
|
|
|
|
|
|
|
DBLINK_DB => $rawdata[0], |
393
|
|
|
|
|
|
|
DBLINK_ACC => $rawdata[1], |
394
|
|
|
|
|
|
|
); |
395
|
|
|
|
|
|
|
} |
396
|
27
|
|
|
|
|
74
|
while (my ($k, $v) = each %dblink_data) { |
397
|
93
|
50
|
|
|
|
310
|
$data->{$k} = $v if $v; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
1; |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
__END__ |