line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# |
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Seq::Meta::Array |
3
|
|
|
|
|
|
|
# |
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
5
|
|
|
|
|
|
|
# |
6
|
|
|
|
|
|
|
# Cared for by Heikki Lehvaslaiho |
7
|
|
|
|
|
|
|
# |
8
|
|
|
|
|
|
|
# Copyright Heikki Lehvaslaiho |
9
|
|
|
|
|
|
|
# |
10
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Bio::Seq::Meta::Array - array-based generic implementation of a |
17
|
|
|
|
|
|
|
sequence class with residue-based meta information |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use Bio::LocatableSeq; |
22
|
|
|
|
|
|
|
use Bio::Seq::Meta::Array; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
my $seq = Bio::Seq::Meta::Array->new(-id=>'test', |
25
|
|
|
|
|
|
|
-seq=>'ACTGCTAGCT', |
26
|
|
|
|
|
|
|
-start=>2434, |
27
|
|
|
|
|
|
|
-start=>2443, |
28
|
|
|
|
|
|
|
-strand=>1, |
29
|
|
|
|
|
|
|
-verbose=>1, # to see warnings |
30
|
|
|
|
|
|
|
); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# to test this is a meta seq object |
33
|
|
|
|
|
|
|
$seq->isa("Bio::Seq::Meta::Array") |
34
|
|
|
|
|
|
|
|| $seq->throw("$seq is not a Bio::Seq::Meta::Array"); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
$seq->meta('1 2 3 4 5 6 7 8 9 10'); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# or you could create the Meta object directly |
39
|
|
|
|
|
|
|
$seq = Bio::Seq::Meta::Array->new(-id=>'test', |
40
|
|
|
|
|
|
|
-seq=>'ACTGCTAGCT', |
41
|
|
|
|
|
|
|
-start=>2434, |
42
|
|
|
|
|
|
|
-start=>2443, |
43
|
|
|
|
|
|
|
-strand=>1, |
44
|
|
|
|
|
|
|
-meta=>'1 2 3 4 5 6 7 8 9 10', |
45
|
|
|
|
|
|
|
-verbose=>1, # to see warnings |
46
|
|
|
|
|
|
|
); |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# accessors |
50
|
|
|
|
|
|
|
$arrayref = $seq->meta(); |
51
|
|
|
|
|
|
|
$string = $seq->meta_text(); |
52
|
|
|
|
|
|
|
$substring = $seq->submeta_text(2,5); |
53
|
|
|
|
|
|
|
$unique_key = $seq->accession_number(); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 DESCRIPTION |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
This class implements generic methods for sequences with residue-based |
58
|
|
|
|
|
|
|
meta information. Meta sequences with meta data are Bio::LocatableSeq |
59
|
|
|
|
|
|
|
objects with additional methods to store that meta information. See |
60
|
|
|
|
|
|
|
L and L. |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
The meta information in this class can be a string of variable length |
63
|
|
|
|
|
|
|
and can be a complex structure. Blank values are undef or zero. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
Application specific implementations should inherit from this class to |
66
|
|
|
|
|
|
|
override and add to these methods. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
This class can be used for storing sequence quality values but |
69
|
|
|
|
|
|
|
Bio::Seq::Quality has named methods that make it easier. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head1 SEE ALSO |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
L, |
74
|
|
|
|
|
|
|
L, |
75
|
|
|
|
|
|
|
L, |
76
|
|
|
|
|
|
|
L |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 NOTE |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
This Bio::Seq::MetaI implementation inherits from Bio::LocatableSeq, which |
81
|
|
|
|
|
|
|
itself inherits from Bio::PrimarySeq. It is not a Bio::SeqI, so bless-ing |
82
|
|
|
|
|
|
|
objects of this class into a Bio::SeqI or vice versa and will not work as |
83
|
|
|
|
|
|
|
expected (see bug 2262). This may be addressed in a future refactor of |
84
|
|
|
|
|
|
|
Bio::LocatableSeq. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 FEEDBACK |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=head2 Mailing Lists |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
91
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
92
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
95
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head2 Support |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
I |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
104
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
105
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
106
|
|
|
|
|
|
|
with code and data examples if at all possible. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
=head2 Reporting Bugs |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
111
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
112
|
|
|
|
|
|
|
web: |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head1 AUTHOR - Heikki Lehvaslaiho |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
Email heikki-at-bioperl-dot-org |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head1 CONTRIBUTORS |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Chad Matsalla, bioinformatics@dieselwurks.com |
123
|
|
|
|
|
|
|
Aaron Mackey, amackey@virginia.edu |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 APPENDIX |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
The rest of the documentation details each of the object methods. |
128
|
|
|
|
|
|
|
Internal methods are usually preceded with a _ |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=cut |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# Let the code begin... |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
package Bio::Seq::Meta::Array; |
137
|
9
|
|
|
9
|
|
840
|
use strict; |
|
9
|
|
|
|
|
9
|
|
|
9
|
|
|
|
|
236
|
|
138
|
|
|
|
|
|
|
|
139
|
9
|
|
|
9
|
|
29
|
use base qw(Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI); |
|
9
|
|
|
|
|
12
|
|
|
9
|
|
|
|
|
3361
|
|
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
our $DEFAULT_NAME = 'DEFAULT'; |
142
|
|
|
|
|
|
|
our $GAP = '-'; |
143
|
|
|
|
|
|
|
our $META_GAP = 0; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head2 new |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Title : new |
148
|
|
|
|
|
|
|
Usage : $metaseq = Bio::Seq::Meta::Array->new |
149
|
|
|
|
|
|
|
( -meta => 'aaaaaaaabbbbbbbb', |
150
|
|
|
|
|
|
|
-seq => 'TKLMILVSHIVILSRM' |
151
|
|
|
|
|
|
|
-id => 'human_id', |
152
|
|
|
|
|
|
|
-accession_number => 'S000012', |
153
|
|
|
|
|
|
|
); |
154
|
|
|
|
|
|
|
Function: Constructor for Bio::Seq::Meta::Array class, meta data being in a |
155
|
|
|
|
|
|
|
string. Note that you can provide an empty quality string. |
156
|
|
|
|
|
|
|
Returns : a new Bio::Seq::Meta::Array object |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
=cut |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub new { |
161
|
866
|
|
|
866
|
1
|
2229
|
my ($class, %args) = @_; |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
# run-time modification of @ISA is extremely evil (you should't pick your |
164
|
|
|
|
|
|
|
# interface on the fly); this has no obvious effect on any tests so |
165
|
|
|
|
|
|
|
# commenting out - cjfields 2011-4-6 |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
#defined inheritance according to stated baseclass, |
168
|
|
|
|
|
|
|
#if undefined then will be PrimarySeq |
169
|
|
|
|
|
|
|
#if (defined($args{'-baseclass'})) { |
170
|
|
|
|
|
|
|
# @ISA = ($args{'-baseclass'},"Bio::Seq::MetaI"); |
171
|
|
|
|
|
|
|
# } |
172
|
|
|
|
|
|
|
#else { |
173
|
|
|
|
|
|
|
# @ISA = qw( Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI ); |
174
|
|
|
|
|
|
|
# } |
175
|
|
|
|
|
|
|
|
176
|
866
|
|
|
|
|
2715
|
my $self = $class->SUPER::new(%args); |
177
|
|
|
|
|
|
|
|
178
|
866
|
|
|
|
|
2545
|
my($meta, $forceflush) = |
179
|
|
|
|
|
|
|
$self->_rearrange([qw(META |
180
|
|
|
|
|
|
|
FORCE_FLUSH |
181
|
|
|
|
|
|
|
)], |
182
|
|
|
|
|
|
|
%args); |
183
|
|
|
|
|
|
|
|
184
|
866
|
|
|
|
|
2316
|
$self->{'_meta'}->{$DEFAULT_NAME} = []; |
185
|
|
|
|
|
|
|
|
186
|
866
|
100
|
|
|
|
1246
|
$meta && $self->meta($meta); |
187
|
866
|
100
|
|
|
|
1021
|
$forceflush && $self->force_flush($forceflush); |
188
|
|
|
|
|
|
|
|
189
|
866
|
|
|
|
|
1880
|
return $self; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head2 meta |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
Title : meta |
196
|
|
|
|
|
|
|
Usage : $meta_values = $obj->meta($values_string); |
197
|
|
|
|
|
|
|
Function: |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Get and set method for the meta data starting from residue |
200
|
|
|
|
|
|
|
position one. Since it is dependent on the length of the |
201
|
|
|
|
|
|
|
sequence, it needs to be manipulated after the sequence. |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
The length of the returned value always matches the length |
204
|
|
|
|
|
|
|
of the sequence. |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
Returns : reference to an array of meta data |
207
|
|
|
|
|
|
|
Args : new value, string or array ref, optional |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=cut |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
sub meta { |
212
|
37
|
|
|
37
|
1
|
101
|
shift->named_meta($DEFAULT_NAME, shift); |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=head2 meta_text |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
Title : meta_text |
218
|
|
|
|
|
|
|
Usage : $meta_values = $obj->meta_text($values_arrayref); |
219
|
|
|
|
|
|
|
Function: Variant of meta() guarantied to return a string |
220
|
|
|
|
|
|
|
representation of meta data. For details, see L. |
221
|
|
|
|
|
|
|
Returns : a string |
222
|
|
|
|
|
|
|
Args : new value, string or array ref, optional |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=cut |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub meta_text { |
227
|
20
|
100
|
|
20
|
1
|
24
|
return join ' ', map {0 unless $_} @{shift->meta(shift)}; |
|
143
|
|
|
|
|
318
|
|
|
20
|
|
|
|
|
37
|
|
228
|
|
|
|
|
|
|
} |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head2 named_meta |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
Title : named_meta() |
233
|
|
|
|
|
|
|
Usage : $meta_values = $obj->named_meta($name, $values_arrayref); |
234
|
|
|
|
|
|
|
Function: A more general version of meta(). Each meta data set needs |
235
|
|
|
|
|
|
|
to be named. See also L. |
236
|
|
|
|
|
|
|
Returns : reference to an array of meta data |
237
|
|
|
|
|
|
|
Args : scalar, name of the meta data set |
238
|
|
|
|
|
|
|
new value, string or array ref, optional |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
=cut |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
sub named_meta { |
243
|
1564
|
|
|
1564
|
1
|
1519
|
my ($self, $name, $value) = @_; |
244
|
|
|
|
|
|
|
|
245
|
1564
|
|
33
|
|
|
2008
|
$name ||= $DEFAULT_NAME; |
246
|
|
|
|
|
|
|
|
247
|
1564
|
100
|
|
|
|
1951
|
if (defined $value) { |
248
|
851
|
|
|
|
|
684
|
my ($arrayref); |
249
|
|
|
|
|
|
|
|
250
|
851
|
100
|
|
|
|
1232
|
if (ref $value eq 'ARRAY' ) { # array ref |
|
|
50
|
|
|
|
|
|
251
|
721
|
|
|
|
|
642
|
$arrayref = $value; |
252
|
|
|
|
|
|
|
} |
253
|
|
|
|
|
|
|
elsif (not ref($value)) { # scalar |
254
|
130
|
|
|
|
|
32026
|
$arrayref = [split /\s+/, $value]; |
255
|
|
|
|
|
|
|
} else { |
256
|
0
|
|
|
|
|
0
|
$self->throw("I need a scalar or array ref, not [". ref($value). "]"); |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# test for length |
260
|
851
|
|
|
|
|
4933
|
my $diff = $self->length - @{$arrayref}; |
|
851
|
|
|
|
|
1056
|
|
261
|
851
|
100
|
|
|
|
1266
|
if ($diff > 0) { |
262
|
5
|
|
|
|
|
12
|
foreach (1..$diff) { push @{$arrayref}, 0;} |
|
22
|
|
|
|
|
13
|
|
|
22
|
|
|
|
|
27
|
|
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
851
|
|
|
|
|
1066
|
$self->{'_meta'}->{$name} = $arrayref; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
#$self->_test_gap_positions($name) if $self->verbose > 0; |
268
|
|
|
|
|
|
|
} |
269
|
|
|
|
|
|
|
|
270
|
1564
|
100
|
|
|
|
2085
|
$self->_do_flush if $self->force_flush; |
271
|
|
|
|
|
|
|
|
272
|
1564
|
|
33
|
|
|
3999
|
return $self->{'_meta'}->{$name} || (" " x $self->length); |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
=head2 _test_gap_positions |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
Title : _test_gap_positions |
279
|
|
|
|
|
|
|
Usage : $meta_values = $obj->_test_gap_positions($name); |
280
|
|
|
|
|
|
|
Function: Internal test for correct position of gap characters. |
281
|
|
|
|
|
|
|
Gap being only '-' this time. |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
This method is called from named_meta() when setting meta |
284
|
|
|
|
|
|
|
data but only if verbose is positive as this can be an |
285
|
|
|
|
|
|
|
expensive process on very long sequences. Set verbose(1) to |
286
|
|
|
|
|
|
|
see warnings when gaps do not align in sequence and meta |
287
|
|
|
|
|
|
|
data and turn them into errors by setting verbose(2). |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
Returns : true on success, prints warnings |
290
|
|
|
|
|
|
|
Args : none |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=cut |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub _test_gap_positions { |
295
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
296
|
0
|
|
|
|
|
0
|
my $name = shift; |
297
|
0
|
|
|
|
|
0
|
my $success = 1; |
298
|
|
|
|
|
|
|
|
299
|
0
|
0
|
|
|
|
0
|
$self->seq || return $success; |
300
|
0
|
|
|
|
|
0
|
my $len = CORE::length($self->seq); |
301
|
0
|
|
|
|
|
0
|
for (my $i=0; $i < $len; $i++) { |
302
|
0
|
|
|
|
|
0
|
my $s = substr $self->{seq}, $i, 1; |
303
|
0
|
|
|
|
|
0
|
my $m = substr $self->{_meta}->{$name}, $i, 1; |
304
|
0
|
0
|
0
|
|
|
0
|
$self->warn("Gap mismatch in column [". ($i+1). "] of [$name] meta data in seq [". $self->id. "]") |
|
|
|
0
|
|
|
|
|
305
|
|
|
|
|
|
|
and $success = 0 |
306
|
|
|
|
|
|
|
if ($m eq '-') && $s ne $m; |
307
|
|
|
|
|
|
|
} |
308
|
0
|
|
|
|
|
0
|
return $success; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head2 named_meta_text |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
Title : named_meta_text() |
314
|
|
|
|
|
|
|
Usage : $meta_values = $obj->named_meta_text($name, $values_arrayref); |
315
|
|
|
|
|
|
|
Function: Variant of named_meta() guarantied to return a textual |
316
|
|
|
|
|
|
|
representation of the named meta data. |
317
|
|
|
|
|
|
|
For details, see L. |
318
|
|
|
|
|
|
|
Returns : a string |
319
|
|
|
|
|
|
|
Args : scalar, name of the meta data set |
320
|
|
|
|
|
|
|
new value, string or array ref, optional |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=cut |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
sub named_meta_text { |
325
|
4
|
|
|
4
|
1
|
5
|
return join ' ', @{shift->named_meta(@_)}; |
|
4
|
|
|
|
|
9
|
|
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
} |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=head2 submeta |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
Title : submeta |
332
|
|
|
|
|
|
|
Usage : $subset_of_meta_values = $obj->submeta(10, 20, $value_string); |
333
|
|
|
|
|
|
|
$subset_of_meta_values = $obj->submeta(10, undef, $value_string); |
334
|
|
|
|
|
|
|
Function: |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
Get and set method for meta data for subsequences. |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
Numbering starts from 1 and the number is inclusive, ie 1-2 |
339
|
|
|
|
|
|
|
are the first two residue of the sequence. Start cannot be |
340
|
|
|
|
|
|
|
larger than end but can be equal. |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
If the second argument is missing the returned values |
343
|
|
|
|
|
|
|
should extend to the end of the sequence. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
The return value may be a string or an array reference, |
346
|
|
|
|
|
|
|
depending on the implentation. If in doubt, use |
347
|
|
|
|
|
|
|
submeta_text() which is a variant guarantied to return a |
348
|
|
|
|
|
|
|
string. See L. |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Returns : A reference to an array or a string |
351
|
|
|
|
|
|
|
Args : integer, start position |
352
|
|
|
|
|
|
|
integer, end position, optional when a third argument present |
353
|
|
|
|
|
|
|
new value, string or array ref, optional |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=cut |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub submeta { |
358
|
1034
|
|
|
1034
|
1
|
1292
|
shift->named_submeta($DEFAULT_NAME, @_); |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=head2 submeta_text |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
Title : submeta_text |
364
|
|
|
|
|
|
|
Usage : $meta_values = $obj->submeta_text(20, $value_string); |
365
|
|
|
|
|
|
|
Function: Variant of submeta() guarantied to return a textual |
366
|
|
|
|
|
|
|
representation of meta data. For details, see L. |
367
|
|
|
|
|
|
|
Returns : a string |
368
|
|
|
|
|
|
|
Args : new value, string or array ref, optional |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=cut |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
sub submeta_text { |
374
|
6
|
|
|
6
|
1
|
7
|
return join ' ', @{shift->named_submeta($DEFAULT_NAME, @_)}; |
|
6
|
|
|
|
|
15
|
|
375
|
|
|
|
|
|
|
} |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=head2 named_submeta |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Title : named_submeta |
380
|
|
|
|
|
|
|
Usage : $subset_of_meta_values = $obj->named_submeta($name, 10, 20, $value_string); |
381
|
|
|
|
|
|
|
$subset_of_meta_values = $obj->named_submeta($name, 10); |
382
|
|
|
|
|
|
|
Function: Variant of submeta() guarantied to return a textual |
383
|
|
|
|
|
|
|
representation of meta data. For details, see L. |
384
|
|
|
|
|
|
|
Returns : A reference to an array or a string |
385
|
|
|
|
|
|
|
Args : scalar, name of the meta data set |
386
|
|
|
|
|
|
|
integer, start position |
387
|
|
|
|
|
|
|
integer, end position, optional when a third argument present (can be undef) |
388
|
|
|
|
|
|
|
new value, string or array ref, optional |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=cut |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
sub named_submeta { |
394
|
2144
|
|
|
2144
|
1
|
1883
|
my ($self, $name, $start, $end, $value) = @_; |
395
|
|
|
|
|
|
|
|
396
|
2144
|
|
33
|
|
|
2558
|
$name ||= $DEFAULT_NAME; |
397
|
2144
|
|
100
|
|
|
2307
|
$start ||=1; |
398
|
2144
|
50
|
33
|
|
|
8433
|
$start =~ /^[+]?\d+$/ and $start > 0 or |
399
|
|
|
|
|
|
|
$self->throw("Need at least a positive integer start value"); |
400
|
2144
|
|
|
|
|
1753
|
$start--; |
401
|
2144
|
|
|
|
|
1274
|
my $meta_len = scalar(@{$self->{_meta}->{$name}}); |
|
2144
|
|
|
|
|
2440
|
|
402
|
2144
|
100
|
|
|
|
2359
|
if (defined $value) { |
403
|
12
|
|
|
|
|
11
|
my $arrayref; |
404
|
|
|
|
|
|
|
|
405
|
12
|
50
|
|
|
|
32
|
if (ref $value eq 'ARRAY' ) { # array ref |
|
|
50
|
|
|
|
|
|
406
|
0
|
|
|
|
|
0
|
$arrayref = $value; |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
elsif (not ref($value)) { # scalar |
409
|
12
|
|
|
|
|
45
|
$arrayref = [split /\s+/, $value]; |
410
|
|
|
|
|
|
|
} else { |
411
|
0
|
|
|
|
|
0
|
$self->throw("I need a space separated scalar or array ref, not [". ref($value). "]"); |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
$self->warn("You are setting meta values beyond the length of the sequence\n". |
415
|
|
|
|
|
|
|
"[$start > ". length($self->seq)."] in sequence ". $self->id) |
416
|
12
|
50
|
|
|
|
15
|
if $start + scalar @{$arrayref} -1 > $self->length; |
|
12
|
|
|
|
|
31
|
|
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
|
419
|
12
|
100
|
|
|
|
21
|
$end or $end = @{$arrayref} + $start; |
|
2
|
|
|
|
|
2
|
|
420
|
12
|
|
|
|
|
8
|
$end--; |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
# test for length; pad if needed |
423
|
12
|
|
|
|
|
13
|
my $diff = $end - $start - scalar @{$arrayref}; |
|
12
|
|
|
|
|
34
|
|
424
|
12
|
50
|
|
|
|
19
|
if ($diff > 0) { |
425
|
0
|
|
|
|
|
0
|
foreach (1..$diff) { push @{$arrayref}, $META_GAP} |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
|
428
|
12
|
|
|
|
|
8
|
@{$self->{_meta}->{$name}}[$start..$end] = @{$arrayref}; |
|
12
|
|
|
|
|
33
|
|
|
12
|
|
|
|
|
17
|
|
429
|
|
|
|
|
|
|
|
430
|
12
|
100
|
|
|
|
21
|
$self->_do_flush if $self->force_flush; |
431
|
|
|
|
|
|
|
|
432
|
12
|
|
|
|
|
55
|
return $arrayref; |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
} else { |
435
|
|
|
|
|
|
|
# don't set by seq length; use meta array length instead; bug 2478 |
436
|
2132
|
|
66
|
|
|
2448
|
$end ||= $meta_len; |
437
|
2132
|
50
|
|
|
|
2457
|
if ($end > $meta_len) { |
438
|
0
|
|
|
|
|
0
|
$self->warn("End is longer than meta sequence $name length; resetting to $meta_len"); |
439
|
0
|
|
|
|
|
0
|
$end = $meta_len; |
440
|
|
|
|
|
|
|
} |
441
|
|
|
|
|
|
|
# warn but don't reset (push use of trunc() instead) |
442
|
2132
|
50
|
|
|
|
2742
|
$self->warn("End is longer than sequence length; use trunc() \n". |
443
|
|
|
|
|
|
|
"if you want a fully truncated object") if $end > $self->length; |
444
|
2132
|
|
|
|
|
1535
|
$end--; |
445
|
2132
|
|
|
|
|
2004
|
return [@{$self->{_meta}->{$name}}[$start..$end]]; |
|
2132
|
|
|
|
|
7932
|
|
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
=head2 named_submeta_text |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
Title : named_submeta_text |
453
|
|
|
|
|
|
|
Usage : $meta_values = $obj->named_submeta_text($name, 20, $value_string); |
454
|
|
|
|
|
|
|
Function: Variant of submeta() guarantied to return a textual |
455
|
|
|
|
|
|
|
representation of meta data. For details, see L. |
456
|
|
|
|
|
|
|
Returns : a string |
457
|
|
|
|
|
|
|
Args : scalar, name of the meta data |
458
|
|
|
|
|
|
|
Args : integer, start position, optional |
459
|
|
|
|
|
|
|
integer, end position, optional |
460
|
|
|
|
|
|
|
new value, string or array ref, optional |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
=cut |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
sub named_submeta_text { |
465
|
5
|
|
|
5
|
1
|
7
|
return join ' ', @{shift->named_submeta(@_)}; |
|
5
|
|
|
|
|
9
|
|
466
|
|
|
|
|
|
|
} |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=head2 meta_names |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
Title : meta_names |
471
|
|
|
|
|
|
|
Usage : @meta_names = $obj->meta_names() |
472
|
|
|
|
|
|
|
Function: Retrives an array of meta data set names. The default |
473
|
|
|
|
|
|
|
(unnamed) set name is guarantied to be the first name if it |
474
|
|
|
|
|
|
|
contains any data. |
475
|
|
|
|
|
|
|
Returns : an array of names |
476
|
|
|
|
|
|
|
Args : none |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
=cut |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
sub meta_names { |
481
|
36
|
|
|
36
|
1
|
35
|
my ($self) = @_; |
482
|
|
|
|
|
|
|
|
483
|
36
|
|
|
|
|
35
|
my @r; |
484
|
36
|
|
|
|
|
29
|
foreach ( sort keys %{$self->{'_meta'}} ) { |
|
36
|
|
|
|
|
116
|
|
485
|
55
|
100
|
|
|
|
111
|
push (@r, $_) unless $_ eq $DEFAULT_NAME; |
486
|
|
|
|
|
|
|
} |
487
|
36
|
50
|
|
|
|
95
|
unshift @r, $DEFAULT_NAME if $self->{'_meta'}->{$DEFAULT_NAME}; |
488
|
36
|
|
|
|
|
64
|
return @r; |
489
|
|
|
|
|
|
|
} |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=head2 meta_length |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
Title : meta_length() |
495
|
|
|
|
|
|
|
Usage : $meta_len = $obj->meta_length(); |
496
|
|
|
|
|
|
|
Function: return the number of elements in the meta set |
497
|
|
|
|
|
|
|
Returns : integer |
498
|
|
|
|
|
|
|
Args : - |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub meta_length { |
503
|
4
|
|
|
4
|
1
|
7
|
my ($self) = @_; |
504
|
4
|
|
|
|
|
7
|
return $self->named_meta_length($DEFAULT_NAME); |
505
|
|
|
|
|
|
|
} |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=head2 named_meta_length |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
Title : named_meta_length() |
511
|
|
|
|
|
|
|
Usage : $meeta_len = $obj->named_meta_length($name); |
512
|
|
|
|
|
|
|
Function: return the number of elements in the named meta set |
513
|
|
|
|
|
|
|
Returns : integer |
514
|
|
|
|
|
|
|
Args : - |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=cut |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
sub named_meta_length { |
519
|
104
|
|
|
104
|
1
|
84
|
my ($self, $name) = @_; |
520
|
104
|
|
33
|
|
|
132
|
$name ||= $DEFAULT_NAME; |
521
|
104
|
|
|
|
|
60
|
return scalar @{$self->{'_meta'}->{$name}}; |
|
104
|
|
|
|
|
267
|
|
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=head2 force_flush |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Title : force_flush() |
529
|
|
|
|
|
|
|
Usage : $force_flush = $obj->force_flush(1); |
530
|
|
|
|
|
|
|
Function: Automatically pad with empty values or truncate meta values |
531
|
|
|
|
|
|
|
to sequence length. Not done by default. |
532
|
|
|
|
|
|
|
Returns : boolean 1 or 0 |
533
|
|
|
|
|
|
|
Args : optional boolean value |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
Note that if you turn this forced padding off, the previously padded |
536
|
|
|
|
|
|
|
values are not changed. |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=cut |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
sub force_flush { |
541
|
1597
|
|
|
1597
|
1
|
1454
|
my ($self, $value) = @_; |
542
|
|
|
|
|
|
|
|
543
|
1597
|
100
|
|
|
|
2512
|
if (defined $value) { |
544
|
7
|
100
|
|
|
|
11
|
if ($value) { |
545
|
5
|
|
|
|
|
9
|
$self->{force_flush} = 1; |
546
|
5
|
|
|
|
|
15
|
$self->_do_flush; |
547
|
|
|
|
|
|
|
} else { |
548
|
2
|
|
|
|
|
4
|
$self->{force_flush} = 0; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
} |
551
|
|
|
|
|
|
|
|
552
|
1597
|
|
|
|
|
2594
|
return $self->{force_flush}; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=head2 _do_flush |
557
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
Title : _do_flush |
559
|
|
|
|
|
|
|
Usage : |
560
|
|
|
|
|
|
|
Function: internal method to do the force that meta values are same |
561
|
|
|
|
|
|
|
length as sequence . Called from L |
562
|
|
|
|
|
|
|
Returns : |
563
|
|
|
|
|
|
|
Args : |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=cut |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
sub _do_flush { |
569
|
26
|
|
|
26
|
|
24
|
my ($self) = @_; |
570
|
|
|
|
|
|
|
|
571
|
26
|
|
|
|
|
33
|
foreach my $name ($self->meta_names) { |
572
|
|
|
|
|
|
|
#print "seq: ", $self->length , " ", $name, ": ", $self->named_meta_length($name), "======\n"; |
573
|
|
|
|
|
|
|
|
574
|
|
|
|
|
|
|
# elongnation |
575
|
36
|
100
|
|
|
|
64
|
if ($self->length > $self->named_meta_length($name)) { |
|
|
100
|
|
|
|
|
|
576
|
10
|
|
|
|
|
14
|
my $diff = $self->length - $self->named_meta_length($name); |
577
|
10
|
|
|
|
|
17
|
foreach (1..$diff) { push @{$self->{'_meta'}->{$name}}, $META_GAP} |
|
64
|
|
|
|
|
35
|
|
|
64
|
|
|
|
|
77
|
|
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
# truncation |
580
|
|
|
|
|
|
|
elsif ( $self->length < $self->named_meta_length($name) ) { |
581
|
4
|
|
|
|
|
9
|
$self->{_meta}->{$name} = [@{$self->{_meta}->{$name}}[0..($self->length-1)]] |
|
4
|
|
|
|
|
17
|
|
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
} |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
=head2 is_flush |
588
|
|
|
|
|
|
|
|
589
|
|
|
|
|
|
|
Title : is_flush |
590
|
|
|
|
|
|
|
Usage : $is_flush = $obj->is_flush() |
591
|
|
|
|
|
|
|
or $is_flush = $obj->is_flush($my_meta_name) |
592
|
|
|
|
|
|
|
Function: Boolean to tell if all meta values are in |
593
|
|
|
|
|
|
|
flush with the sequence length. |
594
|
|
|
|
|
|
|
Returns true if force_flush() is set |
595
|
|
|
|
|
|
|
Set verbosity to a positive value to see failed meta sets |
596
|
|
|
|
|
|
|
Returns : boolean 1 or 0 |
597
|
|
|
|
|
|
|
Args : optional name of the meta set |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
=cut |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
sub is_flush { |
602
|
|
|
|
|
|
|
|
603
|
14
|
|
|
14
|
1
|
20
|
my ($self, $name) = shift; |
604
|
|
|
|
|
|
|
|
605
|
14
|
100
|
|
|
|
23
|
return 1 if $self->force_flush; |
606
|
|
|
|
|
|
|
|
607
|
9
|
|
|
|
|
10
|
my $sticky = ''; |
608
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
|
610
|
9
|
50
|
|
|
|
18
|
if ($name) { |
611
|
0
|
0
|
|
|
|
0
|
$sticky .= "$name " if $self->length != $self->named_meta_length($name); |
612
|
|
|
|
|
|
|
} else { |
613
|
9
|
|
|
|
|
30
|
foreach my $m ($self->meta_names) { |
614
|
16
|
50
|
66
|
|
|
28
|
$sticky .= "$m " if ($self->named_meta_length($m) > 0) && ($self->length != $self->named_meta_length($m)) ; |
615
|
|
|
|
|
|
|
} |
616
|
|
|
|
|
|
|
} |
617
|
|
|
|
|
|
|
|
618
|
9
|
50
|
|
|
|
16
|
if ($sticky) { |
619
|
0
|
0
|
|
|
|
0
|
print "These meta set are not flush: $sticky\n" if $self->verbose; |
620
|
0
|
|
|
|
|
0
|
return 0; |
621
|
|
|
|
|
|
|
} |
622
|
|
|
|
|
|
|
|
623
|
9
|
|
|
|
|
20
|
return 1; |
624
|
|
|
|
|
|
|
} |
625
|
|
|
|
|
|
|
|
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=head1 Bio::PrimarySeqI methods |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
=head2 revcom |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
Title : revcom |
632
|
|
|
|
|
|
|
Usage : $newseq = $seq->revcom(); |
633
|
|
|
|
|
|
|
Function: Produces a new Bio::Seq::MetaI implementing object where |
634
|
|
|
|
|
|
|
the order of residues and their meta information is reversed. |
635
|
|
|
|
|
|
|
Returns : A new (fresh) Bio::Seq::Meta object |
636
|
|
|
|
|
|
|
Args : none |
637
|
|
|
|
|
|
|
Throws : if the object returns false on is_flush() |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
Note: The method does nothing to meta values, it reorders them, only. |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
=cut |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
sub revcom { |
644
|
9
|
|
|
9
|
1
|
18
|
my $self = shift; |
645
|
|
|
|
|
|
|
|
646
|
9
|
50
|
|
|
|
25
|
$self->throw("Can not get a reverse complement. The object is not flush.") |
647
|
|
|
|
|
|
|
unless $self->is_flush; |
648
|
|
|
|
|
|
|
|
649
|
9
|
|
|
|
|
27
|
my $new = $self->SUPER::revcom; |
650
|
9
|
|
|
|
|
18
|
my $end = $self->length - 1; |
651
|
|
|
|
|
|
|
map { |
652
|
16
|
|
|
|
|
20
|
$new->{_meta}->{$_} = [ reverse @{$self->{_meta}->{$_}}[0..$end]] |
|
16
|
|
|
|
|
83
|
|
653
|
9
|
|
|
|
|
11
|
} keys %{$self->{_meta}}; |
|
9
|
|
|
|
|
22
|
|
654
|
|
|
|
|
|
|
|
655
|
9
|
|
|
|
|
23
|
return $new; |
656
|
|
|
|
|
|
|
} |
657
|
|
|
|
|
|
|
|
658
|
|
|
|
|
|
|
=head2 trunc |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
Title : trunc |
661
|
|
|
|
|
|
|
Usage : $subseq = $seq->trunc(10,100); |
662
|
|
|
|
|
|
|
Function: Provides a truncation of a sequence together with meta data |
663
|
|
|
|
|
|
|
Returns : a fresh Bio::Seq::Meta implementing object |
664
|
|
|
|
|
|
|
Args : Two integers denoting first and last residue of the sub-sequence. |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=cut |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
sub trunc { |
669
|
54
|
|
|
54
|
1
|
5115
|
my ($self, $start, $end) = @_; |
670
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
# test arguments |
672
|
54
|
50
|
33
|
|
|
341
|
$start =~ /^[+]?\d+$/ and $start > 0 or |
673
|
|
|
|
|
|
|
$self->throw("Need at least a positive integer start value as start; got [$start]"); |
674
|
54
|
50
|
33
|
|
|
237
|
$end =~ /^[+]?\d+$/ and $end > 0 or |
675
|
|
|
|
|
|
|
$self->throw("Need at least a positive integer start value as end; got [$end]"); |
676
|
54
|
50
|
|
|
|
87
|
$end >= $start or |
677
|
|
|
|
|
|
|
$self->throw("End position has to be larger or equal to start; got [$start..$end]"); |
678
|
54
|
50
|
|
|
|
110
|
$end <= $self->length or |
679
|
|
|
|
|
|
|
$self->throw("End position can not be larger than sequence length; got [$end]"); |
680
|
|
|
|
|
|
|
|
681
|
54
|
|
|
|
|
131
|
my $new = $self->SUPER::trunc($start, $end); |
682
|
54
|
|
|
|
|
59
|
$start--; |
683
|
54
|
|
|
|
|
50
|
$end--; |
684
|
|
|
|
|
|
|
map { |
685
|
107
|
|
|
|
|
130
|
$new->{_meta}->{$_} = [@{$self->{_meta}->{$_}}[$start..$end]] |
|
107
|
|
|
|
|
437
|
|
686
|
54
|
|
|
|
|
34
|
} keys %{$self->{_meta}}; |
|
54
|
|
|
|
|
113
|
|
687
|
54
|
|
|
|
|
179
|
return $new; |
688
|
|
|
|
|
|
|
} |
689
|
|
|
|
|
|
|
|
690
|
|
|
|
|
|
|
1; |