line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MUST::Core::IdList; |
2
|
|
|
|
|
|
|
# ABSTRACT: Id list for selecting specific sequences |
3
|
|
|
|
|
|
|
$Bio::MUST::Core::IdList::VERSION = '0.212650'; |
4
|
17
|
|
|
17
|
|
11408
|
use Moose; |
|
17
|
|
|
|
|
44
|
|
|
17
|
|
|
|
|
135
|
|
5
|
17
|
|
|
17
|
|
89257
|
use namespace::autoclean; |
|
17
|
|
|
|
|
62
|
|
|
17
|
|
|
|
|
204
|
|
6
|
|
|
|
|
|
|
|
7
|
17
|
|
|
17
|
|
1803
|
use autodie; |
|
17
|
|
|
|
|
76
|
|
|
17
|
|
|
|
|
140
|
|
8
|
17
|
|
|
17
|
|
94518
|
use feature qw(say); |
|
17
|
|
|
|
|
53
|
|
|
17
|
|
|
|
|
1497
|
|
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# use Smart::Comments; |
11
|
|
|
|
|
|
|
|
12
|
17
|
|
|
17
|
|
134
|
use Carp; |
|
17
|
|
|
|
|
38
|
|
|
17
|
|
|
|
|
1439
|
|
13
|
|
|
|
|
|
|
|
14
|
17
|
|
|
17
|
|
121
|
use Bio::MUST::Core::Types; |
|
17
|
|
|
|
|
38
|
|
|
17
|
|
|
|
|
532
|
|
15
|
17
|
|
|
17
|
|
107
|
use Bio::MUST::Core::Constants qw(:files); |
|
17
|
|
|
|
|
49
|
|
|
17
|
|
|
|
|
3697
|
|
16
|
17
|
|
|
17
|
|
131
|
use aliased 'Bio::MUST::Core::SeqId'; |
|
17
|
|
|
|
|
59
|
|
|
17
|
|
|
|
|
126
|
|
17
|
17
|
|
|
17
|
|
4002
|
use aliased 'Bio::MUST::Core::Ali'; |
|
17
|
|
|
|
|
43
|
|
|
17
|
|
|
|
|
64
|
|
18
|
|
|
|
|
|
|
with 'Bio::MUST::Core::Roles::Commentable', |
19
|
|
|
|
|
|
|
'Bio::MUST::Core::Roles::Listable'; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# public array |
23
|
|
|
|
|
|
|
has 'ids' => ( |
24
|
|
|
|
|
|
|
traits => ['Array'], |
25
|
|
|
|
|
|
|
is => 'ro', |
26
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::Types::full_ids', |
27
|
|
|
|
|
|
|
default => sub { [] }, |
28
|
|
|
|
|
|
|
coerce => 1, |
29
|
|
|
|
|
|
|
writer => '_set_ids', |
30
|
|
|
|
|
|
|
handles => { |
31
|
|
|
|
|
|
|
count_ids => 'count', |
32
|
|
|
|
|
|
|
all_ids => 'elements', |
33
|
|
|
|
|
|
|
add_id => 'push', |
34
|
|
|
|
|
|
|
}, |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# private hash for faster querying |
39
|
|
|
|
|
|
|
has '_index_for' => ( |
40
|
|
|
|
|
|
|
traits => ['Hash'], |
41
|
|
|
|
|
|
|
is => 'ro', |
42
|
|
|
|
|
|
|
isa => 'HashRef[Str]', |
43
|
|
|
|
|
|
|
init_arg => undef, |
44
|
|
|
|
|
|
|
lazy => 1, |
45
|
|
|
|
|
|
|
builder => '_build_index_for', |
46
|
|
|
|
|
|
|
handles => { |
47
|
|
|
|
|
|
|
count_indices => 'count', |
48
|
|
|
|
|
|
|
is_listed => 'defined', |
49
|
|
|
|
|
|
|
index_for => 'get', |
50
|
|
|
|
|
|
|
set_index => 'set', |
51
|
|
|
|
|
|
|
}, |
52
|
|
|
|
|
|
|
); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## no critic (ProhibitUnusedPrivateSubroutines) |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# Note: we don't store SeqId objects in the list but dynamically build them |
58
|
|
|
|
|
|
|
# to benefit from SeqId methods (e.g., auto-removal of first '_'). This is |
59
|
|
|
|
|
|
|
# the most flexible approach without costing too much in CPU-time. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
sub _build_index_for { |
62
|
15
|
|
|
15
|
|
44
|
my $self = shift; |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
# build private hash from internal array |
65
|
15
|
|
|
|
|
27
|
my $i = 0; |
66
|
15
|
|
|
|
|
270
|
return { map { $_->full_id => $i++ } $self->all_seq_ids }; |
|
325
|
|
|
|
|
8574
|
|
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
## use critic |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
after 'add_id' => sub { |
72
|
|
|
|
|
|
|
my $self = shift; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# check if there are indeed ids not yet in private hash |
75
|
|
|
|
|
|
|
# Note: this might not be the case when adding ids in an empty list |
76
|
|
|
|
|
|
|
my $n = $self->count_ids; |
77
|
|
|
|
|
|
|
my $i = $self->count_indices; |
78
|
|
|
|
|
|
|
return if $n == $i; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# update private hash from internal array |
81
|
|
|
|
|
|
|
$self->set_index( |
82
|
|
|
|
|
|
|
map { $_->full_id => $i++ } ($self->all_seq_ids)[$i..$n-1] |
83
|
|
|
|
|
|
|
); |
84
|
|
|
|
|
|
|
return; |
85
|
|
|
|
|
|
|
}; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
sub all_seq_ids { |
89
|
30
|
|
|
30
|
1
|
62
|
my $self = shift; |
90
|
30
|
|
|
|
|
1098
|
return map { SeqId->new( full_id => $_ ) } $self->all_ids; |
|
420
|
|
|
|
|
11469
|
|
91
|
|
|
|
|
|
|
} |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
sub negative_list { |
96
|
5
|
|
|
5
|
1
|
13
|
my $self = shift; |
97
|
5
|
|
|
|
|
9
|
my $listable = shift; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# filter out seq ids that are in the original list |
100
|
5
|
|
|
|
|
19
|
my @ids = map { $_->full_id } $listable->all_seq_ids; |
|
50
|
|
|
|
|
1330
|
|
101
|
5
|
|
|
|
|
17
|
return $self->new( ids => [ grep { not $self->is_listed($_) } @ids ] ); |
|
50
|
|
|
|
|
1741
|
|
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# IdList-based Ali factory methods |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub reordered_ali { ## no critic (RequireArgUnpacking) |
109
|
4
|
|
|
4
|
1
|
38
|
return shift->_ali_from_list_(1, @_); |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub filtered_ali { ## no critic (RequireArgUnpacking) |
115
|
54
|
|
|
54
|
1
|
188
|
return shift->_ali_from_list_(0, @_); |
116
|
|
|
|
|
|
|
} |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
sub _ali_from_list_ { |
120
|
58
|
|
|
58
|
|
105
|
my $self = shift; |
121
|
58
|
|
|
|
|
90
|
my $reorder = shift; |
122
|
58
|
|
|
|
|
107
|
my $ali = shift; |
123
|
58
|
|
|
|
|
89
|
my $lookup = shift; # optional IdList indexing the Ali |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# override passed lookup with internal lookup if available |
126
|
|
|
|
|
|
|
# Note: this allows Stash lookups to be used transparently |
127
|
58
|
100
|
|
|
|
327
|
$lookup = $ali->lookup if $ali->can('lookup'); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
# TODO: warn for missing ids in Ali? |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
# create new Ali object (extending header comment) |
132
|
|
|
|
|
|
|
# TODO: allow custom comments |
133
|
58
|
100
|
|
|
|
2468
|
my $new_ali = Ali->new( |
134
|
|
|
|
|
|
|
comments => [ $ali->all_comments, |
135
|
|
|
|
|
|
|
'built by ' . ($reorder ? 'reordered_ali' : 'filtered_ali') |
136
|
|
|
|
|
|
|
], |
137
|
|
|
|
|
|
|
); |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
# case 1: use lookup when available |
140
|
58
|
100
|
|
|
|
149
|
if (defined $lookup) { |
141
|
|
|
|
|
|
|
### Using lookup... |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# get slot list from lookup |
144
|
|
|
|
|
|
|
# Note: Since this list follows the list in $self it is 'reordered'. |
145
|
|
|
|
|
|
|
# We thus sort it by ascending slot if the Ali order must be kept. |
146
|
3
|
|
|
|
|
129
|
my @slots = $lookup->index_for($self->all_ids); |
147
|
3
|
100
|
|
|
|
56
|
@slots = sort { $a <=> $b } @slots unless $reorder; |
|
7
|
|
|
|
|
20
|
|
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# populate new Ali with deep copies of Seqs in slot list |
150
|
3
|
|
|
|
|
106
|
$new_ali->add_seq( $ali->get_seq($_)->clone ) for @slots; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# case 2: scan all seqs to find those that are listed |
154
|
|
|
|
|
|
|
else { |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
SEQ: |
157
|
55
|
|
|
|
|
2046
|
for my $seq ($ali->all_seqs) { |
158
|
4536
|
100
|
|
|
|
11360
|
next SEQ unless $self->is_listed($seq->full_id); |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# add Seq to new Ali honoring either IdList order... |
161
|
67
|
100
|
|
|
|
180
|
if ($reorder) { |
162
|
8
|
|
|
|
|
43
|
$new_ali->set_seq( |
163
|
|
|
|
|
|
|
$self->index_for($seq->full_id), $seq->clone |
164
|
|
|
|
|
|
|
); |
165
|
8
|
|
|
|
|
31
|
next SEQ; |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# ...or original Ali order |
169
|
59
|
|
|
|
|
272
|
$new_ali->add_seq($seq->clone); |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# when reordering an Ali, ensure that new Ali does not contain |
173
|
|
|
|
|
|
|
# empty slots due to some missing ids in the original Ali |
174
|
|
|
|
|
|
|
$new_ali->_set_seqs( |
175
|
55
|
100
|
|
10
|
|
385
|
[ $new_ali->filter_seqs( sub { defined } ) ] |
|
10
|
|
|
|
|
335
|
|
176
|
|
|
|
|
|
|
) if $reorder; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
58
|
|
|
|
|
222
|
return $new_ali; |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
# I/O methods |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub load { |
187
|
2
|
|
|
2
|
1
|
327
|
my $class = shift; |
188
|
2
|
|
|
|
|
3
|
my $infile = shift; |
189
|
2
|
|
100
|
|
|
9
|
my $args = shift // {}; # HashRef (should not be empty...) |
190
|
|
|
|
|
|
|
|
191
|
2
|
|
100
|
|
|
8
|
my $col = $args->{column} // 0; |
192
|
2
|
|
66
|
|
|
9
|
my $sep = $args->{separator} // qr{\t}xms; |
193
|
|
|
|
|
|
|
|
194
|
2
|
|
|
|
|
11
|
open my $in, '<', $infile; |
195
|
|
|
|
|
|
|
|
196
|
2
|
|
|
|
|
2574
|
my $list = $class->new(); |
197
|
|
|
|
|
|
|
|
198
|
2
|
|
|
|
|
5
|
my @ids; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
LINE: |
201
|
2
|
|
|
|
|
1824
|
while (my $line = <$in>) { |
202
|
12
|
|
|
|
|
27
|
chomp $line; |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
# skip empty lines and process comment lines |
205
|
12
|
100
|
66
|
|
|
81
|
next LINE if $line =~ $EMPTY_LINE |
206
|
|
|
|
|
|
|
|| $list->is_comment($line); |
207
|
|
|
|
|
|
|
|
208
|
10
|
|
|
|
|
60
|
my @fields = split $sep, $line; |
209
|
10
|
|
|
|
|
66
|
push @ids, $fields[$col]; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
2
|
|
|
|
|
88
|
$list->_set_ids( \@ids ); |
213
|
|
|
|
|
|
|
|
214
|
2
|
|
|
|
|
39
|
return $list; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub load_lis { |
220
|
1
|
|
|
1
|
1
|
92
|
my $class = shift; |
221
|
1
|
|
|
|
|
2
|
my $infile = shift; |
222
|
|
|
|
|
|
|
|
223
|
1
|
|
|
|
|
4
|
open my $in, '<', $infile; |
224
|
|
|
|
|
|
|
|
225
|
1
|
|
|
|
|
318
|
my $list = $class->new(); |
226
|
|
|
|
|
|
|
|
227
|
1
|
|
|
|
|
4
|
my $count; |
228
|
|
|
|
|
|
|
my @ids; |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
LINE: |
231
|
1
|
|
|
|
|
724
|
while (my $line = <$in>) { |
232
|
8
|
|
|
|
|
18
|
chomp $line; |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
# skip empty lines and process comment lines |
235
|
8
|
100
|
66
|
|
|
50
|
next LINE if $line =~ $EMPTY_LINE |
236
|
|
|
|
|
|
|
|| $list->is_comment($line); |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# read id count as first lone number if not yet defined |
239
|
6
|
100
|
66
|
|
|
21
|
if (!defined $count && $line =~ $COUNT_LINE) { |
240
|
1
|
|
|
|
|
3
|
$count = $line; |
241
|
1
|
|
|
|
|
4
|
next LINE; |
242
|
|
|
|
|
|
|
} |
243
|
|
|
|
|
|
|
|
244
|
5
|
|
|
|
|
27
|
push @ids, $line; |
245
|
|
|
|
|
|
|
} |
246
|
|
|
|
|
|
|
|
247
|
1
|
|
|
|
|
41
|
$list->_set_ids( \@ids ); |
248
|
|
|
|
|
|
|
|
249
|
1
|
50
|
|
|
|
38
|
carp '[BMC] Warning: id list size does not match id count in header!' |
250
|
|
|
|
|
|
|
unless $list->count_ids == $count; |
251
|
|
|
|
|
|
|
|
252
|
1
|
|
|
|
|
19
|
return $list; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub store { |
258
|
2
|
|
|
2
|
1
|
9
|
my $self = shift; |
259
|
2
|
|
|
|
|
5
|
my $outfile = shift; |
260
|
|
|
|
|
|
|
|
261
|
2
|
|
|
|
|
22
|
open my $out, '>', $outfile; |
262
|
|
|
|
|
|
|
|
263
|
2
|
|
|
|
|
2956
|
print {$out} $self->header; |
|
2
|
|
|
|
|
18
|
|
264
|
2
|
|
|
|
|
10
|
say {$out} join "\n", $self->all_ids; |
|
2
|
|
|
|
|
99
|
|
265
|
|
|
|
|
|
|
|
266
|
2
|
|
|
|
|
128
|
return; |
267
|
|
|
|
|
|
|
} |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
sub store_lis { |
272
|
1
|
|
|
1
|
1
|
4
|
my $self = shift; |
273
|
1
|
|
|
|
|
3
|
my $outfile = shift; |
274
|
|
|
|
|
|
|
|
275
|
1
|
|
|
|
|
5
|
open my $out, '>', $outfile; |
276
|
|
|
|
|
|
|
|
277
|
1
|
|
|
|
|
265
|
print {$out} $self->header; |
|
1
|
|
|
|
|
5
|
|
278
|
1
|
|
|
|
|
5
|
say {$out} $self->count_ids; |
|
1
|
|
|
|
|
41
|
|
279
|
1
|
|
|
|
|
3
|
say {$out} join "\n", $self->all_ids; |
|
1
|
|
|
|
|
37
|
|
280
|
|
|
|
|
|
|
|
281
|
1
|
|
|
|
|
41
|
return; |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
285
|
|
|
|
|
|
|
1; |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
__END__ |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=pod |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
=head1 NAME |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
Bio::MUST::Core::IdList - Id list for selecting specific sequences |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
=head1 VERSION |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
version 0.212650 |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head1 SYNOPSIS |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# TODO |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
=head1 DESCRIPTION |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# TODO |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
=head1 METHODS |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=head2 all_seq_ids |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
=head2 negative_list |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=head2 reordered_ali |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
=head2 filtered_ali |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
=head2 load |
318
|
|
|
|
|
|
|
|
319
|
|
|
|
|
|
|
=head2 load_lis |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=head2 store |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
=head2 store_lis |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=head1 AUTHOR |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
334
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
=cut |