line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MUST::Core::Roles::Listable; |
2
|
|
|
|
|
|
|
# ABSTRACT: Listable Moose role for objects with implied id lists |
3
|
|
|
|
|
|
|
$Bio::MUST::Core::Roles::Listable::VERSION = '0.212530'; |
4
|
17
|
|
|
17
|
|
12869
|
use Moose::Role; |
|
17
|
|
|
|
|
47
|
|
|
17
|
|
|
|
|
142
|
|
5
|
|
|
|
|
|
|
|
6
|
17
|
|
|
17
|
|
100109
|
use autodie; |
|
17
|
|
|
|
|
59
|
|
|
17
|
|
|
|
|
153
|
|
7
|
17
|
|
|
17
|
|
94728
|
use feature qw(say); |
|
17
|
|
|
|
|
42
|
|
|
17
|
|
|
|
|
1563
|
|
8
|
|
|
|
|
|
|
|
9
|
17
|
|
|
17
|
|
125
|
use Carp; |
|
17
|
|
|
|
|
41
|
|
|
17
|
|
|
|
|
1437
|
|
10
|
17
|
|
|
17
|
|
166
|
use Const::Fast; |
|
17
|
|
|
|
|
41
|
|
|
17
|
|
|
|
|
198
|
|
11
|
17
|
|
|
17
|
|
12427
|
use Date::Format; |
|
17
|
|
|
|
|
134295
|
|
|
17
|
|
|
|
|
1731
|
|
12
|
17
|
|
|
17
|
|
193
|
use List::AllUtils; |
|
17
|
|
|
|
|
42
|
|
|
17
|
|
|
|
|
849
|
|
13
|
17
|
|
|
17
|
|
123
|
use POSIX qw(ceil floor); |
|
17
|
|
|
|
|
39
|
|
|
17
|
|
|
|
|
179
|
|
14
|
|
|
|
|
|
|
|
15
|
17
|
|
|
17
|
|
1539
|
use Bio::MUST::Core::Types; |
|
17
|
|
|
|
|
50
|
|
|
17
|
|
|
|
|
573
|
|
16
|
17
|
|
|
17
|
|
105
|
use Bio::MUST::Core::Constants qw(:seqids); |
|
17
|
|
|
|
|
50
|
|
|
17
|
|
|
|
|
28458
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
requires 'all_seq_ids'; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# IdList factory methods |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# alias for std_list emphasizing its use as a lookup |
25
|
|
|
|
|
|
|
sub new_lookup { |
26
|
3
|
|
|
3
|
1
|
25
|
return shift->_list_from_seq_ids(0); |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub std_list { |
31
|
1
|
|
|
1
|
1
|
28
|
return shift->_list_from_seq_ids(0); |
32
|
|
|
|
|
|
|
} |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub alphabetical_list { |
36
|
1
|
|
|
1
|
1
|
9
|
return shift->_list_from_seq_ids(1); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub _list_from_seq_ids { |
40
|
5
|
|
|
5
|
|
14
|
my $self = shift; |
41
|
5
|
|
|
|
|
11
|
my $sort = shift; |
42
|
|
|
|
|
|
|
|
43
|
5
|
|
|
|
|
22
|
my @ids = map { $_->full_id } $self->all_seq_ids; |
|
290
|
|
|
|
|
7875
|
|
44
|
5
|
100
|
|
|
|
91
|
@ids = sort @ids if $sort; # optionally sort list |
45
|
5
|
|
|
|
|
188
|
return Bio::MUST::Core::IdList->new( ids => \@ids ); |
46
|
|
|
|
|
|
|
} |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
around qw(complete_seq_list len_mapper) => sub { |
49
|
|
|
|
|
|
|
my $method = shift; |
50
|
|
|
|
|
|
|
my $self = shift; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# ensure that seqs are available (e.g., the object is an Ali) |
53
|
|
|
|
|
|
|
unless ( $self->can('all_seqs') ) { |
54
|
|
|
|
|
|
|
carp '[BMC] Warning: cannot proceed without seqs; returning undef!'; |
55
|
|
|
|
|
|
|
return; |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
return $self->$method(@_); |
59
|
|
|
|
|
|
|
}; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub complete_seq_list { |
63
|
5
|
|
|
5
|
1
|
9
|
my $self = shift; |
64
|
5
|
|
|
|
|
9
|
my $min_res = shift; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# get (non-missing char) lengths of all seqs and record max_len |
67
|
5
|
|
|
|
|
201
|
my @lengths = map { $_->nomiss_seq_len } $self->all_seqs; |
|
50
|
|
|
|
|
123
|
|
68
|
5
|
|
|
|
|
34
|
my $max_len = List::AllUtils::max @lengths; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# convert fractional min_res to conservative integer (if needed) |
71
|
5
|
100
|
66
|
|
|
45
|
$min_res = ceil($min_res * $max_len) |
72
|
|
|
|
|
|
|
if 0 < $min_res && $min_res < 1; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# filter out seqs with less than min_res non-missing chars |
75
|
5
|
|
|
|
|
22
|
my @ids = map { $_->full_id } $self->all_seq_ids; |
|
50
|
|
|
|
|
1332
|
|
76
|
5
|
|
|
|
|
50
|
my @indices = grep { $lengths[$_] >= $min_res } 0..$#ids; |
|
50
|
|
|
|
|
98
|
|
77
|
|
|
|
|
|
|
|
78
|
5
|
|
|
|
|
206
|
return Bio::MUST::Core::IdList->new( ids => [ @ids[@indices] ] ); |
79
|
|
|
|
|
|
|
} |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# IdMapper factory methods |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub std_mapper { |
86
|
12
|
|
|
12
|
1
|
65
|
my $self = shift; |
87
|
12
|
|
100
|
|
|
75
|
my $prefix = shift // 'seq'; |
88
|
|
|
|
|
|
|
|
89
|
12
|
|
|
|
|
78
|
my @seq_ids = $self->all_seq_ids; |
90
|
|
|
|
|
|
|
return Bio::MUST::Core::IdMapper->new( |
91
|
73
|
|
|
|
|
1799
|
long_ids => [ map { $_->full_id } @seq_ids ], # list context |
92
|
12
|
|
|
|
|
60
|
abbr_ids => [ map { $prefix . $_ } 1..@seq_ids ], # scalar context |
|
73
|
|
|
|
|
635
|
|
93
|
|
|
|
|
|
|
); |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub acc_mapper { |
98
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
99
|
1
|
|
50
|
|
|
17
|
my $prefix = shift // q{}; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# Note: this mapper could fail with non-GenBank Seqs |
102
|
1
|
|
|
|
|
7
|
my @seq_ids = $self->all_seq_ids; |
103
|
|
|
|
|
|
|
return Bio::MUST::Core::IdMapper->new( |
104
|
10
|
|
|
|
|
205
|
long_ids => [ map { $_->full_id } @seq_ids ], |
105
|
1
|
|
|
|
|
5
|
abbr_ids => [ map { $prefix . $_->accession } @seq_ids ], |
|
10
|
|
|
|
|
213
|
|
106
|
|
|
|
|
|
|
); |
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub len_mapper { |
111
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
112
|
|
|
|
|
|
|
|
113
|
1
|
|
|
|
|
7
|
my @seq_ids = $self->all_seq_ids; |
114
|
1
|
|
|
|
|
27
|
my @lengths = map { $_->nomiss_seq_len } $self->all_seqs; |
|
10
|
|
|
|
|
20
|
|
115
|
|
|
|
|
|
|
return Bio::MUST::Core::IdMapper->new( |
116
|
10
|
|
|
|
|
295
|
long_ids => [ map { $_->full_id . '@' . shift @lengths } @seq_ids ], |
117
|
1
|
|
|
|
|
4
|
abbr_ids => [ map { $_->full_id } @seq_ids ], |
|
10
|
|
|
|
|
220
|
|
118
|
|
|
|
|
|
|
); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub regex_mapper { ## no critic (RequireArgUnpacking) |
123
|
6
|
|
|
6
|
1
|
42
|
my $self = shift; |
124
|
|
|
|
|
|
|
# my $prefix = shift // q{}; # note the currying below |
125
|
|
|
|
|
|
|
# my $regex = shift // $DEF_ID; |
126
|
|
|
|
|
|
|
|
127
|
6
|
|
|
|
|
32
|
my @long_ids = map { $_->full_id } $self->all_seq_ids; |
|
49
|
|
|
|
|
1094
|
|
128
|
6
|
|
|
|
|
166
|
my @abbr_ids = map { $_->abbr_with_regex(@_) } $self->all_seq_ids; |
|
49
|
|
|
|
|
132
|
|
129
|
|
|
|
|
|
|
|
130
|
6
|
|
|
|
|
185
|
return Bio::MUST::Core::IdMapper->new( |
131
|
|
|
|
|
|
|
long_ids => \@long_ids, |
132
|
|
|
|
|
|
|
abbr_ids => \@abbr_ids |
133
|
|
|
|
|
|
|
); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub org_mapper_from_long_ids { |
138
|
1
|
|
|
1
|
1
|
9
|
my $self = shift; |
139
|
1
|
|
|
|
|
3
|
my $mapper = shift; # mapper long_org => abbr_org |
140
|
|
|
|
|
|
|
|
141
|
1
|
|
|
|
|
3
|
my @long_ids; |
142
|
|
|
|
|
|
|
my @abbr_ids; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
ID: |
145
|
1
|
|
|
|
|
7
|
for my $seq_id ( $self->all_seq_ids ) { |
146
|
8
|
50
|
|
|
|
186
|
next ID if $seq_id->is_foreign; |
147
|
|
|
|
|
|
|
|
148
|
8
|
|
|
|
|
173
|
push @long_ids, $seq_id->full_id; |
149
|
8
|
|
|
|
|
29
|
push @abbr_ids, $mapper->abbr_id_for( $seq_id->full_org ) |
150
|
|
|
|
|
|
|
. '|' . $seq_id->accession; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
1
|
|
|
|
|
28
|
return Bio::MUST::Core::IdMapper->new( |
154
|
|
|
|
|
|
|
long_ids => \@long_ids, |
155
|
|
|
|
|
|
|
abbr_ids => \@abbr_ids |
156
|
|
|
|
|
|
|
); |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub org_mapper_from_abbr_ids { |
161
|
1
|
|
|
1
|
1
|
10
|
my $self = shift; |
162
|
1
|
|
|
|
|
3
|
my $mapper = shift; # mapper long_org => abbr_org |
163
|
|
|
|
|
|
|
|
164
|
1
|
|
|
|
|
4
|
my @long_ids; |
165
|
|
|
|
|
|
|
my @abbr_ids; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
ID: |
168
|
1
|
|
|
|
|
7
|
for my $seq_id ( $self->all_seq_ids ) { |
169
|
8
|
|
|
|
|
188
|
my $abbr_id = $seq_id->full_id; |
170
|
8
|
|
|
|
|
26
|
my ($abbr_org, $accession) = split /\|/xms, $abbr_id, 2; |
171
|
8
|
50
|
|
|
|
18
|
next ID unless $abbr_org; |
172
|
|
|
|
|
|
|
|
173
|
8
|
|
|
|
|
239
|
push @long_ids, $mapper->long_id_for($abbr_org) . '@' . $accession; |
174
|
8
|
|
|
|
|
21
|
push @abbr_ids, $abbr_id; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
1
|
|
|
|
|
24
|
return Bio::MUST::Core::IdMapper->new( |
178
|
|
|
|
|
|
|
long_ids => \@long_ids, |
179
|
|
|
|
|
|
|
abbr_ids => \@abbr_ids |
180
|
|
|
|
|
|
|
); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
const my $NBS_ID_LEN => 79; |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub store_nbs { |
187
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
188
|
0
|
|
|
|
|
|
my $outfile = shift; |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# #Sequences extracted from c111_78.ali of the 5 May 2009 at 11 hours 40 |
191
|
|
|
|
|
|
|
# #File c111_78.nbs created on Tuesday 5 May 2009 at 11 hours 40 |
192
|
|
|
|
|
|
|
# #184 positions remain on the 184 aligned positions |
193
|
|
|
|
|
|
|
# #life.col,life.nom |
194
|
|
|
|
|
|
|
# #Here is the list of the 78 species used: |
195
|
|
|
|
|
|
|
# Aciduliprofundum_boonei_T469___________________________________________________ Aeropyrum_pernix_K1____________________________________________________________ |
196
|
|
|
|
|
|
|
# Archaeoglobus_fulgidus_DSM_4304________________________________________________ Archaeoglobus_profundus_Av18__DSM_5631_________________________________________ |
197
|
|
|
|
|
|
|
# ... |
198
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
my @ids = $self->all_seq_ids; |
200
|
|
|
|
|
|
|
|
201
|
0
|
|
|
|
|
|
open my $out, '>', $outfile; |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# print minimum header |
204
|
0
|
|
|
|
|
|
print {$out} "#File $outfile created on " . ctime(time); |
|
0
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
say {$out} '#Here is the list of the ' . scalar @ids . ' species used:'; |
|
0
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# print padded ids on two columns |
208
|
0
|
|
|
|
|
|
for my $i (0..$#ids) { |
209
|
0
|
|
|
|
|
|
my $id = $ids[$i]->foreign_id; |
210
|
0
|
|
|
|
|
|
my $pad_id = $id . '_' x ($NBS_ID_LEN - length $id); |
211
|
0
|
0
|
|
|
|
|
my $term = $i % 2 ? "\n" : q{ }; |
212
|
0
|
|
|
|
|
|
print {$out} $pad_id . $term; |
|
0
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
} |
214
|
0
|
|
|
|
|
|
say {$out} q{}; |
|
0
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
216
|
0
|
|
|
|
|
|
return; |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
17
|
|
|
17
|
|
182
|
no Moose::Role; |
|
17
|
|
|
|
|
46
|
|
|
17
|
|
|
|
|
196
|
|
220
|
|
|
|
|
|
|
1; |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
__END__ |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=pod |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head1 NAME |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Bio::MUST::Core::Roles::Listable - Listable Moose role for objects with implied id lists |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head1 VERSION |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
version 0.212530 |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 SYNOPSIS |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# TODO |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head1 DESCRIPTION |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# TODO |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head1 METHODS |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=head2 new_lookup |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 std_list |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 alphabetical_list |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=head2 complete_seq_list |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 std_mapper |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=head2 acc_mapper |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head2 len_mapper |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=head2 regex_mapper |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 org_mapper_from_long_ids |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head2 org_mapper_from_abbr_ids |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=head2 store_nbs |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=head1 AUTHOR |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
275
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=cut |