| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::MUST::Core::Roles::Listable; |
|
2
|
|
|
|
|
|
|
# ABSTRACT: Listable Moose role for objects with implied id lists |
|
3
|
|
|
|
|
|
|
$Bio::MUST::Core::Roles::Listable::VERSION = '0.212670'; |
|
4
|
17
|
|
|
17
|
|
10851
|
use Moose::Role; |
|
|
17
|
|
|
|
|
43
|
|
|
|
17
|
|
|
|
|
139
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
17
|
|
|
17
|
|
99023
|
use autodie; |
|
|
17
|
|
|
|
|
54
|
|
|
|
17
|
|
|
|
|
133
|
|
|
7
|
17
|
|
|
17
|
|
92684
|
use feature qw(say); |
|
|
17
|
|
|
|
|
45
|
|
|
|
17
|
|
|
|
|
1374
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
17
|
|
|
17
|
|
128
|
use Carp; |
|
|
17
|
|
|
|
|
38
|
|
|
|
17
|
|
|
|
|
1390
|
|
|
10
|
17
|
|
|
17
|
|
121
|
use Const::Fast; |
|
|
17
|
|
|
|
|
43
|
|
|
|
17
|
|
|
|
|
180
|
|
|
11
|
17
|
|
|
17
|
|
11053
|
use Date::Format; |
|
|
17
|
|
|
|
|
126871
|
|
|
|
17
|
|
|
|
|
1527
|
|
|
12
|
17
|
|
|
17
|
|
162
|
use List::AllUtils; |
|
|
17
|
|
|
|
|
38
|
|
|
|
17
|
|
|
|
|
758
|
|
|
13
|
17
|
|
|
17
|
|
116
|
use POSIX qw(ceil floor); |
|
|
17
|
|
|
|
|
43
|
|
|
|
17
|
|
|
|
|
159
|
|
|
14
|
|
|
|
|
|
|
|
|
15
|
17
|
|
|
17
|
|
1385
|
use Bio::MUST::Core::Types; |
|
|
17
|
|
|
|
|
38
|
|
|
|
17
|
|
|
|
|
495
|
|
|
16
|
17
|
|
|
17
|
|
97
|
use Bio::MUST::Core::Constants qw(:seqids); |
|
|
17
|
|
|
|
|
40
|
|
|
|
17
|
|
|
|
|
27322
|
|
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
requires 'all_seq_ids'; |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# IdList factory methods |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# alias for std_list emphasizing its use as a lookup |
|
25
|
|
|
|
|
|
|
sub new_lookup { |
|
26
|
3
|
|
|
3
|
1
|
22
|
return shift->_list_from_seq_ids(0); |
|
27
|
|
|
|
|
|
|
} |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub std_list { |
|
31
|
1
|
|
|
1
|
1
|
15
|
return shift->_list_from_seq_ids(0); |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub alphabetical_list { |
|
36
|
1
|
|
|
1
|
1
|
10
|
return shift->_list_from_seq_ids(1); |
|
37
|
|
|
|
|
|
|
} |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub _list_from_seq_ids { |
|
40
|
5
|
|
|
5
|
|
16
|
my $self = shift; |
|
41
|
5
|
|
|
|
|
10
|
my $sort = shift; |
|
42
|
|
|
|
|
|
|
|
|
43
|
5
|
|
|
|
|
25
|
my @ids = map { $_->full_id } $self->all_seq_ids; |
|
|
290
|
|
|
|
|
7338
|
|
|
44
|
5
|
100
|
|
|
|
43
|
@ids = sort @ids if $sort; # optionally sort list |
|
45
|
5
|
|
|
|
|
202
|
return Bio::MUST::Core::IdList->new( ids => \@ids ); |
|
46
|
|
|
|
|
|
|
} |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
around qw(complete_seq_list len_mapper) => sub { |
|
49
|
|
|
|
|
|
|
my $method = shift; |
|
50
|
|
|
|
|
|
|
my $self = shift; |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# ensure that seqs are available (e.g., the object is an Ali) |
|
53
|
|
|
|
|
|
|
unless ( $self->can('all_seqs') ) { |
|
54
|
|
|
|
|
|
|
carp '[BMC] Warning: cannot proceed without seqs; returning undef!'; |
|
55
|
|
|
|
|
|
|
return; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
return $self->$method(@_); |
|
59
|
|
|
|
|
|
|
}; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub complete_seq_list { |
|
63
|
5
|
|
|
5
|
1
|
13
|
my $self = shift; |
|
64
|
5
|
|
|
|
|
12
|
my $min_res = shift; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# get (non-missing char) lengths of all seqs and record max_len |
|
67
|
5
|
|
|
|
|
217
|
my @lengths = map { $_->nomiss_seq_len } $self->all_seqs; |
|
|
50
|
|
|
|
|
225
|
|
|
68
|
5
|
|
|
|
|
39
|
my $max_len = List::AllUtils::max @lengths; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# convert fractional min_res to conservative integer (if needed) |
|
71
|
5
|
100
|
66
|
|
|
64
|
$min_res = ceil($min_res * $max_len) |
|
72
|
|
|
|
|
|
|
if 0 < $min_res && $min_res < 1; |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# filter out seqs with less than min_res non-missing chars |
|
75
|
5
|
|
|
|
|
30
|
my @ids = map { $_->full_id } $self->all_seq_ids; |
|
|
50
|
|
|
|
|
1279
|
|
|
76
|
5
|
|
|
|
|
30
|
my @indices = grep { $lengths[$_] >= $min_res } 0..$#ids; |
|
|
50
|
|
|
|
|
98
|
|
|
77
|
|
|
|
|
|
|
|
|
78
|
5
|
|
|
|
|
224
|
return Bio::MUST::Core::IdList->new( ids => [ @ids[@indices] ] ); |
|
79
|
|
|
|
|
|
|
} |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# IdMapper factory methods |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
sub std_mapper { |
|
86
|
12
|
|
|
12
|
1
|
58
|
my $self = shift; |
|
87
|
12
|
|
100
|
|
|
75
|
my $prefix = shift // 'seq'; |
|
88
|
|
|
|
|
|
|
|
|
89
|
12
|
|
|
|
|
65
|
my @seq_ids = $self->all_seq_ids; |
|
90
|
|
|
|
|
|
|
return Bio::MUST::Core::IdMapper->new( |
|
91
|
73
|
|
|
|
|
1903
|
long_ids => [ map { $_->full_id } @seq_ids ], # list context |
|
92
|
12
|
|
|
|
|
52
|
abbr_ids => [ map { $prefix . $_ } 1..@seq_ids ], # scalar context |
|
|
73
|
|
|
|
|
613
|
|
|
93
|
|
|
|
|
|
|
); |
|
94
|
|
|
|
|
|
|
} |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub acc_mapper { |
|
98
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
99
|
1
|
|
50
|
|
|
7
|
my $prefix = shift // q{}; |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
# Note: this mapper could fail with non-GenBank Seqs |
|
102
|
1
|
|
|
|
|
17
|
my @seq_ids = $self->all_seq_ids; |
|
103
|
|
|
|
|
|
|
return Bio::MUST::Core::IdMapper->new( |
|
104
|
10
|
|
|
|
|
268
|
long_ids => [ map { $_->full_id } @seq_ids ], |
|
105
|
1
|
|
|
|
|
5
|
abbr_ids => [ map { $prefix . $_->accession } @seq_ids ], |
|
|
10
|
|
|
|
|
258
|
|
|
106
|
|
|
|
|
|
|
); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub len_mapper { |
|
111
|
1
|
|
|
1
|
1
|
2
|
my $self = shift; |
|
112
|
|
|
|
|
|
|
|
|
113
|
1
|
|
|
|
|
4
|
my @seq_ids = $self->all_seq_ids; |
|
114
|
1
|
|
|
|
|
49
|
my @lengths = map { $_->nomiss_seq_len } $self->all_seqs; |
|
|
10
|
|
|
|
|
22
|
|
|
115
|
|
|
|
|
|
|
return Bio::MUST::Core::IdMapper->new( |
|
116
|
10
|
|
|
|
|
265
|
long_ids => [ map { $_->full_id . '@' . shift @lengths } @seq_ids ], |
|
117
|
1
|
|
|
|
|
4
|
abbr_ids => [ map { $_->full_id } @seq_ids ], |
|
|
10
|
|
|
|
|
249
|
|
|
118
|
|
|
|
|
|
|
); |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub regex_mapper { ## no critic (RequireArgUnpacking) |
|
123
|
6
|
|
|
6
|
1
|
33
|
my $self = shift; |
|
124
|
|
|
|
|
|
|
# my $prefix = shift // q{}; # note the currying below |
|
125
|
|
|
|
|
|
|
# my $regex = shift // $DEF_ID; |
|
126
|
|
|
|
|
|
|
|
|
127
|
6
|
|
|
|
|
24
|
my @long_ids = map { $_->full_id } $self->all_seq_ids; |
|
|
49
|
|
|
|
|
1246
|
|
|
128
|
6
|
|
|
|
|
156
|
my @abbr_ids = map { $_->abbr_with_regex(@_) } $self->all_seq_ids; |
|
|
49
|
|
|
|
|
139
|
|
|
129
|
|
|
|
|
|
|
|
|
130
|
6
|
|
|
|
|
181
|
return Bio::MUST::Core::IdMapper->new( |
|
131
|
|
|
|
|
|
|
long_ids => \@long_ids, |
|
132
|
|
|
|
|
|
|
abbr_ids => \@abbr_ids |
|
133
|
|
|
|
|
|
|
); |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
sub org_mapper_from_long_ids { |
|
138
|
1
|
|
|
1
|
1
|
9
|
my $self = shift; |
|
139
|
1
|
|
|
|
|
2
|
my $mapper = shift; # mapper long_org => abbr_org |
|
140
|
|
|
|
|
|
|
|
|
141
|
1
|
|
|
|
|
3
|
my @long_ids; |
|
142
|
|
|
|
|
|
|
my @abbr_ids; |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
ID: |
|
145
|
1
|
|
|
|
|
6
|
for my $seq_id ( $self->all_seq_ids ) { |
|
146
|
8
|
50
|
|
|
|
217
|
next ID if $seq_id->is_foreign; |
|
147
|
|
|
|
|
|
|
|
|
148
|
8
|
|
|
|
|
231
|
push @long_ids, $seq_id->full_id; |
|
149
|
8
|
|
|
|
|
53
|
push @abbr_ids, $mapper->abbr_id_for( $seq_id->full_org ) |
|
150
|
|
|
|
|
|
|
. '|' . $seq_id->accession; |
|
151
|
|
|
|
|
|
|
} |
|
152
|
|
|
|
|
|
|
|
|
153
|
1
|
|
|
|
|
29
|
return Bio::MUST::Core::IdMapper->new( |
|
154
|
|
|
|
|
|
|
long_ids => \@long_ids, |
|
155
|
|
|
|
|
|
|
abbr_ids => \@abbr_ids |
|
156
|
|
|
|
|
|
|
); |
|
157
|
|
|
|
|
|
|
} |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
sub org_mapper_from_abbr_ids { |
|
161
|
1
|
|
|
1
|
1
|
11
|
my $self = shift; |
|
162
|
1
|
|
|
|
|
3
|
my $mapper = shift; # mapper long_org => abbr_org |
|
163
|
|
|
|
|
|
|
|
|
164
|
1
|
|
|
|
|
3
|
my @long_ids; |
|
165
|
|
|
|
|
|
|
my @abbr_ids; |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
ID: |
|
168
|
1
|
|
|
|
|
5
|
for my $seq_id ( $self->all_seq_ids ) { |
|
169
|
8
|
|
|
|
|
214
|
my $abbr_id = $seq_id->full_id; |
|
170
|
8
|
|
|
|
|
37
|
my ($abbr_org, $accession) = split /\|/xms, $abbr_id, 2; |
|
171
|
8
|
50
|
|
|
|
22
|
next ID unless $abbr_org; |
|
172
|
|
|
|
|
|
|
|
|
173
|
8
|
|
|
|
|
282
|
push @long_ids, $mapper->long_id_for($abbr_org) . '@' . $accession; |
|
174
|
8
|
|
|
|
|
23
|
push @abbr_ids, $abbr_id; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
1
|
|
|
|
|
28
|
return Bio::MUST::Core::IdMapper->new( |
|
178
|
|
|
|
|
|
|
long_ids => \@long_ids, |
|
179
|
|
|
|
|
|
|
abbr_ids => \@abbr_ids |
|
180
|
|
|
|
|
|
|
); |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
const my $NBS_ID_LEN => 79; |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub store_nbs { |
|
187
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
188
|
0
|
|
|
|
|
|
my $outfile = shift; |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
# #Sequences extracted from c111_78.ali of the 5 May 2009 at 11 hours 40 |
|
191
|
|
|
|
|
|
|
# #File c111_78.nbs created on Tuesday 5 May 2009 at 11 hours 40 |
|
192
|
|
|
|
|
|
|
# #184 positions remain on the 184 aligned positions |
|
193
|
|
|
|
|
|
|
# #life.col,life.nom |
|
194
|
|
|
|
|
|
|
# #Here is the list of the 78 species used: |
|
195
|
|
|
|
|
|
|
# Aciduliprofundum_boonei_T469___________________________________________________ Aeropyrum_pernix_K1____________________________________________________________ |
|
196
|
|
|
|
|
|
|
# Archaeoglobus_fulgidus_DSM_4304________________________________________________ Archaeoglobus_profundus_Av18__DSM_5631_________________________________________ |
|
197
|
|
|
|
|
|
|
# ... |
|
198
|
|
|
|
|
|
|
|
|
199
|
0
|
|
|
|
|
|
my @ids = $self->all_seq_ids; |
|
200
|
|
|
|
|
|
|
|
|
201
|
0
|
|
|
|
|
|
open my $out, '>', $outfile; |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
# print minimum header |
|
204
|
0
|
|
|
|
|
|
print {$out} "#File $outfile created on " . ctime(time); |
|
|
0
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
|
say {$out} '#Here is the list of the ' . scalar @ids . ' species used:'; |
|
|
0
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
# print padded ids on two columns |
|
208
|
0
|
|
|
|
|
|
for my $i (0..$#ids) { |
|
209
|
0
|
|
|
|
|
|
my $id = $ids[$i]->foreign_id; |
|
210
|
0
|
|
|
|
|
|
my $pad_id = $id . '_' x ($NBS_ID_LEN - length $id); |
|
211
|
0
|
0
|
|
|
|
|
my $term = $i % 2 ? "\n" : q{ }; |
|
212
|
0
|
|
|
|
|
|
print {$out} $pad_id . $term; |
|
|
0
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
} |
|
214
|
0
|
|
|
|
|
|
say {$out} q{}; |
|
|
0
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
|
|
216
|
0
|
|
|
|
|
|
return; |
|
217
|
|
|
|
|
|
|
} |
|
218
|
|
|
|
|
|
|
|
|
219
|
17
|
|
|
17
|
|
161
|
no Moose::Role; |
|
|
17
|
|
|
|
|
43
|
|
|
|
17
|
|
|
|
|
162
|
|
|
220
|
|
|
|
|
|
|
1; |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
__END__ |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=pod |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=head1 NAME |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
Bio::MUST::Core::Roles::Listable - Listable Moose role for objects with implied id lists |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=head1 VERSION |
|
231
|
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
version 0.212670 |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# TODO |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
# TODO |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head1 METHODS |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=head2 new_lookup |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 std_list |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head2 alphabetical_list |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=head2 complete_seq_list |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head2 std_mapper |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=head2 acc_mapper |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head2 len_mapper |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
=head2 regex_mapper |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=head2 org_mapper_from_long_ids |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head2 org_mapper_from_abbr_ids |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=head2 store_nbs |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=head1 AUTHOR |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
275
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
276
|
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=cut |