| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Bio::MUST::Core::Ali::Temporary; |
|
2
|
|
|
|
|
|
|
# ABSTRACT: Thin wrapper for a temporary mapped Ali written on disk |
|
3
|
|
|
|
|
|
|
$Bio::MUST::Core::Ali::Temporary::VERSION = '0.212530'; |
|
4
|
17
|
|
|
17
|
|
134
|
use Moose; |
|
|
17
|
|
|
|
|
45
|
|
|
|
17
|
|
|
|
|
142
|
|
|
5
|
17
|
|
|
17
|
|
121198
|
use namespace::autoclean; |
|
|
17
|
|
|
|
|
44
|
|
|
|
17
|
|
|
|
|
172
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
17
|
|
|
17
|
|
1789
|
use autodie; |
|
|
17
|
|
|
|
|
42
|
|
|
|
17
|
|
|
|
|
174
|
|
|
8
|
17
|
|
|
17
|
|
94144
|
use feature qw(say); |
|
|
17
|
|
|
|
|
59
|
|
|
|
17
|
|
|
|
|
1644
|
|
|
9
|
|
|
|
|
|
|
|
|
10
|
17
|
|
|
17
|
|
148
|
use Path::Class qw(file); |
|
|
17
|
|
|
|
|
40
|
|
|
|
17
|
|
|
|
|
1320
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
17
|
|
|
17
|
|
134
|
use Bio::MUST::Core::Types; |
|
|
17
|
|
|
|
|
37
|
|
|
|
17
|
|
|
|
|
7453
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Note: tried to implement it as a subclass of Bio::MUST::Core::Ali but this |
|
15
|
|
|
|
|
|
|
# led to issues: (1) coercions became a nightmare and (2) the temp_fasta was |
|
16
|
|
|
|
|
|
|
# written as soon as the Ali was created and thus was empty |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# TODO: allows to specify the directory for the temp file (File::Temp tmpdir) |
|
19
|
|
|
|
|
|
|
# TODO: allows to specify a template for the temp file name? |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# ATTRIBUTES |
|
22
|
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
has 'seqs' => ( |
|
25
|
|
|
|
|
|
|
is => 'ro', |
|
26
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::Ali', |
|
27
|
|
|
|
|
|
|
required => 1, |
|
28
|
|
|
|
|
|
|
coerce => 1, |
|
29
|
|
|
|
|
|
|
handles => [ |
|
30
|
|
|
|
|
|
|
qw(count_comments all_comments get_comment |
|
31
|
|
|
|
|
|
|
guessing all_seq_ids has_uniq_ids is_protein is_aligned |
|
32
|
|
|
|
|
|
|
get_seq get_seq_with_id first_seq all_seqs filter_seqs count_seqs |
|
33
|
|
|
|
|
|
|
gapmiss_regex |
|
34
|
|
|
|
|
|
|
) |
|
35
|
|
|
|
|
|
|
], # comment-related methods needed by IdList |
|
36
|
|
|
|
|
|
|
); |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
has 'args' => ( |
|
40
|
|
|
|
|
|
|
is => 'ro', |
|
41
|
|
|
|
|
|
|
isa => 'HashRef', |
|
42
|
|
|
|
|
|
|
builder => '_build_args', |
|
43
|
|
|
|
|
|
|
); |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
has 'file' => ( |
|
47
|
|
|
|
|
|
|
is => 'ro', |
|
48
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::Types::File', |
|
49
|
|
|
|
|
|
|
init_arg => undef, |
|
50
|
|
|
|
|
|
|
coerce => 1, |
|
51
|
|
|
|
|
|
|
writer => '_set_file', |
|
52
|
|
|
|
|
|
|
handles => { |
|
53
|
|
|
|
|
|
|
remove => 'remove', |
|
54
|
|
|
|
|
|
|
filename => 'stringify', |
|
55
|
|
|
|
|
|
|
}, |
|
56
|
|
|
|
|
|
|
); |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has 'mapper' => ( |
|
60
|
|
|
|
|
|
|
is => 'ro', |
|
61
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::IdMapper', |
|
62
|
|
|
|
|
|
|
init_arg => undef, |
|
63
|
|
|
|
|
|
|
writer => '_set_mapper', |
|
64
|
|
|
|
|
|
|
handles => [ qw(all_long_ids all_abbr_ids long_id_for abbr_id_for) ], |
|
65
|
|
|
|
|
|
|
); |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
with 'Bio::MUST::Core::Roles::Aliable'; |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
## no critic (ProhibitUnusedPrivateSubroutines) |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub _build_args { |
|
72
|
3
|
|
|
3
|
|
111
|
return { clean => 1, degap => 1 }; |
|
73
|
|
|
|
|
|
|
} |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
## use critic |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub BUILD { |
|
78
|
5
|
|
|
5
|
0
|
13
|
my $self = shift; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# remove persistent key (if any) from args before temp_fasta call |
|
81
|
|
|
|
|
|
|
# TODO: work out whether this is really needed |
|
82
|
5
|
|
|
|
|
10
|
my %args = %{ $self->args }; |
|
|
5
|
|
|
|
|
159
|
|
|
83
|
5
|
|
|
|
|
13
|
delete $args{persistent}; |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# create temporary FASTA file and setup associated IdMapper |
|
86
|
5
|
|
|
|
|
191
|
my $ali = $self->seqs; |
|
87
|
5
|
|
|
|
|
21
|
my ($filename, $mapper) = $ali->temp_fasta( \%args ); |
|
88
|
5
|
|
|
|
|
531
|
$self->_set_file($filename); |
|
89
|
5
|
|
|
|
|
193
|
$self->_set_mapper($mapper); |
|
90
|
|
|
|
|
|
|
|
|
91
|
5
|
|
|
|
|
157
|
return; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub DEMOLISH { |
|
95
|
5
|
|
|
5
|
0
|
12
|
my $self = shift; |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
$self->remove |
|
98
|
5
|
100
|
|
|
|
159
|
unless $self->args->{persistent}; |
|
99
|
|
|
|
|
|
|
|
|
100
|
5
|
|
|
|
|
715
|
return; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# ACCESSORS |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# MISC METHODS |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub type { |
|
110
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
|
111
|
0
|
0
|
|
|
|
|
return $self->is_protein ? 'prot' : 'nucl'; |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
|
115
|
|
|
|
|
|
|
1; |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
__END__ |
|
118
|
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=pod |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 NAME |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Bio::MUST::Core::Ali::Temporary - Thin wrapper for a temporary mapped Ali written on disk |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 VERSION |
|
126
|
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
version 0.212530 |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
#!/usr/bin/env perl |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
use Modern::Perl '2011'; |
|
134
|
|
|
|
|
|
|
# same as: |
|
135
|
|
|
|
|
|
|
# use strict; |
|
136
|
|
|
|
|
|
|
# use warnings; |
|
137
|
|
|
|
|
|
|
# use feature qw(say); |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
use Bio::MUST::Core; |
|
140
|
|
|
|
|
|
|
use aliased 'Bio::MUST::Core::Ali::Temporary'; |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# build Ali::Temporary object from existing ALI file |
|
143
|
|
|
|
|
|
|
my $temp_db = Temporary->new( seqs => 'database.ali' ); |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# get properties |
|
146
|
|
|
|
|
|
|
my $db = $temp_db->filename; |
|
147
|
|
|
|
|
|
|
my $dbtype = $temp_db->type; |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# pass it to external program |
|
150
|
|
|
|
|
|
|
system("makeblastdb -in $db -dbtype $dbtype"); |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# alternative constructor call |
|
153
|
|
|
|
|
|
|
# build Ali::Temporary object from existing Ali object |
|
154
|
|
|
|
|
|
|
use aliased 'Bio::MUST::Core::Ali'; |
|
155
|
|
|
|
|
|
|
my $ali = Ali->load('queries.ali'); |
|
156
|
|
|
|
|
|
|
my $temp_qu = Temporary->new( seqs => $ali ); |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# pass it to external program |
|
159
|
|
|
|
|
|
|
use File::Temp; |
|
160
|
|
|
|
|
|
|
my $query = $temp_qu->filename; |
|
161
|
|
|
|
|
|
|
my $out = File::Temp->new( UNLINK => 0, SUFFIX => '.blastp' ); |
|
162
|
|
|
|
|
|
|
system("blastp -query $query -db $db -out $out"); |
|
163
|
|
|
|
|
|
|
say "report: $out"; |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# later... when parsing the BLAST report |
|
166
|
|
|
|
|
|
|
# let's say $id is a BLAST hit in database.ali |
|
167
|
|
|
|
|
|
|
my $id = 'seq2'; |
|
168
|
|
|
|
|
|
|
my $long_id = $temp_db->long_id_for($id); |
|
169
|
|
|
|
|
|
|
say "hit id: $long_id"; |
|
170
|
|
|
|
|
|
|
# ... |
|
171
|
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# more alternative constructor calls |
|
173
|
|
|
|
|
|
|
# build Ali::Temporary object from list of Seq objects |
|
174
|
|
|
|
|
|
|
my @seqs = $ali->filter_seqs( sub { $_->seq_len >= 500 } ); |
|
175
|
|
|
|
|
|
|
my $temp_ls = Temporary->new( seqs => \@seqs ); |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# build Ali::Temporary object preserving gaps in Seq objects |
|
178
|
|
|
|
|
|
|
# (and persistent associated FASTA file) |
|
179
|
|
|
|
|
|
|
my $temp_gp = Temporary->new( |
|
180
|
|
|
|
|
|
|
seqs => \@seqs, |
|
181
|
|
|
|
|
|
|
args => { degap => 0, persistent => 1 } |
|
182
|
|
|
|
|
|
|
); |
|
183
|
|
|
|
|
|
|
my $filename = $temp_gp->filename; |
|
184
|
|
|
|
|
|
|
# later... |
|
185
|
|
|
|
|
|
|
unlink $filename; |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
This module implements a class representing a temporary FASTA file where |
|
190
|
|
|
|
|
|
|
sequence ids are automatically abbreviated (C<seq1>, C<seq2>...) for maximum |
|
191
|
|
|
|
|
|
|
compatibility with external programs. To this end, it combines an internal |
|
192
|
|
|
|
|
|
|
L<Bio::MUST::Core::Ali> object and a L<Bio::MUST::Core::IdMapper> object. |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
An C<Ali::Temporary> can be built from an existing ALI (or FASTA) file or |
|
195
|
|
|
|
|
|
|
on-the-fly from a list (ArrayRef) of L<Bio::MUST::Core::Seq> objects (see the |
|
196
|
|
|
|
|
|
|
SYNOPSIS for examples). |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Its sequences can be aligned or not but by default sequences are degapped |
|
199
|
|
|
|
|
|
|
before writing the associated temporary FASTA file. If gaps are to be |
|
200
|
|
|
|
|
|
|
preserved, this behavior can be altered via the optional C<args> attribute. |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head2 seqs |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
L<Bio::MUST::Core::Ali> object (required) |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
This required attribute contains the L<Bio::MUST::Core::Seq> objects that are |
|
209
|
|
|
|
|
|
|
written in the associated temporary FASTA file. It can be specified either as |
|
210
|
|
|
|
|
|
|
a path to an ALI/FASTA file or as an C<Ali> object or as an ArrayRef of C<Seq> |
|
211
|
|
|
|
|
|
|
objects (see the SYNOPSIS for examples). |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
For now, it provides the following methods: C<count_comments>, |
|
214
|
|
|
|
|
|
|
C<all_comments>, C<get_comment>, C<guessing>, C<all_seq_ids>, C<has_uniq_ids>, |
|
215
|
|
|
|
|
|
|
C<is_protein>, C<is_aligned>, C<get_seq>, C<get_seq_with_id>, C<first_seq>, |
|
216
|
|
|
|
|
|
|
C<all_seqs>, C<filter_seqs> and C<count_seqs> (see L<Bio::MUST::Core::Ali>). |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head2 args |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
HashRef (optional) |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
When specified this optional attribute is passed to the C<temp_fasta> method |
|
223
|
|
|
|
|
|
|
of the internal C<Ali> object. Its purpose is to allow the fine-tuning of the |
|
224
|
|
|
|
|
|
|
format of the associated temporary FASTA file. |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
By default, its contents is C<<clean => 1>> and C<<degap => 1>>, so as to |
|
227
|
|
|
|
|
|
|
generate a FASTA file of degapped sequences where ambiguous and missing states |
|
228
|
|
|
|
|
|
|
are replaced by C<X>. |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Additionally, if you want to keep your temporary files around for debugging |
|
231
|
|
|
|
|
|
|
purposes, you can pass the option C<<persistent => 1>>. This will disable the |
|
232
|
|
|
|
|
|
|
autoremoval of the file on object destruction. |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head2 file |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
L<Path::Class::File> object (auto) |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
This attribute is automatically initialized with the path of the associated |
|
239
|
|
|
|
|
|
|
temporary FASTA file. Thus, it cannot be user-specified. |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
It provides the following methods: C<remove> and C<filename> (see below). |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=head2 mapper |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
L<Bio::MUST::Core::IdMapper> object (auto) |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
This attribute is automatically initialized with the mapper associating the |
|
248
|
|
|
|
|
|
|
long ids of the internal C<Ali> object to the abbreviated ids used in the |
|
249
|
|
|
|
|
|
|
associated temporary FASTA file. Thus, it cannot be user-specified. |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
It provides the following methods: C<all_long_ids>, C<all_abbr_ids>, |
|
252
|
|
|
|
|
|
|
C<long_id_for> and C<abbr_id_for> (see L<Bio::MUST::Core::IdMapper>). |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=head1 ACCESSORS |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head2 filename |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Returns the stringified filename of the associated temporary FASTA file. |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
This method does not accept any arguments. |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head2 type |
|
263
|
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
Returns the type of the sequences in the internal C<Ali> object using BLAST |
|
265
|
|
|
|
|
|
|
denomination (C<prot> or C<nucl>). See L<Bio::MUST::Core::Seq::is_protein> for |
|
266
|
|
|
|
|
|
|
the exact test performed. |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
This method does not accept any arguments. |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head1 MISC METHODS |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head2 remove |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Remove (unlink) the associated temporary FASTA file. |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
Since this method is in principle automatically invoked on object destruction, |
|
277
|
|
|
|
|
|
|
users should not need it. Note that C<persistent> temporary files (see object |
|
278
|
|
|
|
|
|
|
constructor) have to be removed manually, which requires to get and store |
|
279
|
|
|
|
|
|
|
their C<filename> before object destruction. |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head1 AUTHOR |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
|
284
|
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
|
288
|
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
290
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
291
|
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=cut |