line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MUST::Core::Ali::Temporary; |
2
|
|
|
|
|
|
|
# ABSTRACT: Thin wrapper for a temporary mapped Ali written on disk |
3
|
|
|
|
|
|
|
$Bio::MUST::Core::Ali::Temporary::VERSION = '0.212530'; |
4
|
17
|
|
|
17
|
|
134
|
use Moose; |
|
17
|
|
|
|
|
45
|
|
|
17
|
|
|
|
|
142
|
|
5
|
17
|
|
|
17
|
|
121198
|
use namespace::autoclean; |
|
17
|
|
|
|
|
44
|
|
|
17
|
|
|
|
|
172
|
|
6
|
|
|
|
|
|
|
|
7
|
17
|
|
|
17
|
|
1789
|
use autodie; |
|
17
|
|
|
|
|
42
|
|
|
17
|
|
|
|
|
174
|
|
8
|
17
|
|
|
17
|
|
94144
|
use feature qw(say); |
|
17
|
|
|
|
|
59
|
|
|
17
|
|
|
|
|
1644
|
|
9
|
|
|
|
|
|
|
|
10
|
17
|
|
|
17
|
|
148
|
use Path::Class qw(file); |
|
17
|
|
|
|
|
40
|
|
|
17
|
|
|
|
|
1320
|
|
11
|
|
|
|
|
|
|
|
12
|
17
|
|
|
17
|
|
134
|
use Bio::MUST::Core::Types; |
|
17
|
|
|
|
|
37
|
|
|
17
|
|
|
|
|
7453
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Note: tried to implement it as a subclass of Bio::MUST::Core::Ali but this |
15
|
|
|
|
|
|
|
# led to issues: (1) coercions became a nightmare and (2) the temp_fasta was |
16
|
|
|
|
|
|
|
# written as soon as the Ali was created and thus was empty |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# TODO: allows to specify the directory for the temp file (File::Temp tmpdir) |
19
|
|
|
|
|
|
|
# TODO: allows to specify a template for the temp file name? |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# ATTRIBUTES |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
has 'seqs' => ( |
25
|
|
|
|
|
|
|
is => 'ro', |
26
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::Ali', |
27
|
|
|
|
|
|
|
required => 1, |
28
|
|
|
|
|
|
|
coerce => 1, |
29
|
|
|
|
|
|
|
handles => [ |
30
|
|
|
|
|
|
|
qw(count_comments all_comments get_comment |
31
|
|
|
|
|
|
|
guessing all_seq_ids has_uniq_ids is_protein is_aligned |
32
|
|
|
|
|
|
|
get_seq get_seq_with_id first_seq all_seqs filter_seqs count_seqs |
33
|
|
|
|
|
|
|
gapmiss_regex |
34
|
|
|
|
|
|
|
) |
35
|
|
|
|
|
|
|
], # comment-related methods needed by IdList |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
has 'args' => ( |
40
|
|
|
|
|
|
|
is => 'ro', |
41
|
|
|
|
|
|
|
isa => 'HashRef', |
42
|
|
|
|
|
|
|
builder => '_build_args', |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
has 'file' => ( |
47
|
|
|
|
|
|
|
is => 'ro', |
48
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::Types::File', |
49
|
|
|
|
|
|
|
init_arg => undef, |
50
|
|
|
|
|
|
|
coerce => 1, |
51
|
|
|
|
|
|
|
writer => '_set_file', |
52
|
|
|
|
|
|
|
handles => { |
53
|
|
|
|
|
|
|
remove => 'remove', |
54
|
|
|
|
|
|
|
filename => 'stringify', |
55
|
|
|
|
|
|
|
}, |
56
|
|
|
|
|
|
|
); |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has 'mapper' => ( |
60
|
|
|
|
|
|
|
is => 'ro', |
61
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::IdMapper', |
62
|
|
|
|
|
|
|
init_arg => undef, |
63
|
|
|
|
|
|
|
writer => '_set_mapper', |
64
|
|
|
|
|
|
|
handles => [ qw(all_long_ids all_abbr_ids long_id_for abbr_id_for) ], |
65
|
|
|
|
|
|
|
); |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
with 'Bio::MUST::Core::Roles::Aliable'; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
## no critic (ProhibitUnusedPrivateSubroutines) |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
sub _build_args { |
72
|
3
|
|
|
3
|
|
111
|
return { clean => 1, degap => 1 }; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
## use critic |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub BUILD { |
78
|
5
|
|
|
5
|
0
|
13
|
my $self = shift; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# remove persistent key (if any) from args before temp_fasta call |
81
|
|
|
|
|
|
|
# TODO: work out whether this is really needed |
82
|
5
|
|
|
|
|
10
|
my %args = %{ $self->args }; |
|
5
|
|
|
|
|
159
|
|
83
|
5
|
|
|
|
|
13
|
delete $args{persistent}; |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
# create temporary FASTA file and setup associated IdMapper |
86
|
5
|
|
|
|
|
191
|
my $ali = $self->seqs; |
87
|
5
|
|
|
|
|
21
|
my ($filename, $mapper) = $ali->temp_fasta( \%args ); |
88
|
5
|
|
|
|
|
531
|
$self->_set_file($filename); |
89
|
5
|
|
|
|
|
193
|
$self->_set_mapper($mapper); |
90
|
|
|
|
|
|
|
|
91
|
5
|
|
|
|
|
157
|
return; |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
sub DEMOLISH { |
95
|
5
|
|
|
5
|
0
|
12
|
my $self = shift; |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
$self->remove |
98
|
5
|
100
|
|
|
|
159
|
unless $self->args->{persistent}; |
99
|
|
|
|
|
|
|
|
100
|
5
|
|
|
|
|
715
|
return; |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
# ACCESSORS |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# MISC METHODS |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub type { |
110
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
111
|
0
|
0
|
|
|
|
|
return $self->is_protein ? 'prot' : 'nucl'; |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
115
|
|
|
|
|
|
|
1; |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
__END__ |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
=pod |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=head1 NAME |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Bio::MUST::Core::Ali::Temporary - Thin wrapper for a temporary mapped Ali written on disk |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head1 VERSION |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
version 0.212530 |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head1 SYNOPSIS |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
#!/usr/bin/env perl |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
use Modern::Perl '2011'; |
134
|
|
|
|
|
|
|
# same as: |
135
|
|
|
|
|
|
|
# use strict; |
136
|
|
|
|
|
|
|
# use warnings; |
137
|
|
|
|
|
|
|
# use feature qw(say); |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
use Bio::MUST::Core; |
140
|
|
|
|
|
|
|
use aliased 'Bio::MUST::Core::Ali::Temporary'; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# build Ali::Temporary object from existing ALI file |
143
|
|
|
|
|
|
|
my $temp_db = Temporary->new( seqs => 'database.ali' ); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# get properties |
146
|
|
|
|
|
|
|
my $db = $temp_db->filename; |
147
|
|
|
|
|
|
|
my $dbtype = $temp_db->type; |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# pass it to external program |
150
|
|
|
|
|
|
|
system("makeblastdb -in $db -dbtype $dbtype"); |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# alternative constructor call |
153
|
|
|
|
|
|
|
# build Ali::Temporary object from existing Ali object |
154
|
|
|
|
|
|
|
use aliased 'Bio::MUST::Core::Ali'; |
155
|
|
|
|
|
|
|
my $ali = Ali->load('queries.ali'); |
156
|
|
|
|
|
|
|
my $temp_qu = Temporary->new( seqs => $ali ); |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
# pass it to external program |
159
|
|
|
|
|
|
|
use File::Temp; |
160
|
|
|
|
|
|
|
my $query = $temp_qu->filename; |
161
|
|
|
|
|
|
|
my $out = File::Temp->new( UNLINK => 0, SUFFIX => '.blastp' ); |
162
|
|
|
|
|
|
|
system("blastp -query $query -db $db -out $out"); |
163
|
|
|
|
|
|
|
say "report: $out"; |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
# later... when parsing the BLAST report |
166
|
|
|
|
|
|
|
# let's say $id is a BLAST hit in database.ali |
167
|
|
|
|
|
|
|
my $id = 'seq2'; |
168
|
|
|
|
|
|
|
my $long_id = $temp_db->long_id_for($id); |
169
|
|
|
|
|
|
|
say "hit id: $long_id"; |
170
|
|
|
|
|
|
|
# ... |
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# more alternative constructor calls |
173
|
|
|
|
|
|
|
# build Ali::Temporary object from list of Seq objects |
174
|
|
|
|
|
|
|
my @seqs = $ali->filter_seqs( sub { $_->seq_len >= 500 } ); |
175
|
|
|
|
|
|
|
my $temp_ls = Temporary->new( seqs => \@seqs ); |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# build Ali::Temporary object preserving gaps in Seq objects |
178
|
|
|
|
|
|
|
# (and persistent associated FASTA file) |
179
|
|
|
|
|
|
|
my $temp_gp = Temporary->new( |
180
|
|
|
|
|
|
|
seqs => \@seqs, |
181
|
|
|
|
|
|
|
args => { degap => 0, persistent => 1 } |
182
|
|
|
|
|
|
|
); |
183
|
|
|
|
|
|
|
my $filename = $temp_gp->filename; |
184
|
|
|
|
|
|
|
# later... |
185
|
|
|
|
|
|
|
unlink $filename; |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head1 DESCRIPTION |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
This module implements a class representing a temporary FASTA file where |
190
|
|
|
|
|
|
|
sequence ids are automatically abbreviated (C<seq1>, C<seq2>...) for maximum |
191
|
|
|
|
|
|
|
compatibility with external programs. To this end, it combines an internal |
192
|
|
|
|
|
|
|
L<Bio::MUST::Core::Ali> object and a L<Bio::MUST::Core::IdMapper> object. |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
An C<Ali::Temporary> can be built from an existing ALI (or FASTA) file or |
195
|
|
|
|
|
|
|
on-the-fly from a list (ArrayRef) of L<Bio::MUST::Core::Seq> objects (see the |
196
|
|
|
|
|
|
|
SYNOPSIS for examples). |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Its sequences can be aligned or not but by default sequences are degapped |
199
|
|
|
|
|
|
|
before writing the associated temporary FASTA file. If gaps are to be |
200
|
|
|
|
|
|
|
preserved, this behavior can be altered via the optional C<args> attribute. |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
=head1 ATTRIBUTES |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head2 seqs |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
L<Bio::MUST::Core::Ali> object (required) |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
This required attribute contains the L<Bio::MUST::Core::Seq> objects that are |
209
|
|
|
|
|
|
|
written in the associated temporary FASTA file. It can be specified either as |
210
|
|
|
|
|
|
|
a path to an ALI/FASTA file or as an C<Ali> object or as an ArrayRef of C<Seq> |
211
|
|
|
|
|
|
|
objects (see the SYNOPSIS for examples). |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
For now, it provides the following methods: C<count_comments>, |
214
|
|
|
|
|
|
|
C<all_comments>, C<get_comment>, C<guessing>, C<all_seq_ids>, C<has_uniq_ids>, |
215
|
|
|
|
|
|
|
C<is_protein>, C<is_aligned>, C<get_seq>, C<get_seq_with_id>, C<first_seq>, |
216
|
|
|
|
|
|
|
C<all_seqs>, C<filter_seqs> and C<count_seqs> (see L<Bio::MUST::Core::Ali>). |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=head2 args |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
HashRef (optional) |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
When specified this optional attribute is passed to the C<temp_fasta> method |
223
|
|
|
|
|
|
|
of the internal C<Ali> object. Its purpose is to allow the fine-tuning of the |
224
|
|
|
|
|
|
|
format of the associated temporary FASTA file. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
By default, its contents is C<<clean => 1>> and C<<degap => 1>>, so as to |
227
|
|
|
|
|
|
|
generate a FASTA file of degapped sequences where ambiguous and missing states |
228
|
|
|
|
|
|
|
are replaced by C<X>. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
Additionally, if you want to keep your temporary files around for debugging |
231
|
|
|
|
|
|
|
purposes, you can pass the option C<<persistent => 1>>. This will disable the |
232
|
|
|
|
|
|
|
autoremoval of the file on object destruction. |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
=head2 file |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
L<Path::Class::File> object (auto) |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
This attribute is automatically initialized with the path of the associated |
239
|
|
|
|
|
|
|
temporary FASTA file. Thus, it cannot be user-specified. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
It provides the following methods: C<remove> and C<filename> (see below). |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=head2 mapper |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
L<Bio::MUST::Core::IdMapper> object (auto) |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
This attribute is automatically initialized with the mapper associating the |
248
|
|
|
|
|
|
|
long ids of the internal C<Ali> object to the abbreviated ids used in the |
249
|
|
|
|
|
|
|
associated temporary FASTA file. Thus, it cannot be user-specified. |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
It provides the following methods: C<all_long_ids>, C<all_abbr_ids>, |
252
|
|
|
|
|
|
|
C<long_id_for> and C<abbr_id_for> (see L<Bio::MUST::Core::IdMapper>). |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
=head1 ACCESSORS |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=head2 filename |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Returns the stringified filename of the associated temporary FASTA file. |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
This method does not accept any arguments. |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
=head2 type |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
Returns the type of the sequences in the internal C<Ali> object using BLAST |
265
|
|
|
|
|
|
|
denomination (C<prot> or C<nucl>). See L<Bio::MUST::Core::Seq::is_protein> for |
266
|
|
|
|
|
|
|
the exact test performed. |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
This method does not accept any arguments. |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=head1 MISC METHODS |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head2 remove |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Remove (unlink) the associated temporary FASTA file. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
Since this method is in principle automatically invoked on object destruction, |
277
|
|
|
|
|
|
|
users should not need it. Note that C<persistent> temporary files (see object |
278
|
|
|
|
|
|
|
constructor) have to be removed manually, which requires to get and store |
279
|
|
|
|
|
|
|
their C<filename> before object destruction. |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head1 AUTHOR |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
286
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
290
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=cut |