File Coverage

blib/lib/Bio/MUST/Core/Ali/Temporary.pm
Criterion Covered Total %
statement 31 33 93.9
branch 2 4 50.0
condition n/a
subroutine 9 10 90.0
pod 1 3 33.3
total 43 50 86.0


line stmt bran cond sub pod time code
1             package Bio::MUST::Core::Ali::Temporary;
2             # ABSTRACT: Thin wrapper for a temporary mapped Ali written on disk
3             $Bio::MUST::Core::Ali::Temporary::VERSION = '0.212670';
4 17     17   130 use Moose;
  17         43  
  17         124  
5 17     17   121485 use namespace::autoclean;
  17         47  
  17         160  
6              
7 17     17   1581 use autodie;
  17         42  
  17         146  
8 17     17   93643 use feature qw(say);
  17         42  
  17         1656  
9              
10 17     17   137 use Path::Class qw(file);
  17         42  
  17         1097  
11              
12 17     17   123 use Bio::MUST::Core::Types;
  17         49  
  17         6960  
13              
14             # Note: tried to implement it as a subclass of Bio::MUST::Core::Ali but this
15             # led to issues: (1) coercions became a nightmare and (2) the temp_fasta was
16             # written as soon as the Ali was created and thus was empty
17              
18             # TODO: allows to specify the directory for the temp file (File::Temp tmpdir)
19             # TODO: allows to specify a template for the temp file name?
20              
21             # ATTRIBUTES
22              
23              
24             has 'seqs' => (
25             is => 'ro',
26             isa => 'Bio::MUST::Core::Ali',
27             required => 1,
28             coerce => 1,
29             handles => [
30             qw(count_comments all_comments get_comment
31             guessing all_seq_ids has_uniq_ids is_protein is_aligned
32             get_seq get_seq_with_id first_seq all_seqs filter_seqs count_seqs
33             gapmiss_regex
34             )
35             ], # comment-related methods needed by IdList
36             );
37              
38              
39             has 'args' => (
40             is => 'ro',
41             isa => 'HashRef',
42             builder => '_build_args',
43             );
44              
45              
46             has 'file' => (
47             is => 'ro',
48             isa => 'Bio::MUST::Core::Types::File',
49             init_arg => undef,
50             coerce => 1,
51             writer => '_set_file',
52             handles => {
53             remove => 'remove',
54             filename => 'stringify',
55             },
56             );
57              
58              
59             has 'mapper' => (
60             is => 'ro',
61             isa => 'Bio::MUST::Core::IdMapper',
62             init_arg => undef,
63             writer => '_set_mapper',
64             handles => [ qw(all_long_ids all_abbr_ids long_id_for abbr_id_for) ],
65             );
66              
67             with 'Bio::MUST::Core::Roles::Aliable';
68              
69             ## no critic (ProhibitUnusedPrivateSubroutines)
70              
71             sub _build_args {
72 3     3   99 return { clean => 1, degap => 1 };
73             }
74              
75             ## use critic
76              
77             sub BUILD {
78 5     5 0 12 my $self = shift;
79              
80             # remove persistent key (if any) from args before temp_fasta call
81             # TODO: work out whether this is really needed
82 5         8 my %args = %{ $self->args };
  5         150  
83 5         15 delete $args{persistent};
84              
85             # create temporary FASTA file and setup associated IdMapper
86 5         151 my $ali = $self->seqs;
87 5         18 my ($filename, $mapper) = $ali->temp_fasta( \%args );
88 5         507 $self->_set_file($filename);
89 5         178 $self->_set_mapper($mapper);
90              
91 5         172 return;
92             }
93              
94             sub DEMOLISH {
95 5     5 0 12 my $self = shift;
96              
97             $self->remove
98 5 100       191 unless $self->args->{persistent};
99              
100 5         661 return;
101             }
102              
103             # ACCESSORS
104              
105              
106             # MISC METHODS
107              
108              
109             sub type {
110 0     0 1   my $self = shift;
111 0 0         return $self->is_protein ? 'prot' : 'nucl';
112             }
113              
114             __PACKAGE__->meta->make_immutable;
115             1;
116              
117             __END__
118              
119             =pod
120              
121             =head1 NAME
122              
123             Bio::MUST::Core::Ali::Temporary - Thin wrapper for a temporary mapped Ali written on disk
124              
125             =head1 VERSION
126              
127             version 0.212670
128              
129             =head1 SYNOPSIS
130              
131             #!/usr/bin/env perl
132              
133             use Modern::Perl '2011';
134             # same as:
135             # use strict;
136             # use warnings;
137             # use feature qw(say);
138              
139             use Bio::MUST::Core;
140             use aliased 'Bio::MUST::Core::Ali::Temporary';
141              
142             # build Ali::Temporary object from existing ALI file
143             my $temp_db = Temporary->new( seqs => 'database.ali' );
144              
145             # get properties
146             my $db = $temp_db->filename;
147             my $dbtype = $temp_db->type;
148              
149             # pass it to external program
150             system("makeblastdb -in $db -dbtype $dbtype");
151              
152             # alternative constructor call
153             # build Ali::Temporary object from existing Ali object
154             use aliased 'Bio::MUST::Core::Ali';
155             my $ali = Ali->load('queries.ali');
156             my $temp_qu = Temporary->new( seqs => $ali );
157              
158             # pass it to external program
159             use File::Temp;
160             my $query = $temp_qu->filename;
161             my $out = File::Temp->new( UNLINK => 0, SUFFIX => '.blastp' );
162             system("blastp -query $query -db $db -out $out");
163             say "report: $out";
164              
165             # later... when parsing the BLAST report
166             # let's say $id is a BLAST hit in database.ali
167             my $id = 'seq2';
168             my $long_id = $temp_db->long_id_for($id);
169             say "hit id: $long_id";
170             # ...
171              
172             # more alternative constructor calls
173             # build Ali::Temporary object from list of Seq objects
174             my @seqs = $ali->filter_seqs( sub { $_->seq_len >= 500 } );
175             my $temp_ls = Temporary->new( seqs => \@seqs );
176              
177             # build Ali::Temporary object preserving gaps in Seq objects
178             # (and persistent associated FASTA file)
179             my $temp_gp = Temporary->new(
180             seqs => \@seqs,
181             args => { degap => 0, persistent => 1 }
182             );
183             my $filename = $temp_gp->filename;
184             # later...
185             unlink $filename;
186              
187             =head1 DESCRIPTION
188              
189             This module implements a class representing a temporary FASTA file where
190             sequence ids are automatically abbreviated (C<seq1>, C<seq2>...) for maximum
191             compatibility with external programs. To this end, it combines an internal
192             L<Bio::MUST::Core::Ali> object and a L<Bio::MUST::Core::IdMapper> object.
193              
194             An C<Ali::Temporary> can be built from an existing ALI (or FASTA) file or
195             on-the-fly from a list (ArrayRef) of L<Bio::MUST::Core::Seq> objects (see the
196             SYNOPSIS for examples).
197              
198             Its sequences can be aligned or not but by default sequences are degapped
199             before writing the associated temporary FASTA file. If gaps are to be
200             preserved, this behavior can be altered via the optional C<args> attribute.
201              
202             =head1 ATTRIBUTES
203              
204             =head2 seqs
205              
206             L<Bio::MUST::Core::Ali> object (required)
207              
208             This required attribute contains the L<Bio::MUST::Core::Seq> objects that are
209             written in the associated temporary FASTA file. It can be specified either as
210             a path to an ALI/FASTA file or as an C<Ali> object or as an ArrayRef of C<Seq>
211             objects (see the SYNOPSIS for examples).
212              
213             For now, it provides the following methods: C<count_comments>,
214             C<all_comments>, C<get_comment>, C<guessing>, C<all_seq_ids>, C<has_uniq_ids>,
215             C<is_protein>, C<is_aligned>, C<get_seq>, C<get_seq_with_id>, C<first_seq>,
216             C<all_seqs>, C<filter_seqs> and C<count_seqs> (see L<Bio::MUST::Core::Ali>).
217              
218             =head2 args
219              
220             HashRef (optional)
221              
222             When specified this optional attribute is passed to the C<temp_fasta> method
223             of the internal C<Ali> object. Its purpose is to allow the fine-tuning of the
224             format of the associated temporary FASTA file.
225              
226             By default, its contents is C<<clean => 1>> and C<<degap => 1>>, so as to
227             generate a FASTA file of degapped sequences where ambiguous and missing states
228             are replaced by C<X>.
229              
230             Additionally, if you want to keep your temporary files around for debugging
231             purposes, you can pass the option C<<persistent => 1>>. This will disable the
232             autoremoval of the file on object destruction.
233              
234             =head2 file
235              
236             L<Path::Class::File> object (auto)
237              
238             This attribute is automatically initialized with the path of the associated
239             temporary FASTA file. Thus, it cannot be user-specified.
240              
241             It provides the following methods: C<remove> and C<filename> (see below).
242              
243             =head2 mapper
244              
245             L<Bio::MUST::Core::IdMapper> object (auto)
246              
247             This attribute is automatically initialized with the mapper associating the
248             long ids of the internal C<Ali> object to the abbreviated ids used in the
249             associated temporary FASTA file. Thus, it cannot be user-specified.
250              
251             It provides the following methods: C<all_long_ids>, C<all_abbr_ids>,
252             C<long_id_for> and C<abbr_id_for> (see L<Bio::MUST::Core::IdMapper>).
253              
254             =head1 ACCESSORS
255              
256             =head2 filename
257              
258             Returns the stringified filename of the associated temporary FASTA file.
259              
260             This method does not accept any arguments.
261              
262             =head2 type
263              
264             Returns the type of the sequences in the internal C<Ali> object using BLAST
265             denomination (C<prot> or C<nucl>). See L<Bio::MUST::Core::Seq::is_protein> for
266             the exact test performed.
267              
268             This method does not accept any arguments.
269              
270             =head1 MISC METHODS
271              
272             =head2 remove
273              
274             Remove (unlink) the associated temporary FASTA file.
275              
276             Since this method is in principle automatically invoked on object destruction,
277             users should not need it. Note that C<persistent> temporary files (see object
278             constructor) have to be removed manually, which requires to get and store
279             their C<filename> before object destruction.
280              
281             =head1 AUTHOR
282              
283             Denis BAURAIN <denis.baurain@uliege.be>
284              
285             =head1 COPYRIGHT AND LICENSE
286              
287             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
288              
289             This is free software; you can redistribute it and/or modify it under
290             the same terms as the Perl 5 programming language system itself.
291              
292             =cut