File Coverage

blib/lib/Bio/MUST/Core/Taxonomy/Filter.pm
Criterion Covered Total %
statement 30 60 50.0
branch 0 16 0.0
condition n/a
subroutine 10 15 66.6
pod 2 3 66.6
total 42 94 44.6


line stmt bran cond sub pod time code
1             package Bio::MUST::Core::Taxonomy::Filter;
2             # ABSTRACT: Helper class for filtering seqs according to taxonomy
3             $Bio::MUST::Core::Taxonomy::Filter::VERSION = '0.212670';
4 17     17   10438 use Moose;
  17         54  
  17         132  
5 17     17   119925 use namespace::autoclean;
  17         55  
  17         177  
6              
7 17     17   1582 use autodie;
  17         51  
  17         171  
8 17     17   91186 use feature qw(say);
  17         46  
  17         1544  
9              
10 17     17   122 use Smart::Comments;
  17         50  
  17         143  
11              
12 17     17   19483 use Carp;
  17         53  
  17         1224  
13 17     17   122 use Const::Fast;
  17         40  
  17         170  
14 17     17   1139 use List::AllUtils;
  17         43  
  17         723  
15              
16 17     17   123 use Bio::MUST::Core::Types;
  17         48  
  17         509  
17 17     17   99 use aliased 'Bio::MUST::Core::IdList';
  17         45  
  17         188  
18             with 'Bio::MUST::Core::Roles::Taxable';
19              
20              
21             has '_specs' => (
22             is => 'ro',
23             isa => 'Bio::MUST::Core::IdList',
24             required => 1,
25             coerce => 1,
26             handles => {
27             all_specs => 'all_ids',
28             },
29             );
30              
31              
32             has '_is_' . $_ => (
33             traits => ['Hash'],
34             is => 'ro',
35             isa => 'HashRef',
36             init_arg => undef,
37             writer => '_set_is_' . $_,
38             handles => {
39             'all_' . $_ => 'keys',
40             'is_' . $_ => 'defined',
41             },
42             ) for qw(wanted unwanted);
43              
44              
45             # TODO: allow specifying taxa as partial lineages to solve ambiguities
46             # TODO: allow specifying taxa as taxid and/or mustids (for strains)
47              
48             # regexes for deriving filter from specifications
49             const my $WANTED => qr{\A \+ \s* (.*) }xms;
50             const my $UNWANTED => qr{\A \- \s* (.*) }xms;
51              
52             sub BUILD {
53 0     0 0   my $self = shift;
54              
55             # parse filter specifications
56 0 0         my @wanted = map { $_ =~ $WANTED ? $1 : () } $self->all_specs;
  0            
57 0 0         my @unwanted = map { $_ =~ $UNWANTED ? $1 : () } $self->all_specs;
  0            
58              
59             # warn in case of ambiguous taxa
60 0           for my $taxon (@wanted, @unwanted) {
61 0 0         carp "[BMC] Warning: $taxon is taxonomically ambiguous in filter!"
62             if $self->tax->is_dupe($taxon);
63             }
64              
65             # build filtering hashes from wanted and unwanted arrays
66             # Note: we want no virus by default but exclude nothing
67 0 0         push @wanted, 'cellular organisms' unless @wanted;
68 0           my %is_wanted = map { $_ => 1 } @wanted;
  0            
69 0           my %is_unwanted = map { $_ => 1 } @unwanted;
  0            
70              
71             # store HashRefs for filter
72 0           $self->_set_is_wanted( \%is_wanted );
73 0           $self->_set_is_unwanted(\%is_unwanted);
74              
75 0           return;
76             }
77              
78              
79              
80             sub is_allowed {
81 0     0 1   my $self = shift;
82 0           my $seq_id = shift;
83              
84             # test whether lineage of seq_id has at least one wanted taxon
85             # ... or has at least one unwanted taxon
86             # non-matching taxa are allowed by default
87              
88 0           my @lineage = $self->tax->fetch_lineage($seq_id);
89 0 0         return undef unless @lineage; ## no critic (ProhibitExplicitReturnUndef)
90              
91 0 0   0     return 0 unless List::AllUtils::any { $self->is_wanted( $_) } @lineage;
  0            
92 0 0   0     return 0 if List::AllUtils::any { $self->is_unwanted($_) } @lineage;
  0            
93 0           return 1;
94             }
95              
96              
97              
98             sub tax_list {
99 0     0 1   my $self = shift;
100 0           my $listable = shift;
101              
102 0           my @ids;
103 0           for my $seq_id ($listable->all_seq_ids) {
104 0 0         push @ids, $seq_id->full_id if $self->is_allowed($seq_id);
105             }
106              
107 0           return IdList->new( ids => \@ids );
108             }
109              
110             __PACKAGE__->meta->make_immutable;
111             1;
112              
113             __END__
114              
115             =pod
116              
117             =head1 NAME
118              
119             Bio::MUST::Core::Taxonomy::Filter - Helper class for filtering seqs according to taxonomy
120              
121             =head1 VERSION
122              
123             version 0.212670
124              
125             =head1 SYNOPSIS
126              
127             # TODO
128              
129             =head1 DESCRIPTION
130              
131             # TODO
132              
133             =head1 METHODS
134              
135             =head2 is_allowed
136              
137             =head2 tax_list
138              
139             =head1 AUTHOR
140              
141             Denis BAURAIN <denis.baurain@uliege.be>
142              
143             =head1 COPYRIGHT AND LICENSE
144              
145             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
146              
147             This is free software; you can redistribute it and/or modify it under
148             the same terms as the Perl 5 programming language system itself.
149              
150             =cut