File Coverage

blib/lib/Bio/DNA/Incomplete.pm
Criterion Covered Total %
statement 38 38 100.0
branch 6 6 100.0
condition 1 2 50.0
subroutine 9 9 100.0
pod 4 4 100.0
total 58 59 98.3


line stmt bran cond sub pod time code
1             package Bio::DNA::Incomplete;
2             {
3             $Bio::DNA::Incomplete::VERSION = '0.004';
4             }
5 1     1   47367 use strict;
  1         3  
  1         26  
6 1     1   5 use warnings;
  1         1  
  1         21  
7              
8 1     1   5 use Carp 'croak';
  1         2  
  1         62  
9 1     1   833 use Sub::Exporter::Progressive -setup => { exports => [qw/pattern_to_regex pattern_to_regex_string match_pattern all_possibilities/], groups => { default => [qw/pattern_to_regex pattern_to_regex_string match_pattern all_possibilities/]} };
  1         1160  
  1         13  
10              
11             my %simple = map { ( $_ => $_ ) } qw/A C G T/;
12              
13             my %meaning_of = (
14             R => 'AG',
15             Y => 'CT',
16             W => 'AT',
17             S => 'CG',
18             M => 'AC',
19             K => 'GT',
20             H => 'ACT',
21             B => 'CGT',
22             V => 'ACG',
23             D => 'AGT',
24             N => 'ACGT',
25             );
26             my %pattern_for = %meaning_of;
27             $_ = "[$_]" for values %pattern_for;
28             my ($invalid) = map { qr/[^$_]/ } join '', keys %simple, keys %pattern_for;
29             my %bases_for = (%meaning_of, %simple);
30             $_ = [ split // ] for values %bases_for;
31              
32             sub pattern_to_regex_string {
33 7     7 1 65 my $pattern = uc shift;
34 7 100       74 croak 'Invalid pattern' if $pattern =~ /$invalid/;
35              
36 6         47 $pattern =~ s/([^ATCG])/$pattern_for{$1}/g;
37 6         21 return "(?i:$pattern)";
38             }
39              
40             sub pattern_to_regex {
41 6     6 1 14 my $pattern = uc shift;
42 6         15 my $string = pattern_to_regex_string($pattern);
43 6         114 return qr/$string/;
44             };
45              
46             sub match_pattern {
47 6     6 1 26 my ($pattern, @args) = @_;
48 6         14 my $regex = pattern_to_regex($pattern);
49 6         16 return grep { $_ =~ /\A $regex \z/xms } @args;
  6         131  
50             }
51              
52             sub _all_possibilities {
53 7     7   17 my ($current, @rest) = @_;
54 7 100       18 if (@rest) {
55 5         6 my @ret;
56 5         36 my $pretail = _all_possibilities(@rest);
57             # Chunks longer than 1 are always /[ACTG]+/, so always match themselves
58 5 100       15 for my $head (length $current == 1 ? @{ $bases_for{$current} } : $current) {
  4         11  
59 9         10 for my $tail (@{$pretail}) {
  9         14  
60 27         59 push @ret, $head.$tail;
61             }
62             }
63 5         59 return \@ret;
64             }
65             else {
66 2   50     14 return $bases_for{$current} || [ $current ];
67             }
68             }
69              
70             sub all_possibilities {
71 2     2 1 8 my $pattern = uc shift;
72 2         16 my @bases = $pattern =~ m/[ACTG]+|[^ACGT]/g;
73 2         5 return @{ _all_possibilities(@bases) };
  2         9  
74             }
75              
76             1;
77              
78             #ABSTRACT: Match incompletely specified bases in nucleic acid sequences
79              
80             __END__