line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::DNA::Incomplete; |
2
|
|
|
|
|
|
|
{ |
3
|
|
|
|
|
|
|
$Bio::DNA::Incomplete::VERSION = '0.004'; |
4
|
|
|
|
|
|
|
} |
5
|
1
|
|
|
1
|
|
47367
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
26
|
|
6
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
21
|
|
7
|
|
|
|
|
|
|
|
8
|
1
|
|
|
1
|
|
5
|
use Carp 'croak'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
62
|
|
9
|
1
|
|
|
1
|
|
833
|
use Sub::Exporter::Progressive -setup => { exports => [qw/pattern_to_regex pattern_to_regex_string match_pattern all_possibilities/], groups => { default => [qw/pattern_to_regex pattern_to_regex_string match_pattern all_possibilities/]} }; |
|
1
|
|
|
|
|
1160
|
|
|
1
|
|
|
|
|
13
|
|
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my %simple = map { ( $_ => $_ ) } qw/A C G T/; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
my %meaning_of = ( |
14
|
|
|
|
|
|
|
R => 'AG', |
15
|
|
|
|
|
|
|
Y => 'CT', |
16
|
|
|
|
|
|
|
W => 'AT', |
17
|
|
|
|
|
|
|
S => 'CG', |
18
|
|
|
|
|
|
|
M => 'AC', |
19
|
|
|
|
|
|
|
K => 'GT', |
20
|
|
|
|
|
|
|
H => 'ACT', |
21
|
|
|
|
|
|
|
B => 'CGT', |
22
|
|
|
|
|
|
|
V => 'ACG', |
23
|
|
|
|
|
|
|
D => 'AGT', |
24
|
|
|
|
|
|
|
N => 'ACGT', |
25
|
|
|
|
|
|
|
); |
26
|
|
|
|
|
|
|
my %pattern_for = %meaning_of; |
27
|
|
|
|
|
|
|
$_ = "[$_]" for values %pattern_for; |
28
|
|
|
|
|
|
|
my ($invalid) = map { qr/[^$_]/ } join '', keys %simple, keys %pattern_for; |
29
|
|
|
|
|
|
|
my %bases_for = (%meaning_of, %simple); |
30
|
|
|
|
|
|
|
$_ = [ split // ] for values %bases_for; |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
sub pattern_to_regex_string { |
33
|
7
|
|
|
7
|
1
|
65
|
my $pattern = uc shift; |
34
|
7
|
100
|
|
|
|
74
|
croak 'Invalid pattern' if $pattern =~ /$invalid/; |
35
|
|
|
|
|
|
|
|
36
|
6
|
|
|
|
|
47
|
$pattern =~ s/([^ATCG])/$pattern_for{$1}/g; |
37
|
6
|
|
|
|
|
21
|
return "(?i:$pattern)"; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub pattern_to_regex { |
41
|
6
|
|
|
6
|
1
|
14
|
my $pattern = uc shift; |
42
|
6
|
|
|
|
|
15
|
my $string = pattern_to_regex_string($pattern); |
43
|
6
|
|
|
|
|
114
|
return qr/$string/; |
44
|
|
|
|
|
|
|
}; |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
sub match_pattern { |
47
|
6
|
|
|
6
|
1
|
26
|
my ($pattern, @args) = @_; |
48
|
6
|
|
|
|
|
14
|
my $regex = pattern_to_regex($pattern); |
49
|
6
|
|
|
|
|
16
|
return grep { $_ =~ /\A $regex \z/xms } @args; |
|
6
|
|
|
|
|
131
|
|
50
|
|
|
|
|
|
|
} |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
sub _all_possibilities { |
53
|
7
|
|
|
7
|
|
17
|
my ($current, @rest) = @_; |
54
|
7
|
100
|
|
|
|
18
|
if (@rest) { |
55
|
5
|
|
|
|
|
6
|
my @ret; |
56
|
5
|
|
|
|
|
36
|
my $pretail = _all_possibilities(@rest); |
57
|
|
|
|
|
|
|
# Chunks longer than 1 are always /[ACTG]+/, so always match themselves |
58
|
5
|
100
|
|
|
|
15
|
for my $head (length $current == 1 ? @{ $bases_for{$current} } : $current) { |
|
4
|
|
|
|
|
11
|
|
59
|
9
|
|
|
|
|
10
|
for my $tail (@{$pretail}) { |
|
9
|
|
|
|
|
14
|
|
60
|
27
|
|
|
|
|
59
|
push @ret, $head.$tail; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
} |
63
|
5
|
|
|
|
|
59
|
return \@ret; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
else { |
66
|
2
|
|
50
|
|
|
14
|
return $bases_for{$current} || [ $current ]; |
67
|
|
|
|
|
|
|
} |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
sub all_possibilities { |
71
|
2
|
|
|
2
|
1
|
8
|
my $pattern = uc shift; |
72
|
2
|
|
|
|
|
16
|
my @bases = $pattern =~ m/[ACTG]+|[^ACGT]/g; |
73
|
2
|
|
|
|
|
5
|
return @{ _all_possibilities(@bases) }; |
|
2
|
|
|
|
|
9
|
|
74
|
|
|
|
|
|
|
} |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
1; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
#ABSTRACT: Match incompletely specified bases in nucleic acid sequences |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
__END__ |