line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MUST::Apps::SlaveAligner::Local; |
2
|
|
|
|
|
|
|
# ABSTRACT: Internal class for slave-aligner |
3
|
|
|
|
|
|
|
$Bio::MUST::Apps::SlaveAligner::Local::VERSION = '0.210570'; |
4
|
1
|
|
|
1
|
|
648
|
use Moose; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
7
|
|
5
|
1
|
|
|
1
|
|
7137
|
use namespace::autoclean; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
6
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
83
|
use autodie; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
8
|
1
|
|
|
1
|
|
5410
|
use feature qw(say switch); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
83
|
|
9
|
1
|
|
|
1
|
|
8
|
use experimental qw(smartmatch); # to suppress warnings about 'when' |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
9
|
|
10
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
91
|
use Smart::Comments -ENV; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
6
|
|
12
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
1311
|
use List::AllUtils qw(max); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
49
|
|
14
|
1
|
|
|
1
|
|
7
|
use Sort::Naturally; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
15
|
|
|
|
|
|
|
|
16
|
1
|
|
|
1
|
|
6
|
use Bio::MUST::Core; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
36
|
|
17
|
1
|
|
|
1
|
|
7
|
use aliased 'Bio::MUST::Core::Seq'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
9
|
|
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# _delayed_indels is a hash (site) of hashes (new_seq id) |
21
|
|
|
|
|
|
|
# storing the number of insertions caused by each new_seq at each |
22
|
|
|
|
|
|
|
# pos (as if each new_seq was added on its own to the Ali) |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
has 'ali' => ( |
25
|
|
|
|
|
|
|
is => 'ro', |
26
|
|
|
|
|
|
|
isa => 'Bio::MUST::Core::Ali', |
27
|
|
|
|
|
|
|
required => 1, |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
has '_delayed_indels' => ( |
32
|
|
|
|
|
|
|
traits => ['Hash'], |
33
|
|
|
|
|
|
|
is => 'ro', |
34
|
|
|
|
|
|
|
isa => 'HashRef[HashRef[Num]]', |
35
|
|
|
|
|
|
|
init_arg => undef, |
36
|
|
|
|
|
|
|
default => sub { {} }, |
37
|
|
|
|
|
|
|
handles => { |
38
|
|
|
|
|
|
|
all_sites => 'keys', |
39
|
|
|
|
|
|
|
indels_at => 'get', |
40
|
|
|
|
|
|
|
}, |
41
|
|
|
|
|
|
|
); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub align { ## no critic (RequireArgUnpacking) |
45
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
46
|
0
|
|
|
|
|
|
my %args = @_; |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
my ( |
49
|
|
|
|
|
|
|
$query, # new_seq as aligned in BLAST report |
50
|
|
|
|
|
|
|
$sbjct, # template as aligned in BLAST report |
51
|
|
|
|
|
|
|
$tmplt, # template as aligned in Ali |
52
|
|
|
|
|
|
|
$splcd, |
53
|
|
|
|
|
|
|
$start # pairwise alignment start in template |
54
|
0
|
|
|
|
|
|
) = @args{ qw(new_seq subject template cds_seq start) }; |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# determine mode (prot/nucl) based on absence/presence of a DNA seq |
57
|
|
|
|
|
|
|
# setup multiplier according to mode |
58
|
0
|
0
|
|
|
|
|
my $mul = defined $splcd ? 3 : 1; |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
# align query left-end on template |
61
|
0
|
|
|
|
|
|
for (my $u = 0; $u < $start; $u++) { |
62
|
0
|
0
|
|
|
|
|
$start++ if $tmplt->is_gap($u); |
63
|
|
|
|
|
|
|
} |
64
|
0
|
|
|
|
|
|
my $aligned_seq = q{ } x ( ($start-1) * $mul ); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# TODO: check whether $start can be set to hit_start-1 before the loop |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# setup loop |
69
|
0
|
|
|
|
|
|
my $t = $start-1; # pos within template in Ali |
70
|
0
|
|
|
|
|
|
my $q = 0; # pos within new_seq in BLAST report |
71
|
0
|
|
|
|
|
|
my $s = 0; # pos within template in BLAST report |
72
|
0
|
|
|
|
|
|
my $len = $query->seq_len; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# loop through new_seq positions |
75
|
0
|
|
|
|
|
|
while ($q < $len) { |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
# encode gap configuration for easy dispatching |
78
|
0
|
|
|
|
|
|
my $case |
79
|
|
|
|
|
|
|
= $tmplt->is_gap($t) |
80
|
|
|
|
|
|
|
. $query->is_gap($q) |
81
|
|
|
|
|
|
|
. $sbjct->is_gap($s) |
82
|
|
|
|
|
|
|
; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
# examine 6 possible cases (t* q* s* and tL q* s* do not exist) |
85
|
0
|
|
|
|
|
|
given ($case) { |
86
|
0
|
|
|
|
|
|
when (/100|110/xms) { # case 1: t* qL sL |
87
|
|
|
|
|
|
|
# case 2: t* q* sL |
88
|
0
|
|
|
|
|
|
$aligned_seq .= '*' x $mul; |
89
|
0
|
|
|
|
|
|
$t++; |
90
|
|
|
|
|
|
|
} |
91
|
0
|
|
|
|
|
|
when (/101|000/xms) { # case 3: t* qL s* |
92
|
|
|
|
|
|
|
# case 4: tL qL sL |
93
|
0
|
0
|
|
|
|
|
$aligned_seq .= $mul == 1 ? $query->state_at($q) |
94
|
|
|
|
|
|
|
: $splcd->edit_seq($q * $mul, $mul); |
95
|
0
|
|
|
|
|
|
$t++; |
96
|
0
|
|
|
|
|
|
$q++; |
97
|
0
|
|
|
|
|
|
$s++; |
98
|
|
|
|
|
|
|
} |
99
|
0
|
|
|
|
|
|
when (/010/xms) { # case 5: tL q* sL |
100
|
0
|
|
|
|
|
|
$aligned_seq .= '*' x $mul; |
101
|
0
|
|
|
|
|
|
$t++; |
102
|
0
|
|
|
|
|
|
$q++; |
103
|
0
|
|
|
|
|
|
$s++; |
104
|
|
|
|
|
|
|
} |
105
|
0
|
|
|
|
|
|
when (/001/xms) { # case 6: tL qL s* |
106
|
0
|
0
|
|
|
|
|
$aligned_seq .= $mul == 1 ? $query->state_at($q) |
107
|
|
|
|
|
|
|
: $splcd->edit_seq($q * $mul, $mul); |
108
|
0
|
|
|
|
|
|
$self->_delayed_indels->{$t}{ $query->full_id } += $mul; |
109
|
0
|
|
|
|
|
|
$q++; |
110
|
0
|
|
|
|
|
|
$s++; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
} |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
|
115
|
0
|
|
|
|
|
|
return Seq->new( |
116
|
|
|
|
|
|
|
seq_id => $query->full_id, |
117
|
|
|
|
|
|
|
seq => $aligned_seq |
118
|
|
|
|
|
|
|
); |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# TODO: ensure that new seqs are in Ali |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub make_indels { |
125
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
# process delayed indels (from left to right) |
128
|
0
|
|
|
|
|
|
my $offset = 0; |
129
|
0
|
|
|
|
|
|
my @sites = sort { $a <=> $b } $self->all_sites; |
|
0
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
for my $site (@sites) { |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
# get length of longest indel at site |
134
|
0
|
|
|
|
|
|
my %indel_by = %{ $self->indels_at($site) }; |
|
0
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
|
my $max_indel = max values %indel_by; |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
# insert indels at site into all seqs |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
SEQ: |
140
|
0
|
|
|
|
|
|
for my $seq ( $self->ali->all_seqs ) { |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
# reduce indel length if seq has indels at the same site |
143
|
0
|
|
0
|
|
|
|
my $indel = $max_indel - ( $indel_by{ $seq->full_id } // 0 ); |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
# skip seq if indel reduced to zero |
146
|
0
|
0
|
|
|
|
|
next SEQ if $indel == 0; |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
# skip seq if indel farther than seq end |
149
|
|
|
|
|
|
|
# TODO: check boundary |
150
|
0
|
0
|
|
|
|
|
next SEQ if $site + $offset > $seq->seq_len - 1; |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# splice seq to insert required number of gaps |
153
|
0
|
|
|
|
|
|
$seq->edit_seq($site + $offset, 0, '*' x $indel); |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# account for indel in subsequent computations |
157
|
0
|
|
|
|
|
|
$offset += $max_indel; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
|
160
|
0
|
|
|
|
|
|
return; |
161
|
|
|
|
|
|
|
} |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
165
|
|
|
|
|
|
|
1; |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
__END__ |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=pod |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head1 NAME |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
Bio::MUST::Apps::SlaveAligner::Local - Internal class for slave-aligner |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=head1 VERSION |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
version 0.210570 |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 AUTHOR |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
Denis BAURAIN <denis.baurain@uliege.be> |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN. |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
188
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=cut |