File Coverage

blib/lib/Bio/MUST/Apps/SlaveAligner/Local.pm
Criterion Covered Total %
statement 30 78 38.4
branch 0 12 0.0
condition 0 2 0.0
subroutine 10 12 83.3
pod 0 2 0.0
total 40 106 37.7


line stmt bran cond sub pod time code
1             package Bio::MUST::Apps::SlaveAligner::Local;
2             # ABSTRACT: Internal class for slave-aligner
3             $Bio::MUST::Apps::SlaveAligner::Local::VERSION = '0.210370';
4 1     1   815 use Moose;
  1         2  
  1         6  
5 1     1   7272 use namespace::autoclean;
  1         2  
  1         43  
6              
7 1     1   108 use autodie;
  1         2  
  1         8  
8 1     1   5381 use feature qw(say switch);
  1         2  
  1         96  
9 1     1   8 use experimental qw(smartmatch); # to suppress warnings about 'when'
  1         3  
  1         8  
10              
11 1     1   94 use Smart::Comments -ENV;
  1         2  
  1         8  
12              
13 1     1   1278 use List::AllUtils qw(max);
  1         2  
  1         54  
14 1     1   6 use Sort::Naturally;
  1         2  
  1         45  
15              
16 1     1   6 use Bio::MUST::Core;
  1         3  
  1         26  
17 1     1   5 use aliased 'Bio::MUST::Core::Seq';
  1         2  
  1         7  
18              
19              
20             # _delayed_indels is a hash (site) of hashes (new_seq id)
21             # storing the number of insertions caused by each new_seq at each
22             # pos (as if each new_seq was added on its own to the Ali)
23              
24             has 'ali' => (
25             is => 'ro',
26             isa => 'Bio::MUST::Core::Ali',
27             required => 1,
28             );
29              
30              
31             has '_delayed_indels' => (
32             traits => ['Hash'],
33             is => 'ro',
34             isa => 'HashRef[HashRef[Num]]',
35             init_arg => undef,
36             default => sub { {} },
37             handles => {
38             all_sites => 'keys',
39             indels_at => 'get',
40             },
41             );
42              
43              
44             sub align { ## no critic (RequireArgUnpacking)
45 0     0 0   my $self = shift;
46 0           my %args = @_;
47              
48             my (
49             $query, # new_seq as aligned in BLAST report
50             $sbjct, # template as aligned in BLAST report
51             $tmplt, # template as aligned in Ali
52             $splcd,
53             $start # pairwise alignment start in template
54 0           ) = @args{ qw(new_seq subject template cds_seq start) };
55              
56             # determine mode (prot/nucl) based on absence/presence of a DNA seq
57             # setup multiplier according to mode
58 0 0         my $mul = defined $splcd ? 3 : 1;
59              
60             # align query left-end on template
61 0           for (my $u = 0; $u < $start; $u++) {
62 0 0         $start++ if $tmplt->is_gap($u);
63             }
64 0           my $aligned_seq = q{ } x ( ($start-1) * $mul );
65              
66             # TODO: check whether $start can be set to hit_start-1 before the loop
67              
68             # setup loop
69 0           my $t = $start-1; # pos within template in Ali
70 0           my $q = 0; # pos within new_seq in BLAST report
71 0           my $s = 0; # pos within template in BLAST report
72 0           my $len = $query->seq_len;
73              
74             # loop through new_seq positions
75 0           while ($q < $len) {
76              
77             # encode gap configuration for easy dispatching
78 0           my $case
79             = $tmplt->is_gap($t)
80             . $query->is_gap($q)
81             . $sbjct->is_gap($s)
82             ;
83              
84             # examine 6 possible cases (t* q* s* and tL q* s* do not exist)
85 0           given ($case) {
86 0           when (/100|110/xms) { # case 1: t* qL sL
87             # case 2: t* q* sL
88 0           $aligned_seq .= '*' x $mul;
89 0           $t++;
90             }
91 0           when (/101|000/xms) { # case 3: t* qL s*
92             # case 4: tL qL sL
93 0 0         $aligned_seq .= $mul == 1 ? $query->state_at($q)
94             : $splcd->edit_seq($q * $mul, $mul);
95 0           $t++;
96 0           $q++;
97 0           $s++;
98             }
99 0           when (/010/xms) { # case 5: tL q* sL
100 0           $aligned_seq .= '*' x $mul;
101 0           $t++;
102 0           $q++;
103 0           $s++;
104             }
105 0           when (/001/xms) { # case 6: tL qL s*
106 0 0         $aligned_seq .= $mul == 1 ? $query->state_at($q)
107             : $splcd->edit_seq($q * $mul, $mul);
108 0           $self->_delayed_indels->{$t}{ $query->full_id } += $mul;
109 0           $q++;
110 0           $s++;
111             }
112             }
113             }
114              
115 0           return Seq->new(
116             seq_id => $query->full_id,
117             seq => $aligned_seq
118             );
119             }
120              
121              
122             # TODO: ensure that new seqs are in Ali
123              
124             sub make_indels {
125 0     0 0   my $self = shift;
126              
127             # process delayed indels (from left to right)
128 0           my $offset = 0;
129 0           my @sites = sort { $a <=> $b } $self->all_sites;
  0            
130              
131 0           for my $site (@sites) {
132              
133             # get length of longest indel at site
134 0           my %indel_by = %{ $self->indels_at($site) };
  0            
135 0           my $max_indel = max values %indel_by;
136              
137             # insert indels at site into all seqs
138              
139             SEQ:
140 0           for my $seq ( $self->ali->all_seqs ) {
141              
142             # reduce indel length if seq has indels at the same site
143 0   0       my $indel = $max_indel - ( $indel_by{ $seq->full_id } // 0 );
144              
145             # skip seq if indel reduced to zero
146 0 0         next SEQ if $indel == 0;
147              
148             # skip seq if indel farther than seq end
149             # TODO: check boundary
150 0 0         next SEQ if $site + $offset > $seq->seq_len - 1;
151              
152             # splice seq to insert required number of gaps
153 0           $seq->edit_seq($site + $offset, 0, '*' x $indel);
154             }
155              
156             # account for indel in subsequent computations
157 0           $offset += $max_indel;
158             }
159              
160 0           return;
161             }
162              
163              
164             __PACKAGE__->meta->make_immutable;
165             1;
166              
167             __END__
168              
169             =pod
170              
171             =head1 NAME
172              
173             Bio::MUST::Apps::SlaveAligner::Local - Internal class for slave-aligner
174              
175             =head1 VERSION
176              
177             version 0.210370
178              
179             =head1 AUTHOR
180              
181             Denis BAURAIN <denis.baurain@uliege.be>
182              
183             =head1 COPYRIGHT AND LICENSE
184              
185             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
186              
187             This is free software; you can redistribute it and/or modify it under
188             the same terms as the Perl 5 programming language system itself.
189              
190             =cut