File Coverage

blib/lib/Bio/MUST/Apps/FortyTwo/AliProcessor.pm
Criterion Covered Total %
statement 72 303 23.7
branch 0 68 0.0
condition 0 30 0.0
subroutine 24 49 48.9
pod 0 7 0.0
total 96 457 21.0


line stmt bran cond sub pod time code
1             package Bio::MUST::Apps::FortyTwo::AliProcessor;
2             # ABSTRACT: Internal class for forty-two tool
3             $Bio::MUST::Apps::FortyTwo::AliProcessor::VERSION = '0.210370';
4 1     1   809 use Moose;
  1         3  
  1         6  
5 1     1   7044 use namespace::autoclean;
  1         2  
  1         8  
6              
7 1     1   82 use autodie;
  1         2  
  1         8  
8 1     1   5524 use feature qw(say);
  1         3  
  1         109  
9              
10 1     1   8 use Smart::Comments -ENV;
  1         3  
  1         11  
11              
12 1     1   5233 use Carp;
  1         3  
  1         77  
13 1     1   6 use Const::Fast;
  1         20  
  1         9  
14 1     1   64 use File::Basename;
  1         2  
  1         62  
15 1     1   6 use List::AllUtils qw(first part partition_by pairmap);
  1         2  
  1         58  
16 1     1   5 use Path::Class qw(file);
  1         2  
  1         38  
17 1     1   5 use POSIX;
  1         2  
  1         10  
18 1     1   1897 use Sort::Naturally;
  1         2  
  1         57  
19 1     1   7 use Tie::IxHash;
  1         21  
  1         42  
20              
21 1     1   6 use Bio::MUST::Core;
  1         2  
  1         38  
22 1     1   7 use Bio::MUST::Core::Utils qw(:filenames secure_outfile);
  1         2  
  1         142  
23 1     1   8 use Bio::MUST::Drivers;
  1         2  
  1         28  
24 1     1   6 use aliased 'Bio::MUST::Core::Ali';
  1         2  
  1         8  
25 1     1   273 use aliased 'Bio::MUST::Core::Ali::Temporary';
  1         2  
  1         5  
26 1     1   210 use aliased 'Bio::MUST::Core::IdList';
  1         3  
  1         3  
27 1     1   189 use aliased 'Bio::MUST::Core::Seq';
  1         2  
  1         4  
28 1     1   221 use aliased 'Bio::MUST::Core::SeqId';
  1         2  
  1         5  
29 1     1   187 use aliased 'Bio::MUST::Apps::SlaveAligner::Local';
  1         3  
  1         3  
30 1     1   127 use aliased 'Bio::MUST::Apps::FortyTwo::OrgProcessor';
  1         2  
  1         10  
31 1     1   150 use aliased 'Bio::MUST::Apps::Debrief42::TaxReport::NewSeq';
  1         2  
  1         8  
32              
33             with 'Bio::MUST::Apps::Roles::AliProcable';
34              
35              
36             has 'run_proc' => (
37             is => 'ro',
38             isa => 'Bio::MUST::Apps::FortyTwo::RunProcessor',
39             required => 1,
40             );
41              
42              
43             has '+ali' => (
44             handles => [ qw(all_new_seqs all_but_new_seqs) ],
45             );
46              
47              
48             has 'lookup' => (
49             is => 'ro',
50             isa => 'Bio::MUST::Core::IdList',
51             init_arg => undef,
52             lazy => 1,
53             builder => '_build_lookup',
54             );
55              
56              
57             has 'blastdb' => (
58             is => 'ro',
59             isa => 'Bio::MUST::Drivers::Blast::Database::Temporary',
60             init_arg => undef,
61             lazy => 1,
62             builder => '_build_blastdb',
63             );
64              
65              
66             has 'para_blastdb' => (
67             is => 'ro',
68             isa => 'Maybe[Bio::MUST::Drivers::Blast::Database::Temporary]',
69             init_arg => undef,
70             lazy => 1,
71             builder => '_build_para_blastdb',
72             );
73              
74              
75             has 'tax_report' => (
76             traits => ['Hash'],
77             is => 'ro',
78             isa => 'HashRef[Bio::MUST::Apps::Debrief42::TaxReport::NewSeq]',
79             init_arg => undef,
80             default => sub { {} },
81             handles => {
82             tax_line_for => 'get',
83             set_tax_line => 'set',
84             all_tax_line_ids => 'keys',
85             },
86             );
87              
88              
89             has '_' . $_ . '_seqs_by_org' => (
90             traits => ['Hash'],
91             is => 'ro',
92             isa => 'HashRef[ArrayRef[Bio::MUST::Core::Seq]]',
93             init_arg => undef,
94             lazy => 1,
95             builder => '_build_' . $_ . '_seqs_by_org',
96             handles => {
97             'count_' . $_ . '_orgs' => 'count',
98             'all_' . $_ . '_orgs' => 'keys',
99             $_ . '_seqs_for' => 'get',
100             },
101             ) for qw(ali non new);
102              
103              
104             has 'query_seqs' => (
105             is => 'ro',
106             isa => 'Bio::MUST::Core::Ali::Temporary',
107             init_arg => undef,
108             lazy => 1,
109             builder => '_build_query_seqs',
110             );
111              
112              
113             has '_best_hits_by_ref_org' => (
114             traits => ['Hash'],
115             is => 'ro',
116             isa => 'HashRef[Bio::MUST::Core::IdList]',
117             init_arg => undef,
118             lazy => 1,
119             builder => '_build_best_hits_by_ref_org',
120             handles => {
121             count_ref_orgs => 'count',
122             all_ref_orgs => 'keys',
123             remove_ref_org => 'delete',
124             all_best_hits => 'values',
125             best_hits_for => 'get',
126             },
127             );
128              
129              
130             has '_seqs_for_removal' => (
131             is => 'ro',
132             isa => 'Bio::MUST::Core::IdList',
133             init_arg => undef,
134             default => sub { IdList->new() },
135             handles => {
136             marked_for_removal => 'is_listed',
137             mark_seq_for_removal => 'add_id',
138             all_marked_seqs => 'all_ids',
139             remove_marked_seqs => 'negative_list',
140             },
141             );
142              
143              
144             ## no critic (ProhibitUnusedPrivateSubroutines)
145              
146             sub _build_integrator {
147 0     0     return Local->new( ali => shift->ali );
148             }
149              
150              
151             sub _build_lookup {
152 0     0     return shift->ali->new_lookup;
153             }
154              
155              
156             sub _build_blastdb {
157 0     0     return Bio::MUST::Drivers::Blast::Database::Temporary->new(
158             seqs => [ shift->all_but_new_seqs ]
159             );
160             }
161              
162              
163             sub _build_para_blastdb {
164 0     0     my $self = shift;
165              
166 0           my $parafile = change_suffix($self->ali->filename, '.para');
167 0 0         return unless -e $parafile;
168              
169             #### [ALI] PARA file in use: $parafile
170 0           return Bio::MUST::Drivers::Blast::Database::Temporary->new(
171             seqs => $parafile
172             );
173             }
174              
175              
176             sub _build_new_seqs_by_org {
177 0     0     my $self = shift;
178 0           return $self->_split_seqs_by_org(
179             Ali->new( seqs => [ $self->all_new_seqs ] )
180             );
181             }
182              
183              
184             sub _build_ali_seqs_by_org {
185 0     0     my $self = shift;
186 0           return $self->_split_seqs_by_org(
187             Ali->new( seqs => [ $self->all_but_new_seqs ] )
188             );
189             }
190              
191              
192             sub _build_non_seqs_by_org {
193 0     0     my $self = shift;
194              
195 0           my $nonfile = change_suffix( $self->ali->filename, '.non' );
196 0 0         return {} unless -e $nonfile;
197              
198             #### [ALI] NON file in use: $nonfile
199 0           return $self->_split_seqs_by_org( Ali->load($nonfile) );
200             }
201              
202              
203             const my $DEF_ORG => ':default';
204              
205             sub _split_seqs_by_org {
206 0     0     my $self = shift;
207 0           my $seqs = shift; # actually an Ali object...
208              
209             # split Seqs into one or more ArrayRefs keyed by full_org
210             # tie probably useless here but ensuring reproducible logs
211 0           tie my %seqs_for, 'Tie::IxHash';
212 0           for my $seq ($seqs->all_seqs) {
213 0   0       my $genus = $seq->genus // q{};
214 0   0       my $org = $seq->full_org // $DEF_ORG;
215 0           push @{ $seqs_for{$org} }, $seq;
  0            
216             }
217              
218 0           return \%seqs_for;
219             }
220              
221              
222             sub _build_query_seqs {
223 0     0     my $self = shift;
224              
225             #### [ALI] Collecting queries...
226              
227 0           my $rp = $self->run_proc;
228              
229             my @seqs # filter out doubtful seqs from each query_org
230 0           = grep { not $_->is_doubtful }
231 0   0       map { @{ $self->ali_seqs_for($_) // [] } }
  0            
  0            
232             $rp->all_query_orgs
233             ;
234              
235 0 0         unless (@seqs) {
236 0           carp '[ALI] Warning: no seq for any query_org; using longest instead!';
237 0           @seqs = ( $self->get_longest_query_seq );
238             }
239              
240 0           return Temporary->new( seqs => \@seqs );
241             }
242              
243              
244             sub _build_best_hits_by_ref_org {
245 0     0     my $self = shift;
246              
247             #### [ALI] Identifying best hits for queries in reference orgs...
248              
249 0           my $rp = $self->run_proc;
250 0   0       my $args = $rp->blast_args_for('references') // {};
251 0           $args->{-outfmt} = 6;
252             $args->{-max_target_seqs} = 10
253 0 0         unless defined $args->{-max_target_seqs};
254              
255             # tie might help making 42 completely deterministic
256 0           tie my %best_hits_for, 'Tie::IxHash';
257 0           tie my %avg_score_for, 'Tie::IxHash';
258              
259             ORG:
260 0           for my $ref_org ($rp->all_ref_orgs) {
261              
262 0           my $query_seqs = $self->query_seqs;
263 0           my $blastdb = $rp->ref_blastdb_for($ref_org);
264 0           my $parser = $blastdb->blastp($query_seqs, $args);
265             #### [ALI] BLASTP: $ref_org . q{ } . $parser->filename
266              
267             # collect best hit(s) for each query against ref proteome
268 0           my $curr_query = q{};
269 0           my $seen_qry_n = 0;
270 0           my $seen_hit_n = 0;
271 0           my $best_score = 0;
272 0           my $cmul_score = 0;
273              
274             # tie might help making 42 completely deterministic
275 0           tie my %hits, 'Tie::IxHash';
276 0           while ( my $hsp = $parser->next_hit ) {
277              
278             # fetch query and bitscore for current hit
279 0           my $query = $hsp->query_id;
280 0           my $score = $hsp->bit_score;
281              
282             # reset score if next query
283 0 0         if ($query ne $curr_query) {
284 0           $seen_qry_n++;
285 0           $curr_query = $query;
286 0           $best_score = 0;
287             }
288              
289             # include hit if within score tolerance
290             # Note: for consistency with coverage_mul the new lower score
291             # becomes the new 'best_score' (increasing tolerance thus)
292 0 0         if ($score >= $best_score * $rp->ref_score_mul) {
293 0           my $hit = $hsp->hit_id;
294             ######## [DEBUG] query: $query_seqs->long_id_for($query)
295             ######## [DEBUG] hit: $hit
296             ######## [DEBUG] score: $score
297 0           $hits{$hit}++;
298 0           $seen_hit_n++;
299 0           $best_score = $score;
300 0           $cmul_score += $score;
301             }
302             }
303              
304 0 0         $parser->remove unless $rp->debug_mode;
305              
306             # skip ref_orgs without best hits
307 0 0         unless ($cmul_score) {
308 0           carp "[ALI] Note: no best hit for: $ref_org;"
309             . ' removing it from ref_orgs.';
310 0           next ORG;
311             }
312              
313             # store average score for best hits in current ref proteome
314             # Note: divisor is set to number of best hits (plus unseen queries)
315 0           my $divisor = $seen_hit_n + $query_seqs->count_seqs - $seen_qry_n;
316 0           my $avg_score = $cmul_score / $divisor;
317 0           $avg_score_for{$ref_org} = $avg_score;
318              
319             # store best hit list for current ref proteome
320 0           my $best_hits = IdList->new( ids => [ keys %hits ] );
321 0           $best_hits_for{$ref_org} = $best_hits;
322             }
323             ######## [DEBUG] avg scores: display( pairmap { "$a => $b" } %avg_score_for )
324              
325 0           my $ref_org_n = ceil( $rp->ref_org_mul * $rp->all_ref_orgs );
326 0           my @ref_orgs = keys %avg_score_for;
327 0 0         if (@ref_orgs > $ref_org_n) {
328              
329             #### [ALI] Discarding non-best ref_orgs and keeping only: $ref_org_n
330             @ref_orgs = sort {
331 0           $avg_score_for{$b} <=> $avg_score_for{$a}
  0            
332             } @ref_orgs;
333 0           @ref_orgs = @ref_orgs[ 0 .. $ref_org_n - 1 ];
334              
335 0           %best_hits_for = map { $_ => $best_hits_for{$_} } @ref_orgs;
  0            
336             }
337              
338 0           return \%best_hits_for;
339             }
340              
341             ## use critic
342              
343              
344             sub get_longest_query_seq {
345 0     0 0   my $self = shift;
346              
347 0           my $max_seq;
348 0           my $max_len = 0;
349              
350             # loop through potential queries and select longest seq
351 0           for my $org ($self->all_ali_orgs) {
352              
353             SEQ:
354 0           for my $seq ( @{ $self->ali_seqs_for($org) } ) {
  0            
355              
356             # filter out doubtful seqs from each query_org
357 0 0         next SEQ if $seq->is_doubtful;
358              
359             # find longest seq using a manual yet fast approach
360 0           my $len = $seq->nomiss_seq_len;
361 0 0         if ($len > $max_len) {
362 0           $max_seq = $seq;
363 0           $max_len = $len;
364             }
365             }
366             }
367              
368 0           return $max_seq;
369             }
370              
371              
372             sub check_brh_among_best_hits {
373 0     0 0   my $self = shift;
374              
375             #### [ALI] Checking BRH among best hits in reference orgs...
376 0 0         if ($self->count_ref_orgs < 2) {
377 0           carp '[ALI] Warning: not enough ref_orgs for checking BRH'
378             . ' among best hits!';
379 0           return;
380             }
381              
382 0           my $rp = $self->run_proc;
383 0   0       my $args = $rp->blast_args_for('references') // {};
384 0           $args->{-outfmt} = 6;
385 0           $args->{-max_target_seqs} = 1;
386              
387 0           for my $ref_org1 ($self->all_ref_orgs) {
388              
389             # build query_seqs from best hits for current ref_org
390 0           my $query_seqs = Temporary->new(
391             seqs => $rp->ref_blastdb_for($ref_org1)->blastdbcmd(
392             [ $self->best_hits_for($ref_org1)->all_ids ]
393             )->seqs # TODO: improve this through coercion
394             );
395              
396 0           tie my %count_for, 'Tie::IxHash';
397              
398             ORG2:
399 0           for my $ref_org2 ($self->all_ref_orgs) {
400 0 0         next ORG2 if $ref_org2 eq $ref_org1;
401              
402 0           my $blastdb = $rp->ref_blastdb_for($ref_org2);
403 0           my $parser = $blastdb->blastp($query_seqs, $args);
404             #### [ALI] BLASTP: "$ref_org1 vs $ref_org2 " . $parser->filename
405              
406 0           my $best_hits = $self->best_hits_for($ref_org2);
407 0           while ( my $hsp = $parser->next_query ) {
408 0           my $candidate = $query_seqs->long_id_for( $hsp->query_id );
409 0           my $in_best_hits = $best_hits->is_listed( $hsp->hit_id );
410             ######## [DEBUG]: $candidate . ( $in_best_hits && ' [=BRH=]' )
411 0 0         $count_for{$candidate}++ if $in_best_hits;
412             }
413 0 0         $parser->remove unless $rp->debug_mode;
414             }
415              
416             ######## [DEBUG] BRH counts: display( pairmap { "$a => $b" } %count_for )
417              
418             # remove ref_orgs that completely failed BRH
419             # Note: this should not happen too often because of ref_org_mul
420 0 0         unless (keys %count_for) {
421 0           carp "[ALI] Note: failed BRH for: $ref_org1;"
422             . ' removing it from ref_orgs.';
423 0           $self->remove_ref_org($ref_org1);
424             # Note: the remaining ref_orgs (as 1) will not be tested vs. this
425             # ref_org (as 2) from now on; thus early Warnings only due to this
426             # ref_org (as 2) could have been avoided
427             }
428              
429             # check BRH with all *other* ref_orgs (hence - 1)
430 0           my $other_n = $self->all_ref_orgs - 1;
431             carp '[ALI] Warning: best hits not in BRH across all ref_orgs!'
432             unless List::AllUtils::all {
433 0     0     $count_for{$_} == $other_n
434 0 0         } keys %count_for
435             ;
436             }
437              
438 0           return;
439             }
440              
441              
442             sub merge_chunks {
443 0     0 0   my $self = shift;
444              
445             # fetch all new seqs having a 42 tail
446             # those are the aligned orthologous transcripts
447 0           my $tail_regex = qr{ (:? \.H\d+\.\d+ | \.E\.bf | \.E\.lc ) \b }xms;
448 0           my @new_seqs = grep { $_->full_id =~ $tail_regex } $self->all_new_seqs;
  0            
449              
450             # leave early if no new seqs
451 0 0         return unless @new_seqs;
452              
453             # partition transcripts by full_id ignoring 42 tails
454             my %new_seqs_by_acc = partition_by {
455 0     0     ( my $full_id = $_->full_id ) =~ s{$tail_regex}{}; $full_id
  0            
456 0           } @new_seqs;
457              
458             # separate single-chunk transcripts from multi-chunk transcripts
459             my ($singles, $multiples) = part {
460 0     0     @{ $new_seqs_by_acc{$_} } > 1
  0            
461 0           } sort keys %new_seqs_by_acc;
462             ####### [ALI] multi-chunk transcripts: display( @{$multiples} )
463              
464             # simply remove 42 tails from single-chunk transcripts
465             # Note: ->[0] is required because values are singleton ARRAYREFs
466             $new_seqs_by_acc{$_}->[0]->set_seq_id( SeqId->new( full_id => $_ ) )
467 0   0       for @{ $singles // [] };
  0            
468              
469             # leave early if no multi-chunk transcripts
470 0 0 0       return unless @{ $multiples // [] };
  0            
471              
472             # get alignment width and regex
473 0           my $width = $self->ali->width;
474 0           my $gap_regex = $self->ali->gapmiss_regex;
475              
476             # merge chunks for each transcript
477 0           for my $merged_id ( @{$multiples} ) {
  0            
478              
479             # order transcript chunks following BLAST hit and HSP ranks
480             # this will ensure that best-scoring hits/HSPs come first
481             my @chunks = sort {
482 0           ncmp( $a->full_id, $b->full_id )
483 0           } @{ $new_seqs_by_acc{$merged_id} };
  0            
484              
485 0           my $merged_seq;
486 0           my $gap = q{ };
487              
488             # take each state from first chunk where it is not missing/gap
489             # insert '*' if no chunk can provide it
490             # Note: we insert a blank at first site to trigger Seq->spacify
491 0           for my $site (0..$width-1) {
492 0     0     my $chunk = first { $_->state_at($site) !~ $gap_regex } @chunks;
  0            
493 0 0         $merged_seq .= $chunk ? $chunk->state_at($site) : $gap;
494 0           $gap = '*';
495             }
496              
497             # add merged_seq
498             $self->ali->add_seq(
499 0           Seq->new(
500             seq_id => $merged_id,
501             seq => $merged_seq
502             )
503             );
504              
505             # mark chunks for removal
506 0           $self->mark_seq_for_removal( map { $_->full_id } @chunks );
  0            
507             }
508              
509 0           return;
510             }
511              
512              
513             sub mark_redundant_seqs {
514 0     0 0   my $self = shift;
515              
516             # fetch all new seqs not yet marked for removal
517             my @new_seqs = grep {
518 0           !$self->marked_for_removal( $_->full_id )
  0            
519             } $self->all_new_seqs;
520              
521             # examine each new_seq for redundancy etc
522 0           for my $seq (@new_seqs) {
523              
524 0           my $id = $seq->full_id;
525 0           my $org = $seq->full_org;
526              
527             # remove seq if included in Ali for the org...
528 0 0   0     if ( List::AllUtils::any { $seq->is_subseq_of($_) }
  0 0          
    0          
529 0   0       @{ $self->ali_seqs_for($org) // [] } ) {
530             ####### [ALI] removed for redundancy within ALI: $id
531 0           $self->mark_seq_for_removal($id);
532             }
533             # ... or included in .non file for the org...
534 0     0     elsif (List::AllUtils::any { $seq->is_subseq_of($_) }
535 0   0       @{ $self->non_seqs_for($org) // [] } ) {
536             ####### [ALI] removed for inclusion in NON file: $id
537 0           $self->mark_seq_for_removal($id);
538             }
539             # ... or included in new_seqs for the org
540             # but not seq itself and not if already marked for removal
541             # to avoid both auto and mutual removal of identical new_seqs
542 0     0     elsif (List::AllUtils::any { $seq->is_subseq_of($_) }
543 0 0         grep { $id ne $_->full_id
544             && !$self->marked_for_removal( $_->full_id ) }
545 0   0       @{ $self->new_seqs_for($org) // [] } ) {
546             ####### [ALI] removed for redundancy with other #NEW# seqs: $id
547 0           $self->mark_seq_for_removal($id);
548             } # Note: it is important that marked_seqs... is constantly updated
549             }
550              
551 0           return;
552             }
553              
554              
555             sub mark_lengthened_seqs {
556 0     0 0   my $self = shift;
557              
558             # fetch all new seqs not yet marked for removal
559             my @new_seqs = grep {
560 0           !$self->marked_for_removal( $_->full_id )
  0            
561             } $self->all_new_seqs;
562              
563             # examine each new_seq for lengthening
564 0           for my $seq (@new_seqs) {
565 0           my $org = $seq->full_org;
566              
567             # identify pre-existing seqs included in a new_seq...
568 0           my @redundants = map { $_->full_id } # opposite to above...
569 0           grep { $_->is_subseq_of($seq) }
570 0           grep { !$self->marked_for_removal( $_->full_id ) }
571 0   0       @{ $self->ali_seqs_for($org) // [] }
  0            
572             ; # Note: it is important that marked_seqs... is constantly updated
573              
574             # ... and remove them
575             # Note: not sure that more than one per new_seq would be very common
576 0 0         if (@redundants) {
577             ####### [ALI] removed for redundancy by lengthening: join ', ', @redundants
578 0           $self->mark_seq_for_removal(@redundants);
579             }
580             }
581              
582 0           return;
583             }
584              
585              
586             sub reorder_new_seqs {
587 0     0 0   my $self = shift;
588              
589 0           my $ali = $self->ali;
590              
591             # partition Ali between old and new_seqs
592 0     0     my ($old_seqs, $new_seqs) = part { $_->is_new } $ali->all_seqs;
  0            
593              
594             # reorder new_seqs naturally on family/full_org/accession
595             # Note: this should ignore the c# tag during sorting
596 0   0       $ali->_set_seqs( [ @{ $old_seqs // [] }, sort {
597 0 0         ncmp( $a->family_then_full_org, $b->family_then_full_org )
598             ||
599             ncmp( $a->accession, $b->accession )
600 0   0       } @{ $new_seqs // [] } ] );
  0            
601              
602 0           return;
603             }
604              
605              
606             sub BUILD {
607 0     0 0   my $self = shift;
608              
609 0           my $rp = $self->run_proc;
610              
611 0           my $ali = $self->ali;
612             #### [ALI] #seqs: $ali->count_seqs
613              
614 0 0         unless ( $ali->count_seqs ) {
615             #### [ALI] empty file; skipping!
616 0           return;
617             }
618              
619             # default to clearing new tags from Ali
620             $ali->clear_new_tags
621 0 0         unless $rp->ali_keep_old_new_tags eq 'on';
622              
623             # check for optional use of .non and .para
624             # Note: lazy builders do not require it but this makes a better log
625 0           $self->count_non_orgs;
626 0           $self->para_blastdb;
627              
628             #### [ALI] queries: display( $self->query_seqs->all_long_ids )
629              
630 0 0         if ($rp->ref_brh eq 'on') {
631              
632             #### [ALI] reference orgs: display( $self->all_ref_orgs )
633              
634             #### [ALI] best hits: display( map { $_->all_ids } $self->all_best_hits )
635              
636 0           $self->check_brh_among_best_hits;
637             }
638              
639 0           for my $org ($rp->all_orgs) {
640              
641             #### [ALI] Processing ORG: $org->{org}
642 0           OrgProcessor->new( ali_proc => $self, %{$org} );
  0            
643             }
644              
645             # build ALI outfile honoring out_dir and out_suffix
646 0           my $outfile = insert_suffix($ali->filename, $rp->out_suffix);
647 0 0         $outfile = file( $rp->out_dir, basename($outfile) ) if $rp->out_dir;
648              
649 0 0         unless ( $rp->run_mode eq 'metagenomic' ) {
650              
651             #### [ALI] Making delayed indels...
652 0           $self->integrator->make_indels;
653              
654             #### [ALI] Merging sequence chunks...
655 0           $self->merge_chunks;
656              
657             #### [ALI] Removing redundant (and unwanted) seqs...
658 0           $self->mark_redundant_seqs;
659              
660             # TODO: write tests for this
661 0 0         if ($rp->ali_keep_lengthened_seqs eq 'off') {
662             #### [ALI] Removing lengthened seqs...
663 0           $self->mark_lengthened_seqs;
664             }
665              
666             #### [ALI] merged/redundant/unwanted/lengthened: display( $self->all_marked_seqs )
667 0           $ali->apply_list( $self->remove_marked_seqs($ali) );
668              
669             #### [ALI] Re-ordering remaining aligned seqs...
670 0           $self->reorder_new_seqs;
671              
672             #### [ALI] Writing updated file...
673 0           $ali->store( secure_outfile($outfile) ); # note the secure...
674             }
675              
676 0 0         if ($rp->tax_reports eq 'on') {
677             # TODO: consider moving this in TaxReport-like class
678              
679             #### [ALI] Writing taxonomic report...
680 0           my $tax_report = secure_outfile( # note the secure...
681             change_suffix($outfile, '.tax-report')
682             );
683              
684 0           open my $fh, '>', $tax_report;
685 0           say {$fh} '# ' . join "\t", NewSeq->heads;
  0            
686              
687             # print tax_lines for all new_seqs
688             my @new_seqs = $rp->run_mode eq 'phylogenomic'
689 0 0         ? map { $_->full_id } $self->all_new_seqs # phylogenomic mode
  0            
690             : nsort( $self->all_tax_line_ids ) # metagenomic mode
691             ;
692 0           say {$fh} join "\n", map { $_->stringify }
  0            
693 0   0       map { $self->tax_line_for($_) // () } @new_seqs;
  0            
694             } # Note: skip preexisting new_seqs for which there are no tax_lines
695              
696             #### [ALI] Done analyzing file...
697 0           return;
698             }
699              
700              
701             __PACKAGE__->meta->make_immutable;
702             1;
703              
704             __END__
705              
706             =pod
707              
708             =head1 NAME
709              
710             Bio::MUST::Apps::FortyTwo::AliProcessor - Internal class for forty-two tool
711              
712             =head1 VERSION
713              
714             version 0.210370
715              
716             =head1 AUTHOR
717              
718             Denis BAURAIN <denis.baurain@uliege.be>
719              
720             =head1 COPYRIGHT AND LICENSE
721              
722             This software is copyright (c) 2013 by University of Liege / Unit of Eukaryotic Phylogenomics / Denis BAURAIN.
723              
724             This is free software; you can redistribute it and/or modify it under
725             the same terms as the Perl 5 programming language system itself.
726              
727             =cut