File Coverage

blib/lib/App/Fasops/Common.pm

Criterion	Covered	Total	%
statement	573	723	79.2
branch	138	206	66.9
condition	21	37	56.7
subroutine	51	54	94.4
pod	3	33	9.0
total	786	1053	74.6

line	stmt	bran	cond	sub	pod	time	code
1							package App::Fasops::Common;
2	26			26		403532	use strict;
	26					122
	26					802
3	26			26		131	use warnings;
	26					57
	26					689
4	26			26		3567	use autodie;
	26					108957
	26					234
5
6	26			26		150671	use 5.018001;
	26					420
7
8	26			26		173	use Carp qw();
	26					48
	26					543
9	26			26		13644	use File::ShareDir qw();
	26					556903
	26					734
10	26			26		3729	use IO::Zlib;
	26					471868
	26					214
11	26			26		17957	use IPC::Cmd qw();
	26					1258267
	26					838
12	26			26		236	use List::Util;
	26					55
	26					1712
13	26			26		5925	use Path::Tiny qw();
	26					76902
	26					556
14	26			26		3600	use Tie::IxHash;
	26					20183
	26					726
15	26			26		3346	use YAML::Syck qw();
	26					13433
	26					491
16
17	26			26		3910	use AlignDB::IntSpan;
	26					50261
	26					602
18	26			26		3354	use App::RL::Common;
	26					142078
	26					227553
19
20							sub mean (@) {
21	24			24	0	47	@_ = grep { defined $_ } @_;
	59					153
22	24	50				63	return 0 unless @_;
23	24	100				62	return $_[0] unless @_ > 1;
24	12					58	return List::Util::sum(@_) / scalar(@_);
25							}
26
27							sub any (&@) {
28	39914			39914	0	55957	my $f = shift;
29	39914					58283	for (@_) {
30	376683	100				502101	return 1 if $f->();
31							}
32	38277					122199	return 0;
33							}
34
35							sub all (&@) {
36	42656			42656	0	58793	my $f = shift;
37	42656					61443	for (@_) {
38	385793	100				557204	return 0 unless $f->();
39							}
40	41127					75021	return 1;
41							}
42
43							sub uniq (@) {
44	2426			2426	0	3319	my %seen = ();
45	2426					3250	my $k;
46							my $seen_undef;
47	2426	50				3450	return grep { defined $_ ? not $seen{ $k = $_ }++ : not $seen_undef++ } @_;
	13816					31957
48							}
49
50							sub firstidx (&@) {
51	12			12	0	17	my $f = shift;
52	12					19	for my $i ( 0 .. $#_ ) {
53	19					28	local *_ = \$_[$i];
54	19	100				36	return $i if $f->();
55							}
56	0					0	return -1;
57							}
58
59							sub read_replaces {
60	3			3	0	5	my $file = shift;
61
62	3					20	tie my %replace, "Tie::IxHash";
63	3					43	my @lines = Path::Tiny::path($file)->lines( { chomp => 1 } );
64	3					590	for (@lines) {
65	5					51	my @fields = split /\t/;
66	5	50				14	if ( @fields >= 1 ) {
67	5					7	my $ori = shift @fields;
68	5					25	$replace{$ori} = [@fields];
69							}
70							}
71
72	3					43	return \%replace;
73							}
74
75							sub parse_block {
76	115			115	0	706	my $block = shift;
77	115					204	my $want_full = shift;
78
79	115					2739	my @lines = grep {/\S/} split /\n/, $block;
	1098					2548
80	115	50				389	Carp::croak "Numbers of headers not equal to seqs\n" if @lines % 2;
81
82	115					773	tie my %info_of, "Tie::IxHash";
83	115					2062	while (@lines) {
84	549					7143	my $header = shift @lines;
85	549					2141	$header =~ s/^\>//;
86	549					1030	chomp $header;
87
88	549					819	my $seq = shift @lines;
89	549					762	chomp $seq;
90
91	549					1296	my $info_ref = App::RL::Common::decode_header($header);
92	549					84509	$info_ref->{seq} = $seq;
93	549	100	66			7881	if ( $want_full or !defined $info_ref->{name} ) {
94	376					853	my $ess_header = App::RL::Common::encode_header( $info_ref, 1 );
95	376					28660	$info_of{$ess_header} = $info_ref;
96							}
97							else {
98	173					1383	$info_of{ $info_ref->{name} } = $info_ref;
99							}
100							}
101
102	115					2182	return \%info_of;
103							}
104
105							sub parse_block_array {
106	10			10	0	19	my $block = shift;
107
108	10					64	my @lines = grep {/\S/} split /\n/, $block;
	78					172
109	10	50				33	Carp::croak "Numbers of headers not equal to seqs\n" if @lines % 2;
110
111	10					19	my $info_ary = [];
112	10					23	while (@lines) {
113	39					75	my $header = shift @lines;
114	39					126	$header =~ s/^\>//;
115	39					60	chomp $header;
116
117	39					51	my $seq = shift @lines;
118	39					44	chomp $seq;
119
120	39					85	my $info_ref = App::RL::Common::decode_header($header);
121	39					5561	$info_ref->{seq} = $seq;
122
123	39					415	push @{$info_ary}, $info_ref;
	39					105
124							}
125
126	10					25	return $info_ary;
127							}
128
129							sub parse_block_header {
130	9			9	0	17	my $block = shift;
131
132	9					40	my @lines = grep {/\S/} split /\n/, $block;
	72					135
133	9	50				23	Carp::croak "Numbers of headers not equal to seqs\n" if @lines % 2;
134
135	9					36	tie my %info_of, "Tie::IxHash";
136	9					113	while (@lines) {
137	36					319	my $header = shift @lines;
138	36					111	$header =~ s/^\>//;
139	36					95	chomp $header;
140
141	36					46	my $seq = shift @lines;
142	36					37	chomp $seq;
143
144	36					72	my $info_ref = App::RL::Common::decode_header($header);
145	36					4820	my $ess_header = App::RL::Common::encode_header( $info_ref, 1 );
146	36					2231	$info_ref->{seq} = $seq;
147	36					489	$info_of{$ess_header} = $info_ref;
148							}
149
150	9					111	return \%info_of;
151							}
152
153							sub parse_axt_block {
154	6			6	0	14	my $block = shift;
155	6					9	my $length_of = shift;
156
157	6					33	my @lines = grep {/\S/} split /\n/, $block;
	18					58
158	6	50				16	Carp::croak "A block of axt should contain three lines\n" if @lines != 3;
159
160	6					43	my (undef, $first_chr, $first_start, $first_end, $second_chr,
161							$second_start, $second_end, $query_strand, undef,
162							) = split /\s+/, $lines[0];
163
164	6	100				20	if ( $query_strand eq "-" ) {
165	3	100	66			15	if ( defined $length_of and ref $length_of eq "HASH" ) {
166	1	50				8	if ( exists $length_of->{$second_chr} ) {
167	1					4	$second_start = $length_of->{$second_chr} - $second_start + 1;
168	1					4	$second_end = $length_of->{$second_chr} - $second_end + 1;
169	1					3	( $second_start, $second_end ) = ( $second_end, $second_start );
170							}
171							}
172							}
173
174	6					59	my $info_refs = [
175							{ name => "target",
176							chr => $first_chr,
177							start => $first_start,
178							end => $first_end,
179							strand => "+",
180							seq => $lines[1],
181							},
182							{ name => "query",
183							chr => $second_chr,
184							start => $second_start,
185							end => $second_end,
186							strand => $query_strand,
187							seq => $lines[2],
188							},
189							];
190
191	6					21	return $info_refs;
192							}
193
194							sub parse_maf_block {
195	4			4	0	18	my $block = shift;
196
197	4					22	my @lines = grep {/\S/} split /\n/, $block;
	16					49
198	4	50				12	Carp::croak "A block of maf should contain s lines\n" unless @lines > 0;
199
200	4					26	tie my %info_of, "Tie::IxHash";
201
202	4					67	for my $sline (@lines) {
203	16					274	my ( undef, $src, $start, $size, $strand, $srcSize, $text ) = split /\s+/, $sline;
204
205	16					41	my ( $species, $chr_name ) = split /\./, $src;
206	16	50				37	$chr_name = $species if !defined $chr_name;
207
208							# adjust coordinates to be one-based inclusive
209	16					30	$start = $start + 1;
210
211	16					27	my $end = $start + $size - 1;
212
213	16	100				32	if ( $strand eq "-" ) {
214	4					12	$start = $srcSize - $start + 1;
215	4					6	$end = $srcSize - $end + 1;
216	4					16	( $start, $end ) = ( $end, $start );
217							}
218
219	16					97	$info_of{$species} = {
220							name => $species,
221							chr => $chr_name,
222							start => $start,
223							end => $end,
224							strand => $strand,
225							seq => $text,
226							};
227							}
228
229	4					63	return \%info_of;
230							}
231
232							sub revcom {
233	10			10	0	2168	my $seq = shift;
234
235	10					39	$seq =~ tr/ACGTMRWSYKVHDBNacgtmrwsykvhdbn-/TGCAKYWSRMBDHVNtgcakyswrmbdhvn-/;
236	10					19	my $seq_rc = reverse $seq;
237
238	10					28	return $seq_rc;
239							}
240
241							sub seq_length {
242	15			15	0	2125	my $seq = shift;
243
244	15					36	my $gaps = $seq =~ tr/-/-/;
245
246	15					68	return length($seq) - $gaps;
247							}
248
249							#@returns AlignDB::IntSpan
250							sub indel_intspan {
251	208			208	0	4867	my $seq = shift;
252
253	208					566	my $intspan = AlignDB::IntSpan->new();
254	208					2028	my $length = length($seq);
255
256	208					299	my $offset = 0;
257	208					282	my $start = 0;
258	208					317	my $end = 0;
259	208					407	for my $pos ( 1 .. $length ) {
260	78177					103587	my $base = substr( $seq, $pos - 1, 1 );
261	78177	100				113751	if ( $base eq '-' ) {
262	2271	100				3672	if ( $offset == 0 ) {
263	536					689	$start = $pos;
264							}
265	2271					2988	$offset++;
266							}
267							else {
268	75906	100				115090	if ( $offset != 0 ) {
269	524					687	$end = $pos - 1;
270	524					1171	$intspan->add_pair( $start, $end );
271							}
272	75906					135229	$offset = 0;
273							}
274							}
275	208	100				423	if ( $offset != 0 ) {
276	12					37	$end = $length;
277	12					39	$intspan->add_pair( $start, $end );
278							}
279
280	208					1189	return $intspan;
281							}
282
283							#@returns AlignDB::IntSpan
284							sub seq_intspan {
285	24			24	0	11074	my $seq = shift;
286
287	24					103	my $intspan = AlignDB::IntSpan->new();
288	24					297	my $length = length($seq);
289	24					99	$intspan->add_pair( 1, $length );
290
291	24					1460	$intspan->subtract( indel_intspan($seq) );
292
293	24					20487	return $intspan;
294							}
295
296							sub align_seqs {
297	0			0	0	0	my $seq_refs = shift;
298	0		0			0	my $aln_bin = shift // "mafft";
299
300							# get executable
301	0					0	my $bin;
302
303	0	0				0	if ( $aln_bin =~ /clus/i ) {
		0
		0
304	0					0	$aln_bin = 'clustalw';
305	0					0	for my $e (qw{clustalw clustal-w clustalw2}) {
306	0	0				0	if ( IPC::Cmd::can_run($e) ) {
307	0					0	$bin = $e;
308	0					0	last;
309							}
310							}
311							}
312							elsif ( $aln_bin =~ /musc/i ) {
313	0					0	$aln_bin = 'muscle';
314	0					0	for my $e (qw{muscle}) {
315	0	0				0	if ( IPC::Cmd::can_run($e) ) {
316	0					0	$bin = $e;
317	0					0	last;
318							}
319							}
320							}
321							elsif ( $aln_bin =~ /maff/i ) {
322	0					0	$aln_bin = 'mafft';
323	0					0	for my $e (qw{mafft}) {
324	0	0				0	if ( IPC::Cmd::can_run($e) ) {
325	0					0	$bin = $e;
326	0					0	last;
327							}
328							}
329							}
330
331	0	0				0	if ( !defined $bin ) {
332	0					0	Carp::confess "Could not find the executable for $aln_bin\n";
333							}
334
335							# temp in and out
336							#@type Path::Tiny
337	0					0	my $temp_in = Path::Tiny->tempfile("seq_in_XXXXXXXX");
338
339							#@type Path::Tiny
340	0					0	my $temp_out = Path::Tiny->tempfile("seq_out_XXXXXXXX");
341
342							# msa may change the order of sequences
343	0					0	my @indexes = 0 .. scalar( @{$seq_refs} - 1 );
	0					0
344							{
345	0					0	my $fh = $temp_in->openw();
	0					0
346	0					0	for my $i (@indexes) {
347	0					0	printf {$fh} ">seq_%d\n", $i;
	0					0
348	0					0	printf {$fh} "%s\n", $seq_refs->[$i];
	0					0
349							}
350	0					0	close $fh;
351							}
352
353	0					0	my @args;
354	0	0				0	if ( $aln_bin eq "clustalw" ) {
		0
		0
355	0					0	push @args, "-align -type=dna -output=fasta -outorder=input -quiet";
356	0					0	push @args, "-infile=" . $temp_in->absolute->stringify;
357	0					0	push @args, "-outfile=" . $temp_out->absolute->stringify;
358							}
359							elsif ( $aln_bin eq "muscle" ) {
360	0					0	push @args, "-quiet";
361	0					0	push @args, "-in " . $temp_in->absolute->stringify;
362	0					0	push @args, "-out " . $temp_out->absolute->stringify;
363							}
364							elsif ( $aln_bin eq "mafft" ) {
365	0					0	push @args, "--quiet";
366	0					0	push @args, "--auto";
367	0					0	push @args, $temp_in->absolute->stringify;
368	0					0	push @args, "> " . $temp_out->absolute->stringify;
369							}
370
371	0					0	my $cmd_line = join " ", ( $bin, @args );
372	0					0	my $ok = IPC::Cmd::run( command => $cmd_line );
373
374	0	0				0	if ( !$ok ) {
375	0					0	Carp::confess("$aln_bin call failed\n");
376							}
377
378	0					0	my @aligned;
379	0					0	my $seq_of = read_fasta( $temp_out->absolute->stringify );
380	0					0	for my $i (@indexes) {
381	0					0	push @aligned, $seq_of->{ "seq_" . $i };
382							}
383
384							# delete .dnd files created by clustalw
385							#printf STDERR "%s\n", $temp_in->absolute->stringify;
386	0	0				0	if ( $aln_bin eq "clustalw" ) {
387	0					0	my $dnd = $temp_in->absolute->stringify . ".dnd";
388	0					0	path($dnd)->remove;
389							}
390
391	0					0	undef $temp_in;
392	0					0	undef $temp_out;
393
394	0					0	return \@aligned;
395							}
396
397							sub align_seqs_quick {
398	0			0	0	0	my $seq_refs = shift;
399	0					0	my $aln_prog = shift;
400	0					0	my $indel_pad = shift;
401	0					0	my $indel_fill = shift;
402
403	0	0				0	if ( !defined $aln_prog ) {
404	0					0	$aln_prog = "mafft";
405							}
406	0	0				0	if ( !defined $indel_pad ) {
407	0					0	$indel_pad = 50;
408							}
409	0	0				0	if ( !defined $indel_fill ) {
410	0					0	$indel_fill = 50;
411							}
412
413	0					0	my @aligned = @{$seq_refs};
	0					0
414	0					0	my $seq_count = scalar @aligned;
415
416							# all indel regions
417	0					0	my $realign_region = AlignDB::IntSpan->new();
418	0					0	for my $seq (@aligned) {
419	0					0	my $indel_intspan = indel_intspan($seq);
420	0					0	$indel_intspan = $indel_intspan->pad($indel_pad);
421	0					0	$realign_region->merge($indel_intspan);
422							}
423
424							# join adjacent realign regions
425	0					0	$realign_region = $realign_region->fill($indel_fill);
426
427							# realign all segments in realign_region
428	0					0	for my $span ( reverse $realign_region->spans ) {
429	0					0	my $seg_start = $span->[0];
430	0					0	my $seg_end = $span->[1];
431
432	0					0	my @segments;
433	0					0	for my $i ( 0 .. $seq_count - 1 ) {
434	0					0	my $seg = substr( $aligned[$i], $seg_start - 1, $seg_end - $seg_start + 1 );
435	0					0	push @segments, $seg;
436							}
437	0					0	my $realigned_segments = align_seqs( \@segments );
438
439	0					0	for my $i ( 0 .. $seq_count - 1 ) {
440	0					0	my $seg = $realigned_segments->[$i];
441	0					0	$seg = uc $seg;
442	0					0	substr( $aligned[$i], $seg_start - 1, $seg_end - $seg_start + 1, $seg );
443							}
444							}
445
446	0					0	return \@aligned;
447							}
448
449							#----------------------------#
450							# trim pure dash regions
451							#----------------------------#
452							sub trim_pure_dash {
453	10			10	0	1774	my $seq_refs = shift;
454
455	10					17	my $seq_count = @{$seq_refs};
	10					20
456	10					21	my $seq_length = length $seq_refs->[0];
457
458	10	50				23	return unless $seq_length;
459
460	10					61	my $trim_region = AlignDB::IntSpan->new();
461
462	10					124	for my $pos ( 1 .. $seq_length ) {
463	528					886	my @bases;
464	528					753	for my $i ( 0 .. $seq_count - 1 ) {
465	2062					2473	my $base = substr( $seq_refs->[$i], $pos - 1, 1 );
466	2062					2893	push @bases, $base;
467							}
468
469	528	100		544		1114	if ( all { $_ eq '-' } @bases ) {
	544					1673
470	4					12	$trim_region->add($pos);
471							}
472							}
473
474	10					47	for my $span ( reverse $trim_region->spans ) {
475	3					53	my $seg_start = $span->[0];
476	3					4	my $seg_end = $span->[1];
477
478	3					6	for my $i ( 0 .. $seq_count - 1 ) {
479	9					20	substr( $seq_refs->[$i], $seg_start - 1, $seg_end - $seg_start + 1, "" );
480							}
481							}
482
483	10					237	return;
484							}
485
486							#----------------------------#
487							# trim outgroup only sequence
488							#----------------------------#
489							# if intersect is superset of union
490							# T G----C
491							# Q G----C
492							# O GAAAAC
493							sub trim_outgroup {
494	11			11	0	11411	my $seq_refs = shift;
495
496	11					20	my $seq_count = scalar @{$seq_refs};
	11					21
497
498	11	50				29	Carp::confess "Need three or more sequences\n" if $seq_count < 3;
499
500							# Don't expand indel set here. Last seq is outgroup
501	11					20	my @indel_intspans;
502	11					25	for my $i ( 0 .. $seq_count - 2 ) {
503	22					49	my $indel_intspan = indel_intspan( $seq_refs->[$i] );
504	22					39	push @indel_intspans, $indel_intspan;
505							}
506
507							# find trim_region
508	11					27	my $union_set = AlignDB::IntSpan::union(@indel_intspans);
509	11					2074	my $intersect_set = AlignDB::IntSpan::intersect(@indel_intspans);
510
511	11					3850	my $trim_region = AlignDB::IntSpan->new();
512	11					114	for my $span ( $union_set->runlists ) {
513	23	100				2952	if ( $intersect_set->superset($span) ) {
514	15					5351	$trim_region->add($span);
515							}
516							}
517
518							# trim all segments in trim_region
519	11					1796	for my $span ( reverse $trim_region->spans ) {
520	13					207	my $seg_start = $span->[0];
521	13					20	my $seg_end = $span->[1];
522
523	13					25	for my $i ( 0 .. $seq_count - 1 ) {
524	39					85	substr( $seq_refs->[$i], $seg_start - 1, $seg_end - $seg_start + 1, "" );
525							}
526							}
527
528	11					128	return;
529							}
530
531							#----------------------------#
532							# record complex indels and ingroup indels
533							#----------------------------#
534							# All ingroup intersect sets are parts of complex indels after trim_outgroup()
535							# intersect 4-6
536							# T GGA--C
537							# Q G----C
538							# O GGAGAC
539							# result, complex_region 2-3
540							# T GGAC
541							# Q G--C
542							# O GGAC
543							sub trim_complex_indel {
544	6			6	0	21	my $seq_refs = shift;
545
546	6					10	my $seq_count = scalar @{$seq_refs};
	6					9
547	6					16	my @ingroup_idx = ( 0 .. $seq_count - 2 );
548
549	6	50				15	Carp::confess "Need three or more sequences\n" if $seq_count < 3;
550
551							# Don't expand indel set here. Last seq is outgroup
552	6					10	my @indel_intspans;
553	6					8	for my $i (@ingroup_idx) {
554	12					21	my $indel_intspan = indel_intspan( $seq_refs->[$i] );
555	12					22	push @indel_intspans, $indel_intspan;
556							}
557	6					15	my $outgroup_indel_intspan = indel_intspan( $seq_refs->[ $seq_count - 1 ] );
558
559							# find trim_region
560	6					13	my $union_set = AlignDB::IntSpan::union(@indel_intspans);
561	6					619	my $intersect_set = AlignDB::IntSpan::intersect(@indel_intspans);
562
563	6					1287	my $complex_region = AlignDB::IntSpan->new();
564	6					59	for ( reverse $intersect_set->spans ) {
565	3					60	my $seg_start = $_->[0];
566	3					4	my $seg_end = $_->[1];
567
568							# trim sequence, including outgroup
569	3					8	for my $i ( 0 .. $seq_count - 1 ) {
570	9					20	substr( $seq_refs->[$i], $seg_start - 1, $seg_end - $seg_start + 1, "" );
571							}
572
573							# add to complex_region
574	3					7	for my $span ( $union_set->runlists ) {
575	4					203	my $sub_union_set = AlignDB::IntSpan->new($span);
576	4	100				395	if ( $sub_union_set->superset("$seg_start-$seg_end") ) {
577	3					1010	$complex_region->merge($sub_union_set);
578							}
579							}
580
581							# modify all related set
582	3					518	$union_set = $union_set->banish_span( $seg_start, $seg_end );
583	3					488	for (@ingroup_idx) {
584	6					350	$indel_intspans[$_]
585							= $indel_intspans[$_]->banish_span( $seg_start, $seg_end );
586							}
587	3					212	$outgroup_indel_intspan->banish_span( $seg_start, $seg_end );
588	3					338	$complex_region = $complex_region->banish_span( $seg_start, $seg_end );
589							}
590
591							# add ingroup-outgroup complex indels to complex_region
592	6					385	for my $i (@ingroup_idx) {
593	12					1261	my $outgroup_intersect_intspan = $outgroup_indel_intspan->intersect( $indel_intspans[$i] );
594	12					2507	for my $sub_out_set ( $outgroup_intersect_intspan->sets ) {
595	4					510	for my $sub_union_set ( $union_set->sets ) {
596
597							# union_set > intersect_set
598	5	50				921	if ( $sub_union_set->larger_than($sub_out_set) ) {
599	0					0	$complex_region->merge($sub_union_set);
600							}
601							}
602							}
603							}
604
605	6					97	return $complex_region->runlist;
606							}
607
608							# read normal fasta files
609							sub read_fasta {
610	1			1	0	565	my $infile = shift;
611
612	1					4	my ( @names, %seqs );
613
614	1					11	my $in_fh = IO::Zlib->new( $infile, "rb" );
615
616	1					2003	my $cur_name;
617	1					6	while ( my $line = $in_fh->getline ) {
618	8					1205	chomp $line;
619	8	50	33			42	if ( $line eq '' or substr( $line, 0, 1 ) eq " " ) {
		50
		100
620	0					0	next;
621							}
622							elsif ( substr( $line, 0, 1 ) eq "#" ) {
623	0					0	next;
624							}
625							elsif ( substr( $line, 0, 1 ) eq ">" ) {
626	4					12	($cur_name) = split /\s+/, $line;
627	4					16	$cur_name =~ s/^>//;
628	4					10	push @names, $cur_name;
629	4					15	$seqs{$cur_name} = '';
630							}
631							else {
632	4					16	$seqs{$cur_name} .= $line;
633							}
634							}
635
636	1					301	$in_fh->close;
637
638	1					194	tie my %seq_of, "Tie::IxHash";
639	1					30	for my $name (@names) {
640	4					62	$seq_of{$name} = $seqs{$name};
641							}
642	1					21	return \%seq_of;
643							}
644
645							sub calc_gc_ratio {
646	13			13	0	5515	my $seq_refs = shift;
647
648	13					20	my $seq_count = scalar @{$seq_refs};
	13					27
649
650	13					20	my @ratios;
651	13					47	for my $i ( 0 .. $seq_count - 1 ) {
652
653							# Count all four bases
654	17					32	my $a_count = $seq_refs->[$i] =~ tr/Aa/Aa/;
655	17					23	my $g_count = $seq_refs->[$i] =~ tr/Gg/Gg/;
656	17					30	my $c_count = $seq_refs->[$i] =~ tr/Cc/Cc/;
657	17					21	my $t_count = $seq_refs->[$i] =~ tr/Tt/Tt/;
658
659	17					27	my $four_count = $a_count + $g_count + $c_count + $t_count;
660	17					23	my $gc_count = $g_count + $c_count;
661
662	17	50				33	if ( $four_count == 0 ) {
663	0					0	next;
664							}
665							else {
666	17					27	my $gc_ratio = $gc_count / $four_count;
667	17					39	push @ratios, $gc_ratio;
668							}
669							}
670
671	13					33	return mean(@ratios);
672							}
673
674							sub pair_D {
675	60			60	0	361	my $seq_refs = shift;
676
677	60					68	my $seq_count = scalar @{$seq_refs};
	60					86
678	60	50				118	if ( $seq_count != 2 ) {
679	0					0	Carp::confess "Need two sequences\n";
680							}
681
682	60					95	my $legnth = length $seq_refs->[0];
683
684	60					108	my ( $comparable_bases, $differences ) = (0) x 2;
685
686	60					109	for my $pos ( 1 .. $legnth ) {
687	4943					7113	my $base0 = substr $seq_refs->[0], $pos - 1, 1;
688	4943					6671	my $base1 = substr $seq_refs->[1], $pos - 1, 1;
689
690	4943	100	100			15460	if ( $base0 =~ /[atcg]/i
691							&& $base1 =~ /[atcg]/i )
692							{
693	4858					5808	$comparable_bases++;
694	4858	100				8038	if ( $base0 ne $base1 ) {
695	915					1279	$differences++;
696							}
697							}
698							}
699
700	60	50				108	if ( $comparable_bases == 0 ) {
701	0					0	Carp::carp "comparable_bases == 0\n";
702	0					0	return 0;
703							}
704							else {
705	60					153	return $differences / $comparable_bases;
706							}
707							}
708
709							# Split D value to D1 (substitutions in first_seq), D2( substitutions in second_seq) and Dcomplex
710							# (substitutions can't be referred)
711							sub ref_pair_D {
712	7			7	0	2742	my $seq_refs = shift; # first, second, outgroup
713
714	7					25	my $seq_count = scalar @{$seq_refs};
	7					16
715	7	50				18	if ( $seq_count != 3 ) {
716	0					0	Carp::confess "Need three sequences\n";
717							}
718
719	7					15	my ( $d1, $d2, $dc ) = (0) x 3;
720	7					12	my $length = length $seq_refs->[0];
721
722	7	50				16	return ( $d1, $d2, $dc ) if $length == 0;
723
724	7					15	for my $pos ( 1 .. $length ) {
725	70					106	my $base0 = substr $seq_refs->[0], $pos - 1, 1;
726	70					91	my $base1 = substr $seq_refs->[1], $pos - 1, 1;
727	70					106	my $base_og = substr $seq_refs->[2], $pos - 1, 1;
728	70	100				128	if ( $base0 ne $base1 ) {
729	8	100	33			59	if ( $base0 =~ /[atcg]/i
			66
730							&& $base1 =~ /[atcg]/i
731							&& $base_og =~ /[atcg]/i )
732							{
733	7	100				17	if ( $base1 eq $base_og ) {
		100
734	3					7	$d1++;
735							}
736							elsif ( $base0 eq $base_og ) {
737	3					5	$d2++;
738							}
739							else {
740	1					3	$dc++;
741							}
742							}
743							else {
744	1					2	$dc++;
745							}
746							}
747							}
748
749	7					14	for ( $d1, $d2, $dc ) {
750	21					30	$_ /= $length;
751							}
752
753	7					22	return ( $d1, $d2, $dc );
754							}
755
756							sub multi_seq_stat {
757	11			11	0	1290	my $seq_refs = shift;
758
759	11					17	my $seq_count = scalar @{$seq_refs};
	11					18
760	11	50				28	if ( $seq_count < 2 ) {
761	0					0	Carp::confess "Need two or more sequences\n";
762							}
763
764	11					25	my $legnth = length $seq_refs->[0];
765
766							# For every positions, search for polymorphism_site
767	11					30	my ( $comparable_bases, $identities, $differences, $gaps, $ns, $align_errors, ) = (0) x 6;
768	11					28	for my $pos ( 1 .. $legnth ) {
769	746					1083	my @bases = ();
770	746					1238	for my $i ( 0 .. $seq_count - 1 ) {
771	2645					3815	my $base = substr( $seq_refs->[$i], $pos - 1, 1 );
772	2645					4159	push @bases, $base;
773							}
774	746					1126	@bases = uniq(@bases);
775
776	746	100		991		2055	if ( all { $_ =~ /[agct]/i } @bases ) {
	991	100				3180
777	725					932	$comparable_bases++;
778	725	100		936		1778	if ( all { $_ eq $bases[0] } @bases ) {
	936					2326
779	514					1278	$identities++;
780							}
781							else {
782	211					557	$differences++;
783							}
784							}
785	41			41		113	elsif ( any { $_ eq '-' } @bases ) {
786	20					53	$gaps++;
787							}
788							else {
789	1					3	$ns++;
790							}
791							}
792	11	50				28	if ( $comparable_bases == 0 ) {
793	0					0	print YAML::Syck::Dump { seqs => $seq_refs, };
794	0					0	Carp::carp "number_of_comparable_bases == 0!!\n";
795	0					0	return [ $legnth, $comparable_bases, $identities, $differences,
796							$gaps, $ns, $legnth, undef, ];
797							}
798
799	11					28	my @all_Ds;
800	11					32	for ( my $i = 0; $i < $seq_count; $i++ ) {
801	35					90	for ( my $j = $i + 1; $j < $seq_count; $j++ ) {
802	42					114	my $D = pair_D( [ $seq_refs->[$i], $seq_refs->[$j] ] );
803	42					133	push @all_Ds, $D;
804							}
805							}
806
807	11					29	my $D = mean(@all_Ds);
808
809	11					49	return [ $legnth, $comparable_bases, $identities, $differences,
810							$gaps, $ns, $align_errors, $D, ];
811							}
812
813							sub get_snps {
814	40			40	0	3042	my $seq_refs = shift;
815
816	40					83	my $align_length = length $seq_refs->[0];
817	40					64	my $seq_count = scalar @{$seq_refs};
	40					70
818
819							# SNPs
820	40					73	my $snp_bases_of = {};
821	40					89	for my $pos ( 1 .. $align_length ) {
822	40657					52216	my @bases;
823	40657					67806	for my $i ( 0 .. $seq_count - 1 ) {
824	388137					530368	my $base = substr( $seq_refs->[$i], $pos - 1, 1 );
825	388137					574835	push @bases, $base;
826							}
827
828	40657	100		383322		99280	if ( all { $_ =~ /[agct]/i } @bases ) {
	383322					981508
829	39884	100		376606		98363	if ( any { $_ ne $bases[0] } @bases ) {
	376606					762196
830	1617					8068	$snp_bases_of->{$pos} = \@bases;
831							}
832							}
833							}
834
835	40					68	my @sites;
836	40					77	for my $pos ( sort { $a <=> $b } keys %{$snp_bases_of} ) {
	9823					12115
	40					674
837
838	1617					2250	my @bases = @{ $snp_bases_of->{$pos} };
	1617					4148
839
840	1617					2225	my $target_base = $bases[0];
841	1617					2941	my $all_bases = join '', @bases;
842
843	1617					2207	my $query_base;
844							my $mutant_to;
845	1617					1942	my $snp_freq = 0;
846	1617					1933	my $snp_occured;
847	1617					3877	my @class = uniq( sort @bases );
848	1617	50				3513	if ( scalar @class < 2 ) {
		100
849	0					0	Carp::confess "no snp\n";
850							}
851							elsif ( scalar @class > 2 ) {
852	84					122	$snp_freq = -1;
853	84					111	$snp_occured = 'unknown';
854	84					118	$query_base = "";
855	84					115	$mutant_to = "";
856							}
857							else {
858	1533					2420	for (@bases) {
859	10462	100				15292	if ( $target_base ne $_ ) {
860	3619					4350	$snp_freq++;
861	3619					4861	$snp_occured .= '0';
862							}
863							else {
864	6843					9358	$snp_occured .= '1';
865							}
866							}
867	1533					2149	($query_base) = grep { $_ ne $target_base } @bases;
	10462					16272
868	1533					2577	$mutant_to = $target_base . '<->' . $query_base;
869							}
870
871							# here freq is the minor allele freq
872	1617					3022	$snp_freq = List::Util::min( $snp_freq, $seq_count - $snp_freq );
873
874	1617					8704	push @sites,
875							{
876							snp_pos => $pos,
877							snp_target_base => $target_base,
878							snp_query_base => $query_base,
879							snp_all_bases => $all_bases,
880							snp_mutant_to => $mutant_to,
881							snp_freq => $snp_freq,
882							snp_occured => $snp_occured,
883							};
884							}
885
886	40					1119	return \@sites;
887							}
888
889							sub polarize_snp {
890	13			13	0	67	my $sites = shift;
891	13					25	my $outgroup_seq = shift;
892
893	13					24	for my $site ( @{$sites} ) {
	13					25
894	195					360	my $outgroup_base = substr $outgroup_seq, $site->{snp_pos} - 1, 1;
895
896	195					400	my @nts = split '', $site->{snp_all_bases};
897	195					254	my @class;
898	195					278	for my $nt (@nts) {
899	582					737	my $class_bool = 0;
900	582					821	for (@class) {
901	571	100				1070	if ( $_ eq $nt ) { $class_bool = 1; }
	188					275
902							}
903	582	100				969	unless ($class_bool) {
904	394					694	push @class, $nt;
905							}
906							}
907
908	195					286	my ( $mutant_to, $snp_freq, $snp_occured );
909
910	195	50				391	if ( scalar @class < 2 ) {
		100
911	0					0	Carp::confess "Not a real SNP\n";
912							}
913							elsif ( scalar @class == 2 ) {
914	191					271	for my $nt (@nts) {
915	570	100				914	if ( $nt eq $outgroup_base ) {
916	255					372	$snp_occured .= '0';
917							}
918							else {
919	315					419	$snp_occured .= '1';
920	315					387	$snp_freq++;
921	315					506	$mutant_to = "$outgroup_base->$nt";
922							}
923							}
924							}
925							else {
926	4					7	$snp_freq = -1;
927	4					7	$mutant_to = 'Complex';
928	4					8	$snp_occured = 'unknown';
929							}
930
931							# outgroup_base is not equal to any nts
932	195	100				416	if ( $snp_occured eq ( '1' x ( length $snp_occured ) ) ) {
933	28					41	$snp_freq = -1;
934	28					40	$mutant_to = 'Complex';
935	28					35	$snp_occured = 'unknown';
936							}
937
938	195					455	$site->{snp_outgroup_base} = $outgroup_base;
939	195					278	$site->{snp_mutant_to} = $mutant_to;
940	195					241	$site->{snp_freq} = $snp_freq;
941	195					397	$site->{snp_occured} = $snp_occured;
942							}
943
944	13					33	return $sites;
945							}
946
947							sub get_indels {
948	27			27	0	2076	my $seq_refs = shift;
949
950	27					35	my $seq_count = scalar @{$seq_refs};
	27					55
951
952	27					165	my $indel_set = AlignDB::IntSpan->new();
953	27					389	for my $i ( 0 .. $seq_count - 1 ) {
954	99					4789	my $seq_indel_set = indel_intspan( $seq_refs->[$i] );
955	99					258	$indel_set->merge($seq_indel_set);
956							}
957
958	27					3862	my @sites;
959
960							# indels
961	27					76	for my $cur_indel ( $indel_set->spans ) {
962	52					462	my ( $indel_start, $indel_end ) = @{$cur_indel};
	52					101
963	52					84	my $indel_length = $indel_end - $indel_start + 1;
964
965	52					75	my @indel_seqs;
966	52					67	for my $seq ( @{$seq_refs} ) {
	52					89
967	194					375	push @indel_seqs, ( substr $seq, $indel_start - 1, $indel_length );
968							}
969	52					120	my $indel_all_seqs = join "\|", @indel_seqs;
970
971	52					78	my $indel_type;
972	52					106	my @uniq_indel_seqs = uniq(@indel_seqs);
973
974							# seqs with least '-' char wins
975	118					237	my ($indel_seq) = map { $_->[0] }
976	80					183	sort { $a->[1] <=> $b->[1] }
977	52					101	map { [ $_, tr/-/-/ ] } @uniq_indel_seqs;
	118					332
978
979	52	50				205	if ( scalar @uniq_indel_seqs < 2 ) {
		100
		50
980	0					0	Carp::confess "no indel!\n";
981	0					0	next;
982							}
983							elsif ( scalar @uniq_indel_seqs > 2 ) {
984	14					30	$indel_type = 'C';
985							}
986							elsif ( $indel_seq =~ /-/ ) {
987	0					0	$indel_type = 'C';
988							}
989							else {
990							# 'D': means deletion relative to target/first seq
991							# target is ----
992							# 'I': means insertion relative to target/first seq
993							# target is AAAA
994	38	100				84	if ( $indel_seqs[0] eq $indel_seq ) {
995	27					46	$indel_type = 'I';
996							}
997							else {
998	11					26	$indel_type = 'D';
999							}
1000							}
1001
1002	52					136	my $indel_freq = 0;
1003	52					71	my $indel_occured;
1004	52	100				99	if ( $indel_type eq 'C' ) {
1005	14					22	$indel_freq = -1;
1006	14					26	$indel_occured = 'unknown';
1007							}
1008							else {
1009	38					65	for (@indel_seqs) {
1010
1011							# same as target '1', not '0'
1012	138	100				231	if ( $indel_seqs[0] eq $_ ) {
1013	84					111	$indel_freq++;
1014	84					134	$indel_occured .= '1';
1015							}
1016							else {
1017	54					90	$indel_occured .= '0';
1018							}
1019							}
1020							}
1021
1022							# here freq is the minor allele freq
1023	52					114	$indel_freq = List::Util::min( $indel_freq, $seq_count - $indel_freq );
1024
1025	52					368	push @sites,
1026							{
1027							indel_start => $indel_start,
1028							indel_end => $indel_end,
1029							indel_length => $indel_length,
1030							indel_seq => $indel_seq,
1031							indel_all_seqs => $indel_all_seqs,
1032							indel_freq => $indel_freq,
1033							indel_occured => $indel_occured,
1034							indel_type => $indel_type,
1035							};
1036							}
1037
1038	27					347	return \@sites;
1039							}
1040
1041							sub polarize_indel {
1042	3			3	0	9	my $sites = shift;
1043	3					7	my $outgroup_seq = shift;
1044
1045	3					9	my $outgroup_indel_set = indel_intspan($outgroup_seq);
1046
1047	3					7	for my $site ( @{$sites} ) {
	3					8
1048	5					18	my @indel_seqs = split /\\|/, $site->{indel_all_seqs};
1049
1050							my $indel_outgroup_seq = substr $outgroup_seq, $site->{indel_start} - 1,
1051	5					27	$site->{indel_length};
1052
1053	5					12	my ( $indel_type, $indel_occured, $indel_freq );
1054
1055							my $indel_set
1056	5					14	= AlignDB::IntSpan->new()->add_pair( $site->{indel_start}, $site->{indel_end} );
1057
1058							# this line is different to previous subroutines
1059	5					326	my @uniq_indel_seqs = uniq( @indel_seqs, $indel_outgroup_seq );
1060
1061							# seqs with least '-' char wins
1062	11					21	my ($indel_seq) = map { $_->[0] }
1063	7					21	sort { $a->[1] <=> $b->[1] }
1064	5					14	map { [ $_, tr/-/-/ ] } @uniq_indel_seqs;
	11					33
1065
1066	5	50				31	if ( scalar @uniq_indel_seqs < 2 ) {
		100
		50
1067	0					0	Carp::confess "no indel!\n";
1068							}
1069							elsif ( scalar @uniq_indel_seqs > 2 ) {
1070	1					3	$indel_type = 'C';
1071							}
1072							elsif ( $indel_seq =~ /-/ ) {
1073	0					0	$indel_type = 'C';
1074							}
1075							else {
1076
1077	4	50	66			54	if ( ( $indel_outgroup_seq !~ /\-/ )
		100
		50
1078							and ( $indel_seq ne $indel_outgroup_seq ) )
1079							{
1080
1081							# this section should already be judged in previes
1082							# uniq_indel_seqs section, but I keep it here for safe
1083
1084							# reference indel content does not contain '-' and is not equal
1085							# to the one of alignment
1086							# AAA
1087							# A-A
1088							# ref ACA
1089	0					0	$indel_type = 'C';
1090							}
1091							elsif ( $outgroup_indel_set->intersect($indel_set)->is_not_empty ) {
1092	3					1101	my $island = $outgroup_indel_set->find_islands($indel_set);
1093	3	100				6759	if ( $island->equal($indel_set) ) {
1094
1095							# NNNN
1096							# N--N
1097							# ref N--N
1098	2					159	$indel_type = 'I';
1099							}
1100							else {
1101							# reference indel island is larger or smaller
1102							# NNNN
1103							# N-NN
1104							# ref N--N
1105							# or
1106							# NNNN
1107							# N--N
1108							# ref N-NN
1109	1					70	$indel_type = 'C';
1110							}
1111							}
1112							elsif ( $outgroup_indel_set->intersect($indel_set)->is_empty ) {
1113
1114							# NNNN
1115							# N--N
1116							# ref NNNN
1117	1					774	$indel_type = 'D';
1118							}
1119							else {
1120	0					0	Carp::confess "Errors when polarizing indel!\n";
1121							}
1122							}
1123
1124	5	100				19	if ( $indel_type eq 'C' ) {
1125	2					5	$indel_occured = 'unknown';
1126	2					4	$indel_freq = -1;
1127							}
1128							else {
1129	3					9	for my $seq (@indel_seqs) {
1130	8	100				19	if ( $seq eq $indel_outgroup_seq ) {
1131	3					9	$indel_occured .= '0';
1132							}
1133							else {
1134	5					11	$indel_occured .= '1';
1135	5					11	$indel_freq++;
1136							}
1137							}
1138							}
1139
1140	5					15	$site->{indel_outgroup_seq} = $indel_outgroup_seq;
1141	5					11	$site->{indel_type} = $indel_type;
1142	5					9	$site->{indel_occured} = $indel_occured;
1143	5					18	$site->{indel_freq} = $indel_freq;
1144							}
1145
1146	3					13	return $sites;
1147							}
1148
1149							sub chr_to_align {
1150	8			8	1	75	my AlignDB::IntSpan $intspan = shift;
1151	8					15	my $pos = shift;
1152
1153	8		50			21	my $chr_start = shift \|\| 1;
1154	8		50			20	my $chr_strand = shift \|\| "+";
1155
1156	8					25	my $chr_end = $chr_start + $intspan->size - 1;
1157
1158	8	50	33			627	if ( $pos < $chr_start \|\| $pos > $chr_end ) {
1159	0					0	Carp::confess "[$pos] out of ranges [$chr_start,$chr_end]\n";
1160							}
1161
1162	8					11	my $align_pos;
1163	8	100				21	if ( $chr_strand eq "+" ) {
1164	5					9	$align_pos = $pos - $chr_start + 1;
1165	5					18	$align_pos = $intspan->at($align_pos);
1166							}
1167							else {
1168	3					9	$align_pos = $pos - $chr_start + 1;
1169	3					8	$align_pos = $intspan->at( -$align_pos );
1170							}
1171
1172	8					816	return $align_pos;
1173							}
1174
1175							sub align_to_chr {
1176	734			734	1	7773	my AlignDB::IntSpan $intspan = shift;
1177	734					974	my $pos = shift;
1178
1179	734		50			1586	my $chr_start = shift \|\| 1;
1180	734		50			4719	my $chr_strand = shift \|\| "+";
1181
1182	734					4958	my $chr_end = $chr_start + $intspan->size - 1;
1183
1184	734	50				101997	if ( $pos < 1 ) {
1185	0					0	Carp::confess "align pos out of ranges\n";
1186							}
1187
1188	734					960	my $chr_pos;
1189	734	100				1561	if ( $intspan->contains($pos) ) {
		100
		100
1190	728					34618	$chr_pos = $intspan->index($pos);
1191							}
1192							elsif ( $pos < $intspan->min ) {
1193	2					117	$chr_pos = 1;
1194							}
1195							elsif ( $pos > $intspan->max ) {
1196	2					167	$chr_pos = $intspan->size;
1197							}
1198							else {
1199							# pin to left base
1200	2					145	my @spans = $intspan->spans;
1201	2					54	for my $i ( 0 .. $#spans ) {
1202	4	100				10	if ( $spans[$i]->[1] < $pos ) {
1203	2					4	next;
1204							}
1205							else {
1206	2					6	$pos = $spans[ $i - 1 ]->[1];
1207	2					4	last;
1208							}
1209							}
1210	2					6	$chr_pos = $intspan->index($pos);
1211							}
1212
1213	734	100				81440	if ( $chr_strand eq "+" ) {
1214	727					1081	$chr_pos = $chr_pos + $chr_start - 1;
1215							}
1216							else {
1217	7					12	$chr_pos = $chr_end - $chr_pos + 1;
1218							}
1219
1220	734					1443	return $chr_pos;
1221							}
1222
1223							sub calc_ld {
1224	9			9	1	5275	my $strA = shift;
1225	9					16	my $strB = shift;
1226
1227	9					15	my $size = length $strA;
1228
1229	9	50				21	if ( $size != length $strB ) {
1230	0					0	Carp::confess "Lengths not equal for [$strA] and [$strA]\n";
1231	0					0	return;
1232							}
1233
1234	9					18	for ( $strA, $strB ) {
1235	18	50				54	if (/[^10]/) {
1236	0					0	Carp::confess "[$_] contains illegal chars\n";
1237	0					0	return;
1238							}
1239							}
1240
1241	9					17	my $A_count = $strA =~ tr/1/1/;
1242	9					20	my $fA = $A_count / $size;
1243	9					15	my $fa = 1 - $fA;
1244
1245	9					13	my $B_count = $strB =~ tr/1/1/;
1246	9					15	my $fB = $B_count / $size;
1247	9					12	my $fb = 1 - $fB;
1248
1249	9	50		36		48	if ( any { $_ == 0 } ( $fA, $fa, $fB, $fb ) ) {
	36					82
1250	0					0	return ( undef, undef );
1251							}
1252
1253							# '1' in strA and '1' in strB as fAB
1254	9					28	my ( $AB_count, $fAB ) = ( 0, 0 );
1255	9					18	for my $i ( 1 .. $size ) {
1256	70					99	my $ichar = substr $strA, $i - 1, 1;
1257	70					94	my $schar = substr $strB, $i - 1, 1;
1258	70	100	100			181	if ( $ichar eq '1' and $schar eq '1' ) {
1259	23					33	$AB_count++;
1260							}
1261							}
1262	9					14	$fAB = $AB_count / $size;
1263
1264	9					16	my $DAB = $fAB - $fA * $fB;
1265
1266	9					13	my ( $r, $dprime );
1267	9					18	$r = $DAB / sqrt( $fA * $fa * $fB * $fb );
1268
1269	9	100				17	if ( $DAB < 0 ) {
1270	1					4	$dprime = $DAB / List::Util::min( $fA * $fB, $fa * $fb );
1271							}
1272							else {
1273	8					24	$dprime = $DAB / List::Util::min( $fA * $fb, $fa * $fB );
1274							}
1275
1276	9					28	return ( $r, $dprime );
1277							}
1278
1279							sub poa_consensus {
1280	0			0	0		my $seq_refs = shift;
1281
1282	0						my $aln_prog = "poa";
1283
1284							# temp in and out
1285	0						my $temp_in = Path::Tiny->tempfile("seq_in_XXXXXXXX");
1286	0						my $temp_out = Path::Tiny->tempfile("seq_out_XXXXXXXX");
1287
1288							# msa may change the order of sequences
1289	0						my @indexes = 0 .. scalar( @{$seq_refs} - 1 );
	0
1290							{
1291	0						my $fh = $temp_in->openw();
	0
1292	0						for my $i (@indexes) {
1293	0						printf {$fh} ">seq_%d\n", $i;
	0
1294	0						printf {$fh} "%s\n", $seq_refs->[$i];
	0
1295							}
1296	0						close $fh;
1297							}
1298
1299	0						my @args;
1300							{
1301	0						push @args, "-hb";
	0
1302	0						push @args, "-read_fasta " . $temp_in->absolute->stringify;
1303	0						push @args, "-clustal " . $temp_out->absolute->stringify;
1304	0						push @args, File::ShareDir::dist_file( 'App-Fasops', 'poa-blosum80.mat' );
1305							}
1306
1307	0						my $cmd_line = join " ", ( $aln_prog, @args );
1308	0						my $ok = IPC::Cmd::run( command => $cmd_line );
1309
1310	0	0					if ( !$ok ) {
1311	0						Carp::confess("Calling [$aln_prog] failed\n");
1312							}
1313
1314	0						my $consensus = join "", grep {/^CONSENS0/} $temp_out->lines( { chomp => 1, } );
	0
1315	0						$consensus =~ s/CONSENS0//g;
1316	0						$consensus =~ s/\s//g;
1317	0						$consensus =~ s/-//g;
1318
1319	0						return $consensus;
1320							}
1321
1322							1;
1323
1324							__END__