line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MLST::OutputFasta; |
2
|
|
|
|
|
|
|
# ABSTRACT: Take in two hashes, both containing sequence names and sequences and output fasta files. |
3
|
|
|
|
|
|
|
$Bio::MLST::OutputFasta::VERSION = '2.1.1706216'; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
10
|
|
|
10
|
|
103597
|
use Moose; |
|
10
|
|
|
|
|
311630
|
|
|
10
|
|
|
|
|
68
|
|
7
|
10
|
|
|
10
|
|
45327
|
use File::Basename; |
|
10
|
|
|
|
|
15
|
|
|
10
|
|
|
|
|
715
|
|
8
|
10
|
|
|
10
|
|
46
|
use File::Path qw(make_path); |
|
10
|
|
|
|
|
12
|
|
|
10
|
|
|
|
|
405
|
|
9
|
10
|
|
|
10
|
|
823
|
use Bio::PrimarySeq; |
|
10
|
|
|
|
|
39284
|
|
|
10
|
|
|
|
|
184
|
|
10
|
10
|
|
|
10
|
|
516
|
use Bio::SeqIO; |
|
10
|
|
|
|
|
20220
|
|
|
10
|
|
|
|
|
178
|
|
11
|
10
|
|
|
10
|
|
355
|
use Bio::MLST::Types; |
|
10
|
|
|
|
|
13
|
|
|
10
|
|
|
|
|
4494
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
has 'matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 ); |
14
|
|
|
|
|
|
|
has 'non_matching_sequences' => ( is => 'ro', isa => 'Maybe[HashRef]', required => 1 ); |
15
|
|
|
|
|
|
|
has 'output_directory' => ( is => 'ro', isa => 'Str', required => 1 ); |
16
|
|
|
|
|
|
|
has 'input_fasta_file' => ( is => 'ro', isa => 'Bio::MLST::File', required => 1 ); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
has '_fasta_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_filename' ); |
19
|
|
|
|
|
|
|
has 'concat_sequence' => ( is => 'rw', isa => 'Maybe[Str]' ); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub _build__fasta_filename |
23
|
|
|
|
|
|
|
{ |
24
|
5
|
|
|
5
|
|
5
|
my($self) = @_; |
25
|
5
|
|
|
|
|
88
|
my $filename = fileparse($self->input_fasta_file, qr/\.[^.]*$/); |
26
|
5
|
|
|
|
|
150
|
return $filename; |
27
|
|
|
|
|
|
|
} |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub _sort_and_join_sequences |
30
|
|
|
|
|
|
|
{ |
31
|
4
|
|
|
4
|
|
4
|
my($self, $combined_sequences) = @_; |
32
|
4
|
|
|
|
|
4
|
my @allele_names = sort keys %{$combined_sequences}; |
|
4
|
|
|
|
|
24
|
|
33
|
4
|
|
|
|
|
7
|
my @sorted_sequences = map { $combined_sequences->{$_} } @allele_names; |
|
14
|
|
|
|
|
15
|
|
34
|
4
|
|
|
|
|
11
|
join("", @sorted_sequences); |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub create_files |
38
|
|
|
|
|
|
|
{ |
39
|
5
|
|
|
5
|
1
|
6
|
my($self) = @_; |
40
|
5
|
|
|
|
|
95
|
make_path($self->output_directory); |
41
|
5
|
|
|
|
|
105
|
$self->_fasta_filename; |
42
|
5
|
100
|
66
|
|
|
153
|
if((defined($self->matching_sequences) && %{$self->matching_sequences}) ||(defined($self->non_matching_sequences) && %{$self->non_matching_sequences}) ) |
|
5
|
|
33
|
|
|
81
|
|
|
1
|
|
66
|
|
|
17
|
|
43
|
|
|
|
|
|
|
{ |
44
|
|
|
|
|
|
|
|
45
|
4
|
|
|
|
|
4
|
my %matching_sequences = %{$self->matching_sequences}; |
|
4
|
|
|
|
|
65
|
|
46
|
4
|
|
|
|
|
9
|
my %combined_sequences = (%matching_sequences); |
47
|
|
|
|
|
|
|
|
48
|
4
|
100
|
66
|
|
|
71
|
if(defined($self->non_matching_sequences) && %{$self->non_matching_sequences}) |
|
4
|
|
|
|
|
65
|
|
49
|
|
|
|
|
|
|
{ |
50
|
3
|
|
|
|
|
4
|
my %non_matching_sequences = %{$self->non_matching_sequences}; |
|
3
|
|
|
|
|
67
|
|
51
|
3
|
|
|
|
|
9
|
%combined_sequences = (%matching_sequences, %non_matching_sequences); |
52
|
|
|
|
|
|
|
} |
53
|
4
|
|
|
|
|
13
|
my $concat_sequence = $self->_sort_and_join_sequences(\%combined_sequences); |
54
|
|
|
|
|
|
|
|
55
|
4
|
|
|
|
|
75
|
$self->concat_sequence($concat_sequence); |
56
|
|
|
|
|
|
|
} |
57
|
|
|
|
|
|
|
|
58
|
5
|
100
|
66
|
|
|
99
|
if(defined($self->non_matching_sequences) && %{$self->non_matching_sequences}) |
|
5
|
|
|
|
|
90
|
|
59
|
|
|
|
|
|
|
{ |
60
|
|
|
|
|
|
|
# create 1 FASTA file for each unknown allele with a close match to another allele |
61
|
3
|
|
|
|
|
3
|
for my $sequence_name (keys %{$self->non_matching_sequences}) |
|
3
|
|
|
|
|
52
|
|
62
|
|
|
|
|
|
|
{ |
63
|
6
|
50
|
|
|
|
1300
|
next if(length($self->non_matching_sequences->{$sequence_name}) < 2); |
64
|
6
|
100
|
|
|
|
124
|
next if($self->_does_sequence_contain_all_unknowns($self->non_matching_sequences->{$sequence_name})); |
65
|
5
|
|
|
|
|
111
|
my $non_matching_output_filename = join('/',($self->output_directory, $self->_fasta_filename.'.unknown_allele.'.$sequence_name.'.fa')); |
66
|
5
|
|
|
|
|
41
|
my $out = Bio::SeqIO->new(-file => "+>$non_matching_output_filename" , '-format' => 'Fasta'); |
67
|
5
|
|
|
|
|
14778
|
$out->write_seq(Bio::PrimarySeq->new(-seq => $self->non_matching_sequences->{$sequence_name}, -id => $sequence_name)); |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
} |
70
|
5
|
|
|
|
|
609
|
1; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub _does_sequence_contain_all_unknowns |
74
|
|
|
|
|
|
|
{ |
75
|
6
|
|
|
6
|
|
10
|
my($self, $sequence) = @_; |
76
|
6
|
100
|
|
|
|
14
|
return 1 if($sequence =~ m/^N+$/); |
77
|
5
|
|
|
|
|
13
|
return 0; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
|
82
|
10
|
|
|
10
|
|
49
|
no Moose; |
|
10
|
|
|
|
|
10
|
|
|
10
|
|
|
|
|
57
|
|
83
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
84
|
|
|
|
|
|
|
1; |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
__END__ |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=pod |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=encoding UTF-8 |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 NAME |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Bio::MLST::OutputFasta - Take in two hashes, both containing sequence names and sequences and output fasta files. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head1 VERSION |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
version 2.1.1706216 |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=head1 SYNOPSIS |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Take in two hashes, both containing sequence names and sequences and output fasta files. |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
use Bio::MLST::OutputFasta; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
my $output_fasta = Bio::MLST::OutputFasta->new( |
107
|
|
|
|
|
|
|
matching_sequences => \%matching_sequences, |
108
|
|
|
|
|
|
|
non_matching_sequences => \%non_matching_sequences, |
109
|
|
|
|
|
|
|
output_directory => '/path/to/output', |
110
|
|
|
|
|
|
|
input_fasta_file => '/path/to/fasta' |
111
|
|
|
|
|
|
|
); |
112
|
|
|
|
|
|
|
$output_fasta->create_files(); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 METHODS |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=head2 create_files |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
Create output fasta files. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=head1 AUTHOR |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
This software is Copyright (c) 2012 by Wellcome Trust Sanger Institute. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
This is free software, licensed under: |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
=cut |