line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::MLST::CDC::Convert; |
2
|
|
|
|
|
|
|
# ABSTRACT: Take in a fasta file of emmST sequences and convert it into an MLST format |
3
|
|
|
|
|
|
|
$Bio::MLST::CDC::Convert::VERSION = '2.1.1630910'; |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
3
|
|
|
3
|
|
4833
|
use Moose; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
24
|
|
7
|
3
|
|
|
3
|
|
16078
|
use File::Basename; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
245
|
|
8
|
3
|
|
|
3
|
|
18
|
use File::Path qw(make_path); |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
131
|
|
9
|
3
|
|
|
3
|
|
15
|
use Bio::PrimarySeq; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
69
|
|
10
|
3
|
|
|
3
|
|
13
|
use Bio::SeqIO; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
53
|
|
11
|
3
|
|
|
3
|
|
11
|
use Bio::MLST::Types; |
|
3
|
|
|
|
|
3
|
|
|
3
|
|
|
|
|
56
|
|
12
|
3
|
|
|
3
|
|
14
|
use Text::CSV; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
31
|
|
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
with 'Bio::MLST::Download::Downloadable'; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
has 'species' => ( is => 'ro', isa => 'Str', required => 1 ); |
17
|
|
|
|
|
|
|
has 'input_file' => ( is => 'ro', isa => 'Str', required => 1 ); |
18
|
|
|
|
|
|
|
has 'gene_name' => ( is => 'ro', isa => 'Str', required => 1 ); |
19
|
|
|
|
|
|
|
has 'base_directory' => ( is => 'ro', isa => 'Str', required => 1 ); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has 'destination_directory' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build_destination_directory' ); |
22
|
|
|
|
|
|
|
has '_output_allele_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__output_allele_filename' ); |
23
|
|
|
|
|
|
|
has '_output_profile_filename' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__output_profile_filename' ); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub _build__output_allele_filename |
26
|
|
|
|
|
|
|
{ |
27
|
2
|
|
|
2
|
|
4
|
my ($self) = @_; |
28
|
2
|
|
|
|
|
54
|
join('/',($self->destination_directory, 'alleles',$self->gene_name.'.tfa')); |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub _build__output_profile_filename |
32
|
|
|
|
|
|
|
{ |
33
|
2
|
|
|
2
|
|
6
|
my ($self) = @_; |
34
|
2
|
|
|
|
|
84
|
join('/',($self->destination_directory, 'profiles',$self->_sub_directory.'.txt')); |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _build_destination_directory |
38
|
|
|
|
|
|
|
{ |
39
|
2
|
|
|
2
|
|
6
|
my ($self) = @_; |
40
|
2
|
|
|
|
|
50
|
my $destination_directory = join('/',($self->base_directory,$self->_sub_directory)); |
41
|
2
|
|
|
|
|
390
|
make_path($destination_directory); |
42
|
2
|
|
|
|
|
292
|
make_path(join('/',($destination_directory,'alleles'))); |
43
|
2
|
|
|
|
|
250
|
make_path(join('/',($destination_directory,'profiles'))); |
44
|
2
|
|
|
|
|
64
|
return $destination_directory; |
45
|
|
|
|
|
|
|
} |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
sub _sub_directory |
48
|
|
|
|
|
|
|
{ |
49
|
4
|
|
|
4
|
|
6
|
my ($self) = @_; |
50
|
4
|
|
|
|
|
108
|
my $combined_name = join('_',($self->species)); |
51
|
4
|
|
|
|
|
16
|
$combined_name =~ s!\.$!!gi; |
52
|
4
|
|
|
|
|
26
|
$combined_name =~ s!\W!_!gi; |
53
|
4
|
|
|
|
|
80
|
return $combined_name; |
54
|
|
|
|
|
|
|
} |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
sub create_mlst_files |
58
|
|
|
|
|
|
|
{ |
59
|
2
|
|
|
2
|
1
|
4
|
my ($self) = @_; |
60
|
|
|
|
|
|
|
|
61
|
2
|
|
|
|
|
66
|
$self->_download_file($self->input_file,$self->destination_directory); |
62
|
|
|
|
|
|
|
|
63
|
2
|
|
|
|
|
70
|
my $fasta_obj = Bio::SeqIO->new( -file => join('/',($self->destination_directory, $self->_get_filename_from_url($self->input_file))) , -format => 'Fasta'); |
64
|
2
|
|
|
|
|
7850
|
my $out_fasta_obj = Bio::SeqIO->new(-file => "+>".$self->_output_allele_filename , -format => 'Fasta'); |
65
|
|
|
|
|
|
|
|
66
|
2
|
|
|
|
|
1022
|
my @sequence_names; |
67
|
2
|
|
|
|
|
2
|
my $counter = 1; |
68
|
2
|
|
|
|
|
6
|
while(my $seq = $fasta_obj->next_seq()) |
69
|
|
|
|
|
|
|
{ |
70
|
14
|
|
|
|
|
2620
|
my $normalised_name = $self->gene_name."-".$counter; |
71
|
14
|
|
|
|
|
48
|
push(@sequence_names,[$seq->id,$counter]); |
72
|
14
|
|
|
|
|
200
|
$seq->id($normalised_name); |
73
|
|
|
|
|
|
|
|
74
|
14
|
|
|
|
|
140
|
$out_fasta_obj->write_seq($seq); |
75
|
14
|
|
|
|
|
2474
|
$counter++; |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
2
|
|
|
|
|
86
|
$self->_create_profile(\@sequence_names); |
79
|
2
|
|
|
|
|
248
|
return $self; |
80
|
|
|
|
|
|
|
} |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
sub _create_profile |
83
|
|
|
|
|
|
|
{ |
84
|
2
|
|
|
2
|
|
4
|
my ($self,$sequence_names) = @_; |
85
|
2
|
50
|
|
|
|
74
|
open(my $profile, '+>', $self->_output_profile_filename ) or die 'Couldnt open output profile file'; |
86
|
|
|
|
|
|
|
|
87
|
2
|
|
|
|
|
36
|
my $csv_out = Text::CSV->new({binary=>1, always_quote=>1, sep_char=>"\t", eol=>"\n"}); |
88
|
2
|
|
|
|
|
292
|
$csv_out->print($profile,['ST',$self->gene_name]); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
|
91
|
2
|
|
|
|
|
338
|
for my $sequence_type_details (@{$sequence_names}) |
|
2
|
|
|
|
|
8
|
|
92
|
|
|
|
|
|
|
{ |
93
|
14
|
|
|
|
|
604
|
$csv_out->print($profile,$sequence_type_details); |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
} |
96
|
|
|
|
|
|
|
|
97
|
3
|
|
|
3
|
|
1737
|
no Moose; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
17
|
|
98
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
99
|
|
|
|
|
|
|
1; |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
__END__ |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=pod |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=encoding UTF-8 |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 NAME |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
Bio::MLST::CDC::Convert - Take in a fasta file of emmST sequences and convert it into an MLST format |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=head1 VERSION |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
version 2.1.1630910 |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head1 SYNOPSIS |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
ake in a fasta file of emmST sequences and convert it into an MLST format, producing an allele file, and a profile. |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
use Bio::MLST::CDC::Convert; |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
my $convert_fasta = Bio::MLST::CDC::Convert->new( |
122
|
|
|
|
|
|
|
species => 'Streptococcus pyogenes emmST', |
123
|
|
|
|
|
|
|
input_file => 't/data/CDC_emmST_partial.tfa', |
124
|
|
|
|
|
|
|
gene_name => 'emmST', |
125
|
|
|
|
|
|
|
base_directory => '/path/to/output/dir' |
126
|
|
|
|
|
|
|
); |
127
|
|
|
|
|
|
|
$convert_fasta->create_mlst_files(); |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
=head1 METHODS |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=head2 create_mlst_files |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Create an allele file and a profile, in the MLST directory structure. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 AUTHOR |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
This software is Copyright (c) 2012 by Wellcome Trust Sanger Institute. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
This is free software, licensed under: |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |