line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Roary::ExtractProteomeFromGFFs; |
2
|
|
|
|
|
|
|
$Bio::Roary::ExtractProteomeFromGFFs::VERSION = '3.11.0'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Take in GFF files and create protein sequences in FASTA format |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
5
|
|
|
5
|
|
85023
|
use Moose; |
|
5
|
|
|
|
|
399175
|
|
|
5
|
|
|
|
|
27
|
|
7
|
5
|
|
|
5
|
|
28396
|
use Bio::Roary::Exceptions; |
|
5
|
|
|
|
|
11
|
|
|
5
|
|
|
|
|
108
|
|
8
|
5
|
|
|
5
|
|
1522
|
use Bio::Roary::ExtractProteomeFromGFF; |
|
5
|
|
|
|
|
17
|
|
|
5
|
|
|
|
|
216
|
|
9
|
5
|
|
|
5
|
|
44
|
use File::Basename; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
373
|
|
10
|
5
|
|
|
5
|
|
30
|
use Cwd qw(getcwd); |
|
5
|
|
|
|
|
10
|
|
|
5
|
|
|
|
|
187
|
|
11
|
5
|
|
|
5
|
|
26
|
use File::Temp; |
|
5
|
|
|
|
|
9
|
|
|
5
|
|
|
|
|
2085
|
|
12
|
|
|
|
|
|
|
with 'Bio::Roary::JobRunner::Role'; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
has 'gff_files' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); |
15
|
|
|
|
|
|
|
has 'fasta_files' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_fasta_files' ); |
16
|
|
|
|
|
|
|
has 'fasta_files_to_gff_files' => ( is => 'ro', isa => 'HashRef', lazy => 1, builder => '_build_fasta_files_to_gff_files' ); |
17
|
|
|
|
|
|
|
has 'apply_unknowns_filter' => ( is => 'rw', isa => 'Bool', default => 1 ); |
18
|
|
|
|
|
|
|
has '_queue' => ( is => 'rw', isa => 'Str', default => 'small' ); |
19
|
|
|
|
|
|
|
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 ); |
20
|
|
|
|
|
|
|
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); |
21
|
|
|
|
|
|
|
has 'working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } ); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
sub _build__extract_proteome_objects |
24
|
|
|
|
|
|
|
{ |
25
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
26
|
|
|
|
|
|
|
|
27
|
0
|
|
|
|
|
0
|
my %extract_proteome_objects; |
28
|
0
|
|
|
|
|
0
|
for my $filename ( @{ $self->gff_files } ) { |
|
0
|
|
|
|
|
0
|
|
29
|
0
|
|
|
|
|
0
|
my $extract_proteome = Bio::Roary::ExtractProteomeFromGFF->new( |
30
|
|
|
|
|
|
|
gff_file => $filename, |
31
|
|
|
|
|
|
|
); |
32
|
0
|
|
|
|
|
0
|
$extract_proteome_objects{ $filename } = $extract_proteome; |
33
|
|
|
|
|
|
|
} |
34
|
0
|
|
|
|
|
0
|
return \%extract_proteome_objects; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub _build_fasta_files { |
38
|
24
|
|
|
24
|
|
60
|
my ($self) = @_; |
39
|
24
|
|
|
|
|
45
|
my @fasta_files = sort values( %{$self->fasta_files_to_gff_files} ); |
|
24
|
|
|
|
|
632
|
|
40
|
24
|
|
|
|
|
659
|
return \@fasta_files; |
41
|
|
|
|
|
|
|
} |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
sub _build_fasta_files_to_gff_files { |
44
|
24
|
|
|
24
|
|
43
|
my ($self) = @_; |
45
|
|
|
|
|
|
|
|
46
|
24
|
|
|
|
|
53
|
my %fasta_files; |
47
|
|
|
|
|
|
|
my @commands_to_run; |
48
|
24
|
|
|
|
|
32
|
for my $filename ( @{ $self->gff_files } ) |
|
24
|
|
|
|
|
700
|
|
49
|
|
|
|
|
|
|
{ |
50
|
20
|
50
|
|
|
|
659
|
print "Extracting proteins from $filename\n" if($self->verbose); |
51
|
20
|
|
|
|
|
321
|
my($gff_filename_without_directory, $directories, $suffix) = fileparse($filename); |
52
|
20
|
|
|
|
|
46
|
my $output_suffix = "proteome.faa"; |
53
|
|
|
|
|
|
|
|
54
|
20
|
|
|
|
|
45
|
my $output_filename = $filename.'.'.$output_suffix; |
55
|
20
|
|
|
|
|
526
|
$fasta_files{ $filename } = $self->working_directory.'/'.$gff_filename_without_directory.'.'.$output_suffix; |
56
|
20
|
|
|
|
|
739
|
push(@commands_to_run, "extract_proteome_from_gff --translation_table ".$self->translation_table." --apply_unknowns_filter ".$self->apply_unknowns_filter." -d ".$self->working_directory." -o $output_suffix $filename"); |
57
|
|
|
|
|
|
|
} |
58
|
|
|
|
|
|
|
#Farm out the computation and block until its ready |
59
|
24
|
|
|
|
|
683
|
my $job_runner_obj = $self->_job_runner_class->new( commands_to_run => \@commands_to_run, memory_in_mb => $self->memory_in_mb, queue => $self->_queue, cpus => $self->cpus); |
60
|
24
|
|
|
|
|
171
|
$job_runner_obj->run(); |
61
|
|
|
|
|
|
|
|
62
|
24
|
|
|
|
|
1252
|
return \%fasta_files; |
63
|
|
|
|
|
|
|
} |
64
|
|
|
|
|
|
|
|
65
|
5
|
|
|
5
|
|
35
|
no Moose; |
|
5
|
|
|
|
|
6
|
|
|
5
|
|
|
|
|
34
|
|
66
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
1; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
__END__ |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=pod |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=encoding UTF-8 |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=head1 NAME |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
Bio::Roary::ExtractProteomeFromGFFs - Take in GFF files and create protein sequences in FASTA format |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=head1 VERSION |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
version 3.11.0 |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head1 SYNOPSIS |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Take in GFF files and create protein sequences in FASTA format |
87
|
|
|
|
|
|
|
use Bio::Roary::ExtractProteomeFromGFFs; |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
my $plot_groups_obj = Bio::Roary::ExtractProteomeFromGFFs->new( |
90
|
|
|
|
|
|
|
gff_files => $fasta_files, |
91
|
|
|
|
|
|
|
); |
92
|
|
|
|
|
|
|
$plot_groups_obj->fasta_files(); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=head1 AUTHOR |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
This is free software, licensed under: |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=cut |