line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Roary::ChunkFastaFile; |
2
|
|
|
|
|
|
|
$Bio::Roary::ChunkFastaFile::VERSION = '3.11.0'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Take in a FASTA file and chunk it up into smaller pieces. |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
4
|
|
|
4
|
|
95703
|
use Moose; |
|
4
|
|
|
|
|
423564
|
|
|
4
|
|
|
|
|
29
|
|
7
|
4
|
|
|
4
|
|
25528
|
use Bio::SeqIO; |
|
4
|
|
|
|
|
163029
|
|
|
4
|
|
|
|
|
123
|
|
8
|
4
|
|
|
4
|
|
361
|
use Bio::Roary::Exceptions; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
74
|
|
9
|
4
|
|
|
4
|
|
19
|
use Cwd; |
|
4
|
|
|
|
|
7
|
|
|
4
|
|
|
|
|
210
|
|
10
|
4
|
|
|
4
|
|
20
|
use File::Temp; |
|
4
|
|
|
|
|
6
|
|
|
4
|
|
|
|
|
1531
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
has 'fasta_file' => ( is => 'ro', isa => 'Str', required => 1 ); |
13
|
|
|
|
|
|
|
has 'target_chunk_size' => ( is => 'ro', isa => 'Int', default => 200000 ); |
14
|
|
|
|
|
|
|
has 'sequence_file_names' => ( is => 'ro', isa => 'ArrayRef', lazy => 1, builder => '_build_sequence_file_names' ); |
15
|
|
|
|
|
|
|
has '_working_directory' => |
16
|
|
|
|
|
|
|
( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } ); |
17
|
|
|
|
|
|
|
has '_working_directory_name' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__working_directory_name' ); |
18
|
|
|
|
|
|
|
has '_input_seqio' => ( is => 'ro', isa => 'Bio::SeqIO', lazy => 1, builder => '_build__input_seqio' ); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub _build__working_directory_name { |
21
|
5
|
|
|
5
|
|
13
|
my ($self) = @_; |
22
|
5
|
|
|
|
|
130
|
return $self->_working_directory->dirname(); |
23
|
|
|
|
|
|
|
} |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub _build__input_seqio { |
26
|
5
|
|
|
5
|
|
13
|
my ($self) = @_; |
27
|
5
|
|
|
|
|
132
|
return Bio::SeqIO->new( -file => $self->fasta_file, -format => 'Fasta' ); |
28
|
|
|
|
|
|
|
} |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
sub _create_next_chunk_file_name { |
31
|
20
|
|
|
20
|
|
42
|
my ( $self, $chunk_number ) = @_; |
32
|
20
|
|
|
|
|
587
|
return join( '/', ( $self->_working_directory_name, $chunk_number . '.seq' ) ); |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
sub _create_next_chunk_seqio { |
36
|
10
|
|
|
10
|
|
21
|
my ( $self, $chunk_number ) = @_; |
37
|
10
|
|
|
|
|
38
|
return Bio::SeqIO->new( -file => ">".$self->_create_next_chunk_file_name($chunk_number), -format => 'Fasta' ); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub _build_sequence_file_names { |
41
|
5
|
|
|
5
|
|
21
|
my ($self) = @_; |
42
|
5
|
|
|
|
|
16
|
my @sequence_file_names; |
43
|
5
|
|
|
|
|
10
|
my $chunk_number = 0; |
44
|
5
|
|
|
|
|
13
|
my $current_chunk_length = 0; |
45
|
5
|
|
|
|
|
24
|
my $current_chunk_seqio = $self->_create_next_chunk_seqio($chunk_number); |
46
|
5
|
|
|
|
|
65992
|
push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) ); |
47
|
|
|
|
|
|
|
|
48
|
5
|
|
|
|
|
147
|
while ( my $input_seq = $self->_input_seqio->next_seq() ) { |
49
|
30
|
100
|
|
|
|
6222
|
if ( $current_chunk_length > $self->target_chunk_size ) { |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# next chunk |
52
|
5
|
|
|
|
|
8
|
$chunk_number++; |
53
|
5
|
|
|
|
|
5
|
$current_chunk_length = 0; |
54
|
5
|
|
|
|
|
11
|
$current_chunk_seqio = $self->_create_next_chunk_seqio($chunk_number); |
55
|
5
|
|
|
|
|
3149
|
push( @sequence_file_names, $self->_create_next_chunk_file_name($chunk_number) ); |
56
|
|
|
|
|
|
|
} |
57
|
30
|
|
|
|
|
108
|
$current_chunk_seqio->write_seq($input_seq); |
58
|
30
|
|
|
|
|
6275
|
$current_chunk_length += $input_seq->length(); |
59
|
|
|
|
|
|
|
} |
60
|
5
|
|
|
|
|
191
|
return \@sequence_file_names; |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
4
|
|
|
4
|
|
28
|
no Moose; |
|
4
|
|
|
|
|
12
|
|
|
4
|
|
|
|
|
34
|
|
64
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
1; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
__END__ |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=pod |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=encoding UTF-8 |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=head1 NAME |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
Bio::Roary::ChunkFastaFile - Take in a FASTA file and chunk it up into smaller pieces. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 VERSION |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
version 3.11.0 |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 SYNOPSIS |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
Take in a FASTA file and chunk it up into smaller pieces. |
85
|
|
|
|
|
|
|
use Bio::Roary::ChunkFastaFile; |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
my $obj = Bio::Roary::ChunkFastaFile->new( |
88
|
|
|
|
|
|
|
fasta_file => 'abc.fa', |
89
|
|
|
|
|
|
|
); |
90
|
|
|
|
|
|
|
$obj->sequence_file_names; |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=head1 AUTHOR |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
Andrew J. Page <ap13@sanger.ac.uk> |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
This is free software, licensed under: |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
=cut |