line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Tradis::CommandLine::TradisAnalysis; |
2
|
|
|
|
|
|
|
$Bio::Tradis::CommandLine::TradisAnalysis::VERSION = '1.3.2'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Perform full tradis analysis |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
101007
|
use Moose; |
|
1
|
|
|
|
|
394490
|
|
|
1
|
|
|
|
|
8
|
|
7
|
1
|
|
|
1
|
|
7711
|
use Getopt::Long qw(GetOptionsFromArray); |
|
1
|
|
|
|
|
7367
|
|
|
1
|
|
|
|
|
4
|
|
8
|
1
|
|
|
1
|
|
153
|
use Cwd qw(abs_path cwd); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
41
|
|
9
|
1
|
|
|
1
|
|
313
|
use Bio::Tradis::RunTradis; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
40
|
|
10
|
1
|
|
|
1
|
|
581
|
use TryCatch; |
|
1
|
|
|
|
|
358454
|
|
|
1
|
|
|
|
|
6
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
has 'args' => ( is => 'ro', isa => 'ArrayRef', required => 1 ); |
13
|
|
|
|
|
|
|
has 'script_name' => ( is => 'ro', isa => 'Str', required => 1 ); |
14
|
|
|
|
|
|
|
has 'fastqfile' => ( is => 'rw', isa => 'Str', required => 0 ); |
15
|
|
|
|
|
|
|
has 'tag' => ( is => 'rw', isa => 'Str', required => 0 ); |
16
|
|
|
|
|
|
|
has 'mismatch' => ( is => 'rw', isa => 'Int', required => 0, default => 0 ); |
17
|
|
|
|
|
|
|
has 'tagdirection' => |
18
|
|
|
|
|
|
|
( is => 'rw', isa => 'Str', required => 0, default => '3' ); |
19
|
|
|
|
|
|
|
has 'reference' => ( is => 'rw', isa => 'Str', required => 0 ); |
20
|
|
|
|
|
|
|
has 'help' => ( is => 'rw', isa => 'Bool', required => 0 ); |
21
|
|
|
|
|
|
|
has 'mapping_score' => |
22
|
|
|
|
|
|
|
( is => 'rw', isa => 'Int', required => 0, default => 30 ); |
23
|
|
|
|
|
|
|
has 'smalt_k' => ( is => 'rw', isa => 'Maybe[Int]', required => 0 ); |
24
|
|
|
|
|
|
|
has 'smalt_s' => ( is => 'rw', isa => 'Maybe[Int]', required => 0 ); |
25
|
|
|
|
|
|
|
has 'smalt_y' => ( is => 'rw', isa => 'Maybe[Num]', required => 0, default => 0.96 ); |
26
|
|
|
|
|
|
|
has 'smalt_r' => ( is => 'rw', isa => 'Maybe[Int]', required => 0, default => -1 ); |
27
|
|
|
|
|
|
|
has 'smalt_n' => ( is => 'rw', isa => 'Maybe[Int]', required => 0, default => 1 ); |
28
|
|
|
|
|
|
|
has 'essentiality' => ( is => 'rw', isa => 'Bool', required => 0, default => 0); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); |
31
|
|
|
|
|
|
|
has 'samtools_exec' => ( is => 'rw', isa => 'Str', default => 'samtools' ); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has '_output_directory' => ( |
34
|
|
|
|
|
|
|
is => 'rw', |
35
|
|
|
|
|
|
|
isa => 'Str', |
36
|
|
|
|
|
|
|
required => 0, |
37
|
|
|
|
|
|
|
lazy => 1, |
38
|
|
|
|
|
|
|
builder => '_build__output_directory' |
39
|
|
|
|
|
|
|
); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
has '_stats_handle' => ( |
42
|
|
|
|
|
|
|
is => 'ro', |
43
|
|
|
|
|
|
|
isa => 'FileHandle', |
44
|
|
|
|
|
|
|
required => 0, |
45
|
|
|
|
|
|
|
lazy => 1, |
46
|
|
|
|
|
|
|
builder => '_build__stats_handle' |
47
|
|
|
|
|
|
|
); |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub BUILD { |
50
|
1
|
|
|
1
|
0
|
2
|
my ($self) = @_; |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my ( |
53
|
1
|
|
|
|
|
3
|
$fastqfile, $tag, $td, $mismatch, $ref,$smalt_n, $essentiality, |
54
|
|
|
|
|
|
|
$map_score, $smalt_k, $smalt_s, $smalt_y, $smalt_r, $help, $verbose,$samtools_exec |
55
|
|
|
|
|
|
|
); |
56
|
|
|
|
|
|
|
|
57
|
1
|
|
|
|
|
31
|
GetOptionsFromArray( |
58
|
|
|
|
|
|
|
$self->args, |
59
|
|
|
|
|
|
|
'f|fastqfile=s' => \$fastqfile, |
60
|
|
|
|
|
|
|
't|tag=s' => \$tag, |
61
|
|
|
|
|
|
|
'td|tagdirection=i' => \$td, |
62
|
|
|
|
|
|
|
'mm|mismatch=i' => \$mismatch, |
63
|
|
|
|
|
|
|
'r|reference=s' => \$ref, |
64
|
|
|
|
|
|
|
'm|mapping_score=i' => \$map_score, |
65
|
|
|
|
|
|
|
'sk|smalt_k=i' => \$smalt_k, |
66
|
|
|
|
|
|
|
'ss|smalt_s=i' => \$smalt_s, |
67
|
|
|
|
|
|
|
'sy|smalt_y=f' => \$smalt_y, |
68
|
|
|
|
|
|
|
'n|smalt_n=i' => \$smalt_n, |
69
|
|
|
|
|
|
|
'sr|smalt_r=i' => \$smalt_r, |
70
|
|
|
|
|
|
|
'v|verbose' => \$verbose, |
71
|
|
|
|
|
|
|
'samtools_exec=s' => \$samtools_exec, |
72
|
|
|
|
|
|
|
'e|essentiality' => \$essentiality, |
73
|
|
|
|
|
|
|
'h|help' => \$help |
74
|
|
|
|
|
|
|
); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
#Essentiality analysis requires different defaults. |
77
|
1
|
50
|
33
|
|
|
1615
|
if(!defined($smalt_r) && !defined($map_score) && defined($essentiality)) |
|
|
|
33
|
|
|
|
|
78
|
|
|
|
|
|
|
{ |
79
|
0
|
0
|
|
|
|
0
|
$self->essentiality($essentiality) if ( defined($essentiality)); |
80
|
0
|
|
|
|
|
0
|
$smalt_r = 0; |
81
|
0
|
|
|
|
|
0
|
$map_score = 0; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
1
|
50
|
|
|
|
76
|
$self->fastqfile( abs_path($fastqfile) ) if ( defined($fastqfile) ); |
85
|
1
|
50
|
|
|
|
33
|
$self->tag( uc($tag) ) if ( defined($tag) ); |
86
|
1
|
50
|
|
|
|
4
|
$self->tagdirection($td) if ( defined($td) ); |
87
|
1
|
50
|
|
|
|
3
|
$self->mismatch($mismatch) if ( defined($mismatch) ); |
88
|
1
|
50
|
|
|
|
64
|
$self->reference( abs_path($ref) ) if ( defined($ref) ); |
89
|
1
|
50
|
|
|
|
3
|
$self->mapping_score($map_score) if ( defined($map_score) ); |
90
|
1
|
50
|
|
|
|
5
|
$self->smalt_k($smalt_k) if ( defined($smalt_k) ); |
91
|
1
|
50
|
|
|
|
3
|
$self->smalt_s($smalt_s) if ( defined($smalt_s) ); |
92
|
1
|
50
|
|
|
|
3
|
$self->smalt_y($smalt_y) if ( defined($smalt_y) ); |
93
|
1
|
50
|
|
|
|
3
|
$self->smalt_r($smalt_r) if ( defined($smalt_r) ); |
94
|
1
|
50
|
|
|
|
3
|
$self->smalt_n($smalt_n) if ( defined($smalt_n) ); |
95
|
1
|
50
|
|
|
|
3
|
$self->help($help) if ( defined($help) ); |
96
|
1
|
50
|
|
|
|
2
|
$self->verbose($verbose) if ( defined($verbose)); |
97
|
1
|
50
|
|
|
|
3
|
$self->samtools_exec($samtools_exec) if ( defined($samtools_exec) ); |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# print usage text if required parameters are not present |
100
|
1
|
50
|
33
|
|
|
32
|
( $fastqfile && $tag && $ref ) or die $self->usage_text; |
|
|
|
33
|
|
|
|
|
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
sub run { |
104
|
1
|
|
|
1
|
0
|
4
|
my ($self) = @_; |
105
|
|
|
|
|
|
|
|
106
|
1
|
50
|
|
|
|
30
|
if ( defined( $self->help ) ) { |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
#if ( scalar( @{ $self->args } ) == 0 ) { |
109
|
0
|
|
|
|
|
0
|
$self->usage_text; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
#parse list of files and run pipeline for each one if they all exist |
113
|
1
|
|
|
|
|
22
|
my $fq = $self->fastqfile; |
114
|
1
|
50
|
|
|
|
30
|
open( FILES, "<", $fq ) or die "Cannot find $fq"; |
115
|
1
|
|
|
|
|
18
|
my @filelist = <FILES>; |
116
|
1
|
|
|
|
|
7
|
my $file_dir = $self->get_file_dir; |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
#check files exist before running |
119
|
1
|
|
|
|
|
2
|
my $line_no = 0; |
120
|
1
|
|
|
|
|
2
|
my $full_path; |
121
|
1
|
|
|
|
|
3
|
foreach my $f1 (@filelist) { |
122
|
2
|
|
|
|
|
4
|
chomp($f1); |
123
|
2
|
|
|
|
|
4
|
$line_no++; |
124
|
2
|
50
|
|
|
|
7
|
if ( $f1 =~ /^\// ) { $full_path = $f1; } |
|
0
|
|
|
|
|
0
|
|
125
|
2
|
|
|
|
|
6
|
else { $full_path = "$file_dir/$f1"; } |
126
|
2
|
50
|
|
|
|
27
|
unless ( -e $full_path ) { |
127
|
0
|
|
|
|
|
0
|
die "File $full_path does not exist ($fq, line $line_no)\n"; |
128
|
|
|
|
|
|
|
} |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
#if all files exist, continue with analysis |
132
|
1
|
|
|
|
|
2
|
my $at_least_one_good_fastq = 0; |
133
|
1
|
|
|
|
|
2
|
foreach my $f2 (@filelist) { |
134
|
1
|
|
|
|
|
2
|
chomp($f2); |
135
|
1
|
50
|
|
|
|
4
|
if ( $f2 =~ /^\// ) { $full_path = $f2; } |
|
0
|
|
|
|
|
0
|
|
136
|
1
|
|
|
|
|
5
|
else { $full_path = "$file_dir/$f2"; } |
137
|
1
|
|
|
|
|
25
|
my $analysis = Bio::Tradis::RunTradis->new( |
138
|
|
|
|
|
|
|
fastqfile => $full_path, |
139
|
|
|
|
|
|
|
tag => $self->tag, |
140
|
|
|
|
|
|
|
tagdirection => $self->tagdirection, |
141
|
|
|
|
|
|
|
mismatch => $self->mismatch, |
142
|
|
|
|
|
|
|
reference => $self->reference, |
143
|
|
|
|
|
|
|
mapping_score => $self->mapping_score, |
144
|
|
|
|
|
|
|
output_directory => $self->_output_directory, |
145
|
|
|
|
|
|
|
_stats_handle => $self->_stats_handle, |
146
|
|
|
|
|
|
|
smalt_k => $self->smalt_k, |
147
|
|
|
|
|
|
|
smalt_s => $self->smalt_s, |
148
|
|
|
|
|
|
|
smalt_y => $self->smalt_y, |
149
|
|
|
|
|
|
|
smalt_r => $self->smalt_r, |
150
|
|
|
|
|
|
|
smalt_n => $self->smalt_n, |
151
|
|
|
|
|
|
|
verbose => $self->verbose, |
152
|
|
|
|
|
|
|
samtools_exec => $self->samtools_exec |
153
|
|
|
|
|
|
|
); |
154
|
1
|
|
|
1
|
|
1344
|
try { |
|
1
|
|
|
|
|
7
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
5
|
|
155
|
1
|
|
|
|
|
6
|
$analysis->run_tradis; |
156
|
0
|
|
|
|
|
0
|
$at_least_one_good_fastq = 1; |
157
|
|
|
|
|
|
|
} |
158
|
1
|
0
|
|
1
|
|
676455
|
catch (Bio::Tradis::Exception::TagFilterError $e) { |
|
0
|
0
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
159
|
0
|
|
|
|
|
0
|
my $tag = $self->tag; |
160
|
0
|
|
|
|
|
0
|
warn "There was a problem filtering '$full_path' by '$tag'; it looks like the tag was not found in any read\n"; |
161
|
|
|
|
|
|
|
} |
162
|
0
|
|
|
|
|
0
|
} |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
163
|
0
|
0
|
|
|
|
0
|
if ( ! $at_least_one_good_fastq ) { |
164
|
0
|
|
|
|
|
0
|
Bio::Tradis::Exception::TagFilterError->throw( error => "None of the input files contained the specified tag. Please check that your inputs are valid fastq files and that at least one read in one of them starts with the specified tag\n" ); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
0
|
|
|
|
|
0
|
$self->_tidy_stats; |
168
|
0
|
|
|
|
|
0
|
close(FILES); |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
#$self->_combine_plots; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
sub _build__output_directory { |
174
|
0
|
|
|
0
|
|
0
|
return cwd(); |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
sub _stats_filename { |
178
|
1
|
|
|
1
|
|
3
|
my ($self) = @_; |
179
|
1
|
|
|
|
|
3
|
my $dir = $self->get_file_dir; |
180
|
1
|
|
|
|
|
23
|
my $output_directory = $self->_output_directory; |
181
|
1
|
|
|
|
|
22
|
my $stats_filename = $self->fastqfile; |
182
|
1
|
|
|
|
|
19
|
$stats_filename =~ s/$dir\///; |
183
|
1
|
|
|
|
|
8
|
$stats_filename =~ s/[^\.]+$/stats/; |
184
|
1
|
|
|
|
|
5
|
return "$output_directory/$stats_filename"; |
185
|
|
|
|
|
|
|
} |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub _build__stats_handle { |
188
|
1
|
|
|
1
|
|
4
|
my ($self) = @_; |
189
|
1
|
|
|
|
|
5
|
my $stats_filename = $self->_stats_filename(); |
190
|
1
|
|
|
|
|
66
|
open( my $stats, ">", $stats_filename ); |
191
|
1
|
|
|
|
|
29
|
return $stats; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
sub _tidy_stats { |
195
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
196
|
0
|
|
|
|
|
0
|
my $output_directory = $self->_output_directory; |
197
|
0
|
|
|
|
|
0
|
my $stats_filename = $self->_stats_filename(); |
198
|
0
|
|
|
|
|
0
|
open( STATS, '<', $stats_filename ); |
199
|
0
|
|
|
|
|
0
|
open( TMP, '>', "$output_directory/tmp.stats" ); |
200
|
|
|
|
|
|
|
|
201
|
0
|
|
|
|
|
0
|
my $header = 0; |
202
|
0
|
|
|
|
|
0
|
while ( my $line = <STATS> ) { |
203
|
0
|
0
|
|
|
|
0
|
if ( $line =~ /^File/ ) { |
204
|
0
|
0
|
|
|
|
0
|
if ( $header == 0 ) { |
205
|
0
|
|
|
|
|
0
|
print TMP "$line"; |
206
|
0
|
|
|
|
|
0
|
$header = 1; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
else { |
210
|
0
|
|
|
|
|
0
|
print TMP "$line"; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} |
213
|
0
|
|
|
|
|
0
|
close(TMP); |
214
|
0
|
|
|
|
|
0
|
close(STATS); |
215
|
0
|
|
|
|
|
0
|
system("mv $output_directory/tmp.stats $stats_filename"); |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
sub _combine_plots { |
219
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
220
|
0
|
|
|
|
|
0
|
my $filelist = $self->fastqfile; |
221
|
|
|
|
|
|
|
|
222
|
0
|
|
|
|
|
0
|
return 1; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub get_file_dir { |
226
|
2
|
|
|
2
|
0
|
6
|
my ($self) = @_; |
227
|
2
|
|
|
|
|
46
|
my $fq = $self->fastqfile; |
228
|
|
|
|
|
|
|
|
229
|
2
|
|
|
|
|
14
|
my @dirs = split( '/', $fq ); |
230
|
2
|
|
|
|
|
4
|
pop(@dirs); |
231
|
2
|
|
|
|
|
11
|
return join( '/', @dirs ); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
sub usage_text { |
235
|
0
|
|
|
0
|
0
|
|
print <<USAGE; |
236
|
|
|
|
|
|
|
Run a TraDIS analysis. This involves: |
237
|
|
|
|
|
|
|
1: filtering the data with tags matching that passed via -t option |
238
|
|
|
|
|
|
|
2: removing the tags from the sequences |
239
|
|
|
|
|
|
|
3: mapping |
240
|
|
|
|
|
|
|
4: creating an insertion site plot |
241
|
|
|
|
|
|
|
5: creating a stats summary |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
Usage: bacteria_tradis [options] |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
Options: |
246
|
|
|
|
|
|
|
-f : text file listing fastq files with tradis tags attached |
247
|
|
|
|
|
|
|
-t : tag to search for |
248
|
|
|
|
|
|
|
-r : reference genome in fasta format (.fa) |
249
|
|
|
|
|
|
|
-td : tag direction - 3 or 5 (optional. default = 3) |
250
|
|
|
|
|
|
|
-mm : number of mismatches allowed when matching tag (optional. default = 0) |
251
|
|
|
|
|
|
|
-m : mapping quality cutoff score (optional. default = 30) |
252
|
|
|
|
|
|
|
--smalt_k : custom k-mer value for SMALT mapping (optional) |
253
|
|
|
|
|
|
|
--smalt_s : custom step size for SMALT mapping (optional) |
254
|
|
|
|
|
|
|
--smalt_y : custom y parameter for SMALT (optional. default = 0.96) |
255
|
|
|
|
|
|
|
--smalt_r : custom r parameter for SMALT (optional. default = -1) |
256
|
|
|
|
|
|
|
-n : number of threads to use for SMALT and samtools sort (optional. default = 1) |
257
|
|
|
|
|
|
|
-e : set defaults for essentiality experiment (smalt_r = 0, -m = 0) |
258
|
|
|
|
|
|
|
-v : verbose debugging output |
259
|
|
|
|
|
|
|
USAGE |
260
|
0
|
|
|
|
|
|
exit; |
261
|
|
|
|
|
|
|
} |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
264
|
1
|
|
|
1
|
|
1220
|
no Moose; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
11
|
|
265
|
|
|
|
|
|
|
1; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
__END__ |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
=pod |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
=encoding UTF-8 |
272
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
=head1 NAME |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
Bio::Tradis::CommandLine::TradisAnalysis - Perform full tradis analysis |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
=head1 VERSION |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
version 1.3.2 |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=head1 SYNOPSIS |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Takes a fastq, reference and a tag and generates insertion |
284
|
|
|
|
|
|
|
site plots for use in Artemis |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=head1 AUTHOR |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
Carla Cummins <path-help@sanger.ac.uk> |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
This is free software, licensed under: |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
=cut |