line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Bio::Tradis::RunTradis; |
2
|
|
|
|
|
|
|
$Bio::Tradis::RunTradis::VERSION = '1.3.3'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Perform all steps required for a tradis analysis |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
105356
|
use Cwd; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
109
|
|
7
|
2
|
|
|
2
|
|
402
|
use Moose; |
|
2
|
|
|
|
|
412846
|
|
|
2
|
|
|
|
|
12
|
|
8
|
2
|
|
|
2
|
|
13312
|
use File::Temp; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
154
|
|
9
|
2
|
|
|
2
|
|
11
|
use File::Path 'rmtree'; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
84
|
|
10
|
2
|
|
|
2
|
|
782
|
use Bio::Tradis::FilterTags; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
80
|
|
11
|
2
|
|
|
2
|
|
915
|
use Bio::Tradis::RemoveTags; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
71
|
|
12
|
2
|
|
|
2
|
|
897
|
use Bio::Tradis::Map; |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
81
|
|
13
|
2
|
|
|
2
|
|
954
|
use Bio::Tradis::TradisPlot; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
84
|
|
14
|
2
|
|
|
2
|
|
1041
|
use Bio::Tradis::Exception; |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
47
|
|
15
|
2
|
|
|
2
|
|
507
|
use Bio::Tradis::Samtools; |
|
2
|
|
|
|
|
8
|
|
|
2
|
|
|
|
|
4401
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 ); |
18
|
|
|
|
|
|
|
has 'fastqfile' => ( is => 'rw', isa => 'Str', required => 1 ); |
19
|
|
|
|
|
|
|
has '_unzipped_fastq' => |
20
|
|
|
|
|
|
|
( is => 'rw', isa => 'Str', lazy => 1, builder => '_build__unzipped_fastq' ); |
21
|
|
|
|
|
|
|
has 'tag' => ( is => 'ro', isa => 'Str', required => 1 ); |
22
|
|
|
|
|
|
|
has 'tagdirection' => |
23
|
|
|
|
|
|
|
( is => 'ro', isa => 'Str', required => 1, default => '3' ); |
24
|
|
|
|
|
|
|
has 'mismatch' => ( is => 'rw', isa => 'Int', required => 1, default => 0 ); |
25
|
|
|
|
|
|
|
has 'mapping_score' => |
26
|
|
|
|
|
|
|
( is => 'ro', isa => 'Int', required => 1, default => 30 ); |
27
|
|
|
|
|
|
|
has 'reference' => ( is => 'rw', isa => 'Str', required => 1 ); |
28
|
|
|
|
|
|
|
has 'outfile' => ( |
29
|
|
|
|
|
|
|
is => 'rw', |
30
|
|
|
|
|
|
|
isa => 'Str', |
31
|
|
|
|
|
|
|
required => 0, |
32
|
|
|
|
|
|
|
default => sub { |
33
|
|
|
|
|
|
|
my ($self) = @_; |
34
|
|
|
|
|
|
|
my @dirs = split( '/', $self->fastqfile ); |
35
|
|
|
|
|
|
|
my $o = pop(@dirs); |
36
|
|
|
|
|
|
|
$o =~ s/fastq/out/; |
37
|
|
|
|
|
|
|
return $o; |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
); |
40
|
|
|
|
|
|
|
has 'smalt_k' => ( is => 'rw', isa => 'Maybe[Int]', required => 0 ); |
41
|
|
|
|
|
|
|
has 'smalt_s' => ( is => 'rw', isa => 'Maybe[Int]', required => 0 ); |
42
|
|
|
|
|
|
|
has 'smalt_y' => ( is => 'rw', isa => 'Maybe[Num]', required => 0, default => 0.96 ); |
43
|
|
|
|
|
|
|
has 'smalt_r' => ( is => 'rw', isa => 'Maybe[Int]', required => 0, default => -1); |
44
|
|
|
|
|
|
|
has 'smalt_n' => ( is => 'rw', isa => 'Maybe[Int]', required => 0, default => 1); |
45
|
|
|
|
|
|
|
has 'samtools_exec' => ( is => 'rw', isa => 'Str', default => 'samtools' ); |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
has '_temp_directory' => ( |
48
|
|
|
|
|
|
|
is => 'rw', |
49
|
|
|
|
|
|
|
isa => 'Str', |
50
|
|
|
|
|
|
|
required => 0, |
51
|
|
|
|
|
|
|
lazy => 1, |
52
|
|
|
|
|
|
|
builder => '_build__temp_directory' |
53
|
|
|
|
|
|
|
); |
54
|
|
|
|
|
|
|
has 'output_directory' => ( |
55
|
|
|
|
|
|
|
is => 'rw', |
56
|
|
|
|
|
|
|
isa => 'Str', |
57
|
|
|
|
|
|
|
required => 0, |
58
|
|
|
|
|
|
|
lazy => 1, |
59
|
|
|
|
|
|
|
builder => '_build_output_directory' |
60
|
|
|
|
|
|
|
); |
61
|
|
|
|
|
|
|
has '_stats_handle' => ( is => 'ro', isa => 'FileHandle', required => 1 ); |
62
|
|
|
|
|
|
|
has '_sequence_info' => ( |
63
|
|
|
|
|
|
|
is => 'rw', |
64
|
|
|
|
|
|
|
isa => 'HashRef', |
65
|
|
|
|
|
|
|
required => 0, |
66
|
|
|
|
|
|
|
lazy => 1, |
67
|
|
|
|
|
|
|
builder => '_build__sequence_info' |
68
|
|
|
|
|
|
|
); |
69
|
|
|
|
|
|
|
has '_current_directory' => ( |
70
|
|
|
|
|
|
|
is => 'rw', |
71
|
|
|
|
|
|
|
isa => 'Str', |
72
|
|
|
|
|
|
|
required => 0, |
73
|
|
|
|
|
|
|
lazy => 1, |
74
|
|
|
|
|
|
|
builder => '_build__current_directory' |
75
|
|
|
|
|
|
|
); |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
sub _is_gz { |
78
|
2
|
|
|
2
|
|
7
|
my ($self) = @_; |
79
|
2
|
|
|
|
|
53
|
my $fq = $self->fastqfile; |
80
|
|
|
|
|
|
|
|
81
|
2
|
50
|
|
|
|
16
|
if ( $fq =~ /\.gz/ ) { |
82
|
0
|
|
|
|
|
0
|
return 1; |
83
|
|
|
|
|
|
|
} |
84
|
|
|
|
|
|
|
else { |
85
|
2
|
|
|
|
|
10
|
return 0; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
sub _build__unzipped_fastq { |
90
|
2
|
|
|
2
|
|
6
|
my ($self) = @_; |
91
|
2
|
|
|
|
|
55
|
my $fq = $self->fastqfile; |
92
|
2
|
|
|
|
|
50
|
my $temporary_directory = $self->_temp_directory; |
93
|
|
|
|
|
|
|
|
94
|
2
|
50
|
|
|
|
12
|
if ( $self->_is_gz ) { |
95
|
0
|
|
|
|
|
0
|
$fq =~ /([^\/]+)$/; |
96
|
0
|
|
|
|
|
0
|
my $newfq = $1; |
97
|
0
|
|
|
|
|
0
|
$newfq =~ s/\.gz//; |
98
|
0
|
0
|
|
|
|
0
|
if ( !-e "$temporary_directory/$newfq" ) { |
99
|
0
|
|
|
|
|
0
|
`gunzip -c $fq > $temporary_directory/$newfq`; |
100
|
|
|
|
|
|
|
} |
101
|
0
|
|
|
|
|
0
|
return "$temporary_directory/$newfq"; |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
else { |
104
|
2
|
|
|
|
|
60
|
return $fq; |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub _build__stats_handle { |
109
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
110
|
0
|
|
|
|
|
0
|
my $outfile = $self->outfile; |
111
|
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
0
|
open( my $stats, ">", "$outfile.stats" ); |
113
|
0
|
|
|
|
|
0
|
return $stats; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub _build__sequence_info { |
117
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
118
|
0
|
|
|
|
|
0
|
my $temporary_directory = $self->_temp_directory; |
119
|
0
|
|
|
|
|
0
|
open( GREP, |
120
|
|
|
|
|
|
|
"grep \@SQ $temporary_directory/mapped.sam | awk '{print \$2, \$3}' |" |
121
|
|
|
|
|
|
|
); |
122
|
0
|
|
|
|
|
0
|
my %sns = (); |
123
|
0
|
|
|
|
|
0
|
while ( my $sn = <GREP> ) { |
124
|
0
|
|
|
|
|
0
|
chomp($sn); |
125
|
0
|
|
|
|
|
0
|
$sn =~ /SN:(\S+)\s+LN:(\d+)/; |
126
|
0
|
|
|
|
|
0
|
$sns{$1} = $2; |
127
|
|
|
|
|
|
|
} |
128
|
0
|
|
|
|
|
0
|
return \%sns; |
129
|
|
|
|
|
|
|
} |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
sub _build__temp_directory { |
132
|
1
|
|
|
1
|
|
4
|
my ($self) = @_; |
133
|
1
|
|
|
|
|
44
|
my $tmp_dir = File::Temp->newdir( 'tmp_run_tradis_XXXXX', |
134
|
|
|
|
|
|
|
CLEANUP => 0, |
135
|
|
|
|
|
|
|
DIR => $self->output_directory ); |
136
|
1
|
|
|
|
|
710
|
return $tmp_dir->dirname; |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub _build_output_directory { |
140
|
0
|
|
|
0
|
|
0
|
return cwd(); |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub _build__current_directory { |
144
|
0
|
|
|
0
|
|
0
|
my ($self) = @_; |
145
|
0
|
|
|
|
|
0
|
my $fq = $self->fastqfile; |
146
|
|
|
|
|
|
|
|
147
|
0
|
|
|
|
|
0
|
my @dirs = split( '/', $fq ); |
148
|
0
|
|
|
|
|
0
|
pop(@dirs); |
149
|
0
|
|
|
|
|
0
|
return join( '/', @dirs ); |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
sub run_tradis { |
153
|
1
|
|
|
1
|
0
|
5
|
my ($self) = @_; |
154
|
1
|
|
|
|
|
51
|
my $temporary_directory = $self->_temp_directory; |
155
|
1
|
|
|
|
|
41
|
my $fq = $self->fastqfile; |
156
|
|
|
|
|
|
|
|
157
|
1
|
|
|
|
|
38
|
my $ref = $self->reference; |
158
|
1
|
50
|
|
|
|
31
|
Bio::Tradis::Exception::RefNotFound->throw( error => "$ref not found\n" ) unless( -e $ref ); |
159
|
|
|
|
|
|
|
|
160
|
1
|
50
|
|
|
|
41
|
print STDERR "::::::::::::::::::\n$fq\n::::::::::::::::::\n\n" if($self->verbose); |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
# Step 1: Filter tags that match user input tag |
163
|
1
|
50
|
|
|
|
34
|
print STDERR "..........Step 1: Filter tags that match user input tag\n" if($self->verbose); |
164
|
1
|
|
|
|
|
8
|
$self->_filter; |
165
|
|
|
|
|
|
|
|
166
|
1
|
50
|
|
|
|
44
|
print STDERR "..........Step 1.1: Check that at least one read started with the tag\n" if($self->verbose); |
167
|
1
|
|
|
|
|
8
|
$self->_check_filter; |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
# Step 2: Remove the tag from the sequence and quality strings |
170
|
1
|
50
|
|
|
|
40
|
print STDERR |
171
|
|
|
|
|
|
|
"..........Step 2: Remove the tag from the sequence and quality strings\n" if($self->verbose); |
172
|
1
|
|
|
|
|
10
|
$self->_remove; |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
# Step 3: Map file to reference |
175
|
1
|
50
|
|
|
|
38
|
print STDERR "..........Step 3: Map file to reference\n" if($self->verbose); |
176
|
1
|
|
|
|
|
8
|
$self->_map; |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# Step 4: Convert output from SAM to BAM, sort and index |
179
|
1
|
50
|
|
|
|
54
|
print STDERR |
180
|
|
|
|
|
|
|
"..........Step 3.5: Convert output from SAM to BAM and sort\n" if($self->verbose); |
181
|
1
|
|
|
|
|
11
|
$self->_sam2bam; |
182
|
1
|
|
|
|
|
13
|
$self->_sort_bam; |
183
|
0
|
|
|
|
|
0
|
$self->_bamcheck; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
# Step 5: Generate plot |
186
|
0
|
0
|
|
|
|
0
|
print STDERR "..........Step 4: Generate plot\n" if($self->verbose); |
187
|
0
|
|
|
|
|
0
|
$self->_make_plot; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
# Step 6: Generate statistics |
190
|
0
|
0
|
|
|
|
0
|
print STDERR "..........Step 5: Generate statistics\n" if($self->verbose); |
191
|
0
|
|
|
|
|
0
|
$self->_stats; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
# Step 7: Move files to current directory |
194
|
0
|
0
|
|
|
|
0
|
print STDERR "..........Step 6: Move files to current directory\n" if($self->verbose); |
195
|
0
|
|
|
|
|
0
|
my $outfile = $self->outfile; |
196
|
0
|
|
|
|
|
0
|
my $output_directory = $self->output_directory; |
197
|
0
|
|
|
|
|
0
|
system("mv $temporary_directory/$outfile* $output_directory"); |
198
|
0
|
|
|
|
|
0
|
system("mv $temporary_directory/mapped.sort.bam $output_directory/$outfile.mapped.bam"); |
199
|
0
|
|
|
|
|
0
|
system("mv $temporary_directory/mapped.sort.bam.bai $output_directory/$outfile.mapped.bam.bai"); |
200
|
0
|
|
|
|
|
0
|
system("mv $temporary_directory/mapped.bamcheck $output_directory/$outfile.mapped.bamcheck"); |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
# Clean up |
203
|
0
|
0
|
|
|
|
0
|
print STDERR "..........Clean up\n" if($self->verbose); |
204
|
|
|
|
|
|
|
|
205
|
0
|
|
|
|
|
0
|
rmtree($temporary_directory); |
206
|
|
|
|
|
|
|
|
207
|
0
|
|
|
|
|
0
|
return 1; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
sub _filter { |
211
|
2
|
|
|
2
|
|
7
|
my ($self) = @_; |
212
|
2
|
|
|
|
|
78
|
my $temporary_directory = $self->_temp_directory; |
213
|
2
|
|
|
|
|
76
|
my $fqfile = $self->_unzipped_fastq; |
214
|
2
|
|
|
|
|
55
|
my $tag = $self->tag; |
215
|
2
|
|
|
|
|
56
|
my $mm = $self->mismatch; |
216
|
|
|
|
|
|
|
|
217
|
2
|
|
|
|
|
82
|
my $filter = Bio::Tradis::FilterTags->new( |
218
|
|
|
|
|
|
|
fastqfile => $fqfile, |
219
|
|
|
|
|
|
|
tag => $tag, |
220
|
|
|
|
|
|
|
mismatch => $mm, |
221
|
|
|
|
|
|
|
outfile => "$temporary_directory/filter.fastq" |
222
|
|
|
|
|
|
|
)->filter_tags; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
sub _check_filter { |
226
|
8
|
|
|
8
|
|
579
|
my ($self) = @_; |
227
|
8
|
|
|
|
|
362
|
my $temporary_directory = $self->_temp_directory; |
228
|
8
|
|
|
|
|
29
|
my $filtered_file_filename = "$temporary_directory/filter.fastq"; |
229
|
8
|
100
|
|
|
|
430
|
open my $filtered_file, '<', $filtered_file_filename or |
230
|
|
|
|
|
|
|
Bio::Tradis::Exception::TagFilterError->throw( error => "There was a problem filtering reads by the specified tag. Please check all input files are Fastq formatted and that at least one read in each starts with the specified tag\n" ); |
231
|
7
|
|
|
|
|
15
|
my @first_read_data; |
232
|
7
|
|
|
|
|
119
|
while( my $line = <$filtered_file> ) { |
233
|
22
|
100
|
|
|
|
73
|
last if $. > 4; |
234
|
20
|
|
|
|
|
32
|
chomp($line); |
235
|
20
|
|
|
|
|
1209
|
push @first_read_data, $line; |
236
|
|
|
|
|
|
|
} |
237
|
7
|
|
|
|
|
19
|
my $number_of_read_lines = scalar @first_read_data; |
238
|
7
|
100
|
|
|
|
30
|
if ( $number_of_read_lines ne 4) { |
239
|
|
|
|
|
|
|
# There wasn't enough data for a complete read |
240
|
3
|
|
|
|
|
35
|
Bio::Tradis::Exception::TagFilterError->throw( error => "There was a problem filtering reads by the specified tag. Please check all input files are Fastq formatted and that at least one read in each starts with the specified tag\n" ); |
241
|
|
|
|
|
|
|
} |
242
|
4
|
|
|
|
|
12
|
my $read_plus_sign = $first_read_data[2]; |
243
|
4
|
100
|
|
|
|
17
|
if ( $read_plus_sign ne '+' ) { |
244
|
|
|
|
|
|
|
# The first 'read' didn't have a '+' on the third line, suspicious |
245
|
1
|
|
|
|
|
16
|
Bio::Tradis::Exception::TagFilterError->throw( error => "There was a problem filtering reads by the specified tag. Please check all input files are Fastq formatted and that at least one read in each starts with the specified tag\n" ); |
246
|
|
|
|
|
|
|
} |
247
|
|
|
|
|
|
|
# I'm not proposing further (more detailed) validation here |
248
|
3
|
|
|
|
|
49
|
close $filtered_file; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
sub _remove { |
252
|
2
|
|
|
2
|
|
12
|
my ($self) = @_; |
253
|
2
|
|
|
|
|
93
|
my $temporary_directory = $self->_temp_directory; |
254
|
2
|
|
|
|
|
59
|
my $tag = $self->tag; |
255
|
2
|
|
|
|
|
63
|
my $mm = $self->mismatch; |
256
|
|
|
|
|
|
|
|
257
|
2
|
|
|
|
|
93
|
my $rm_tags = Bio::Tradis::RemoveTags->new( |
258
|
|
|
|
|
|
|
fastqfile => "$temporary_directory/filter.fastq", |
259
|
|
|
|
|
|
|
tag => $tag, |
260
|
|
|
|
|
|
|
mismatch => $mm, |
261
|
|
|
|
|
|
|
outfile => "$temporary_directory/tags_removed.fastq" |
262
|
|
|
|
|
|
|
)->remove_tags; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub _map { |
266
|
2
|
|
|
2
|
|
6
|
my ($self) = @_; |
267
|
2
|
|
|
|
|
63
|
my $temporary_directory = $self->_temp_directory; |
268
|
|
|
|
|
|
|
|
269
|
2
|
|
|
|
|
56
|
my $ref = $self->reference; |
270
|
|
|
|
|
|
|
|
271
|
2
|
|
|
|
|
71
|
my $mapping = Bio::Tradis::Map->new( |
272
|
|
|
|
|
|
|
fastqfile => "$temporary_directory/tags_removed.fastq", |
273
|
|
|
|
|
|
|
reference => "$ref", |
274
|
|
|
|
|
|
|
refname => "$temporary_directory/ref.index", |
275
|
|
|
|
|
|
|
outfile => "$temporary_directory/mapped.sam", |
276
|
|
|
|
|
|
|
smalt_k => $self->smalt_k, |
277
|
|
|
|
|
|
|
smalt_s => $self->smalt_s, |
278
|
|
|
|
|
|
|
smalt_y => $self->smalt_y, |
279
|
|
|
|
|
|
|
smalt_r => $self->smalt_r, |
280
|
|
|
|
|
|
|
smalt_n => $self->smalt_n |
281
|
|
|
|
|
|
|
); |
282
|
2
|
|
|
|
|
20
|
$mapping->index_ref; |
283
|
2
|
|
|
|
|
34
|
$mapping->do_mapping; |
284
|
|
|
|
|
|
|
} |
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
sub _sam2bam { |
287
|
2
|
|
|
2
|
|
14
|
my ($self) = @_; |
288
|
2
|
|
|
|
|
83
|
my $temporary_directory = $self->_temp_directory; |
289
|
|
|
|
|
|
|
|
290
|
2
|
|
|
|
|
78
|
system( |
291
|
|
|
|
|
|
|
$self->samtools_exec." view -b -o $temporary_directory/mapped.bam -S $temporary_directory/mapped.sam" |
292
|
|
|
|
|
|
|
); |
293
|
2
|
|
|
|
|
67
|
return 1; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
sub _sort_bam { |
297
|
2
|
|
|
2
|
|
12
|
my ($self) = @_; |
298
|
2
|
|
|
|
|
136
|
my $temporary_directory = $self->_temp_directory; |
299
|
|
|
|
|
|
|
|
300
|
2
|
|
|
|
|
72
|
my $samtools_obj = Bio::Tradis::Samtools->new(exec => $self->samtools_exec, threads => $self->smalt_n); |
301
|
2
|
|
|
|
|
28
|
$samtools_obj->run_sort("$temporary_directory/mapped.bam","$temporary_directory/mapped.sort.bam"); |
302
|
0
|
|
|
|
|
|
$samtools_obj->run_index("$temporary_directory/mapped.sort.bam"); |
303
|
0
|
|
|
|
|
|
return 1; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
sub _bamcheck { |
307
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
308
|
0
|
|
|
|
|
|
my $temporary_directory = $self->_temp_directory; |
309
|
|
|
|
|
|
|
|
310
|
0
|
|
|
|
|
|
system( |
311
|
|
|
|
|
|
|
$self->samtools_exec." stats $temporary_directory/mapped.sort.bam > $temporary_directory/mapped.bamcheck" |
312
|
|
|
|
|
|
|
); |
313
|
0
|
|
|
|
|
|
return 1; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub _make_plot { |
317
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
318
|
0
|
|
|
|
|
|
my $temporary_directory = $self->_temp_directory; |
319
|
0
|
|
|
|
|
|
my $ref = $self->reference; |
320
|
0
|
|
|
|
|
|
my $outfile = $self->outfile; |
321
|
0
|
|
|
|
|
|
my $tr_d = $self->tagdirection; |
322
|
|
|
|
|
|
|
|
323
|
0
|
|
|
|
|
|
my $plot = Bio::Tradis::TradisPlot->new( |
324
|
|
|
|
|
|
|
mappedfile => "$temporary_directory/mapped.sort.bam", |
325
|
|
|
|
|
|
|
mapping_score => $self->mapping_score, |
326
|
|
|
|
|
|
|
outfile => "$temporary_directory/$outfile" |
327
|
|
|
|
|
|
|
)->plot; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# if tag direction is 5, reverse plot columns |
330
|
0
|
0
|
|
|
|
|
if ( $self->tagdirection eq '5' ) { |
331
|
0
|
0
|
|
|
|
|
print STDERR "Tag direction = 5. Reversing plot..\n" if($self->verbose); |
332
|
0
|
|
|
|
|
|
$self->_reverse_plots; |
333
|
|
|
|
|
|
|
} |
334
|
0
|
|
|
|
|
|
return 1; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub _reverse_plots { |
338
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
339
|
0
|
|
|
|
|
|
my $temporary_directory = $self->_temp_directory; |
340
|
0
|
|
|
|
|
|
my $outfile = $self->outfile; |
341
|
0
|
|
|
|
|
|
my @seqnames = keys %{ $self->_sequence_info }; |
|
0
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
|
343
|
0
|
|
|
|
|
|
my @current_plots = |
344
|
|
|
|
|
|
|
glob("$temporary_directory/$outfile.*.insert_site_plot.gz"); |
345
|
|
|
|
|
|
|
|
346
|
0
|
|
|
|
|
|
foreach my $plotname (@current_plots) { |
347
|
0
|
0
|
|
|
|
|
print STDERR "Reversing $plotname\n" if($self->verbose); |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
#my $plotname = $self->_plotname($sn); |
350
|
0
|
|
|
|
|
|
system("gunzip -c $plotname > $temporary_directory/tmp.plot"); |
351
|
0
|
|
|
|
|
|
system( |
352
|
|
|
|
|
|
|
"awk '{ t = \$1; \$1 = \$2; \$2 = t; print; }' $temporary_directory/tmp.plot > rv_plot" |
353
|
|
|
|
|
|
|
); |
354
|
0
|
|
|
|
|
|
system("gzip -c rv_plot > $plotname"); |
355
|
|
|
|
|
|
|
} |
356
|
0
|
|
|
|
|
|
unlink("$temporary_directory/tmp.plot"); |
357
|
0
|
|
|
|
|
|
unlink("rv_plot"); |
358
|
|
|
|
|
|
|
} |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
sub _stats { |
361
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
362
|
0
|
|
|
|
|
|
my $outfile = $self->outfile; |
363
|
0
|
|
|
|
|
|
my $temporary_directory = $self->_temp_directory; |
364
|
0
|
|
|
|
|
|
my $fq = $self->_unzipped_fastq; |
365
|
0
|
|
|
|
|
|
my $seq_info = $self->_sequence_info; |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
#write header to stats file |
368
|
0
|
|
|
|
|
|
$self->_write_stats_header; |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# Add file name and number of reads in it |
371
|
0
|
|
|
|
|
|
my @fql = split( "/", $fq ); |
372
|
0
|
|
|
|
|
|
my $stats = "$fql[-1],"; |
373
|
0
|
|
|
|
|
|
my $total_reads = `wc $fq | awk '{print \$1/4}'`; |
374
|
0
|
|
|
|
|
|
chomp($total_reads); |
375
|
0
|
|
|
|
|
|
$stats .= "$total_reads,"; |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
# Matching reads |
378
|
0
|
|
|
|
|
|
my $matching = |
379
|
|
|
|
|
|
|
`wc $temporary_directory/filter.fastq | awk '{print \$1/4}'`; |
380
|
0
|
|
|
|
|
|
chomp($matching); |
381
|
0
|
|
|
|
|
|
$stats .= "$matching,"; |
382
|
0
|
|
|
|
|
|
$stats .= ( $matching / $total_reads ) * 100 . ","; |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
# Mapped reads |
385
|
0
|
|
|
|
|
|
my $mapped = $self->_number_of_mapped_reads; |
386
|
0
|
|
|
|
|
|
$stats .= "$mapped,"; |
387
|
0
|
|
|
|
|
|
$stats .= ( $mapped / $matching ) * 100 . ","; |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
# Unique insertion sites |
390
|
0
|
|
|
|
|
|
my ( $total_uis, $total_seq_len ); |
391
|
0
|
|
|
|
|
|
foreach my $si ( keys %{$seq_info} ) { |
|
0
|
|
|
|
|
|
|
392
|
0
|
|
|
|
|
|
my $plotname = $self->_plotname($si); |
393
|
0
|
|
|
|
|
|
system( |
394
|
|
|
|
|
|
|
"gunzip -c $temporary_directory/$plotname > $temporary_directory/tmp.plot" |
395
|
|
|
|
|
|
|
); |
396
|
0
|
|
|
|
|
|
my $uis = `grep -c -v "0 0" $temporary_directory/tmp.plot`; |
397
|
0
|
|
|
|
|
|
chomp($uis); |
398
|
0
|
|
|
|
|
|
$total_uis += $uis; |
399
|
0
|
|
|
|
|
|
$stats .= "$uis,"; |
400
|
0
|
|
|
|
|
|
my $seqlen = ${$seq_info}{$si}; |
|
0
|
|
|
|
|
|
|
401
|
0
|
|
|
|
|
|
$total_seq_len += $seqlen; |
402
|
0
|
|
|
|
|
|
my $uis_per_seqlen = "NaN"; |
403
|
0
|
0
|
|
|
|
|
$uis_per_seqlen = $seqlen / $uis if ( $uis > 0 ); |
404
|
0
|
|
|
|
|
|
chomp($uis_per_seqlen); |
405
|
0
|
|
|
|
|
|
$stats .= "$uis_per_seqlen,"; |
406
|
|
|
|
|
|
|
} |
407
|
0
|
|
|
|
|
|
$stats .= "$total_uis,"; |
408
|
0
|
|
|
|
|
|
my $t_uis_p_l = "NaN"; |
409
|
0
|
0
|
|
|
|
|
$t_uis_p_l = $total_seq_len / $total_uis if ( $total_uis > 0 ); |
410
|
0
|
|
|
|
|
|
$stats .= "$t_uis_p_l\n"; |
411
|
0
|
|
|
|
|
|
print { $self->_stats_handle } $stats; |
|
0
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
sub _write_stats_header { |
415
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
416
|
0
|
|
|
|
|
|
my @seqnames = keys %{ $self->_sequence_info }; |
|
0
|
|
|
|
|
|
|
417
|
0
|
|
|
|
|
|
my @fields = ( |
418
|
|
|
|
|
|
|
"File", |
419
|
|
|
|
|
|
|
"Total Reads", |
420
|
|
|
|
|
|
|
"Reads Matched", |
421
|
|
|
|
|
|
|
"\% Matched", |
422
|
|
|
|
|
|
|
"Reads Mapped", |
423
|
|
|
|
|
|
|
"\% Mapped" |
424
|
|
|
|
|
|
|
); |
425
|
0
|
|
|
|
|
|
print { $self->_stats_handle } join( ",", @fields ) . ","; |
|
0
|
|
|
|
|
|
|
426
|
0
|
|
|
|
|
|
foreach my $sn (@seqnames) { |
427
|
0
|
|
|
|
|
|
print { $self->_stats_handle } "Unique Insertion Sites : $sn,"; |
|
0
|
|
|
|
|
|
|
428
|
0
|
|
|
|
|
|
print { $self->_stats_handle } "Seq Len/UIS : $sn,"; |
|
0
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
} |
430
|
0
|
|
|
|
|
|
print { $self->_stats_handle } "Total Unique Insertion Sites,"; |
|
0
|
|
|
|
|
|
|
431
|
0
|
|
|
|
|
|
print { $self->_stats_handle } "Total Seq Len/Total UIS\n"; |
|
0
|
|
|
|
|
|
|
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
sub _plotname { |
435
|
0
|
|
|
0
|
|
|
my ( $self, $seq_name ) = @_; |
436
|
0
|
|
|
|
|
|
my $outfile = $self->outfile; |
437
|
|
|
|
|
|
|
|
438
|
0
|
|
|
|
|
|
$seq_name =~ s/[^\w\d\.]/_/g; |
439
|
0
|
|
|
|
|
|
my $plotfile_name = "$outfile.$seq_name.insert_site_plot.gz"; |
440
|
0
|
|
|
|
|
|
return $plotfile_name; |
441
|
|
|
|
|
|
|
} |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub _number_of_mapped_reads { |
444
|
0
|
|
|
0
|
|
|
my ($self) = @_; |
445
|
0
|
|
|
|
|
|
my $temporary_directory = $self->_temp_directory; |
446
|
|
|
|
|
|
|
|
447
|
0
|
|
|
|
|
|
my $pars = |
448
|
|
|
|
|
|
|
Bio::Tradis::Parser::Bam->new( |
449
|
|
|
|
|
|
|
file => "$temporary_directory/mapped.bam" ); |
450
|
0
|
|
|
|
|
|
my $c = 0; |
451
|
0
|
|
|
|
|
|
while ( $pars->next_read ) { |
452
|
0
|
0
|
|
|
|
|
if ( $pars->is_mapped ) { |
453
|
0
|
|
|
|
|
|
$c++; |
454
|
|
|
|
|
|
|
} |
455
|
|
|
|
|
|
|
} |
456
|
0
|
|
|
|
|
|
return $c; |
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
__PACKAGE__->meta->make_immutable; |
460
|
2
|
|
|
2
|
|
19
|
no Moose; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
14
|
|
461
|
|
|
|
|
|
|
1; |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
__END__ |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=pod |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=encoding UTF-8 |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=head1 NAME |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
Bio::Tradis::RunTradis - Perform all steps required for a tradis analysis |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=head1 VERSION |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
version 1.3.3 |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=head1 SYNOPSIS |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
Takes a fastq file with tags already attached, filters the tags matching user input, |
480
|
|
|
|
|
|
|
removes the tags, maps to a reference (.fa) and generates insertion site plots for use in |
481
|
|
|
|
|
|
|
Artemis (or other genome browsers), mapped BAM files for each lane and a statistical summary of the analysis. |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
use Bio::Tradis::RunTradis; |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
my $pipeline = Bio::Tradis::RunTradis->new( |
486
|
|
|
|
|
|
|
fastqfile => 'abc', |
487
|
|
|
|
|
|
|
reference => 'abc', |
488
|
|
|
|
|
|
|
tag => 'abc', |
489
|
|
|
|
|
|
|
tagdirection => '5'|'3' |
490
|
|
|
|
|
|
|
); |
491
|
|
|
|
|
|
|
$pipeline->run_tradis(); |
492
|
|
|
|
|
|
|
|
493
|
|
|
|
|
|
|
=head1 PARAMETERS |
494
|
|
|
|
|
|
|
|
495
|
|
|
|
|
|
|
=head2 Required |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=over |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
=item * C<fastqfile> - file containing a list of fastqs (gzipped or raw) to run the |
500
|
|
|
|
|
|
|
complete analysis on. This includes all (including |
501
|
|
|
|
|
|
|
intermediary format conversion and sorting) steps starting from |
502
|
|
|
|
|
|
|
filtering. |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
=item * C<tag> - TraDIS tag to filter and then remove |
505
|
|
|
|
|
|
|
|
506
|
|
|
|
|
|
|
=item * C<reference> - path to/name of reference genome in fasta format (.fa) |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
=back |
509
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
=head2 Optional |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=over |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
=item * C<mismatch> - number of mismatches to allow when filtering/removing the tag. Default = 0 |
515
|
|
|
|
|
|
|
|
516
|
|
|
|
|
|
|
=item * C<tagdirection> - direction of the tag, 5' or 3'. Default = 3 |
517
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=item * C<mapping_score> - cutoff value for mapping score when creating insertion site plots. Default = 30 |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
=back |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
=head1 METHODS |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
C<run_tradis> - run complete analysis with given parameters |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
=head1 AUTHOR |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
Carla Cummins <path-help@sanger.ac.uk> |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute. |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
This is free software, licensed under: |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
The GNU General Public License, Version 3, June 2007 |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
=cut |