line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2015-01-05 15:49:29 mtw> |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Bio::ViennaNGS::BamStatSummary; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
971
|
use 5.12.0; |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
34
|
|
7
|
1
|
|
|
1
|
|
3
|
use version; our $VERSION = qv('0.12_08'); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
4
|
|
8
|
1
|
|
|
1
|
|
271
|
use Moose; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use Carp; |
10
|
|
|
|
|
|
|
use POSIX qw(floor); |
11
|
|
|
|
|
|
|
use Statistics::R; |
12
|
|
|
|
|
|
|
use Data::Dumper; |
13
|
|
|
|
|
|
|
use Path::Class; |
14
|
|
|
|
|
|
|
use namespace::autoclean; |
15
|
|
|
|
|
|
|
use Bio::ViennaNGS::BamStat; |
16
|
|
|
|
|
|
|
use Tie::Hash::Indexed; |
17
|
|
|
|
|
|
|
use File::Basename; |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
has 'data' => ( |
20
|
|
|
|
|
|
|
is => 'ro', |
21
|
|
|
|
|
|
|
isa => 'ArrayRef [Bio::ViennaNGS::BamStat]', |
22
|
|
|
|
|
|
|
default => sub { [] }, |
23
|
|
|
|
|
|
|
); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
has 'countStat' => ( |
26
|
|
|
|
|
|
|
is => 'rw', |
27
|
|
|
|
|
|
|
isa => 'HashRef', |
28
|
|
|
|
|
|
|
predicate => 'has_countStat', |
29
|
|
|
|
|
|
|
default => sub { {} }, |
30
|
|
|
|
|
|
|
# auto_deref => '1', |
31
|
|
|
|
|
|
|
); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
has 'outpath' => ( |
34
|
|
|
|
|
|
|
is => 'rw', |
35
|
|
|
|
|
|
|
isa => 'Str', |
36
|
|
|
|
|
|
|
required => '1', |
37
|
|
|
|
|
|
|
); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
has 'rlib' => ( |
40
|
|
|
|
|
|
|
is => 'rw', |
41
|
|
|
|
|
|
|
isa => 'Str', |
42
|
|
|
|
|
|
|
required => '1', |
43
|
|
|
|
|
|
|
); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
has 'files' => ( |
46
|
|
|
|
|
|
|
is => 'rw', |
47
|
|
|
|
|
|
|
isa => 'ArrayRef', |
48
|
|
|
|
|
|
|
required => 1, |
49
|
|
|
|
|
|
|
predicate => 'has_files', |
50
|
|
|
|
|
|
|
); |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
has 'control_match' => ( # provides stats how many mapped bases match the reference genome |
53
|
|
|
|
|
|
|
is => 'rw', |
54
|
|
|
|
|
|
|
isa => 'Bool', |
55
|
|
|
|
|
|
|
default => '1', |
56
|
|
|
|
|
|
|
predicate => 'has_control_match', |
57
|
|
|
|
|
|
|
); |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
has 'control_clip' => ( # provides stats how many bases are soft or hard clipped |
60
|
|
|
|
|
|
|
is => 'rw', |
61
|
|
|
|
|
|
|
isa => 'Bool', |
62
|
|
|
|
|
|
|
default => '1', |
63
|
|
|
|
|
|
|
predicate => 'has_control_clip', |
64
|
|
|
|
|
|
|
); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
has 'control_split' => ( # provides stats how many/often mapped reads are split |
67
|
|
|
|
|
|
|
is => 'rw', |
68
|
|
|
|
|
|
|
isa => 'Bool', |
69
|
|
|
|
|
|
|
default => '1', |
70
|
|
|
|
|
|
|
predicate => 'has_control_split', |
71
|
|
|
|
|
|
|
); |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
has 'control_qual' => ( # provides stats on quality of the match |
74
|
|
|
|
|
|
|
is => 'rw', |
75
|
|
|
|
|
|
|
isa => 'Bool', |
76
|
|
|
|
|
|
|
default => '1', |
77
|
|
|
|
|
|
|
predicate => 'has_control_qual', |
78
|
|
|
|
|
|
|
); |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
has 'control_edit' => ( # provides stats on the edit distance between read and mapped reference |
81
|
|
|
|
|
|
|
is => 'rw', |
82
|
|
|
|
|
|
|
isa => 'Bool', |
83
|
|
|
|
|
|
|
default => '1', |
84
|
|
|
|
|
|
|
predicate => 'has_control_edit', |
85
|
|
|
|
|
|
|
); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
has 'control_flag' => ( # analyses the sam bit flag for qual/strands/pair_vs_single reads |
88
|
|
|
|
|
|
|
is => 'rw', |
89
|
|
|
|
|
|
|
isa => 'Bool', |
90
|
|
|
|
|
|
|
default => '1', |
91
|
|
|
|
|
|
|
predicate => 'has_control_flag', |
92
|
|
|
|
|
|
|
); |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
has 'control_score' => ( # provides stats on per-base quality scores |
95
|
|
|
|
|
|
|
is => 'rw', |
96
|
|
|
|
|
|
|
isa => 'Bool', |
97
|
|
|
|
|
|
|
default => '1', |
98
|
|
|
|
|
|
|
predicate => 'has_control_score', |
99
|
|
|
|
|
|
|
); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
has 'control_uniq' => ( # gives number and stats of multiplicity of readaligments |
102
|
|
|
|
|
|
|
is => 'rw', |
103
|
|
|
|
|
|
|
isa => 'Bool', |
104
|
|
|
|
|
|
|
default => '1', |
105
|
|
|
|
|
|
|
predicate => 'has_control_uniq', |
106
|
|
|
|
|
|
|
); |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
has 'is_segemehl' => ( # toggles to consider segemehl specific bam feature |
109
|
|
|
|
|
|
|
is => 'rw', |
110
|
|
|
|
|
|
|
isa => 'Bool', |
111
|
|
|
|
|
|
|
default => '0', |
112
|
|
|
|
|
|
|
predicate => 'has_is_segemehl', |
113
|
|
|
|
|
|
|
); |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
sub populate_data { |
116
|
|
|
|
|
|
|
my ($self) = @_; |
117
|
|
|
|
|
|
|
foreach my $bamfile (@{$self->files}){ |
118
|
|
|
|
|
|
|
carp ">> processing $bamfile\n"; |
119
|
|
|
|
|
|
|
my $bo = Bio::ViennaNGS::BamStat->new(bam => $bamfile); |
120
|
|
|
|
|
|
|
$bo->stat_singleBam(); |
121
|
|
|
|
|
|
|
push (@{$self->data}, $bo); |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
} |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub populate_countStat { |
127
|
|
|
|
|
|
|
my ($self) = @_; |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
tie my %hdr, 'Tie::Hash::Indexed'; |
130
|
|
|
|
|
|
|
%hdr = ( |
131
|
|
|
|
|
|
|
"sample" => "Sample", |
132
|
|
|
|
|
|
|
"total_alignments" => "# Total alignments", |
133
|
|
|
|
|
|
|
"mapped_reads" => "# Mapped reads", |
134
|
|
|
|
|
|
|
"umapped_reads" => "# Unique mapped reads", |
135
|
|
|
|
|
|
|
"mmapped_reads" => "# Multi mapped reads", |
136
|
|
|
|
|
|
|
"aligned_pairs" => "# Aligned in pairs", |
137
|
|
|
|
|
|
|
"aligned_mm" => "# Aligned mate missing", |
138
|
|
|
|
|
|
|
"aligned_se" => "# Aligned single end", |
139
|
|
|
|
|
|
|
"aligned_fwd" => "# Aligned forward strand", |
140
|
|
|
|
|
|
|
"aligned_rev" => "# Aligned reverse strand"); |
141
|
|
|
|
|
|
|
${$self->countStat}{'header'} = \%hdr; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
foreach my $sample (@{$self->data}){ |
144
|
|
|
|
|
|
|
my ($basename,$dir,$ext) = fileparse($$sample{'bam'},qr/\.[^.]*/); |
145
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'total_alignments'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'total'} ); |
146
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'mapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'mapped_reads'} ); |
147
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'umapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'uniq_mapped_reads'} ); |
148
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'mmapped_reads'} = |
149
|
|
|
|
|
|
|
floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'mapped_reads'} - $$sample{'data_out'}->{'uniq'}->{'uniq_mapped_reads'} ); |
150
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_pairs'} = floor(0.5 + ($$sample{'data_out'}->{'aln_count'}->{'mapped_pair'})/2 ); |
151
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_mm'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'unmapped_pair'} ); |
152
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_se'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'mapped_single'} ); |
153
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_fwd'} = floor(0.5 + $$sample{'data_out'}->{'strand'}->{'forward'} ); |
154
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_rev'} = floor(0.5 + $$sample{'data_out'}->{'strand'}->{'reverse'} ); |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
sub dump_countStat { |
159
|
|
|
|
|
|
|
my ($self,$how) = @_; |
160
|
|
|
|
|
|
|
my $mn = "mapping_stats.csv"; |
161
|
|
|
|
|
|
|
my $fn = file($self->outpath,$mn); |
162
|
|
|
|
|
|
|
open(OUT, "> $fn") or croak "cannot open OUT $!"; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
print OUT join ("\t", values %{$self->countStat->{'header'} })."\n"; |
165
|
|
|
|
|
|
|
foreach my $sample (keys %{$self->countStat} ){ |
166
|
|
|
|
|
|
|
next if ($sample eq 'header'); |
167
|
|
|
|
|
|
|
my @line = (); |
168
|
|
|
|
|
|
|
foreach my $key ( keys %{$self->countStat->{'header'}} ) { |
169
|
|
|
|
|
|
|
if ($key eq 'sample'){ |
170
|
|
|
|
|
|
|
push (@line, $sample); |
171
|
|
|
|
|
|
|
next; |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
push @line, $self->countStat->{$sample}->{$key}; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
print OUT join ("\t", @line)."\n"; |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# print Dumper($self->countStat); |
179
|
|
|
|
|
|
|
close (OUT); |
180
|
|
|
|
|
|
|
} |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub make_BarPlot{ |
183
|
|
|
|
|
|
|
my ($self) = @_; |
184
|
|
|
|
|
|
|
my @Rstat_data_count = (); |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
## collect data for read.table string |
187
|
|
|
|
|
|
|
push @Rstat_data_count, 'Samples', grep {!/header/} keys %{$self->countStat}; # first line with sample names |
188
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 1st line |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_se'; |
191
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 2nd line with single end aln counts |
192
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_se'}; |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 2nd line |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_pairs'; |
197
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 3rd line with aligned pairs count |
198
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_pairs'}; |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 3rd line |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_mm'; |
203
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 4th line with incomplete aligned pairs |
204
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_mm'}; |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 4th line |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
## produce bar plot |
209
|
|
|
|
|
|
|
my $mn = "mapping_stats.pdf"; |
210
|
|
|
|
|
|
|
my $fn = file($self->outpath,$mn); |
211
|
|
|
|
|
|
|
my $datastring = join(" ", @Rstat_data_count); |
212
|
|
|
|
|
|
|
$self->plot_barplot($fn, "Mapped reads", $datastring); # produce plot with read.table string input |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
sub plot_barplot { #plot barplot read.table text string |
216
|
|
|
|
|
|
|
my ($self, $filename,$ylab,$data_string) = @_; |
217
|
|
|
|
|
|
|
my ($bn,$odir,$ext) = fileparse($filename, qr /\..*/); |
218
|
|
|
|
|
|
|
#my $rlibpath = '/usr/bin/R'; |
219
|
|
|
|
|
|
|
my $rlibpath = $self->rlib; |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
$filename .= '.pdf' unless ($ext eq 'pdf'); |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
my $R = Statistics::R->new(); |
224
|
|
|
|
|
|
|
$R->startR; |
225
|
|
|
|
|
|
|
$R->set('rlib', $rlibpath); |
226
|
|
|
|
|
|
|
$R->set('log_dir', $odir); |
227
|
|
|
|
|
|
|
$R->run("pdf('${filename}')") ; |
228
|
|
|
|
|
|
|
$R->run("dat<-read.table(text = \"$data_string\", header = TRUE, row.names=1)") ; |
229
|
|
|
|
|
|
|
$R->run("dat_m<-as.matrix(dat)") ; |
230
|
|
|
|
|
|
|
$R->run("colors<-terrain.colors(nrow(dat_m), alpha = 1)") ; |
231
|
|
|
|
|
|
|
# $R->run("colors<-c('darkolivegreen4','darkolivegreen2','coral1')") ; |
232
|
|
|
|
|
|
|
# $R->run("colors<-hcl(seq(0, 360, length =nrow(dat_m)))") ; |
233
|
|
|
|
|
|
|
$R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)+2), col=colors, legend.text = TRUE, args.legend = list(x = ncol(dat_m) + 2, y=max(colSums(dat_m)), bty = 'n' ), ylab='$ylab', xlab='Samples')") ; |
234
|
|
|
|
|
|
|
$R->run("dev.off()") ; |
235
|
|
|
|
|
|
|
$R->stopR; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
1; |