| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
|
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2015-02-06 16:28:21 mtw> |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Bio::ViennaNGS::BamStatSummary; |
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
1303
|
use version; our $VERSION = qv('0.12_15'); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
5
|
|
|
7
|
1
|
|
|
1
|
|
63
|
use Moose; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
9
|
|
|
8
|
1
|
|
|
1
|
|
5568
|
use Carp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
60
|
|
|
9
|
1
|
|
|
1
|
|
5
|
use POSIX qw(floor); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
8
|
|
|
10
|
1
|
|
|
1
|
|
265
|
use Statistics::R; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Data::Dumper; |
|
12
|
|
|
|
|
|
|
use Path::Class; |
|
13
|
|
|
|
|
|
|
use namespace::autoclean; |
|
14
|
|
|
|
|
|
|
use Bio::ViennaNGS::BamStat; |
|
15
|
|
|
|
|
|
|
use Tie::Hash::Indexed; |
|
16
|
|
|
|
|
|
|
use File::Basename; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
has 'data' => ( |
|
19
|
|
|
|
|
|
|
is => 'ro', |
|
20
|
|
|
|
|
|
|
isa => 'ArrayRef [Bio::ViennaNGS::BamStat]', |
|
21
|
|
|
|
|
|
|
default => sub { [] }, |
|
22
|
|
|
|
|
|
|
); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
has 'countStat' => ( |
|
25
|
|
|
|
|
|
|
is => 'rw', |
|
26
|
|
|
|
|
|
|
isa => 'HashRef', |
|
27
|
|
|
|
|
|
|
predicate => 'has_countStat', |
|
28
|
|
|
|
|
|
|
default => sub { {} }, |
|
29
|
|
|
|
|
|
|
# auto_deref => '1', |
|
30
|
|
|
|
|
|
|
); |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
has 'outpath' => ( |
|
33
|
|
|
|
|
|
|
is => 'rw', |
|
34
|
|
|
|
|
|
|
isa => 'Str', |
|
35
|
|
|
|
|
|
|
required => '1', |
|
36
|
|
|
|
|
|
|
); |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
has 'rlib' => ( |
|
39
|
|
|
|
|
|
|
is => 'rw', |
|
40
|
|
|
|
|
|
|
isa => 'Str', |
|
41
|
|
|
|
|
|
|
required => '1', |
|
42
|
|
|
|
|
|
|
); |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
has 'files' => ( |
|
45
|
|
|
|
|
|
|
is => 'rw', |
|
46
|
|
|
|
|
|
|
isa => 'ArrayRef', |
|
47
|
|
|
|
|
|
|
required => 1, |
|
48
|
|
|
|
|
|
|
predicate => 'has_files', |
|
49
|
|
|
|
|
|
|
); |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
has 'control_match' => ( # provides stats how many mapped bases match the reference genome |
|
52
|
|
|
|
|
|
|
is => 'rw', |
|
53
|
|
|
|
|
|
|
isa => 'Bool', |
|
54
|
|
|
|
|
|
|
default => '1', |
|
55
|
|
|
|
|
|
|
predicate => 'has_control_match', |
|
56
|
|
|
|
|
|
|
); |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
has 'control_clip' => ( # provides stats how many bases are soft or hard clipped |
|
59
|
|
|
|
|
|
|
is => 'rw', |
|
60
|
|
|
|
|
|
|
isa => 'Bool', |
|
61
|
|
|
|
|
|
|
default => '1', |
|
62
|
|
|
|
|
|
|
predicate => 'has_control_clip', |
|
63
|
|
|
|
|
|
|
); |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
has 'control_split' => ( # provides stats how many/often mapped reads are split |
|
66
|
|
|
|
|
|
|
is => 'rw', |
|
67
|
|
|
|
|
|
|
isa => 'Bool', |
|
68
|
|
|
|
|
|
|
default => '1', |
|
69
|
|
|
|
|
|
|
predicate => 'has_control_split', |
|
70
|
|
|
|
|
|
|
); |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
has 'control_qual' => ( # provides stats on quality of the match |
|
73
|
|
|
|
|
|
|
is => 'rw', |
|
74
|
|
|
|
|
|
|
isa => 'Bool', |
|
75
|
|
|
|
|
|
|
default => '1', |
|
76
|
|
|
|
|
|
|
predicate => 'has_control_qual', |
|
77
|
|
|
|
|
|
|
); |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
has 'control_edit' => ( # provides stats on the edit distance between read and mapped reference |
|
80
|
|
|
|
|
|
|
is => 'rw', |
|
81
|
|
|
|
|
|
|
isa => 'Bool', |
|
82
|
|
|
|
|
|
|
default => '1', |
|
83
|
|
|
|
|
|
|
predicate => 'has_control_edit', |
|
84
|
|
|
|
|
|
|
); |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
has 'control_flag' => ( # analyses the sam bit flag for qual/strands/pair_vs_single reads |
|
87
|
|
|
|
|
|
|
is => 'rw', |
|
88
|
|
|
|
|
|
|
isa => 'Bool', |
|
89
|
|
|
|
|
|
|
default => '1', |
|
90
|
|
|
|
|
|
|
predicate => 'has_control_flag', |
|
91
|
|
|
|
|
|
|
); |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
has 'control_score' => ( # provides stats on per-base quality scores |
|
94
|
|
|
|
|
|
|
is => 'rw', |
|
95
|
|
|
|
|
|
|
isa => 'Bool', |
|
96
|
|
|
|
|
|
|
default => '1', |
|
97
|
|
|
|
|
|
|
predicate => 'has_control_score', |
|
98
|
|
|
|
|
|
|
); |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
has 'control_uniq' => ( # gives number and stats of multiplicity of readaligments |
|
101
|
|
|
|
|
|
|
is => 'rw', |
|
102
|
|
|
|
|
|
|
isa => 'Bool', |
|
103
|
|
|
|
|
|
|
default => '1', |
|
104
|
|
|
|
|
|
|
predicate => 'has_control_uniq', |
|
105
|
|
|
|
|
|
|
); |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
has 'is_segemehl' => ( # toggles to consider segemehl specific bam feature |
|
108
|
|
|
|
|
|
|
is => 'rw', |
|
109
|
|
|
|
|
|
|
isa => 'Bool', |
|
110
|
|
|
|
|
|
|
default => '0', |
|
111
|
|
|
|
|
|
|
predicate => 'has_is_segemehl', |
|
112
|
|
|
|
|
|
|
); |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub populate_data { |
|
115
|
|
|
|
|
|
|
my ($self) = @_; |
|
116
|
|
|
|
|
|
|
foreach my $bamfile (@{$self->files}){ |
|
117
|
|
|
|
|
|
|
#carp ">> processing $bamfile\n"; |
|
118
|
|
|
|
|
|
|
my $bo = Bio::ViennaNGS::BamStat->new(bam => $bamfile); |
|
119
|
|
|
|
|
|
|
$bo->stat_singleBam(); |
|
120
|
|
|
|
|
|
|
push (@{$self->data}, $bo); |
|
121
|
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub populate_countStat { |
|
126
|
|
|
|
|
|
|
my ($self) = @_; |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
tie my %hdr, 'Tie::Hash::Indexed'; |
|
129
|
|
|
|
|
|
|
%hdr = ( |
|
130
|
|
|
|
|
|
|
"sample" => "Sample", |
|
131
|
|
|
|
|
|
|
"total_alignments" => "# Total alignments", |
|
132
|
|
|
|
|
|
|
"mapped_reads" => "# Mapped reads", |
|
133
|
|
|
|
|
|
|
"umapped_reads" => "# Unique mapped reads", |
|
134
|
|
|
|
|
|
|
"mmapped_reads" => "# Multi mapped reads", |
|
135
|
|
|
|
|
|
|
"aligned_pairs" => "# Aligned in pairs", |
|
136
|
|
|
|
|
|
|
"aligned_mm" => "# Aligned mate missing", |
|
137
|
|
|
|
|
|
|
"aligned_se" => "# Aligned single end", |
|
138
|
|
|
|
|
|
|
"aligned_fwd" => "# Aligned forward strand", |
|
139
|
|
|
|
|
|
|
"aligned_rev" => "# Aligned reverse strand"); |
|
140
|
|
|
|
|
|
|
${$self->countStat}{'header'} = \%hdr; |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
foreach my $sample (@{$self->data}){ |
|
143
|
|
|
|
|
|
|
my ($basename,$dir,$ext) = fileparse($$sample{'bam'},qr/\.[^.]*/); |
|
144
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'total_alignments'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'total'} ); |
|
145
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'mapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'mapped_reads'} ); |
|
146
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'umapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'uniq_mapped_reads'} ); |
|
147
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'mmapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'mapped_reads'} - $$sample{'data_out'}->{'uniq'}->{'uniq_mapped_reads'} ); |
|
148
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_pairs'} = floor(0.5 + ($$sample{'data_out'}->{'aln_count'}->{'mapped_pair'})/2 ); |
|
149
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_mm'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'unmapped_pair'} ); |
|
150
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_se'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'mapped_single'} ); |
|
151
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_fwd'} = floor(0.5 + $$sample{'data_out'}->{'strand'}->{'forward'} ); |
|
152
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_rev'} = floor(0.5 + $$sample{'data_out'}->{'strand'}->{'reverse'} ); |
|
153
|
|
|
|
|
|
|
} |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub dump_countStat { |
|
157
|
|
|
|
|
|
|
my ($self,$how) = @_; |
|
158
|
|
|
|
|
|
|
my $mn = "mapping_stats.csv"; |
|
159
|
|
|
|
|
|
|
my $fn = file($self->outpath,$mn); |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
open(OUT, "> $fn") or croak "cannot open OUT $!"; |
|
162
|
|
|
|
|
|
|
print OUT join ("\t", values %{$self->countStat->{'header'} })."\n"; |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
foreach my $sample (keys %{$self->countStat} ){ |
|
165
|
|
|
|
|
|
|
next if ($sample eq 'header'); |
|
166
|
|
|
|
|
|
|
my @line = (); |
|
167
|
|
|
|
|
|
|
foreach my $key ( keys %{$self->countStat->{'header'}} ) { |
|
168
|
|
|
|
|
|
|
if ($key eq 'sample'){ |
|
169
|
|
|
|
|
|
|
push (@line, $sample); |
|
170
|
|
|
|
|
|
|
next; |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
push @line, $self->countStat->{$sample}->{$key}; |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
print OUT join ("\t", @line)."\n"; |
|
175
|
|
|
|
|
|
|
} |
|
176
|
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# print Dumper($self->countStat); |
|
178
|
|
|
|
|
|
|
close (OUT); |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub make_BarPlot{ |
|
182
|
|
|
|
|
|
|
my ($self) = @_; |
|
183
|
|
|
|
|
|
|
my @Rstat_data_count = (); |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
## collect data for read.table string |
|
186
|
|
|
|
|
|
|
push @Rstat_data_count, 'Samples', grep {!/header/} keys %{$self->countStat}; # first line with sample names |
|
187
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 1st line |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_se'; |
|
190
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 2nd line with single end aln counts |
|
191
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_se'}; |
|
192
|
|
|
|
|
|
|
} |
|
193
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 2nd line |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_pairs'; |
|
196
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 3rd line with aligned pairs count |
|
197
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_pairs'}; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 3rd line |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_mm'; |
|
202
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 4th line with incomplete aligned pairs |
|
203
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_mm'}; |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 4th line |
|
206
|
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
## produce bar plot |
|
208
|
|
|
|
|
|
|
my $mn = "mapping_stats.pdf"; |
|
209
|
|
|
|
|
|
|
my $fn = file($self->outpath,$mn); |
|
210
|
|
|
|
|
|
|
my $datastring = join(" ", @Rstat_data_count); |
|
211
|
|
|
|
|
|
|
$self->plot_barplot($fn, "Mapped reads", $datastring); # produce plot with read.table string input |
|
212
|
|
|
|
|
|
|
} |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub plot_barplot { #plot barplot read.table text string |
|
215
|
|
|
|
|
|
|
my ($self, $filename, $ylab, $data_string) = @_; |
|
216
|
|
|
|
|
|
|
my ($bn,$odir,$ext) = fileparse($filename, qr /\..*/); |
|
217
|
|
|
|
|
|
|
#my $rlibpath = '/usr/bin/R'; |
|
218
|
|
|
|
|
|
|
my $rlibpath = $self->rlib; |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
$filename .= '.pdf' unless ($ext eq '.pdf'); |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
my $R = Statistics::R->new(); |
|
223
|
|
|
|
|
|
|
$R->startR; |
|
224
|
|
|
|
|
|
|
$R->set('rlib', $rlibpath); |
|
225
|
|
|
|
|
|
|
$R->set('log_dir', $odir); |
|
226
|
|
|
|
|
|
|
$R->run("pdf('${filename}')") ; |
|
227
|
|
|
|
|
|
|
$R->run("dat<-read.table(text = \"$data_string\", header = TRUE, row.names=1)") ; |
|
228
|
|
|
|
|
|
|
$R->run("dat_m<-as.matrix(dat)") ; |
|
229
|
|
|
|
|
|
|
##$R->run("colors<-terrain.colors(nrow(dat_m), alpha = 1)") ; |
|
230
|
|
|
|
|
|
|
$R->run("colors<-c('lightblue','lightgreen','lightcoral', terrain.colors(nrow(dat_m)-3, alpha = 1))") ; |
|
231
|
|
|
|
|
|
|
$R->run("types<-row.names(dat_m)") ; |
|
232
|
|
|
|
|
|
|
$R->run("par(mar = c(15,3,5,5), oma = c(1, 1, 4, 1))") ; |
|
233
|
|
|
|
|
|
|
# $R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)+2), col=colors, legend.text = TRUE, args.legend = list(x = ncol(dat_m) + 2, y=max(colSums(dat_m)), bty = 'n' ), ylab='$ylab', xlab='Samples')") ; |
|
234
|
|
|
|
|
|
|
# $R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)), col=colors, legend.text = TRUE, args.legend = list(x = ncol(dat_m) + 5, y=-5, bty = 'o' ), ylab='$ylab', xlab='Samples', las=3)") ; |
|
235
|
|
|
|
|
|
|
# $R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)), col=colors, legend.text = TRUE, args.legend = list(\"topright\", horiz = TRUE, bty = 'o' ), ylab='$ylab', xlab='', las=3)") ; |
|
236
|
|
|
|
|
|
|
$R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)), col=colors, ylab='$ylab', xlab='', las=3)") ; |
|
237
|
|
|
|
|
|
|
$R->run("par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0),mar = c(0, 0, 0, 0), new = TRUE)") ; |
|
238
|
|
|
|
|
|
|
$R->run("legend('top', types, horiz = TRUE, inset = c(0,0), bty = 'n', fill = colors, cex = 1.2 )") ; |
|
239
|
|
|
|
|
|
|
$R->run("dev.off()") ; |
|
240
|
|
|
|
|
|
|
$R->stopR; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub make_BoxPlot{ |
|
245
|
|
|
|
|
|
|
my ($self, $whattodo) = @_; |
|
246
|
|
|
|
|
|
|
my @Rstat_data = (); |
|
247
|
|
|
|
|
|
|
my @Rstat_length = (); |
|
248
|
|
|
|
|
|
|
my @Rstat_names = (); |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
## collect data for read.table string |
|
251
|
|
|
|
|
|
|
foreach my $sample (@{$self->data}){ |
|
252
|
|
|
|
|
|
|
my ($basename,$dir,$ext) = fileparse($$sample{'bam'},qr/\.[^.]*/); |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
push @Rstat_data, statsstring(@{$$sample{$whattodo}}); |
|
255
|
|
|
|
|
|
|
push @Rstat_length, scalar(@{$$sample{$whattodo}}); |
|
256
|
|
|
|
|
|
|
push @Rstat_names, "'$basename'", |
|
257
|
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
} |
|
259
|
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
my $data_string="summarydata<-list(stats=matrix(c(".join(",",@Rstat_data)."),5,".scalar(@Rstat_names)."), n=c(".join(",",@Rstat_length)."), names=c(".join(",",@Rstat_names)."))"; |
|
261
|
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
## produce box plot |
|
263
|
|
|
|
|
|
|
if(@Rstat_data){ |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
my $mn = "${whattodo}_stats.pdf"; |
|
266
|
|
|
|
|
|
|
my $fn = file($self->outpath,$mn); |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
$self->plot_bxplot($fn, $whattodo, $data_string); |
|
269
|
|
|
|
|
|
|
} |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
sub plot_bxplot{ |
|
273
|
|
|
|
|
|
|
my ($self, $filename, $ylab, $datacommand_string) = @_; |
|
274
|
|
|
|
|
|
|
my ($bn,$odir,$ext) = fileparse($filename, qr /\..*/); |
|
275
|
|
|
|
|
|
|
my $rlibpath = $self->rlib; |
|
276
|
|
|
|
|
|
|
#my $rlibpath = '/usr/bin/R'; |
|
277
|
|
|
|
|
|
|
$filename .= '.pdf' unless ($ext eq '.pdf'); |
|
278
|
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
my $R = Statistics::R->new(); |
|
280
|
|
|
|
|
|
|
$R->startR; |
|
281
|
|
|
|
|
|
|
$R->set('rlib', $rlibpath); |
|
282
|
|
|
|
|
|
|
$R->set('log_dir', $odir); |
|
283
|
|
|
|
|
|
|
$R->run("pdf('${filename}')") ; |
|
284
|
|
|
|
|
|
|
$R->run("$datacommand_string") ; |
|
285
|
|
|
|
|
|
|
$R->run("bxp(summarydata, medcol = 'red', ylab='$ylab', xlab='',las=3)") ; |
|
286
|
|
|
|
|
|
|
$R->run("dev.off()") ; |
|
287
|
|
|
|
|
|
|
$R->stopR; |
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub statsstring{ |
|
291
|
|
|
|
|
|
|
# usage: %h = %{stats(@a)}; |
|
292
|
|
|
|
|
|
|
my @vals = sort {$a <=> $b} @_; |
|
293
|
|
|
|
|
|
|
my %stats = (); |
|
294
|
|
|
|
|
|
|
my @statstring = (); |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
if(@vals){ |
|
297
|
|
|
|
|
|
|
push @statstring, sprintf("%.2f", &min(\@vals)); ## min |
|
298
|
|
|
|
|
|
|
push @statstring, sprintf("%.2f", $vals[int(@vals/4)]); ## 1.quartile |
|
299
|
|
|
|
|
|
|
if(@vals%2){ |
|
300
|
|
|
|
|
|
|
push @statstring, $vals[int(@vals/2)]; ## odd median |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
else{ |
|
303
|
|
|
|
|
|
|
push @statstring, ($vals[int(@vals/2)-1] + $vals[int(@vals/2)])/2; ## even median |
|
304
|
|
|
|
|
|
|
} |
|
305
|
|
|
|
|
|
|
push @statstring, sprintf("%.2f", $vals[int((@vals*3)/4)]); ## 3.quartile |
|
306
|
|
|
|
|
|
|
push @statstring, sprintf("%.2f", &max(\@vals)); ## max |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
else{ |
|
309
|
|
|
|
|
|
|
@statstring=qw/0 0 0 0 0/; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
|
|
|
|
|
|
return(@statstring); |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub max { # usage: $h = %{max(\@a)}; |
|
315
|
|
|
|
|
|
|
my ($arrayref) = @_; |
|
316
|
|
|
|
|
|
|
my $max = $arrayref->[0]; |
|
317
|
|
|
|
|
|
|
foreach (@$arrayref) {$max = $_ if $_ > $max} |
|
318
|
|
|
|
|
|
|
return $max; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub min { # usage: $h = %{min(\@a)}; |
|
322
|
|
|
|
|
|
|
my ($arrayref) = @_; |
|
323
|
|
|
|
|
|
|
my $min = $arrayref->[0]; |
|
324
|
|
|
|
|
|
|
foreach (@$arrayref) {$min = $_ if $_ < $min} |
|
325
|
|
|
|
|
|
|
return $min; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
1; |
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
__END__ |
|
331
|
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=head1 NAME |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
Bio::ViennaNGS::BamStatSummary - Moose interface to analyze, summarize |
|
336
|
|
|
|
|
|
|
and compare BAM mapping statistics data structure produced by |
|
337
|
|
|
|
|
|
|
Bio::ViennaNGS::BamStat |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
use Bio::ViennaNGS::BamStatSummary; |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
$bamsummary->populate_data(); |
|
344
|
|
|
|
|
|
|
$bamsummary->populate_countStat(); |
|
345
|
|
|
|
|
|
|
$bamsummary->dump_countStat("csv"); |
|
346
|
|
|
|
|
|
|
$bamsummary->make_BarPlot(); |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
$bamsummary->make_BoxPlot("data_edit" ) if( $bamsummary->has_control_edit ); |
|
349
|
|
|
|
|
|
|
$bamsummary->make_BoxPlot("data_clip" ) if( $bamsummary->has_control_clip ); |
|
350
|
|
|
|
|
|
|
$bamsummary->make_BoxPlot("data_match") if( $$bamsummary->has_control_match ); |
|
351
|
|
|
|
|
|
|
$bamsummary->make_BoxPlot("data_qual" ) if( $bamsummary->has_control_qual ); |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
This module provides a L<Moose> interface to process the mapping |
|
357
|
|
|
|
|
|
|
statistics of single BAM file. It uses the data structure as produced |
|
358
|
|
|
|
|
|
|
by L<Bio::ViennaNGS::BamStat>, summarizes the data and compares |
|
359
|
|
|
|
|
|
|
different BAM files. Output is written both as CSV files and graphical |
|
360
|
|
|
|
|
|
|
representation of the results. Internally, this modules build on |
|
361
|
|
|
|
|
|
|
L<Statistics::R>. |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=over |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=item L<Statistics::R> |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=item L<Path::Class> |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=item L<Tie::Hash::Indexed> |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=item L<Moose> |
|
375
|
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=back |
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=over |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=item L<Bio::ViennaNGS> |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item L<Bio::ViennaNGS::BamStat> |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=back |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head1 AUTHORS |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=over |
|
391
|
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item Fabian Amman E<lt>fabian@tbi.univie.ac.atE<gt> |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=item Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt> |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=back |
|
397
|
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Copyright (C) 2015 by Michael T. Wolfinger |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
|
403
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.10.0 or, |
|
404
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
This software is distributed in the hope that it will be useful, but |
|
407
|
|
|
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of |
|
408
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=cut |