line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# -*-CPerl-*- |
2
|
|
|
|
|
|
|
# Last changed Time-stamp: <2015-02-06 16:28:21 mtw> |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
package Bio::ViennaNGS::BamStatSummary; |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
1303
|
use version; our $VERSION = qv('0.12_15'); |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
5
|
|
7
|
1
|
|
|
1
|
|
63
|
use Moose; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
9
|
|
8
|
1
|
|
|
1
|
|
5568
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
60
|
|
9
|
1
|
|
|
1
|
|
5
|
use POSIX qw(floor); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
8
|
|
10
|
1
|
|
|
1
|
|
265
|
use Statistics::R; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Data::Dumper; |
12
|
|
|
|
|
|
|
use Path::Class; |
13
|
|
|
|
|
|
|
use namespace::autoclean; |
14
|
|
|
|
|
|
|
use Bio::ViennaNGS::BamStat; |
15
|
|
|
|
|
|
|
use Tie::Hash::Indexed; |
16
|
|
|
|
|
|
|
use File::Basename; |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
has 'data' => ( |
19
|
|
|
|
|
|
|
is => 'ro', |
20
|
|
|
|
|
|
|
isa => 'ArrayRef [Bio::ViennaNGS::BamStat]', |
21
|
|
|
|
|
|
|
default => sub { [] }, |
22
|
|
|
|
|
|
|
); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
has 'countStat' => ( |
25
|
|
|
|
|
|
|
is => 'rw', |
26
|
|
|
|
|
|
|
isa => 'HashRef', |
27
|
|
|
|
|
|
|
predicate => 'has_countStat', |
28
|
|
|
|
|
|
|
default => sub { {} }, |
29
|
|
|
|
|
|
|
# auto_deref => '1', |
30
|
|
|
|
|
|
|
); |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
has 'outpath' => ( |
33
|
|
|
|
|
|
|
is => 'rw', |
34
|
|
|
|
|
|
|
isa => 'Str', |
35
|
|
|
|
|
|
|
required => '1', |
36
|
|
|
|
|
|
|
); |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
has 'rlib' => ( |
39
|
|
|
|
|
|
|
is => 'rw', |
40
|
|
|
|
|
|
|
isa => 'Str', |
41
|
|
|
|
|
|
|
required => '1', |
42
|
|
|
|
|
|
|
); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
has 'files' => ( |
45
|
|
|
|
|
|
|
is => 'rw', |
46
|
|
|
|
|
|
|
isa => 'ArrayRef', |
47
|
|
|
|
|
|
|
required => 1, |
48
|
|
|
|
|
|
|
predicate => 'has_files', |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
has 'control_match' => ( # provides stats how many mapped bases match the reference genome |
52
|
|
|
|
|
|
|
is => 'rw', |
53
|
|
|
|
|
|
|
isa => 'Bool', |
54
|
|
|
|
|
|
|
default => '1', |
55
|
|
|
|
|
|
|
predicate => 'has_control_match', |
56
|
|
|
|
|
|
|
); |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
has 'control_clip' => ( # provides stats how many bases are soft or hard clipped |
59
|
|
|
|
|
|
|
is => 'rw', |
60
|
|
|
|
|
|
|
isa => 'Bool', |
61
|
|
|
|
|
|
|
default => '1', |
62
|
|
|
|
|
|
|
predicate => 'has_control_clip', |
63
|
|
|
|
|
|
|
); |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
has 'control_split' => ( # provides stats how many/often mapped reads are split |
66
|
|
|
|
|
|
|
is => 'rw', |
67
|
|
|
|
|
|
|
isa => 'Bool', |
68
|
|
|
|
|
|
|
default => '1', |
69
|
|
|
|
|
|
|
predicate => 'has_control_split', |
70
|
|
|
|
|
|
|
); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
has 'control_qual' => ( # provides stats on quality of the match |
73
|
|
|
|
|
|
|
is => 'rw', |
74
|
|
|
|
|
|
|
isa => 'Bool', |
75
|
|
|
|
|
|
|
default => '1', |
76
|
|
|
|
|
|
|
predicate => 'has_control_qual', |
77
|
|
|
|
|
|
|
); |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
has 'control_edit' => ( # provides stats on the edit distance between read and mapped reference |
80
|
|
|
|
|
|
|
is => 'rw', |
81
|
|
|
|
|
|
|
isa => 'Bool', |
82
|
|
|
|
|
|
|
default => '1', |
83
|
|
|
|
|
|
|
predicate => 'has_control_edit', |
84
|
|
|
|
|
|
|
); |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
has 'control_flag' => ( # analyses the sam bit flag for qual/strands/pair_vs_single reads |
87
|
|
|
|
|
|
|
is => 'rw', |
88
|
|
|
|
|
|
|
isa => 'Bool', |
89
|
|
|
|
|
|
|
default => '1', |
90
|
|
|
|
|
|
|
predicate => 'has_control_flag', |
91
|
|
|
|
|
|
|
); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
has 'control_score' => ( # provides stats on per-base quality scores |
94
|
|
|
|
|
|
|
is => 'rw', |
95
|
|
|
|
|
|
|
isa => 'Bool', |
96
|
|
|
|
|
|
|
default => '1', |
97
|
|
|
|
|
|
|
predicate => 'has_control_score', |
98
|
|
|
|
|
|
|
); |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
has 'control_uniq' => ( # gives number and stats of multiplicity of readaligments |
101
|
|
|
|
|
|
|
is => 'rw', |
102
|
|
|
|
|
|
|
isa => 'Bool', |
103
|
|
|
|
|
|
|
default => '1', |
104
|
|
|
|
|
|
|
predicate => 'has_control_uniq', |
105
|
|
|
|
|
|
|
); |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
has 'is_segemehl' => ( # toggles to consider segemehl specific bam feature |
108
|
|
|
|
|
|
|
is => 'rw', |
109
|
|
|
|
|
|
|
isa => 'Bool', |
110
|
|
|
|
|
|
|
default => '0', |
111
|
|
|
|
|
|
|
predicate => 'has_is_segemehl', |
112
|
|
|
|
|
|
|
); |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
sub populate_data { |
115
|
|
|
|
|
|
|
my ($self) = @_; |
116
|
|
|
|
|
|
|
foreach my $bamfile (@{$self->files}){ |
117
|
|
|
|
|
|
|
#carp ">> processing $bamfile\n"; |
118
|
|
|
|
|
|
|
my $bo = Bio::ViennaNGS::BamStat->new(bam => $bamfile); |
119
|
|
|
|
|
|
|
$bo->stat_singleBam(); |
120
|
|
|
|
|
|
|
push (@{$self->data}, $bo); |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
} |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
sub populate_countStat { |
126
|
|
|
|
|
|
|
my ($self) = @_; |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
tie my %hdr, 'Tie::Hash::Indexed'; |
129
|
|
|
|
|
|
|
%hdr = ( |
130
|
|
|
|
|
|
|
"sample" => "Sample", |
131
|
|
|
|
|
|
|
"total_alignments" => "# Total alignments", |
132
|
|
|
|
|
|
|
"mapped_reads" => "# Mapped reads", |
133
|
|
|
|
|
|
|
"umapped_reads" => "# Unique mapped reads", |
134
|
|
|
|
|
|
|
"mmapped_reads" => "# Multi mapped reads", |
135
|
|
|
|
|
|
|
"aligned_pairs" => "# Aligned in pairs", |
136
|
|
|
|
|
|
|
"aligned_mm" => "# Aligned mate missing", |
137
|
|
|
|
|
|
|
"aligned_se" => "# Aligned single end", |
138
|
|
|
|
|
|
|
"aligned_fwd" => "# Aligned forward strand", |
139
|
|
|
|
|
|
|
"aligned_rev" => "# Aligned reverse strand"); |
140
|
|
|
|
|
|
|
${$self->countStat}{'header'} = \%hdr; |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
foreach my $sample (@{$self->data}){ |
143
|
|
|
|
|
|
|
my ($basename,$dir,$ext) = fileparse($$sample{'bam'},qr/\.[^.]*/); |
144
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'total_alignments'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'total'} ); |
145
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'mapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'mapped_reads'} ); |
146
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'umapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'uniq_mapped_reads'} ); |
147
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'mmapped_reads'} = floor(0.5 + $$sample{'data_out'}->{'uniq'}->{'mapped_reads'} - $$sample{'data_out'}->{'uniq'}->{'uniq_mapped_reads'} ); |
148
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_pairs'} = floor(0.5 + ($$sample{'data_out'}->{'aln_count'}->{'mapped_pair'})/2 ); |
149
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_mm'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'unmapped_pair'} ); |
150
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_se'} = floor(0.5 + $$sample{'data_out'}->{'aln_count'}->{'mapped_single'} ); |
151
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_fwd'} = floor(0.5 + $$sample{'data_out'}->{'strand'}->{'forward'} ); |
152
|
|
|
|
|
|
|
${$self->countStat}{$basename}{'aligned_rev'} = floor(0.5 + $$sample{'data_out'}->{'strand'}->{'reverse'} ); |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
sub dump_countStat { |
157
|
|
|
|
|
|
|
my ($self,$how) = @_; |
158
|
|
|
|
|
|
|
my $mn = "mapping_stats.csv"; |
159
|
|
|
|
|
|
|
my $fn = file($self->outpath,$mn); |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
open(OUT, "> $fn") or croak "cannot open OUT $!"; |
162
|
|
|
|
|
|
|
print OUT join ("\t", values %{$self->countStat->{'header'} })."\n"; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
foreach my $sample (keys %{$self->countStat} ){ |
165
|
|
|
|
|
|
|
next if ($sample eq 'header'); |
166
|
|
|
|
|
|
|
my @line = (); |
167
|
|
|
|
|
|
|
foreach my $key ( keys %{$self->countStat->{'header'}} ) { |
168
|
|
|
|
|
|
|
if ($key eq 'sample'){ |
169
|
|
|
|
|
|
|
push (@line, $sample); |
170
|
|
|
|
|
|
|
next; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
push @line, $self->countStat->{$sample}->{$key}; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
print OUT join ("\t", @line)."\n"; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
# print Dumper($self->countStat); |
178
|
|
|
|
|
|
|
close (OUT); |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub make_BarPlot{ |
182
|
|
|
|
|
|
|
my ($self) = @_; |
183
|
|
|
|
|
|
|
my @Rstat_data_count = (); |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
## collect data for read.table string |
186
|
|
|
|
|
|
|
push @Rstat_data_count, 'Samples', grep {!/header/} keys %{$self->countStat}; # first line with sample names |
187
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 1st line |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_se'; |
190
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 2nd line with single end aln counts |
191
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_se'}; |
192
|
|
|
|
|
|
|
} |
193
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 2nd line |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_pairs'; |
196
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 3rd line with aligned pairs count |
197
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_pairs'}; |
198
|
|
|
|
|
|
|
} |
199
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 3rd line |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
push @Rstat_data_count, 'aligned_mm'; |
202
|
|
|
|
|
|
|
foreach my $sample ( grep {!/header/} keys %{$self->countStat} ) { # 4th line with incomplete aligned pairs |
203
|
|
|
|
|
|
|
push @Rstat_data_count, $self->countStat->{$sample}{'aligned_mm'}; |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
$Rstat_data_count[-1]="$Rstat_data_count[-1]\n"; # end 4th line |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
## produce bar plot |
208
|
|
|
|
|
|
|
my $mn = "mapping_stats.pdf"; |
209
|
|
|
|
|
|
|
my $fn = file($self->outpath,$mn); |
210
|
|
|
|
|
|
|
my $datastring = join(" ", @Rstat_data_count); |
211
|
|
|
|
|
|
|
$self->plot_barplot($fn, "Mapped reads", $datastring); # produce plot with read.table string input |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
sub plot_barplot { #plot barplot read.table text string |
215
|
|
|
|
|
|
|
my ($self, $filename, $ylab, $data_string) = @_; |
216
|
|
|
|
|
|
|
my ($bn,$odir,$ext) = fileparse($filename, qr /\..*/); |
217
|
|
|
|
|
|
|
#my $rlibpath = '/usr/bin/R'; |
218
|
|
|
|
|
|
|
my $rlibpath = $self->rlib; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
$filename .= '.pdf' unless ($ext eq '.pdf'); |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
my $R = Statistics::R->new(); |
223
|
|
|
|
|
|
|
$R->startR; |
224
|
|
|
|
|
|
|
$R->set('rlib', $rlibpath); |
225
|
|
|
|
|
|
|
$R->set('log_dir', $odir); |
226
|
|
|
|
|
|
|
$R->run("pdf('${filename}')") ; |
227
|
|
|
|
|
|
|
$R->run("dat<-read.table(text = \"$data_string\", header = TRUE, row.names=1)") ; |
228
|
|
|
|
|
|
|
$R->run("dat_m<-as.matrix(dat)") ; |
229
|
|
|
|
|
|
|
##$R->run("colors<-terrain.colors(nrow(dat_m), alpha = 1)") ; |
230
|
|
|
|
|
|
|
$R->run("colors<-c('lightblue','lightgreen','lightcoral', terrain.colors(nrow(dat_m)-3, alpha = 1))") ; |
231
|
|
|
|
|
|
|
$R->run("types<-row.names(dat_m)") ; |
232
|
|
|
|
|
|
|
$R->run("par(mar = c(15,3,5,5), oma = c(1, 1, 4, 1))") ; |
233
|
|
|
|
|
|
|
# $R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)+2), col=colors, legend.text = TRUE, args.legend = list(x = ncol(dat_m) + 2, y=max(colSums(dat_m)), bty = 'n' ), ylab='$ylab', xlab='Samples')") ; |
234
|
|
|
|
|
|
|
# $R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)), col=colors, legend.text = TRUE, args.legend = list(x = ncol(dat_m) + 5, y=-5, bty = 'o' ), ylab='$ylab', xlab='Samples', las=3)") ; |
235
|
|
|
|
|
|
|
# $R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)), col=colors, legend.text = TRUE, args.legend = list(\"topright\", horiz = TRUE, bty = 'o' ), ylab='$ylab', xlab='', las=3)") ; |
236
|
|
|
|
|
|
|
$R->run("barplot(dat_m, xlim=c(0,ncol(dat_m)), col=colors, ylab='$ylab', xlab='', las=3)") ; |
237
|
|
|
|
|
|
|
$R->run("par(fig = c(0, 1, 0, 1), oma = c(0, 0, 0, 0),mar = c(0, 0, 0, 0), new = TRUE)") ; |
238
|
|
|
|
|
|
|
$R->run("legend('top', types, horiz = TRUE, inset = c(0,0), bty = 'n', fill = colors, cex = 1.2 )") ; |
239
|
|
|
|
|
|
|
$R->run("dev.off()") ; |
240
|
|
|
|
|
|
|
$R->stopR; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
sub make_BoxPlot{ |
245
|
|
|
|
|
|
|
my ($self, $whattodo) = @_; |
246
|
|
|
|
|
|
|
my @Rstat_data = (); |
247
|
|
|
|
|
|
|
my @Rstat_length = (); |
248
|
|
|
|
|
|
|
my @Rstat_names = (); |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
## collect data for read.table string |
251
|
|
|
|
|
|
|
foreach my $sample (@{$self->data}){ |
252
|
|
|
|
|
|
|
my ($basename,$dir,$ext) = fileparse($$sample{'bam'},qr/\.[^.]*/); |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
push @Rstat_data, statsstring(@{$$sample{$whattodo}}); |
255
|
|
|
|
|
|
|
push @Rstat_length, scalar(@{$$sample{$whattodo}}); |
256
|
|
|
|
|
|
|
push @Rstat_names, "'$basename'", |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
my $data_string="summarydata<-list(stats=matrix(c(".join(",",@Rstat_data)."),5,".scalar(@Rstat_names)."), n=c(".join(",",@Rstat_length)."), names=c(".join(",",@Rstat_names)."))"; |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
## produce box plot |
263
|
|
|
|
|
|
|
if(@Rstat_data){ |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
my $mn = "${whattodo}_stats.pdf"; |
266
|
|
|
|
|
|
|
my $fn = file($self->outpath,$mn); |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
$self->plot_bxplot($fn, $whattodo, $data_string); |
269
|
|
|
|
|
|
|
} |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
sub plot_bxplot{ |
273
|
|
|
|
|
|
|
my ($self, $filename, $ylab, $datacommand_string) = @_; |
274
|
|
|
|
|
|
|
my ($bn,$odir,$ext) = fileparse($filename, qr /\..*/); |
275
|
|
|
|
|
|
|
my $rlibpath = $self->rlib; |
276
|
|
|
|
|
|
|
#my $rlibpath = '/usr/bin/R'; |
277
|
|
|
|
|
|
|
$filename .= '.pdf' unless ($ext eq '.pdf'); |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
my $R = Statistics::R->new(); |
280
|
|
|
|
|
|
|
$R->startR; |
281
|
|
|
|
|
|
|
$R->set('rlib', $rlibpath); |
282
|
|
|
|
|
|
|
$R->set('log_dir', $odir); |
283
|
|
|
|
|
|
|
$R->run("pdf('${filename}')") ; |
284
|
|
|
|
|
|
|
$R->run("$datacommand_string") ; |
285
|
|
|
|
|
|
|
$R->run("bxp(summarydata, medcol = 'red', ylab='$ylab', xlab='',las=3)") ; |
286
|
|
|
|
|
|
|
$R->run("dev.off()") ; |
287
|
|
|
|
|
|
|
$R->stopR; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
sub statsstring{ |
291
|
|
|
|
|
|
|
# usage: %h = %{stats(@a)}; |
292
|
|
|
|
|
|
|
my @vals = sort {$a <=> $b} @_; |
293
|
|
|
|
|
|
|
my %stats = (); |
294
|
|
|
|
|
|
|
my @statstring = (); |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
if(@vals){ |
297
|
|
|
|
|
|
|
push @statstring, sprintf("%.2f", &min(\@vals)); ## min |
298
|
|
|
|
|
|
|
push @statstring, sprintf("%.2f", $vals[int(@vals/4)]); ## 1.quartile |
299
|
|
|
|
|
|
|
if(@vals%2){ |
300
|
|
|
|
|
|
|
push @statstring, $vals[int(@vals/2)]; ## odd median |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
else{ |
303
|
|
|
|
|
|
|
push @statstring, ($vals[int(@vals/2)-1] + $vals[int(@vals/2)])/2; ## even median |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
push @statstring, sprintf("%.2f", $vals[int((@vals*3)/4)]); ## 3.quartile |
306
|
|
|
|
|
|
|
push @statstring, sprintf("%.2f", &max(\@vals)); ## max |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
else{ |
309
|
|
|
|
|
|
|
@statstring=qw/0 0 0 0 0/; |
310
|
|
|
|
|
|
|
} |
311
|
|
|
|
|
|
|
return(@statstring); |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
sub max { # usage: $h = %{max(\@a)}; |
315
|
|
|
|
|
|
|
my ($arrayref) = @_; |
316
|
|
|
|
|
|
|
my $max = $arrayref->[0]; |
317
|
|
|
|
|
|
|
foreach (@$arrayref) {$max = $_ if $_ > $max} |
318
|
|
|
|
|
|
|
return $max; |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub min { # usage: $h = %{min(\@a)}; |
322
|
|
|
|
|
|
|
my ($arrayref) = @_; |
323
|
|
|
|
|
|
|
my $min = $arrayref->[0]; |
324
|
|
|
|
|
|
|
foreach (@$arrayref) {$min = $_ if $_ < $min} |
325
|
|
|
|
|
|
|
return $min; |
326
|
|
|
|
|
|
|
} |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
1; |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
__END__ |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=head1 NAME |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
Bio::ViennaNGS::BamStatSummary - Moose interface to analyze, summarize |
336
|
|
|
|
|
|
|
and compare BAM mapping statistics data structure produced by |
337
|
|
|
|
|
|
|
Bio::ViennaNGS::BamStat |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
=head1 SYNOPSIS |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
use Bio::ViennaNGS::BamStatSummary; |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
$bamsummary->populate_data(); |
344
|
|
|
|
|
|
|
$bamsummary->populate_countStat(); |
345
|
|
|
|
|
|
|
$bamsummary->dump_countStat("csv"); |
346
|
|
|
|
|
|
|
$bamsummary->make_BarPlot(); |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
$bamsummary->make_BoxPlot("data_edit" ) if( $bamsummary->has_control_edit ); |
349
|
|
|
|
|
|
|
$bamsummary->make_BoxPlot("data_clip" ) if( $bamsummary->has_control_clip ); |
350
|
|
|
|
|
|
|
$bamsummary->make_BoxPlot("data_match") if( $$bamsummary->has_control_match ); |
351
|
|
|
|
|
|
|
$bamsummary->make_BoxPlot("data_qual" ) if( $bamsummary->has_control_qual ); |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head1 DESCRIPTION |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
This module provides a L<Moose> interface to process the mapping |
357
|
|
|
|
|
|
|
statistics of single BAM file. It uses the data structure as produced |
358
|
|
|
|
|
|
|
by L<Bio::ViennaNGS::BamStat>, summarizes the data and compares |
359
|
|
|
|
|
|
|
different BAM files. Output is written both as CSV files and graphical |
360
|
|
|
|
|
|
|
representation of the results. Internally, this modules build on |
361
|
|
|
|
|
|
|
L<Statistics::R>. |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=over |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=item L<Statistics::R> |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=item L<Path::Class> |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
=item L<Tie::Hash::Indexed> |
373
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
=item L<Moose> |
375
|
|
|
|
|
|
|
|
376
|
|
|
|
|
|
|
=back |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
=head1 SEE ALSO |
379
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=over |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=item L<Bio::ViennaNGS> |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=item L<Bio::ViennaNGS::BamStat> |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=back |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=head1 AUTHORS |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
=over |
391
|
|
|
|
|
|
|
|
392
|
|
|
|
|
|
|
=item Fabian Amman E<lt>fabian@tbi.univie.ac.atE<gt> |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=item Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt> |
395
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
=back |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
Copyright (C) 2015 by Michael T. Wolfinger |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
403
|
|
|
|
|
|
|
it under the same terms as Perl itself, either Perl version 5.10.0 or, |
404
|
|
|
|
|
|
|
at your option, any later version of Perl 5 you may have available. |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
This software is distributed in the hope that it will be useful, but |
407
|
|
|
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of |
408
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=cut |