| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # ChromBed - A class for extracting short read information for one chromosome. | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  |  | 
| 5 | 1 |  |  | 1 |  | 5 | use strict; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 34 |  | 
| 6 | 1 |  |  | 1 |  | 26 | use 5.006; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 49 |  | 
| 7 |  |  |  |  |  |  | package MyShortRead::ChromBed; | 
| 8 | 1 |  |  | 1 |  | 5 | use POSIX qw(ceil); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 8 |  | 
| 9 | 1 |  |  | 1 |  | 65 | use MyShortRead::MyShortRead qw(bin_genome_count bin_genome_count_direction); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 48 |  | 
| 10 | 1 |  |  | 1 |  | 6 | use MyBioinfo::Common qw(mean median); | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 1590 |  | 
| 11 |  |  |  |  |  |  | our $VERSION = '1.00'; | 
| 12 |  |  |  |  |  |  |  | 
| 13 |  |  |  |  |  |  | # Class constructor. | 
| 14 |  |  |  |  |  |  | sub new{ | 
| 15 | 0 |  |  | 0 | 0 |  | my $class = shift; | 
| 16 | 0 |  |  |  |  |  | my $self = {}; | 
| 17 | 0 |  |  |  |  |  | $self->{chrom_name} = undef; | 
| 18 | 0 |  |  |  |  |  | $self->{chrom_len} = undef; | 
| 19 | 0 |  |  |  |  |  | $self->{file_name} = undef;	# chromosome file name. | 
| 20 | 0 |  |  |  |  |  | $self->{bin_num} = undef;	# number of bins for the chromosome. | 
| 21 | 0 |  |  |  |  |  | $self->{bin_count} = [];	# vector for bin read count. | 
| 22 | 0 |  |  |  |  |  | $self->{bin_count_F} = [];	# bin Forward strand read count. | 
| 23 | 0 |  |  |  |  |  | $self->{bin_count_R} = [];	# bin Reverse strand read count. | 
| 24 | 0 |  |  |  |  |  | $self->{window_num} = undef;	# number of sliding windows. | 
| 25 | 0 |  |  |  |  |  | $self->{window_count} = [];	# vector for window read count. | 
| 26 | 0 |  |  |  |  |  | $self->{window_count_F} = [];	# window read count for Forward strand. | 
| 27 | 0 |  |  |  |  |  | $self->{window_count_R} = [];	# window read count for Reverse strand. | 
| 28 | 0 |  |  |  |  |  | bless($self, $class); | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | # User supplied chromosome name, length and file name. | 
| 31 | 0 | 0 |  |  |  |  | if(@_ >= 3){ | 
| 32 | 0 |  |  |  |  |  | $self->{chrom_name} = $_[0]; | 
| 33 | 0 |  |  |  |  |  | $self->{chrom_len} = $_[1]; | 
| 34 | 0 |  |  |  |  |  | $self->{file_name} = $_[2]; | 
| 35 |  |  |  |  |  |  | } | 
| 36 |  |  |  |  |  |  |  | 
| 37 | 0 |  |  |  |  |  | return $self; | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | # Set chromosome information: name, length and file name. | 
| 41 |  |  |  |  |  |  | # Clean memory data. | 
| 42 |  |  |  |  |  |  | sub set_chr_info{ | 
| 43 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 44 | 0 | 0 |  |  |  |  | if(@_ >= 3){ | 
| 45 | 0 |  |  |  |  |  | $self->{chrom_name} = $_[0]; | 
| 46 | 0 |  |  |  |  |  | $self->{chrom_len} = $_[1]; | 
| 47 | 0 |  |  |  |  |  | $self->{file_name} = $_[2]; | 
| 48 | 0 |  |  |  |  |  | $self->{bin_num} = undef; | 
| 49 | 0 |  |  |  |  |  | $self->{bin_count} = []; | 
| 50 |  |  |  |  |  |  | } | 
| 51 |  |  |  |  |  |  | } | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | # Create bin count vector for the specified chromosome file. | 
| 54 |  |  |  |  |  |  | sub do_bin_count{ | 
| 55 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 56 | 0 | 0 |  |  |  |  | if(@_ < 2) {warn "Bin or fragment size not specified. Bin count not performed. Exit.\n"; return;} | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 57 | 0 | 0 | 0 |  |  |  | if(!(defined $self->{chrom_name} and defined $self->{chrom_len} and defined $self->{file_name})){ | 
|  |  |  | 0 |  |  |  |  | 
| 58 | 0 |  |  |  |  |  | warn "Chromosome information missing! Cannot continue. Exit.\n"; | 
| 59 | 0 |  |  |  |  |  | return; | 
| 60 |  |  |  |  |  |  | } | 
| 61 | 0 |  |  |  |  |  | my($bin_size,$frag_size) = @_; | 
| 62 |  |  |  |  |  |  | # Read bin size and calculate bin number. | 
| 63 | 0 |  |  |  |  |  | $self->{bin_num} = ceil($self->{chrom_len} / $bin_size); | 
| 64 |  |  |  |  |  |  | # Create bin count vector. | 
| 65 | 0 |  |  |  |  |  | bin_genome_count($self->{file_name},$bin_size,$self->{chrom_len},$frag_size,$self->{bin_count}); | 
| 66 |  |  |  |  |  |  | } | 
| 67 |  |  |  |  |  |  |  | 
| 68 |  |  |  |  |  |  | # Create bin count with direction for the specified chromosome file. | 
| 69 |  |  |  |  |  |  | sub do_bin_count_direction{ | 
| 70 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 71 | 0 | 0 |  |  |  |  | if(@_ < 2) {warn "Bin or fragment size not specified. Bin count not performed. Exit.\n"; return;} | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 72 | 0 | 0 | 0 |  |  |  | if(!(defined $self->{chrom_name} and defined $self->{chrom_len} and defined $self->{file_name})){ | 
|  |  |  | 0 |  |  |  |  | 
| 73 | 0 |  |  |  |  |  | warn "Chromosome information missing! Cannot continue. Exit.\n"; | 
| 74 | 0 |  |  |  |  |  | return; | 
| 75 |  |  |  |  |  |  | } | 
| 76 | 0 |  |  |  |  |  | my($bin_size,$frag_size) = @_; | 
| 77 |  |  |  |  |  |  | # Read bin size and calculate bin number. | 
| 78 | 0 |  |  |  |  |  | $self->{bin_num} = ceil($self->{chrom_len} / $bin_size); | 
| 79 |  |  |  |  |  |  | # Create bin count vector. | 
| 80 | 0 |  |  |  |  |  | bin_genome_count_direction($self->{file_name},$bin_size,$self->{chrom_len},$frag_size,$self->{bin_count_F},$self->{bin_count_R}); | 
| 81 |  |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | # Smooth bin count signals by applying window functions. | 
| 84 |  |  |  |  |  |  | sub smooth_bin_count{ | 
| 85 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 86 | 0 | 0 |  |  |  |  | if(@_ < 2) {warn "Smoothing width or function not specified. Exit.\n"; return;} | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 87 | 0 | 0 |  |  |  |  | if(!defined $self->{bin_count}){ | 
| 88 | 0 |  |  |  |  |  | warn "Bin count vector not exists. Do bin count first! Exit.\n"; | 
| 89 | 0 |  |  |  |  |  | return; | 
| 90 |  |  |  |  |  |  | } | 
| 91 | 0 |  |  |  |  |  | my($smooth_width,$fun_str) = @_; | 
| 92 | 0 | 0 |  |  |  |  | if($smooth_width <= 1){ | 
| 93 | 0 |  |  |  |  |  | warn "Smooth width must be larger than one. Exit.\n"; | 
| 94 | 0 |  |  |  |  |  | return; | 
| 95 |  |  |  |  |  |  | } | 
| 96 | 0 | 0 |  |  |  |  | if($smooth_width % 2 != 1){ | 
| 97 | 0 |  |  |  |  |  | warn "Smooth width must be odd number. Convert it to nearest odd number.\n"; | 
| 98 | 0 |  |  |  |  |  | $smooth_width++; | 
| 99 |  |  |  |  |  |  | } | 
| 100 | 0 |  |  |  |  |  | my $half = ($smooth_width-1)/2; | 
| 101 | 0 | 0 |  |  |  |  | if($self->{bin_num} <= $half){ | 
| 102 | 0 |  |  |  |  |  | warn "Bin vector too small to calculate smoothed signals.\n"; | 
| 103 | 0 |  |  |  |  |  | return; | 
| 104 |  |  |  |  |  |  | } | 
| 105 | 0 |  |  |  |  |  | my @mov_arr;	# Array containing bin counts of moving window. | 
| 106 |  |  |  |  |  |  | # Build the initial moving array. | 
| 107 | 0 |  |  |  |  |  | for my $i(0..$half) {push @mov_arr, $self->{bin_count}[$i];} | 
|  | 0 |  |  |  |  |  |  | 
| 108 | 0 | 0 |  |  |  |  | if($fun_str eq 'mean') {$self->{bin_count}[0] = mean(@mov_arr);} | 
|  | 0 | 0 |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | elsif($fun_str eq 'median') {$self->{bin_count}[0] = median(@mov_arr);} | 
| 110 |  |  |  |  |  |  | # Iteratively updating moving array and calculate smoothed signals. | 
| 111 | 0 |  |  |  |  |  | for(my $i = 1; $i < $self->{bin_num}; $i++){ | 
| 112 | 0 | 0 |  |  |  |  | if($i - $half > 0) {shift @mov_arr;}	# Remove the left element. | 
|  | 0 |  |  |  |  |  |  | 
| 113 | 0 | 0 |  |  |  |  | if($i + $half < $self->{bin_num}) {push @mov_arr, $self->{bin_count}[$i+$half];}	# Add new element to the right. | 
|  | 0 |  |  |  |  |  |  | 
| 114 | 0 | 0 |  |  |  |  | if($fun_str eq 'mean') {$self->{bin_count}[$i] = mean(@mov_arr);} | 
|  | 0 | 0 |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | elsif($fun_str eq 'median') {$self->{bin_count}[$i] = median(@mov_arr);} | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  | } | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | # Create sliding window count vector. | 
| 120 |  |  |  |  |  |  | sub create_window_count{ | 
| 121 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 122 | 0 | 0 |  |  |  |  | if(@_ < 1) {warn "Number of bins per window not specified. Exit.\n"; return;} | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 123 | 0 |  |  |  |  |  | my $bins_per_window = shift; | 
| 124 | 0 | 0 |  |  |  |  | if(@{$self->{bin_count}} < $bins_per_window){ | 
|  | 0 |  |  |  |  |  |  | 
| 125 | 0 |  |  |  |  |  | warn "Bin count vector not long enough to calculate window count.\n"; | 
| 126 | 0 |  |  |  |  |  | return; | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  | # Initialize window count vector. | 
| 129 | 0 |  |  |  |  |  | $self->{window_num} = $self->{bin_num} - $bins_per_window + 1; | 
| 130 | 0 |  |  |  |  |  | $#{$self->{window_count}} = $self->{window_num} - 1; | 
|  | 0 |  |  |  |  |  |  | 
| 131 |  |  |  |  |  |  | # $mov_window_count is the moving window count. | 
| 132 | 0 |  |  |  |  |  | my $mov_window_count = 0; | 
| 133 | 0 |  |  |  |  |  | for my $i(0..$bins_per_window-1) {$mov_window_count += $self->{bin_count}[$i];} | 
|  | 0 |  |  |  |  |  |  | 
| 134 | 0 |  |  |  |  |  | $self->{window_count}[0] = $mov_window_count; | 
| 135 |  |  |  |  |  |  | # $i is the index to current window. | 
| 136 | 0 |  |  |  |  |  | for(my $i=1; $i < $self->{window_num}; $i++){ | 
| 137 | 0 |  |  |  |  |  | $mov_window_count = $mov_window_count - $self->{bin_count}[$i-1] + $self->{bin_count}[$i-1+$bins_per_window]; | 
| 138 | 0 |  |  |  |  |  | $self->{window_count}[$i] = $mov_window_count; | 
| 139 |  |  |  |  |  |  | } | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  |  | 
| 142 |  |  |  |  |  |  | # Create sliding window count vector with direction. | 
| 143 |  |  |  |  |  |  | sub create_window_count_direction{ | 
| 144 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 145 | 0 | 0 |  |  |  |  | if(@_ < 1) {warn "Number of bins per window not specified. Exit.\n"; return;} | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 146 | 0 |  |  |  |  |  | my $bins_per_window = shift; | 
| 147 | 0 | 0 | 0 |  |  |  | if(@{$self->{bin_count_F}} < $bins_per_window or @{$self->{bin_count_R}} < $bins_per_window){ | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 148 | 0 |  |  |  |  |  | warn "Bin count vector not long enough to calculate window count.\n"; | 
| 149 | 0 |  |  |  |  |  | return; | 
| 150 |  |  |  |  |  |  | } | 
| 151 |  |  |  |  |  |  | # Initialize window count vector. | 
| 152 | 0 |  |  |  |  |  | $self->{window_num} = $self->{bin_num} - $bins_per_window + 1; | 
| 153 | 0 |  |  |  |  |  | $#{$self->{window_count_F}} = $self->{window_num} - 1; | 
|  | 0 |  |  |  |  |  |  | 
| 154 | 0 |  |  |  |  |  | $#{$self->{window_count_R}} = $self->{window_num} - 1; | 
|  | 0 |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | # $mov_window_count is the moving window count. | 
| 156 | 0 |  |  |  |  |  | my $mov_window_count_F = 0; | 
| 157 | 0 |  |  |  |  |  | my $mov_window_count_R = 0; | 
| 158 | 0 |  |  |  |  |  | for my $i(0..$bins_per_window-1) {$mov_window_count_F += $self->{bin_count_F}[$i];} | 
|  | 0 |  |  |  |  |  |  | 
| 159 | 0 |  |  |  |  |  | for my $i(0..$bins_per_window-1) {$mov_window_count_R += $self->{bin_count_R}[$i];} | 
|  | 0 |  |  |  |  |  |  | 
| 160 | 0 |  |  |  |  |  | $self->{window_count_F}[0] = $mov_window_count_F; | 
| 161 | 0 |  |  |  |  |  | $self->{window_count_R}[0] = $mov_window_count_R; | 
| 162 |  |  |  |  |  |  | # $i is the index to current window. | 
| 163 | 0 |  |  |  |  |  | for(my $i=1; $i < $self->{window_num}; $i++){ | 
| 164 | 0 |  |  |  |  |  | $mov_window_count_F = $mov_window_count_F - $self->{bin_count_F}[$i-1] + $self->{bin_count_F}[$i-1+$bins_per_window]; | 
| 165 | 0 |  |  |  |  |  | $mov_window_count_R = $mov_window_count_R - $self->{bin_count_R}[$i-1] + $self->{bin_count_R}[$i-1+$bins_per_window]; | 
| 166 | 0 |  |  |  |  |  | $self->{window_count_F}[$i] = $mov_window_count_F; | 
| 167 | 0 |  |  |  |  |  | $self->{window_count_R}[$i] = $mov_window_count_R; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  | } | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | # Delete chromosome file but keep memory data. | 
| 172 |  |  |  |  |  |  | sub del_file{ | 
| 173 | 0 |  |  | 0 | 0 |  | my $self = shift; | 
| 174 | 0 | 0 |  |  |  |  | if(defined $self->{file_name}){ | 
| 175 | 0 |  |  |  |  |  | my $fn = $self->{file_name}; | 
| 176 | 0 | 0 |  |  |  |  | unlink $fn or warn "Cannot delete file $fn: $!\n"; | 
| 177 |  |  |  |  |  |  | } | 
| 178 |  |  |  |  |  |  | } | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | 1; | 
| 181 |  |  |  |  |  |  |  | 
| 182 |  |  |  |  |  |  | __END__ |