| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package AlignDB::GC; | 
| 2 | 4 |  |  | 4 |  | 81796 | use Moose; | 
|  | 4 |  |  |  |  | 1200733 |  | 
|  | 4 |  |  |  |  | 26 |  | 
| 3 | 4 |  |  | 4 |  | 18941 | use List::Util; | 
|  | 4 |  |  |  |  | 8 |  | 
|  | 4 |  |  |  |  | 219 |  | 
| 4 | 4 |  |  | 4 |  | 2355 | use YAML::Syck; | 
|  | 4 |  |  |  |  | 5645 |  | 
|  | 4 |  |  |  |  | 205 |  | 
| 5 | 4 |  |  | 4 |  | 543 | use AlignDB::IntSpan; | 
|  | 4 |  |  |  |  | 4721 |  | 
|  | 4 |  |  |  |  | 7666 |  | 
| 6 |  |  |  |  |  |  |  | 
| 7 |  |  |  |  |  |  | our $VERSION = '1.1.0'; | 
| 8 |  |  |  |  |  |  |  | 
| 9 |  |  |  |  |  |  | has 'wave_window_size' => ( is => 'rw', isa => 'Int',  default => 100, ); | 
| 10 |  |  |  |  |  |  | has 'wave_window_step' => ( is => 'rw', isa => 'Int',  default => 50, ); | 
| 11 |  |  |  |  |  |  | has 'vicinal_size'     => ( is => 'rw', isa => 'Int',  default => 500, ); | 
| 12 |  |  |  |  |  |  | has 'fall_range'       => ( is => 'rw', isa => 'Num',  default => 0.1, ); | 
| 13 |  |  |  |  |  |  | has 'gsw_size'         => ( is => 'rw', isa => 'Int',  default => 100, ); | 
| 14 |  |  |  |  |  |  | has 'stat_window_size' => ( is => 'rw', isa => 'Int',  default => 100, ); | 
| 15 |  |  |  |  |  |  | has 'stat_window_step' => ( is => 'rw', isa => 'Int',  default => 100, ); | 
| 16 |  |  |  |  |  |  | has 'skip_mdcw'        => ( is => 'rw', isa => 'Bool', default => 0, ); | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | sub segment { | 
| 19 | 3 |  |  | 3 | 0 | 4346 | my $self           = shift; | 
| 20 | 3 |  |  |  |  | 4 | my $comparable_set = shift; | 
| 21 | 3 |  |  |  |  | 2 | my $segment_size   = shift; | 
| 22 | 3 |  |  |  |  | 4 | my $segment_step   = shift; | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | # if not given $segment_size, do a whole alignment analysis | 
| 25 | 3 |  |  |  |  | 4 | my @segment_windows; | 
| 26 | 3 | 100 |  |  |  | 5 | if ($segment_size) { | 
| 27 | 1 |  |  |  |  | 5 | @segment_windows = $self->sliding_window( $comparable_set, $segment_size, $segment_step ); | 
| 28 |  |  |  |  |  |  | } | 
| 29 |  |  |  |  |  |  | else { | 
| 30 | 2 |  |  |  |  | 2 | push @segment_windows, $comparable_set; | 
| 31 |  |  |  |  |  |  | } | 
| 32 |  |  |  |  |  |  |  | 
| 33 | 3 |  |  |  |  | 8 | return @segment_windows; | 
| 34 |  |  |  |  |  |  | } | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | sub sliding_window { | 
| 37 | 11 |  |  | 11 | 0 | 4179 | my $self = shift; | 
| 38 | 11 |  |  |  |  | 12 | my AlignDB::IntSpan $comparable_set = shift; | 
| 39 |  |  |  |  |  |  |  | 
| 40 | 11 |  | 66 |  |  | 99 | my $window_size = shift || $self->wave_window_size; | 
| 41 | 11 |  | 66 |  |  | 84 | my $window_step = shift || $self->wave_window_step; | 
| 42 |  |  |  |  |  |  |  | 
| 43 | 11 |  |  |  |  | 25 | my $comparable_number = $comparable_set->size; | 
| 44 |  |  |  |  |  |  |  | 
| 45 | 11 |  |  |  |  | 112 | my @sliding_windows; | 
| 46 | 11 |  |  |  |  | 9 | my $sliding_start = 1; | 
| 47 | 11 |  |  |  |  | 11 | while (1) { | 
| 48 | 64 |  |  |  |  | 52 | my $sliding_end = $sliding_start + $window_size - 1; | 
| 49 | 64 | 100 |  |  |  | 93 | last if $sliding_end > $comparable_number; | 
| 50 | 53 |  |  |  |  | 117 | my $sliding_window = $comparable_set->slice( $sliding_start, $sliding_end ); | 
| 51 | 53 |  |  |  |  | 2442 | $sliding_start += $window_step; | 
| 52 |  |  |  |  |  |  |  | 
| 53 | 53 |  |  |  |  | 44 | push @sliding_windows, $sliding_window; | 
| 54 |  |  |  |  |  |  | } | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 11 |  |  |  |  | 27 | return @sliding_windows; | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | sub _calc_gc_ratio { | 
| 60 | 48 |  |  | 48 |  | 41 | my @seqs = @_; | 
| 61 |  |  |  |  |  |  |  | 
| 62 | 48 |  |  |  |  | 36 | my @ratios; | 
| 63 | 48 |  |  |  |  | 39 | for my $seq (@seqs) { | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | # Count all four bases | 
| 66 | 48 |  |  |  |  | 48 | my $a_count = $seq =~ tr/Aa/Aa/; | 
| 67 | 48 |  |  |  |  | 35 | my $g_count = $seq =~ tr/Gg/Gg/; | 
| 68 | 48 |  |  |  |  | 41 | my $c_count = $seq =~ tr/Cc/Cc/; | 
| 69 | 48 |  |  |  |  | 26 | my $t_count = $seq =~ tr/Tt/Tt/; | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 48 |  |  |  |  | 45 | my $four_count = $a_count + $g_count + $c_count + $t_count; | 
| 72 | 48 |  |  |  |  | 35 | my $gc_count   = $g_count + $c_count; | 
| 73 |  |  |  |  |  |  |  | 
| 74 | 48 | 50 |  |  |  | 55 | if ( $four_count == 0 ) { | 
| 75 | 0 |  |  |  |  | 0 | next; | 
| 76 |  |  |  |  |  |  | } | 
| 77 |  |  |  |  |  |  | else { | 
| 78 | 48 |  |  |  |  | 41 | my $gc_ratio = $gc_count / $four_count; | 
| 79 | 48 |  |  |  |  | 52 | push @ratios, $gc_ratio; | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  | } | 
| 82 |  |  |  |  |  |  |  | 
| 83 | 48 |  |  |  |  | 53 | return _mean(@ratios); | 
| 84 |  |  |  |  |  |  | } | 
| 85 |  |  |  |  |  |  |  | 
| 86 |  |  |  |  |  |  | sub segment_gc_stat { | 
| 87 | 6 |  |  | 6 | 0 | 4270 | my $self                         = shift; | 
| 88 | 6 |  |  |  |  | 6 | my $seqs_ref                     = shift; | 
| 89 | 6 |  |  |  |  | 6 | my AlignDB::IntSpan $segment_set = shift; | 
| 90 |  |  |  |  |  |  |  | 
| 91 | 6 |  | 66 |  |  | 123 | my $window_size = shift || $self->stat_window_size; | 
| 92 | 6 |  | 66 |  |  | 92 | my $window_step = shift || $self->stat_window_step; | 
| 93 |  |  |  |  |  |  |  | 
| 94 | 6 |  |  |  |  | 14 | my $segment_number = $segment_set->size; | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | # There should be at least 2 sample, i.e., two sliding windows in the segment | 
| 97 | 6 | 50 |  |  |  | 67 | if ( $segment_number < $window_size + $window_step ) { | 
| 98 | 0 |  |  |  |  | 0 | return ( undef, undef, undef, undef ); | 
| 99 |  |  |  |  |  |  | } | 
| 100 |  |  |  |  |  |  |  | 
| 101 | 6 |  |  |  |  | 6 | my @seqs = @{$seqs_ref}; | 
|  | 6 |  |  |  |  | 7 |  | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 6 |  |  |  |  | 14 | my @sliding_windows = $self->sliding_window( $segment_set, $window_size, $window_step ); | 
| 104 | 6 |  |  |  |  | 4 | my @sliding_gcs; | 
| 105 |  |  |  |  |  |  |  | 
| 106 | 6 |  |  |  |  | 8 | foreach my $sliding_set (@sliding_windows) { | 
| 107 | 22 |  |  |  |  | 16 | my @sliding_seqs = map { $sliding_set->substr_span($_) } @seqs; | 
|  | 22 |  |  |  |  | 40 |  | 
| 108 | 22 |  |  |  |  | 373 | my $sliding_gc = _calc_gc_ratio(@sliding_seqs); | 
| 109 | 22 |  |  |  |  | 26 | push @sliding_gcs, $sliding_gc; | 
| 110 |  |  |  |  |  |  | } | 
| 111 |  |  |  |  |  |  |  | 
| 112 | 6 |  |  |  |  | 8 | my $gc_mean = _mean(@sliding_gcs); | 
| 113 | 6 |  |  |  |  | 11 | my $gc_std  = _stddev(@sliding_gcs); | 
| 114 | 6 | 100 | 33 |  |  | 37 | my $gc_cv | 
|  |  | 50 |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | = $gc_mean == 0 || $gc_mean == 1 ? undef | 
| 116 |  |  |  |  |  |  | : $gc_mean <= 0.5 ? $gc_std / $gc_mean | 
| 117 |  |  |  |  |  |  | :                   $gc_std / ( 1 - $gc_mean ); | 
| 118 | 6 | 50 |  |  |  | 162 | my $gc_mdcw = $self->skip_mdcw ? undef : _mdcw(@sliding_gcs); | 
| 119 |  |  |  |  |  |  |  | 
| 120 | 6 |  |  |  |  | 25 | return ( $gc_mean, $gc_std, $gc_cv, $gc_mdcw ); | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  |  | 
| 123 |  |  |  |  |  |  | sub _center_resize { | 
| 124 | 2 |  |  | 2 |  | 2 | my AlignDB::IntSpan $old_set    = shift; | 
| 125 | 2 |  |  |  |  | 3 | my AlignDB::IntSpan $parent_set = shift; | 
| 126 | 2 |  |  |  |  | 1 | my $resize                      = shift; | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | # find the middles of old_set | 
| 129 | 2 |  |  |  |  | 6 | my $half_size           = int( $old_set->size / 2 ); | 
| 130 | 2 |  |  |  |  | 30 | my $midleft             = $old_set->at($half_size); | 
| 131 | 2 |  |  |  |  | 49 | my $midright            = $old_set->at( $half_size + 1 ); | 
| 132 | 2 |  |  |  |  | 37 | my $midleft_parent_idx  = $parent_set->index($midleft); | 
| 133 | 2 |  |  |  |  | 26 | my $midright_parent_idx = $parent_set->index($midright); | 
| 134 |  |  |  |  |  |  |  | 
| 135 | 2 | 50 | 33 |  |  | 29 | return unless $midleft_parent_idx and $midright_parent_idx; | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | # map to parent | 
| 138 | 2 |  |  |  |  | 4 | my $parent_size  = $parent_set->size; | 
| 139 | 2 |  |  |  |  | 18 | my $half_resize  = int( $resize / 2 ); | 
| 140 | 2 |  |  |  |  | 2 | my $new_left_idx = $midleft_parent_idx - $half_resize + 1; | 
| 141 | 2 | 50 |  |  |  | 6 | $new_left_idx = 1 if $new_left_idx < 1; | 
| 142 | 2 |  |  |  |  | 2 | my $new_right_idx = $midright_parent_idx + $half_resize - 1; | 
| 143 | 2 | 50 |  |  |  | 4 | $new_right_idx = $parent_size if $new_right_idx > $parent_size; | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 2 |  |  |  |  | 5 | my $new_set = $parent_set->slice( $new_left_idx, $new_right_idx ); | 
| 146 |  |  |  |  |  |  |  | 
| 147 | 2 |  |  |  |  | 107 | return $new_set; | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | sub segment_gc_stat_one { | 
| 151 | 2 |  |  | 2 | 0 | 3749 | my $self           = shift; | 
| 152 | 2 |  |  |  |  | 2 | my $seqs_ref       = shift; | 
| 153 | 2 |  |  |  |  | 3 | my $comparable_set = shift; | 
| 154 | 2 |  |  |  |  | 1 | my $segment_set    = shift; | 
| 155 |  |  |  |  |  |  |  | 
| 156 | 2 |  |  |  |  | 3 | my $stat_segment_size = 500; | 
| 157 |  |  |  |  |  |  |  | 
| 158 | 2 |  |  |  |  | 4 | my $resize_set = _center_resize( $segment_set, $comparable_set, $stat_segment_size ); | 
| 159 | 2 | 50 |  |  |  | 7 | next unless $resize_set; | 
| 160 |  |  |  |  |  |  |  | 
| 161 | 2 |  |  |  |  | 30 | my @segment_seqs = map { $segment_set->substr_span($_) } @{$seqs_ref}; | 
|  | 2 |  |  |  |  | 6 |  | 
|  | 2 |  |  |  |  | 3 |  | 
| 162 |  |  |  |  |  |  |  | 
| 163 | 2 |  |  |  |  | 41 | my $gc_mean = _calc_gc_ratio(@segment_seqs); | 
| 164 | 2 |  |  |  |  | 3 | my ( $gc_std, $gc_mdcw ) = ( undef, undef ); | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 2 |  |  |  |  | 7 | my ( undef, undef, $gc_cv, undef ) = $self->segment_gc_stat( $seqs_ref, $resize_set, 100, 100 ); | 
| 167 |  |  |  |  |  |  |  | 
| 168 | 2 |  |  |  |  | 7 | return ( $gc_mean, $gc_std, $gc_cv, $gc_mdcw ); | 
| 169 |  |  |  |  |  |  | } | 
| 170 |  |  |  |  |  |  |  | 
| 171 |  |  |  |  |  |  | sub _mean { | 
| 172 | 66 |  |  | 66 |  | 57 | @_ = grep { defined $_ } @_; | 
|  | 108 |  |  |  |  | 138 |  | 
| 173 | 66 | 50 |  |  |  | 97 | return unless @_; | 
| 174 | 66 | 100 |  |  |  | 128 | return $_[0] unless @_ > 1; | 
| 175 | 16 |  |  |  |  | 59 | return List::Util::sum(@_) / scalar(@_); | 
| 176 |  |  |  |  |  |  | } | 
| 177 |  |  |  |  |  |  |  | 
| 178 |  |  |  |  |  |  | sub _variance { | 
| 179 | 6 |  |  | 6 |  | 6 | @_ = grep { defined $_ } @_; | 
|  | 22 |  |  |  |  | 29 |  | 
| 180 | 6 | 50 |  |  |  | 11 | return   unless @_; | 
| 181 | 6 | 50 |  |  |  | 8 | return 0 unless @_ > 1; | 
| 182 | 6 |  |  |  |  | 8 | my $mean = _mean(@_); | 
| 183 | 6 |  |  |  |  | 8 | return List::Util::sum( map { ( $_ - $mean )**2 } @_ ) / $#_; | 
|  | 22 |  |  |  |  | 48 |  | 
| 184 |  |  |  |  |  |  | } | 
| 185 |  |  |  |  |  |  |  | 
| 186 |  |  |  |  |  |  | sub _stddev { | 
| 187 | 6 |  |  | 6 |  | 6 | @_ = grep { defined $_ } @_; | 
|  | 22 |  |  |  |  | 25 |  | 
| 188 | 6 | 50 |  |  |  | 10 | return   unless @_; | 
| 189 | 6 | 50 |  |  |  | 9 | return 0 unless @_ > 1; | 
| 190 | 6 |  |  |  |  | 14 | return sqrt( _variance(@_) ); | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  |  | 
| 193 |  |  |  |  |  |  | sub _mdcw { | 
| 194 | 6 |  |  | 6 |  | 8 | my @array = @_; | 
| 195 |  |  |  |  |  |  |  | 
| 196 | 6 | 50 |  |  |  | 11 | if ( @array <= 1 ) { | 
| 197 | 0 |  |  |  |  | 0 | warn "There should be more than 1 elements in the array.\n"; | 
| 198 | 0 |  |  |  |  | 0 | return; | 
| 199 |  |  |  |  |  |  | } | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 6 |  |  |  |  | 5 | my @dcws; | 
| 202 | 6 |  |  |  |  | 13 | for ( 1 .. $#array ) { | 
| 203 | 16 |  |  |  |  | 16 | my $dcw = $array[$_] - $array[ $_ - 1 ]; | 
| 204 | 16 |  |  |  |  | 20 | push @dcws, abs($dcw); | 
| 205 |  |  |  |  |  |  | } | 
| 206 |  |  |  |  |  |  |  | 
| 207 | 6 |  |  |  |  | 7 | return _mean(@dcws); | 
| 208 |  |  |  |  |  |  | } | 
| 209 |  |  |  |  |  |  |  | 
| 210 |  |  |  |  |  |  | # find local maxima, minima | 
| 211 |  |  |  |  |  |  | sub find_extreme_step1 { | 
| 212 | 1 |  |  | 1 | 0 | 2 | my $self    = shift; | 
| 213 | 1 |  |  |  |  | 1 | my $windows = shift; | 
| 214 |  |  |  |  |  |  |  | 
| 215 | 1 |  |  |  |  | 1 | my $count = scalar @$windows; | 
| 216 |  |  |  |  |  |  |  | 
| 217 | 1 |  |  |  |  | 28 | my $wave_window_size = $self->wave_window_size; | 
| 218 | 1 |  |  |  |  | 21 | my $wave_window_step = $self->wave_window_step; | 
| 219 | 1 |  |  |  |  | 21 | my $vicinal_size     = $self->vicinal_size; | 
| 220 | 1 |  |  |  |  | 3 | my $vicinal_number   = int( $vicinal_size / $wave_window_step ); | 
| 221 |  |  |  |  |  |  |  | 
| 222 | 1 |  |  |  |  | 3 | for my $i ( 0 .. $count - 1 ) { | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | #----------------------------# | 
| 225 |  |  |  |  |  |  | # right vicinal window | 
| 226 |  |  |  |  |  |  | #----------------------------# | 
| 227 | 24 |  |  |  |  | 16 | my @right_vicinal_windows; | 
| 228 | 24 |  |  |  |  | 17 | my ( $right_low, $right_high, $right_flag ); | 
| 229 | 24 |  |  |  |  | 28 | foreach ( $i + 1 .. $i + $vicinal_number ) { | 
| 230 | 1200 | 100 |  |  |  | 1266 | next unless $_ < $count; | 
| 231 | 276 |  |  |  |  | 210 | push @right_vicinal_windows, $windows->[$_]->{sw_gc}; | 
| 232 |  |  |  |  |  |  | } | 
| 233 |  |  |  |  |  |  |  | 
| 234 | 24 | 100 |  |  |  | 28 | if ( scalar @right_vicinal_windows == 0 ) { | 
| 235 | 1 |  |  |  |  | 2 | $right_low  = 0; | 
| 236 | 1 |  |  |  |  | 5 | $right_high = 0; | 
| 237 |  |  |  |  |  |  | } | 
| 238 |  |  |  |  |  |  | else { | 
| 239 | 23 | 100 |  |  |  | 51 | if ( $windows->[$i]->{sw_gc} >= List::Util::max(@right_vicinal_windows) ) { | 
| 240 | 5 |  |  |  |  | 4 | $right_low = 1; | 
| 241 |  |  |  |  |  |  | } | 
| 242 | 23 | 100 |  |  |  | 42 | if ( $windows->[$i]->{sw_gc} <= List::Util::min(@right_vicinal_windows) ) { | 
| 243 | 3 |  |  |  |  | 2 | $right_high = 1; | 
| 244 |  |  |  |  |  |  | } | 
| 245 |  |  |  |  |  |  | } | 
| 246 |  |  |  |  |  |  |  | 
| 247 | 24 | 50 | 66 |  |  | 57 | if ( $right_low and $right_high ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 248 | 0 |  |  |  |  | 0 | print " " x 4, "Can't determine right vicinity.\n"; | 
| 249 | 0 |  |  |  |  | 0 | $right_flag = 'N';    # non | 
| 250 |  |  |  |  |  |  | } | 
| 251 |  |  |  |  |  |  | elsif ($right_low) { | 
| 252 | 5 |  |  |  |  | 6 | $right_flag = 'L';    # low | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  | elsif ($right_high) { | 
| 255 | 3 |  |  |  |  | 4 | $right_flag = 'H';    # high | 
| 256 |  |  |  |  |  |  | } | 
| 257 |  |  |  |  |  |  | else { | 
| 258 | 16 |  |  |  |  | 12 | $right_flag = 'N'; | 
| 259 |  |  |  |  |  |  | } | 
| 260 |  |  |  |  |  |  |  | 
| 261 | 24 |  |  |  |  | 17 | $windows->[$i]->{right_flag} = $right_flag; | 
| 262 |  |  |  |  |  |  |  | 
| 263 |  |  |  |  |  |  | #----------------------------# | 
| 264 |  |  |  |  |  |  | # left vicinal window | 
| 265 |  |  |  |  |  |  | #----------------------------# | 
| 266 | 24 |  |  |  |  | 15 | my @left_vicinal_windows; | 
| 267 | 24 |  |  |  |  | 15 | my ( $left_low, $left_high, $left_flag ); | 
| 268 | 24 |  |  |  |  | 30 | foreach ( $i - $vicinal_number .. $i - 1 ) { | 
| 269 | 1200 | 100 |  |  |  | 1262 | next if $_ < 0; | 
| 270 | 276 |  |  |  |  | 215 | push @left_vicinal_windows, $windows->[$_]->{sw_gc}; | 
| 271 |  |  |  |  |  |  | } | 
| 272 |  |  |  |  |  |  |  | 
| 273 | 24 | 100 |  |  |  | 27 | if ( scalar @left_vicinal_windows == 0 ) { | 
| 274 | 1 |  |  |  |  | 4 | $left_low  = 0; | 
| 275 | 1 |  |  |  |  | 1 | $left_high = 0; | 
| 276 |  |  |  |  |  |  | } | 
| 277 |  |  |  |  |  |  | else { | 
| 278 | 23 | 100 |  |  |  | 44 | if ( $windows->[$i]->{sw_gc} >= List::Util::max(@left_vicinal_windows) ) { | 
| 279 | 7 |  |  |  |  | 7 | $left_low = 1; | 
| 280 |  |  |  |  |  |  | } | 
| 281 | 23 | 100 |  |  |  | 40 | if ( $windows->[$i]->{sw_gc} <= List::Util::min(@left_vicinal_windows) ) { | 
| 282 | 6 |  |  |  |  | 5 | $left_high = 1; | 
| 283 |  |  |  |  |  |  | } | 
| 284 |  |  |  |  |  |  | } | 
| 285 |  |  |  |  |  |  |  | 
| 286 | 24 | 100 | 100 |  |  | 60 | if ( $left_low and $left_high ) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 287 | 2 |  |  |  |  | 227 | print " " x 4, "Can't determine left vicinity.\n"; | 
| 288 | 2 |  |  |  |  | 5 | $left_flag = 'N'; | 
| 289 |  |  |  |  |  |  | } | 
| 290 |  |  |  |  |  |  | elsif ($left_low) { | 
| 291 | 5 |  |  |  |  | 5 | $left_flag = 'L'; | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  | elsif ($left_high) { | 
| 294 | 4 |  |  |  |  | 3 | $left_flag = 'H'; | 
| 295 |  |  |  |  |  |  | } | 
| 296 |  |  |  |  |  |  | else { | 
| 297 | 13 |  |  |  |  | 8 | $left_flag = 'N'; | 
| 298 |  |  |  |  |  |  | } | 
| 299 |  |  |  |  |  |  |  | 
| 300 | 24 |  |  |  |  | 21 | $windows->[$i]->{left_flag} = $left_flag; | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | # high or low window | 
| 303 | 24 |  |  |  |  | 16 | my ($high_low_flag); | 
| 304 |  |  |  |  |  |  |  | 
| 305 | 24 | 100 | 100 |  |  | 67 | if ( $left_flag eq 'H' and $right_flag eq 'H' ) { | 
|  |  | 100 | 100 |  |  |  |  | 
| 306 | 3 |  |  |  |  | 2 | $high_low_flag = 'T';    # trough | 
| 307 |  |  |  |  |  |  | } | 
| 308 |  |  |  |  |  |  | elsif ( $left_flag eq 'L' and $right_flag eq 'L' ) { | 
| 309 | 4 |  |  |  |  | 4 | $high_low_flag = 'C';    # crest | 
| 310 |  |  |  |  |  |  | } | 
| 311 |  |  |  |  |  |  | else { | 
| 312 | 17 |  |  |  |  | 9 | $high_low_flag = 'N'; | 
| 313 |  |  |  |  |  |  | } | 
| 314 | 24 |  |  |  |  | 43 | $windows->[$i]->{high_low_flag} = $high_low_flag; | 
| 315 |  |  |  |  |  |  | } | 
| 316 |  |  |  |  |  |  |  | 
| 317 | 1 |  |  |  |  | 1 | return; | 
| 318 |  |  |  |  |  |  | } | 
| 319 |  |  |  |  |  |  |  | 
| 320 |  |  |  |  |  |  | sub find_extreme_step2 { | 
| 321 | 1 |  |  | 1 | 0 | 1 | my $self    = shift; | 
| 322 | 1 |  |  |  |  | 1 | my $windows = shift; | 
| 323 |  |  |  |  |  |  |  | 
| 324 | 1 |  |  |  |  | 2 | my $count = scalar @$windows; | 
| 325 |  |  |  |  |  |  |  | 
| 326 | 1 |  |  |  |  | 29 | my $fall_range       = $self->fall_range; | 
| 327 | 1 |  |  |  |  | 20 | my $vicinal_size     = $self->vicinal_size; | 
| 328 | 1 |  |  |  |  | 21 | my $wave_window_step = $self->wave_window_step; | 
| 329 | 1 |  |  |  |  | 2 | my $vicinal_number   = int( $vicinal_size / $wave_window_step ); | 
| 330 |  |  |  |  |  |  |  | 
| 331 | 1 |  |  |  |  | 1 | REDO: while (1) { | 
| 332 | 7 |  |  |  |  | 4 | my @extreme; | 
| 333 | 7 |  |  |  |  | 13 | for my $i ( 0 .. $count - 1 ) { | 
| 334 | 168 |  |  |  |  | 105 | my $flag = $windows->[$i]->{high_low_flag}; | 
| 335 | 168 | 100 | 100 |  |  | 382 | if ( $flag eq 'T' or $flag eq 'C' ) { | 
| 336 | 28 |  |  |  |  | 22 | push @extreme, $i; | 
| 337 |  |  |  |  |  |  | } | 
| 338 |  |  |  |  |  |  | } | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | # Ensure there are no crest--crest or trough--trough | 
| 341 | 7 |  |  |  |  | 11 | for my $i ( 0 .. scalar @extreme - 1 ) { | 
| 342 | 20 |  |  |  |  | 15 | my $wave = $windows->[ $extreme[$i] ]->{high_low_flag}; | 
| 343 | 20 |  |  |  |  | 18 | my $gc   = $windows->[ $extreme[$i] ]->{sw_gc}; | 
| 344 | 20 |  |  |  |  | 14 | my ( $left_wave, $right_wave ) = ( ' ', ' ' ); | 
| 345 | 20 |  |  |  |  | 10 | my ( $left_gc,   $right_gc )   = ( 0,   0 ); | 
| 346 |  |  |  |  |  |  |  | 
| 347 | 20 | 100 |  |  |  | 26 | if ( $i - 1 >= 0 ) { | 
| 348 | 13 |  |  |  |  | 10 | $left_gc   = $windows->[ $extreme[ $i - 1 ] ]->{sw_gc}; | 
| 349 | 13 |  |  |  |  | 9 | $left_wave = $windows->[ $extreme[ $i - 1 ] ]->{high_low_flag}; | 
| 350 |  |  |  |  |  |  | } | 
| 351 | 20 | 100 |  |  |  | 26 | if ( $i + 1 < scalar @extreme ) { | 
| 352 | 17 |  |  |  |  | 15 | $right_gc   = $windows->[ $extreme[ $i + 1 ] ]->{sw_gc}; | 
| 353 | 17 |  |  |  |  | 14 | $right_wave = $windows->[ $extreme[ $i + 1 ] ]->{high_low_flag}; | 
| 354 |  |  |  |  |  |  | } | 
| 355 |  |  |  |  |  |  |  | 
| 356 | 20 | 100 |  |  |  | 26 | if ( $wave eq 'T' ) { | 
|  |  | 50 |  |  |  |  |  | 
| 357 | 7 | 100 |  |  |  | 11 | if ( $wave eq $left_wave ) { | 
| 358 | 1 | 50 |  |  |  | 3 | if ( $gc <= $left_gc ) { | 
| 359 | 1 |  |  |  |  | 2 | $windows->[ $extreme[ $i - 1 ] ]->{high_low_flag} = 'N'; | 
| 360 | 1 |  |  |  |  | 3 | next REDO; | 
| 361 |  |  |  |  |  |  | } | 
| 362 |  |  |  |  |  |  | } | 
| 363 | 6 | 100 |  |  |  | 9 | if ( $wave eq $right_wave ) { | 
| 364 | 1 | 50 |  |  |  | 3 | if ( $gc < $right_gc ) { | 
| 365 | 0 |  |  |  |  | 0 | $windows->[ $extreme[ $i + 1 ] ]->{high_low_flag} = 'N'; | 
| 366 | 0 |  |  |  |  | 0 | next REDO; | 
| 367 |  |  |  |  |  |  | } | 
| 368 |  |  |  |  |  |  | } | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  | elsif ( $wave eq 'C' ) { | 
| 371 | 13 | 100 |  |  |  | 14 | if ( $wave eq $left_wave ) { | 
| 372 | 3 | 50 |  |  |  | 4 | if ( $gc >= $left_gc ) { | 
| 373 | 3 |  |  |  |  | 4 | $windows->[ $extreme[ $i - 1 ] ]->{high_low_flag} = 'N'; | 
| 374 | 3 |  |  |  |  | 5 | next REDO; | 
| 375 |  |  |  |  |  |  | } | 
| 376 |  |  |  |  |  |  | } | 
| 377 | 10 | 100 |  |  |  | 15 | if ( $wave eq $right_wave ) { | 
| 378 | 3 | 50 |  |  |  | 5 | if ( $gc > $right_gc ) { | 
| 379 | 0 |  |  |  |  | 0 | $windows->[ $extreme[ $i + 1 ] ]->{high_low_flag} = 'N'; | 
| 380 | 0 |  |  |  |  | 0 | next REDO; | 
| 381 |  |  |  |  |  |  | } | 
| 382 |  |  |  |  |  |  | } | 
| 383 |  |  |  |  |  |  | } | 
| 384 |  |  |  |  |  |  | else { | 
| 385 | 0 |  |  |  |  | 0 | warn "high_low_flag signal errors\n"; | 
| 386 |  |  |  |  |  |  | } | 
| 387 |  |  |  |  |  |  | } | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | # delete nesting crest--trough | 
| 390 | 3 |  |  |  |  | 4 | for my $i ( 0 .. scalar @extreme - 1 ) { | 
| 391 | 7 |  |  |  |  | 6 | my $wave = $windows->[ $extreme[$i] ]->{high_low_flag}; | 
| 392 | 7 |  |  |  |  | 6 | my $gc   = $windows->[ $extreme[$i] ]->{sw_gc}; | 
| 393 | 7 |  |  |  |  | 26 | my ( $wave1, $wave2, $wave3 ) = ( ' ', ' ', ' ' ); | 
| 394 | 7 |  |  |  |  | 4 | my ( $gc1,   $gc2,   $gc3 )   = ( 0,   0,   0 ); | 
| 395 |  |  |  |  |  |  |  | 
| 396 | 7 | 100 |  |  |  | 9 | if ( $i + 3 < scalar @extreme ) { | 
| 397 |  |  |  |  |  |  |  | 
| 398 |  |  |  |  |  |  | # next 1 | 
| 399 | 1 |  |  |  |  | 2 | $wave1 = $windows->[ $extreme[ $i + 1 ] ]->{high_low_flag}; | 
| 400 | 1 |  |  |  |  | 2 | $gc1   = $windows->[ $extreme[ $i + 1 ] ]->{sw_gc}; | 
| 401 |  |  |  |  |  |  |  | 
| 402 |  |  |  |  |  |  | # next 2 | 
| 403 | 1 |  |  |  |  | 1 | $wave2 = $windows->[ $extreme[ $i + 2 ] ]->{high_low_flag}; | 
| 404 | 1 |  |  |  |  | 2 | $gc2   = $windows->[ $extreme[ $i + 2 ] ]->{sw_gc}; | 
| 405 |  |  |  |  |  |  |  | 
| 406 |  |  |  |  |  |  | # next 3 | 
| 407 | 1 |  |  |  |  | 2 | $wave3 = $windows->[ $extreme[ $i + 3 ] ]->{high_low_flag}; | 
| 408 | 1 |  |  |  |  | 1 | $gc3   = $windows->[ $extreme[ $i + 3 ] ]->{sw_gc}; | 
| 409 |  |  |  |  |  |  | } | 
| 410 |  |  |  |  |  |  | else { | 
| 411 | 6 |  |  |  |  | 7 | next; | 
| 412 |  |  |  |  |  |  | } | 
| 413 |  |  |  |  |  |  |  | 
| 414 | 1 | 50 |  |  |  | 4 | if ( abs( $gc1 - $gc2 ) < $fall_range ) { | 
| 415 | 0 | 0 | 0 |  |  | 0 | if (    List::Util::max( $gc1, $gc2 ) < List::Util::max( $gc, $gc3 ) | 
| 416 |  |  |  |  |  |  | and List::Util::min( $gc1, $gc2 ) > List::Util::min( $gc, $gc3 ) ) | 
| 417 |  |  |  |  |  |  | { | 
| 418 | 0 |  |  |  |  | 0 | $windows->[ $extreme[ $i + 1 ] ]->{high_low_flag} = 'N'; | 
| 419 | 0 |  |  |  |  | 0 | $windows->[ $extreme[ $i + 2 ] ]->{high_low_flag} = 'N'; | 
| 420 | 0 |  |  |  |  | 0 | next REDO; | 
| 421 |  |  |  |  |  |  | } | 
| 422 |  |  |  |  |  |  | } | 
| 423 |  |  |  |  |  |  | } | 
| 424 |  |  |  |  |  |  |  | 
| 425 |  |  |  |  |  |  | # delete small fall-range crest--trough | 
| 426 | 3 |  |  |  |  | 4 | for my $i ( 0 .. scalar @extreme - 1 ) { | 
| 427 | 7 |  |  |  |  | 7 | my $wave = $windows->[ $extreme[$i] ]->{high_low_flag}; | 
| 428 | 7 |  |  |  |  | 6 | my $gc   = $windows->[ $extreme[$i] ]->{sw_gc}; | 
| 429 | 7 |  |  |  |  | 5 | my ( $wave1, $wave2 ) = ( ' ', ' ' ); | 
| 430 | 7 |  |  |  |  | 4 | my ( $gc1,   $gc2 )   = ( 0,   0 ); | 
| 431 |  |  |  |  |  |  |  | 
| 432 | 7 | 100 |  |  |  | 9 | if ( $i + 2 < scalar @extreme ) { | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  | # next 1 | 
| 435 | 2 |  |  |  |  | 4 | $wave1 = $windows->[ $extreme[ $i + 1 ] ]->{high_low_flag}; | 
| 436 | 2 |  |  |  |  | 2 | $gc1   = $windows->[ $extreme[ $i + 1 ] ]->{sw_gc}; | 
| 437 |  |  |  |  |  |  |  | 
| 438 |  |  |  |  |  |  | # next 2 | 
| 439 | 2 |  |  |  |  | 3 | $wave2 = $windows->[ $extreme[ $i + 2 ] ]->{high_low_flag}; | 
| 440 | 2 |  |  |  |  | 1 | $gc2   = $windows->[ $extreme[ $i + 2 ] ]->{sw_gc}; | 
| 441 |  |  |  |  |  |  | } | 
| 442 |  |  |  |  |  |  | else { | 
| 443 | 5 |  |  |  |  | 5 | next; | 
| 444 |  |  |  |  |  |  | } | 
| 445 |  |  |  |  |  |  |  | 
| 446 | 2 | 50 | 33 |  |  | 6 | if (    abs( $gc1 - $gc ) < $fall_range | 
| 447 |  |  |  |  |  |  | and abs( $gc2 - $gc1 ) < $fall_range ) | 
| 448 |  |  |  |  |  |  | { | 
| 449 | 0 |  |  |  |  | 0 | $windows->[ $extreme[ $i + 1 ] ]->{high_low_flag} = 'N'; | 
| 450 | 0 |  |  |  |  | 0 | next REDO; | 
| 451 |  |  |  |  |  |  | } | 
| 452 |  |  |  |  |  |  | } | 
| 453 |  |  |  |  |  |  |  | 
| 454 |  |  |  |  |  |  | # delete vicinal crest--trough | 
| 455 | 3 |  |  |  |  | 5 | for my $i ( 0 .. scalar @extreme - 1 ) { | 
| 456 | 3 |  |  |  |  | 4 | my $wave = $windows->[ $extreme[$i] ]->{high_low_flag}; | 
| 457 | 3 |  |  |  |  | 1 | my $gc   = $windows->[ $extreme[$i] ]->{sw_gc}; | 
| 458 |  |  |  |  |  |  |  | 
| 459 | 3 | 100 |  |  |  | 6 | if ( $i + 1 < scalar @extreme ) { | 
| 460 | 2 | 50 |  |  |  | 3 | if ( $extreme[ $i + 1 ] - $extreme[$i] < $vicinal_number ) { | 
| 461 | 2 |  |  |  |  | 2 | $windows->[ $extreme[ $i + 1 ] ]->{high_low_flag} = 'N'; | 
| 462 | 2 |  |  |  |  | 4 | next REDO; | 
| 463 |  |  |  |  |  |  | } | 
| 464 |  |  |  |  |  |  | } | 
| 465 |  |  |  |  |  |  | } | 
| 466 |  |  |  |  |  |  |  | 
| 467 | 1 |  |  |  |  | 1 | my @extreme2; | 
| 468 | 1 |  |  |  |  | 2 | for my $i ( 0 .. $count - 1 ) { | 
| 469 | 24 |  |  |  |  | 19 | my $flag = $windows->[$i]->{high_low_flag}; | 
| 470 | 24 | 100 | 66 |  |  | 56 | if ( $flag eq 'T' or $flag eq 'C' ) { | 
| 471 | 1 |  |  |  |  | 1 | push @extreme2, $i; | 
| 472 |  |  |  |  |  |  | } | 
| 473 |  |  |  |  |  |  | } | 
| 474 |  |  |  |  |  |  |  | 
| 475 | 1 | 50 |  |  |  | 4 | if ( scalar @extreme == scalar @extreme2 ) { | 
| 476 | 1 |  |  |  |  | 2 | last; | 
| 477 |  |  |  |  |  |  | } | 
| 478 |  |  |  |  |  |  | } | 
| 479 |  |  |  |  |  |  |  | 
| 480 | 1 |  |  |  |  | 2 | return; | 
| 481 |  |  |  |  |  |  | } | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | sub gc_wave { | 
| 484 | 1 |  |  | 1 | 0 | 2854 | my $self           = shift; | 
| 485 | 1 |  |  |  |  | 1 | my $seqs_ref       = shift; | 
| 486 | 1 |  |  |  |  | 2 | my $comparable_set = shift; | 
| 487 |  |  |  |  |  |  |  | 
| 488 | 1 |  |  |  |  | 1 | my @seqs = @{$seqs_ref}; | 
|  | 1 |  |  |  |  | 1 |  | 
| 489 |  |  |  |  |  |  |  | 
| 490 | 1 |  |  |  |  | 4 | my @sliding_windows = $self->sliding_window($comparable_set); | 
| 491 |  |  |  |  |  |  |  | 
| 492 | 1 |  |  |  |  | 2 | my @sliding_attrs; | 
| 493 | 1 |  |  |  |  | 3 | for my $i ( 0 .. $#sliding_windows ) { | 
| 494 | 24 |  |  |  |  | 15 | my $sliding_window = $sliding_windows[$i]; | 
| 495 |  |  |  |  |  |  |  | 
| 496 | 24 |  |  |  |  | 21 | my @sw_seqs = map { $sliding_window->substr_span($_) } @seqs; | 
|  | 24 |  |  |  |  | 35 |  | 
| 497 | 24 |  |  |  |  | 350 | my $sw_gc = _calc_gc_ratio(@sw_seqs); | 
| 498 |  |  |  |  |  |  |  | 
| 499 | 24 |  |  |  |  | 31 | my $sw_length = $sliding_window->size; | 
| 500 | 24 |  |  |  |  | 269 | my $sw_span   = scalar $sliding_window->spans; | 
| 501 | 24 |  |  |  |  | 192 | my $sw_indel  = $sw_span - 1; | 
| 502 |  |  |  |  |  |  |  | 
| 503 | 24 |  |  |  |  | 56 | $sliding_attrs[$i] = { | 
| 504 |  |  |  |  |  |  | sw_set    => $sliding_window, | 
| 505 |  |  |  |  |  |  | sw_length => $sw_length, | 
| 506 |  |  |  |  |  |  | sw_gc     => $sw_gc, | 
| 507 |  |  |  |  |  |  | sw_indel  => $sw_indel, | 
| 508 |  |  |  |  |  |  | }; | 
| 509 |  |  |  |  |  |  | } | 
| 510 |  |  |  |  |  |  |  | 
| 511 | 1 |  |  |  |  | 4 | $self->find_extreme_step1( \@sliding_attrs ); | 
| 512 | 1 |  |  |  |  | 4 | $self->find_extreme_step2( \@sliding_attrs ); | 
| 513 |  |  |  |  |  |  |  | 
| 514 | 1 |  |  |  |  | 1 | my @slidings; | 
| 515 | 1 |  |  |  |  | 2 | for (@sliding_attrs) { | 
| 516 |  |  |  |  |  |  | push @slidings, | 
| 517 |  |  |  |  |  |  | { | 
| 518 |  |  |  |  |  |  | set           => $_->{sw_set}, | 
| 519 |  |  |  |  |  |  | high_low_flag => $_->{high_low_flag}, | 
| 520 |  |  |  |  |  |  | gc            => $_->{sw_gc}, | 
| 521 | 24 |  |  |  |  | 42 | }; | 
| 522 |  |  |  |  |  |  | } | 
| 523 |  |  |  |  |  |  |  | 
| 524 | 1 |  |  |  |  | 13 | return @slidings; | 
| 525 |  |  |  |  |  |  | } | 
| 526 |  |  |  |  |  |  |  | 
| 527 |  |  |  |  |  |  | 1; | 
| 528 |  |  |  |  |  |  |  | 
| 529 |  |  |  |  |  |  | __END__ | 
| 530 |  |  |  |  |  |  |  | 
| 531 |  |  |  |  |  |  | =pod | 
| 532 |  |  |  |  |  |  |  | 
| 533 |  |  |  |  |  |  | =encoding UTF-8 | 
| 534 |  |  |  |  |  |  |  | 
| 535 |  |  |  |  |  |  | =head1 NAME | 
| 536 |  |  |  |  |  |  |  | 
| 537 |  |  |  |  |  |  | AlignDB::GC - GC-related analysises | 
| 538 |  |  |  |  |  |  |  | 
| 539 |  |  |  |  |  |  | =head1 ATTRIBUTES | 
| 540 |  |  |  |  |  |  |  | 
| 541 |  |  |  |  |  |  | extreme sliding window size | 
| 542 |  |  |  |  |  |  | extreme sliding window step | 
| 543 |  |  |  |  |  |  | vicinal size | 
| 544 |  |  |  |  |  |  | minimal fall range | 
| 545 |  |  |  |  |  |  | gsw window size | 
| 546 |  |  |  |  |  |  | extreme sliding window size | 
| 547 |  |  |  |  |  |  | extreme sliding window step | 
| 548 |  |  |  |  |  |  | skip calc mdcw | 
| 549 |  |  |  |  |  |  |  | 
| 550 |  |  |  |  |  |  | =head1 METHODS | 
| 551 |  |  |  |  |  |  |  | 
| 552 |  |  |  |  |  |  | segment | 
| 553 |  |  |  |  |  |  | sliding_window | 
| 554 |  |  |  |  |  |  | gc_wave | 
| 555 |  |  |  |  |  |  |  | 
| 556 |  |  |  |  |  |  | =head1 AUTHOR | 
| 557 |  |  |  |  |  |  |  | 
| 558 |  |  |  |  |  |  | Qiang Wang <wang-q@outlook.com> | 
| 559 |  |  |  |  |  |  |  | 
| 560 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 561 |  |  |  |  |  |  |  | 
| 562 |  |  |  |  |  |  | This software is copyright (c) 2008 by Qiang Wang. | 
| 563 |  |  |  |  |  |  |  | 
| 564 |  |  |  |  |  |  | This is free software; you can redistribute it and/or modify it under | 
| 565 |  |  |  |  |  |  | the same terms as the Perl 5 programming language system itself. | 
| 566 |  |  |  |  |  |  |  | 
| 567 |  |  |  |  |  |  | =cut |