File Coverage

lib/Bio/Tradis/Analysis/InsertSite.pm
Criterion Covered Total %
statement 43 93 46.2
branch 0 10 0.0
condition 0 3 0.0
subroutine 10 15 66.6
pod 0 1 0.0
total 53 122 43.4


line stmt bran cond sub pod time code
1             package Bio::Tradis::Analysis::InsertSite;
2             # ABSTRACT: Take in a bam file and plot the start position of each read
3             $Bio::Tradis::Analysis::InsertSite::VERSION = '1.3.2';
4              
5              
6 4     4   89207 use Moose;
  4         404035  
  4         36  
7 4     4   25188 use Bio::Tradis::Parser::Bam;
  4         13  
  4         136  
8 4     4   1587 use Bio::Tradis::Parser::Cigar;
  4         12  
  4         3840  
9              
10             has 'filename' => ( is => 'rw', isa => 'Str', required => 1 );
11             has 'output_base_filename' => ( is => 'rw', isa => 'Str', required => 1 );
12             has 'mapping_score' => ( is => 'ro', isa => 'Int', required => 1 );
13             has 'samtools_exec' => ( is => 'ro', isa => 'Str', default => 'samtools' );
14             has '_output_file_handles' => ( is => 'rw', isa => 'HashRef', lazy_build => 1 );
15             has '_sequence_names' => ( is => 'rw', isa => 'ArrayRef', lazy_build => 1 );
16             has '_sequence_base_counters' =>
17             ( is => 'rw', isa => 'HashRef', lazy_build => 1 );
18             has '_sequence_information' =>
19             ( is => 'rw', isa => 'HashRef', lazy_build => 1 );
20              
21             has '_frequency_of_read_start' => (
22             is => 'rw',
23             isa => 'HashRef',
24             lazy => 1,
25             builder => '_build__frequency_of_read_start'
26             );
27              
28             sub _build__sequence_information {
29 3     3   7 my ($self) = @_;
30 3         72 my %all_sequences_info =
31             Bio::Tradis::Parser::Bam->new( file => $self->filename, samtools_exec => $self->samtools_exec )->seq_info;
32 3         258 return \%all_sequences_info;
33             }
34              
35             sub _build__sequence_names {
36 3     3   13 my ($self) = @_;
37 3         6 my @sequence_names = keys %{ $self->_sequence_information };
  3         93  
38 3         75 return \@sequence_names;
39             }
40              
41             sub _build__sequence_base_counters {
42 0     0   0 my ($self) = @_;
43 0         0 my %sequence_base_counters;
44 0         0 for my $sequence_name ( @{ $self->_sequence_names } ) {
  0         0  
45 0         0 $sequence_base_counters{$sequence_name} = 0;
46             }
47 0         0 return \%sequence_base_counters;
48             }
49              
50             sub _build__output_file_handles {
51 3     3   9 my ($self) = @_;
52 3         88 my $out = $self->output_base_filename;
53 3         10 chomp $out;
54            
55 3         5 my %output_file_handles;
56 3         5 for my $sequence_name ( @{ $self->_sequence_names } ) {
  3         61  
57 0         0 my $file_sequence_name = $sequence_name;
58 0         0 $file_sequence_name =~ s/[^\w\d\.]/_/g;
59 0         0 my $cmd = "gzip > $out.$file_sequence_name.insert_site_plot.gz";
60 0 0       0 open( $output_file_handles{$sequence_name}, '|-', $cmd )
61             || Bio::Tradis::Analysis::Exceptions::FailedToCreateOutputFileHandle
62             ->throw( error =>
63             "Couldnt create output file handle for saving insertsite plot results for "
64             . $sequence_name . " in "
65             . $self->filename
66             . " and output base "
67             . $self->output_base_filename );
68             }
69              
70 3         63 return \%output_file_handles;
71             }
72              
73             sub _number_of_forward_reads {
74 0     0   0 my ( $self, $sequence_name, $read_coord ) = @_;
75 0         0 return $self->_number_of_reads( $sequence_name, $read_coord, 1 );
76             }
77              
78             sub _number_of_reverse_reads {
79 0     0   0 my ( $self, $sequence_name, $read_coord ) = @_;
80 0         0 return $self->_number_of_reads( $sequence_name, $read_coord, -1 );
81             }
82              
83             sub _number_of_reads {
84 0     0   0 my ( $self, $sequence_name, $read_coord, $direction ) = @_;
85 0 0 0     0 if (
86             defined(
87             $self->_frequency_of_read_start->{$sequence_name}{$read_coord}
88             )
89             && defined(
90             $self->_frequency_of_read_start->{$sequence_name}{$read_coord}
91             {$direction}
92             )
93             )
94             {
95             return $self->_frequency_of_read_start->{$sequence_name}{$read_coord}
96 0         0 {$direction};
97             }
98 0         0 return 0;
99             }
100              
101             # work out if padding is needed and return it as a formatted string
102             sub _create_padding_string {
103 0     0   0 my ( $self, $previous_counter, $current_counter ) = @_;
104 0         0 my $padding_string = "";
105 0         0 for ( my $i = $previous_counter + 1 ; $i < $current_counter ; $i++ ) {
106 0         0 $padding_string .= "0 0\n";
107             }
108 0         0 return $padding_string;
109             }
110              
111             sub _print_padding_at_end_of_sequence {
112 3     3   12 my ($self) = @_;
113 3         7 for my $sequence_name ( @{ $self->_sequence_names } ) {
  3         78  
114             my $sequence_length =
115 0         0 $self->_sequence_information->{$sequence_name}->{'LN'};
116 0 0       0 next unless ( $sequence_length =~ /^[\d]+$/ );
117 0         0 $sequence_length++;
118             my $padding_string =
119             $self->_create_padding_string(
120 0         0 $self->_sequence_base_counters->{$sequence_name},
121             $sequence_length );
122 0         0 $self->_sequence_base_counters->{$sequence_name} = $sequence_length;
123 0         0 print { $self->_output_file_handles->{$sequence_name} } $padding_string;
  0         0  
124             }
125             }
126              
127             sub _close_output_file_handles {
128 3     3   10 my ($self) = @_;
129 3         6 for my $output_file_handle ( values %{ $self->_output_file_handles } ) {
  3         72  
130 0         0 close($output_file_handle);
131             }
132 3         7 return;
133             }
134              
135             sub _build__frequency_of_read_start {
136 3     3   9 my ($self) = @_;
137 3         6 my %frequency_of_read_start;
138 3         66 my $samtools_command = join(' ', ($self->samtools_exec, 'view', '-F', 4, '-q', $self->mapping_score, $self->filename));
139              
140 3         5505 open(my $samtools_view_fh,"-|" ,$samtools_command);
141 3         251 while(<$samtools_view_fh>)
142             {
143 0         0 my $sam_line = $_;
144              
145 0         0 my @read_details = split("\t", $sam_line);
146 0         0 my $seqid = $read_details[2];
147 0         0 my $cigar_parser = Bio::Tradis::Parser::Cigar->new(cigar => $read_details[5], coordinate => $read_details[3]);
148 0         0 my $strand = 1;
149 0 0       0 $strand = -1 if(($read_details[1] & 0x10) == 0x10);
150            
151 0 0       0 if ( $strand == 1 ) {
152             $frequency_of_read_start{$seqid}{ $cigar_parser->start }
153 0         0 { $strand }++;
154             }
155             else {
156             $frequency_of_read_start{$seqid}{ $cigar_parser->end }
157 0         0 { $strand }++;
158             }
159             }
160 3         192 return \%frequency_of_read_start;
161             }
162              
163             sub create_plots {
164 3     3 0 7303 my ($self) = @_;
165 3         7 my %read_starts = %{ $self->_frequency_of_read_start };
  3         81  
166 3         13 for my $sequence_name ( keys %read_starts ) {
167 0         0 my %sequence_read_coords = %{ $read_starts{$sequence_name} };
  0         0  
168 0         0 for my $read_coord ( sort { $a <=> $b } ( keys %sequence_read_coords ) )
  0         0  
169             {
170             my $padding_string =
171             $self->_create_padding_string(
172 0         0 $self->_sequence_base_counters->{$sequence_name}, $read_coord );
173 0         0 $self->_sequence_base_counters->{$sequence_name} = $read_coord;
174              
175 0         0 my $forward_reads =
176             $self->_number_of_forward_reads( $sequence_name, $read_coord );
177 0         0 my $reverse_reads =
178             $self->_number_of_reverse_reads( $sequence_name, $read_coord );
179              
180 0         0 print { $self->_output_file_handles->{$sequence_name} }
  0         0  
181             $padding_string . $forward_reads . " " . $reverse_reads . "\n";
182             }
183             }
184              
185 3         16 $self->_print_padding_at_end_of_sequence;
186 3         15 $self->_close_output_file_handles;
187 3         21 return 1;
188             }
189              
190             1;
191              
192             __END__
193              
194             =pod
195              
196             =encoding UTF-8
197              
198             =head1 NAME
199              
200             Bio::Tradis::Analysis::InsertSite - Take in a bam file and plot the start position of each read
201              
202             =head1 VERSION
203              
204             version 1.3.2
205              
206             =head1 SYNOPSIS
207              
208             Takes in a mapped BAM file and plot the start position of each read
209              
210             use Bio::Tradis::Analysis::InsertSite;
211             my $insertsite_plots_from_bam = Bio::Tradis::Analysis::InsertSite->new(
212             filename => 'my_file.bam',
213             output_base_filename => 'my_output_file'
214             );
215             $insertsite_plots_from_bam->create_plots();
216              
217             =head1 NAME
218              
219             InsertSite.pm - Take in a bam file and plot the start position of each read
220              
221             =head1 AUTHOR
222              
223             Carla Cummins <path-help@sanger.ac.uk>
224              
225             =head1 COPYRIGHT AND LICENSE
226              
227             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
228              
229             This is free software, licensed under:
230              
231             The GNU General Public License, Version 3, June 2007
232              
233             =cut