File Coverage

blib/lib/FASTX/Abi.pm
Criterion Covered Total %
statement 114 120 95.0
branch 39 50 78.0
condition 3 6 50.0
subroutine 10 10 100.0
pod 3 3 100.0
total 169 189 89.4


line stmt bran cond sub pod time code
1             package FASTX::Abi;
2 11     11   6504 use 5.016;
  11         31  
3 11     11   48 use warnings;
  11         15  
  11         275  
4 11     11   46 use Carp qw(confess);
  11         19  
  11         435  
5 11     11   7449 use Bio::Trace::ABIF;
  11         140437  
  11         437  
6 11     11   1548 use Data::Dumper;
  11         16591  
  11         433  
7 11     11   54 use File::Basename;
  11         15  
  11         12551  
8              
9             $FASTX::Abi::VERSION = '0.11';
10              
11             #ABSTRACT: Read Sanger trace file (chromatograms) in FASTQ format. For traces called with I option, the ambiguities will be split into two sequences to allow usage from NGS tools that usually do not understand IUPAC ambiguities.
12              
13             our @valid_new_attributes = (
14             'filename', # *REQUIRED* input trace filepath
15             'trim_ends', # bool (default: 1) trim low quality ends
16             'wnd', # int (default: 16) sliding window for quality trim
17             'min_qual', # int (default: 22) threshold for low quality calls
18             'bad_bases', # int (default: 2) maximum number of low quality bases per window
19             'keep_abi', # bool (default: 0) import the Bio::Trace::ABIF object in FASTX::Abi (otherwise deleted after import)
20             );
21             our @valid_obj_attributes = (
22             'diff', # number of ambiguous bases
23             'diff_array', # array of ambiguous bases position
24             'sequence_name', # sequence name from filename
25             'instrument', # Instrument
26             'avg_peak_spacing', # Avg Peak Spacing in chromas
27             'version', # version chromatograms
28             'chromas', # Bio::Trace::ABIF object
29              
30             'hetero', # ambiguity
31             'seq1', # Sequence 1 (non ambiguous, allele1)
32             'seq2', # Sequence 2 (non ambiguous, allele2)
33             'sequence', # Sequence, trimmed
34             'quality', # Quality, trimmed
35             'raw_sequence', # Raw sequence
36             'raw_quality', # Raw quality
37             'iso_seq', # Sequence are equal
38             'discard', # Low quality sequence
39             );
40              
41             our %iupac = (
42             'R' => 'AG',
43             'Y' => 'CT',
44             'M' => 'CA',
45             'K' => 'TG',
46             'W' => 'TA',
47             'S' => 'CG'
48             );
49              
50              
51             sub new {
52             # Instantiate object
53 33     33 1 16959 my ($class, $args) = @_;
54              
55             my $self = {
56             filename => $args->{filename}, # Chromatogram file name
57             trim_ends => $args->{trim_ends}, # Trim low quality ends (bool)
58             min_qual => $args->{min_qual}, # Minimum quality
59             wnd => $args->{wnd}, # Window for end trimming
60             bad_bases => $args->{bad_bases}, # Number of low qual bases per $window_width
61             keep_abi => $args->{keep_abi}, # Do not destroy $self->{chromas} after use
62 33         128 };
63              
64             #check valid inputs:
65 33         59 for my $input (sort keys %{ $args } ) {
  33         124  
66 44 100       591 if ( ! grep( /^$input$/, @valid_new_attributes ) ) {
67 1         190 confess("Method new() does not accept \"$input\" attribute. Valid attributes are:\n", join(', ', @valid_new_attributes));
68             }
69             }
70              
71             # CHECK INPUT FILE
72             # -----------------------------------
73 32 50       83 if (not defined $self->{filename}) {
74 0         0 confess("ABI file must be provided when creating new object");
75             }
76              
77 32 100       469 if (not -e $self->{filename}) {
78 1         107 confess("ABI file not found: ", $self->{filename});
79             }
80 31         58 my $abif;
81             my $try = eval
82 31         45 {
83 31         134 $abif = Bio::Trace::ABIF->new();
84 31 50       415 $abif->open_abif($self->{filename}) or confess "Error in file: ", $self->{filename};
85 31         83335 1;
86             };
87              
88 31 50       59 if (not $try) {
89 0         0 confess("Bio::Trace::ABIF was unable to read: ", $self->{filename});
90             }
91 31         92 my $object = bless $self, $class;
92 31         219 $object->{chromas} = $abif;
93              
94 31         80 my @ext = ('.abi','.ab1','.ABI','.abI','.AB1','.ab');
95 31         2624 my ($seqname) = basename($self->{filename}, @ext);
96 31         112 $object->{sequence_name} = $seqname;
97              
98             # DEFAULTS
99             # -----------------------------------
100 31 100       79 $object->{trim_ends} = 1 unless defined $object->{trim_ends};
101 31 50       65 $object->{wnd} = 10 unless defined $object->{wnd};
102 31 100       94 $object->{min_qual} = 20 unless defined $object->{min_qual};
103 31 50       68 $object->{bad_bases} = 4 unless defined $object->{bad_bases};
104 31 100       63 $object->{keep_abi} = 0 unless defined $object->{keep_abi};
105 31         57 $object->{discard} = 0;
106              
107             # GET SEQUENCE FROM AB1 FILE
108             # -----------------------------------
109 31         58 my $seq = _get_sequence($self);
110 31 100       3107 if ($self->{keep_abi} == 0) {
111 30         49 $self->{chromas} = undef;
112             }
113              
114             #check valid attributes:
115 31         35 for my $input (sort keys %{ $self} ) {
  31         341  
116             # [this is a developer's safety net]
117             # uncoverable condition false
118 682 50       10790 if ( ! grep( /^$input$/, @valid_new_attributes, @valid_obj_attributes ) ) {
119 0         0 confess("Method new() does not accept \"$input\" attribute. Valid attributes are:\n", join(', ', @valid_new_attributes, @valid_obj_attributes));
120             }
121             }
122              
123              
124 31         170 return $object;
125             }
126              
127              
128             sub get_fastq {
129 12     12 1 530 my ($self, $name, $quality_value) = @_;
130              
131 12 50       52 if (not defined $name) {
    50          
132 0         0 $name = $self->{sequence_name};
133             } elsif ($name=~/\s+/) {
134 0         0 $name =~s/\s+/_/g;
135             }
136              
137 12         19 my $quality = $self->{quality};
138 12 100       22 if (defined $quality_value) {
139 9 100 66     40 if ($quality_value =~/^\d+$/ and $quality_value >= 10) {
    100          
140 3 50       8 my $q = chr(($quality_value <= 93 ? $quality_value : 93) + 33);
141 3         8 $quality = $q x length($quality);
142             } elsif (length($quality_value) == 1) {
143 3         8 $quality = $quality_value x length($quality);
144             } else {
145 3         343 confess("Supplied quality is neither a valid integer or a single char: <$quality_value>\n");
146             }
147             }
148              
149 9         13 my $output = '';
150 9 100       16 if ( $self->{iso_seq} ) {
151             $output .= '@' . $name . "\n" .
152 6         83 $self->{seq1} . "\n+\n" .
153             $quality . "\n";
154             } else {
155             $output .= '@' . $name . "_1\n" .
156 3         10 $self->{seq1} . "\n+\n" .
157             $quality . "\n";
158             $output .= '@' . $name . "_2\n" .
159 3         16 $self->{seq2} . "\n+\n" .
160             $quality . "\n";
161             }
162 9         21 return $output;
163             }
164              
165              
166             sub get_trace_info {
167 1     1 1 71 my $self = shift;
168 1         2 my $data;
169 1         2 $data->{instrument} = $self->{instrument};
170 1         2 $data->{version} = $self->{version};
171 1         1 $data->{avg_peak_spacing} = $self->{avg_peak_spacing};
172              
173 1         3 return $data;
174             }
175              
176              
177             sub _get_sequence {
178 31     31   57 my $self = shift;
179 31         42 my $abif = $self->{chromas};
180              
181 31         90 $self->{raw_sequence} = $abif->sequence();
182              
183             # Get quality values
184 31         2336 my @qv = $abif->quality_values();
185             # Encode quality in FASTQ chars
186 31 50       5862 my @fqv = map {chr(int(($_<=93? $_ : 93)*4/6) + 33)} @qv;
  25224         38547  
187              
188             # FASTQ
189 31         648 my $q = join('', @fqv);
190              
191              
192 31         63 $self->{raw_quality} = $q;
193              
194 31         42 $self->{sequence} = $self->{raw_sequence};
195 31         45 $self->{quality} = $self->{raw_quality};
196              
197             # Trim
198 31 100       85 if ($self->{trim_ends}) {
199             #The Sequencing Analysis program determines the clear range of the sequence by trimming bases from the 5' to 3'
200             #ends until fewer than 4 bases out of 20 have a quality value less than 20.
201             #You can change these parameters by explicitly passing arguments to this method
202             #(the default values are $window_width = 20, $bad_bases_threshold = 4, $quality_threshold = 20).
203             # Note that Sequencing Analysis counts the bases starting from one, so you have to add one to the return values to get consistent results.
204              
205             my ($begin_pos, $end_pos) = $abif->clear_range(
206             $self->{wnd},
207             $self->{bad_bases},
208             $self->{min_qual},
209              
210 28         95 );
211              
212             # This can be tested with low quality chromatograms
213             # *TODO* to ask for some bad trace
214              
215             # uncoverable branch false
216             # uncoverable condition left
217             # uncoverable condition right
218              
219 28 50 33     24677 if ($begin_pos>0 and $end_pos>0) {
220 28         59 my $hi_qual_length = $end_pos-$begin_pos+1;
221 28         71 $self->{sequence} = substr($self->{sequence}, $begin_pos, $hi_qual_length);
222 28         89 $self->{quality} = substr($self->{quality} , $begin_pos, $hi_qual_length);
223             } else {
224 0         0 $self->{discard} = 1;
225             }
226             }
227              
228             # Check hetero bases
229 31 100       1326 if ($self->{sequence}!~/[ACGT][RYMKWS]+[ACGT]/i) {
230 21         37 $self->{hetero} = 0;
231             } else {
232 10         18 $self->{hetero} = 1;
233             }
234              
235             # Check
236 31         53 $self->{diff_array} = ();
237 31         118 $self->{diff} = 0;
238 31         42 my $seq1 = '';
239 31         33 my $seq2 = '';
240 31         79 for (my $i = 0; $i{sequence}); $i++) {
241 19983         21186 my $q0 = substr($self->{quality}, $i, 1);
242 19983         20109 my $s0 = substr($self->{sequence}, $i,1);
243              
244             # Ambiguity detected:
245 19983 100       22393 if ($iupac{$s0}) {
246 29         80 my ($base1, $base2) = split //, $iupac{$s0};
247 29         44 $seq1.=$base1;
248 29         29 $seq2.=$base2;
249 29         31 $self->{diff}++;
250 29         34 push(@{ $self->{diff_array} }, $i);
  29         92  
251             } else {
252 19954         18747 $seq1.=$s0;
253 19954         27038 $seq2.=$s0;
254              
255             }
256             }
257 31         64 $self->{seq1} = $seq1;
258 31         52 $self->{seq2} = $seq2;
259              
260 31 100       53 if ($seq1 eq $seq2) {
261 21         44 $self->{iso_seq} = 1
262             } else {
263 10         12 $self->{iso_seq} = 0;
264             }
265              
266              
267 31         116 $self->{instrument} = $self->{chromas}->official_instrument_name();
268 31         2221 $self->{version} = $self->{chromas}->abif_version();
269 31         887 $self->{avg_peak_spacing} = $self->{chromas}->avg_peak_spacing();
270              
271             }
272              
273             1;
274              
275             __END__