File Coverage

blib/lib/FASTX/Seq.pm
Criterion Covered Total %
statement 194 206 94.1
branch 71 96 73.9
condition 35 54 64.8
subroutine 34 36 94.4
pod 29 29 100.0
total 363 421 86.2


line stmt bran cond sub pod time code
1             package FASTX::Seq;
2             #ABSTRACT: A class for representing a sequence for FASTX::Reader
3              
4 46     46   217085 use 5.012;
  46         202  
5 46     46   239 use warnings;
  46         87  
  46         1400  
6 46     46   252 use Carp qw(confess);
  46         108  
  46         2114  
7 46     46   2345 use Data::Dumper;
  46         22182  
  46         2394  
8             $Data::Dumper::Sortkeys = 1;
9 46     46   315 use File::Basename;
  46         120  
  46         147888  
10              
11             $FASTX::Seq::VERSION = $FASTX::Reader::VERSION;
12             $FASTX::Seq::DEFAULT_QUALITY = 'I';
13             $FASTX::Seq::DEFAULT_LINE_LEN = 0;
14             $FASTX::Seq::DEFAULT_OFFSET = 33;
15             require Exporter;
16             our @ISA = qw(Exporter);
17              
18              
19              
20             sub new {
21 76     76 1 5266 my $class = shift @_;
22 76         176 my ($seq, $name, $comment, $qual, $offset, $line_len, $default_quality);
23              
24             # Descriptive instantiation with parameters -param => value
25 76 100       373 if (substr($_[0], 0, 1) eq '-') {
    50          
26 14         65 my %data = @_;
27             # Try parsing
28 14         51 for my $i (keys %data) {
29 51 100       285 if ($i =~ /^-(seq|sequence)/) {
    100          
    100          
    100          
    100          
    100          
    50          
30 13         31 $seq = $data{$i};
31             } elsif ($i =~ /^-(name|id)/) {
32 11         28 $name = $data{$i};
33             } elsif ($i =~ /^-(comment|desc|description)/) {
34 7         15 $comment = $data{$i};
35             } elsif ($i =~ /^-qual(ity)?/) {
36 13         35 $qual = $data{$i};
37             } elsif ($i =~ /^-offset/) {
38 4         12 $offset = $data{$i};
39             } elsif ($i =~ /^-line_len/) {
40 2         6 $line_len = $data{$i};
41             } elsif ($i =~ /^-default_quality/) {
42 0         0 $default_quality = $data{$i};
43             } else {
44 1         263 confess "ERROR FASTX::Seq: Unknown parameter $i\n";
45             }
46             }
47             } elsif (not defined $seq) {
48             # Positional instantiation
49             # check number of arguments
50 62 100 66     571 confess "ERROR FASTX::Seq: Wrong number of arguments\n" if (scalar(@_) < 1 || scalar(@_) > 4);
51 61         179 ($seq, $name, $comment, $qual) = @_;
52             }
53            
54             # Required NOT empty
55 74 50       187 if (not defined $seq) {
56 0         0 confess "ERROR FASTX::Seq: Sequence missing, record cannot be created\n";
57             }
58              
59             # If quality is provided, must match sequence length
60 74 100 66     482 if ( defined $seq && defined $qual
      100        
61             && (length($seq) != length($qual))) {
62 2         404 confess "Sequence/quality length mismatch";
63             }
64            
65 72         230 my $self = bless {}, $class;
66            
67              
68 72   100     341 $self->{name} = $name // undef;
69 72         154 $self->{seq} = $seq;
70 72   100     243 $self->{comment} = $comment // undef;
71 72   100     273 $self->{qual} = $qual // undef;
72            
73             # Store defaults
74 72   33     408 $self->{default_quality} = $default_quality // $FASTX::Seq::DEFAULT_QUALITY;
75 72   66     257 $self->{line_len} = $line_len // $FASTX::Seq::DEFAULT_LINE_LEN;
76 72   66     247 $self->{offset} = $offset // $FASTX::Seq::DEFAULT_OFFSET;
77            
78 72         316 return $self;
79            
80             }
81              
82              
83             sub copy {
84 6     6 1 26 my ($self) = @_;
85 6         19 my $copy = __PACKAGE__->new($self->seq, $self->name, $self->comment, $self->qual);
86 6         23 $copy->{default_quality} = $self->default_quality;
87 6         17 $copy->line_len = $self->line_len;
88 6         20 $copy->offset = $self->offset;
89 6         23 return $copy;
90             }
91              
92              
93             sub seq : lvalue {
94             # Update sequence
95 77     77 1 4429 my ($self, $new_val) = @_;
96 77 100       168 $self->{seq} = $new_val if (defined $new_val);
97 77         352 return $self->{seq};
98             }
99              
100              
101              
102             sub name : lvalue {
103             # Update name
104 10     10 1 24 my ($self, $new_val) = @_;
105 10 50       25 $self->{seq} = $new_val if (defined $new_val);
106 10         38 return $self->{name};
107             }
108              
109              
110              
111             sub qual : lvalue {
112             # Update quality
113 48     48 1 737 my ($self, $new_val) = @_;
114 48 50       106 $self->{qual} = $new_val if (defined $new_val);
115 48         205 return $self->{qual};
116             }
117              
118              
119             sub comment : lvalue {
120             # Update comment
121 7     7 1 17 my ($self, $new_val) = @_;
122 7 50       16 $self->{comment} = $new_val if (defined $new_val);
123 7         24 return $self->{comment};
124             }
125              
126              
127             sub offset : lvalue {
128             # Update offset
129 21     21 1 306 my ($self, $new_val) = @_;
130 21 50 66     65 confess "ERROR FASTX::Seq: offset must be a positive integer" if (defined $new_val && $new_val !~ /^\d+$/);
131 21 100       47 $self->{offset} = $new_val if (defined $new_val);
132 21         60 return $self->{offset};
133             }
134              
135              
136             sub line_len : lvalue {
137             # Update line_len
138 12     12 1 37 my ($self, $new_val) = @_;
139 12 50 33     47 confess "ERROR FASTX::Seq: line_len must be a positive integer" if (defined $new_val && $new_val !~ /^\d+$/);
140 12 50       29 $self->{line_len} = $new_val if (defined $new_val);
141 12         47 return $self->{line_len};
142             }
143              
144              
145             sub default_quality : lvalue {
146             # Update default_quality
147 6     6 1 17 my ($self, $new_val) = @_;
148 6 50 33     20 confess "ERROR FASTX::Seq: default_quality must be a single character" if (defined $new_val && length($new_val) != 1);
149 6 50       13 $self->{default_quality} = $new_val if (defined $new_val);
150 6         16 return $self->{default_quality};
151             }
152              
153              
154             sub len {
155             # Update comment
156 23     23 1 5095 my ($self) = @_;
157 23         101 return length($self->{seq});
158             }
159              
160              
161              
162              
163              
164             sub rev {
165             # Update comment
166 2     2 1 4 my ($self) = @_;
167 2         5 $self->{seq} = reverse($self->{seq});
168 2 50       8 $self->{qual} = reverse($self->{qual}) if (defined reverse($self->{qual}));
169 2         4 return $self;
170             }
171              
172              
173              
174             sub rc {
175             # Update comment
176 1     1 1 2 my ($self) = @_;
177 1         3 $self->rev();
178 1 50       5 if ($self->{seq} =~ /U/i) {
179 0         0 $self->{seq} =~ tr/ACGURYSWKMBDHVacguryswkmbdhv/UGCAYRSWMKVHDBugcayrswmkvhdb/;
180             } else {
181 1         2 $self->{seq} =~ tr/ACGTRYSWKMBDHVacgtryswkmbdhv/TGCAYRSWMKVHDBtgcayrswmkvhdb/;
182             }
183             #$self->{qual} = reverse($self->{qual}) if (defined reverse($self->{qual}));
184 1         2 return $self;
185             }
186              
187              
188             sub slice {
189             # Update comment
190 7     7 1 32 my ($self, $from, $len) = @_;
191 7         10 my $new_seq;
192             my $new_qual;
193 7 100       19 if (defined($len)) {
194 4         18 $new_seq = substr($self->{seq}, $from, $len);
195 4 100       13 $new_qual = defined($self->{qual}) ? substr($self->{qual}, $from, $len) : undef;
196             } else {
197 3         12 $new_seq = substr($self->{seq}, $from);
198 3 100       12 $new_qual = defined($self->{qual}) ? substr($self->{qual}, $from) : undef;
199             }
200 7         23 return __PACKAGE__->new($new_seq, $self->{name}, $self->{comment}, $new_qual);
201             }
202              
203              
204             sub char2qual {
205 42     42 1 1042 my ($self, $quality_encoded, $offset) = @_;
206             # Check quality_encoded is a single character
207 42 50       88 confess "Quality encoded character must be a single character" if (length($quality_encoded) != 1);
208 42 50       69 $offset = defined $offset ? $offset : $FASTX::Seq::DEFAULT_OFFSET;
209 42         133 return unpack("C*", $quality_encoded) - $offset;
210             }
211              
212              
213             sub qual2char {
214 2     2 1 15 my ($self, $quality_integer, $offset) = @_;
215 2 50       13 confess "Quality integer must be an integer value" if ($quality_integer !~ /^\d+$/);
216 2 50       7 $offset = defined $offset ? $offset : $FASTX::Seq::DEFAULT_OFFSET;
217 2         11 return chr($quality_integer + $offset);
218             }
219              
220              
221              
222             sub qualities {
223 5     5 1 14 my ($self) = @_;
224 5         10 my @qualities;
225 5 50       14 if (defined $self->{qual}) {
226 5         15 for (my $i = 0; $i < length($self->{qual}); $i++) {
227 40         92 push @qualities, $self->char2qual(substr($self->{qual}, $i, 1), $self->{offset});
228             }
229             }
230 5         22 return @qualities;
231             }
232              
233              
234             sub min_qual {
235 1     1 1 1190 my ($self) = @_;
236 1         3 my @qualities = $self->qualities();
237             # Calculate minimum quality
238 1         6 @qualities = sort {$a <=> $b} @qualities;
  13         18  
239 1         3 return $qualities[0];
240             }
241              
242              
243              
244             sub max_qual {
245 1     1 1 320 my ($self) = @_;
246 1         4 my @qualities = $self->qualities();
247             # Calculate minimum quality
248 1         6 @qualities = sort {$b <=> $a} @qualities;
  13         17  
249 1         3 return $qualities[0];
250             }
251              
252              
253             sub trim_after : lvalue {
254 1     1 1 293 my ($self, $qual_value) = @_;
255 1 50       9 confess "Quality integer must be an integer value" if ($qual_value !~ /^\d+$/);
256             # Detect first base with quality lower or equal than $qual_value
257 1         3 my $i = 0;
258 1         16 my @qualities = $self->qualities();
259 1         5 for ($i = 0; $i < scalar(@qualities); $i++) {
260 7 100       15 if ($qualities[$i] <= $qual_value) {
261 1         3 last;
262             }
263             }
264             # Trim sequence and quality
265 1         7 my $slice = $self->slice(0, $i);
266 1         16 $self->{seq} = $slice->{seq};
267 1         5 $self->{qual} = $slice->{qual};
268 1         11 return $self;
269             }
270              
271              
272              
273             sub trim_until : lvalue {
274 1     1 1 5 my ($self, $qual_value) = @_;
275 1 50       9 confess "Quality integer must be an integer value" if ($qual_value !~ /^\d+$/);
276             # Detect first base with quality lower or equal than $qual_value
277 1         12 my $i = 0;
278 1         4 my @qualities = $self->qualities();
279 1         4 for ($i = 0; $i < scalar(@qualities); $i++) {
280 3 100       9 if ($qualities[$i] >= $qual_value) {
281 1         1 last;
282             }
283             }
284             # Trim sequence and quality
285 1         13 my $slice = $self->slice($i);
286 1         3 $self->{seq} = $slice->{seq};
287 1         1 $self->{qual} = $slice->{qual};
288            
289 1         5 return $self;
290             }
291              
292             sub _kmer2num {
293 2     2   5 my ($kmer) = @_;
294              
295 2         9 my %baseVal = ('T' => 0, 'C' => 1, 'A' => 2, 'G' => 3, 'U' => 0);
296 2         3 my $klen = length($kmer);
297 2         4 my $num = 0;
298              
299 2         7 for my $i (0..($klen - 1)) {
300 6 50       14 if (exists $baseVal{substr($kmer, $i, 1)}) {
301 6         16 my $p = 4 ** ($klen - 1 - $i);
302 6         13 $num += $p * $baseVal{substr($kmer, $i, 1)};
303             } else {
304 0         0 $num = -1;
305 0         0 last;
306             }
307             }
308              
309 2         6 return $num;
310             }
311              
312              
313             sub translate {
314 1     1 1 3 my ($self, $code_number) = @_;
315              
316 1         6 my $record = $self->copy();
317             # Default genetic code if not specified
318 1   50     3 $code_number //= 11;
319            
320 1         9 my @code_map = (
321             "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #1
322             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
323             "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
324             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
325             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
326             "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
327             "",
328             "",
329             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
330             "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
331             "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #11 Bact, Ar and Plant Plast
332             "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
333             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
334             "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
335             "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #15
336             "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
337             "", "", "", "",
338             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
339             "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
340             "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
341             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
342             "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
343             "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
344             "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
345             "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
346             "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
347             "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
348             "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"
349             );
350            
351 1         3 my $code = $code_map[$code_number - 1];
352             # Check sequence is nucleotidic or RNA
353 1 50       7 confess "ERROR FASTX::Seq: Sequence must be nucleotidic" if ($self->{seq} !~ /^[ACGTU]+$/i);
354 1         2 my @nucleotides = split //, $self->seq;
355 1         3 my @translated_sequence;
356            
357 1         4 for (my $i = 0; $i < scalar(@nucleotides) - 2; $i += 3) {
358 2         7 my $codon = $nucleotides[$i] . $nucleotides[$i+1] . $nucleotides[$i+2];
359 2         12 my $num = _kmer2num($codon); # You need to define this function
360 2 50       4 if ($num != -1) {
361 2         7 push @translated_sequence, substr($code, $num, 1);
362            
363             } else {
364 0         0 push @translated_sequence, '-';
365            
366             }
367             }
368            
369 1         4 my $translated_seq = join('', @translated_sequence);
370            
371 1         4 $record->seq($translated_seq);
372             # Remove qual if present
373 1         10 $record->qual(undef);
374 1         5 return $record;
375             }
376              
377              
378             sub string {
379             # Update comment
380 0     0 1 0 my ($self, @args) = @_;
381 0 0       0 if (defined $self->{qual}) {
382 0         0 return $self->asfastq(@args);
383             } else {
384 0         0 return $self->asfasta(@args);
385             }
386            
387             }
388              
389              
390             sub as_string {
391             # Update comment
392 0     0 1 0 my ($self) = @_;
393 0         0 return $self->string();
394             }
395              
396              
397              
398             sub asfasta {
399             # Update comment
400 15     15 1 3638 my ($self, $len) = @_;
401            
402 15   50     38 my $name = $self->{name} // "sequence";
403 15 100 66     82 my $comment = (defined $self->{comment} and length($self->{comment}) > 0) ? " " . $self->{comment} : "";
404 15         61 return ">" . $name . $comment . "\n" . _split_string($self->{seq}, $len) . "\n";
405             }
406              
407              
408             sub asfastq {
409             # Update comment
410 25     25 1 1547 my ($self, $user_quality) = @_;
411 25         54 my $quality = $self->{qual};
412              
413 25 100       66 if (defined $user_quality) {
    100          
414             # User requests new quality
415 15 100       32 if (length($user_quality) == 1 ) {
416             # And it's valid!
417 10         24 $quality = $user_quality x length($self->{seq});
418             } else {
419 5         284 say STDERR "[WARNING] FASTX::Seq->as_fastq(): Provide a _char_ as quality, not a value (", $user_quality,"): defaulting to $FASTX::Seq::DEFAULT_QUALITY";
420 5         32 $quality = $FASTX::Seq::DEFAULT_QUALITY x length($self->{seq});
421             }
422             } elsif (not defined $quality) {
423 6         70 $quality = $FASTX::Seq::DEFAULT_QUALITY x length($self->{seq});
424             }
425            
426            
427 25   50     73 my $name = $self->{name} // "sequence";
428 25 100 66     150 my $comment = (defined $self->{comment} and length($self->{comment}) > 0) ? " " . $self->{comment} : "";
429            
430 25         135 return "@" . $name . $comment . "\n" . $self->{seq} . "\n+\n" . $quality . "\n";
431             }
432              
433              
434              
435              
436             sub as_fasta {
437             # Update comment
438 5     5 1 17 my ($self, @args) = @_;
439 5         11 return $self->asfasta(@args);
440             }
441              
442              
443             sub as_fastq {
444             # Update comment
445 10     10 1 5819 my ($self, @args) = @_;
446 10         24 return $self->asfastq(@args);
447             }
448              
449              
450             sub is_fasta {
451             # Return true if record has no quality
452 13     13 1 62 my ($self) = @_;
453 13 100 66     93 return ((defined $self->{qual}) > 0 and length($self->{qual}) == length($self->{seq})) ? 0 : 1;
454             }
455              
456              
457             sub is_fastq {
458             # Update comment
459 7     7 1 26 my ($self) = @_;
460 7 100 66     51 return ((defined $self->{qual}) > 0 and length($self->{qual}) == length($self->{seq})) ? 1 : 0;
461             }
462              
463              
464             sub _split_string {
465 15     15   40 my ($string, $width) = @_;
466 15 100 66     67 if (not defined $width or $width == 0) {
467 10         77 return "$string";
468             }
469              
470 5         10 my $output_string;
471 5         16 for (my $i=0; $i
472 74         97 my $frag = substr($string, $i, $width);
473 74         140 $output_string.=$frag."\n";
474             }
475 5         8 chomp($output_string);
476 5         20 return $output_string;
477             }
478              
479             1;
480              
481             __END__