| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package FASTX::Seq; | 
| 2 |  |  |  |  |  |  | #ABSTRACT: A class for representing a sequence for FASTX::Reader | 
| 3 |  |  |  |  |  |  |  | 
| 4 | 46 |  |  | 46 |  | 217085 | use 5.012; | 
|  | 46 |  |  |  |  | 202 |  | 
| 5 | 46 |  |  | 46 |  | 239 | use warnings; | 
|  | 46 |  |  |  |  | 87 |  | 
|  | 46 |  |  |  |  | 1400 |  | 
| 6 | 46 |  |  | 46 |  | 252 | use Carp qw(confess); | 
|  | 46 |  |  |  |  | 108 |  | 
|  | 46 |  |  |  |  | 2114 |  | 
| 7 | 46 |  |  | 46 |  | 2345 | use Data::Dumper; | 
|  | 46 |  |  |  |  | 22182 |  | 
|  | 46 |  |  |  |  | 2394 |  | 
| 8 |  |  |  |  |  |  | $Data::Dumper::Sortkeys = 1; | 
| 9 | 46 |  |  | 46 |  | 315 | use File::Basename; | 
|  | 46 |  |  |  |  | 120 |  | 
|  | 46 |  |  |  |  | 147888 |  | 
| 10 |  |  |  |  |  |  |  | 
| 11 |  |  |  |  |  |  | $FASTX::Seq::VERSION           = $FASTX::Reader::VERSION; | 
| 12 |  |  |  |  |  |  | $FASTX::Seq::DEFAULT_QUALITY   = 'I'; | 
| 13 |  |  |  |  |  |  | $FASTX::Seq::DEFAULT_LINE_LEN  = 0; | 
| 14 |  |  |  |  |  |  | $FASTX::Seq::DEFAULT_OFFSET    = 33; | 
| 15 |  |  |  |  |  |  | require Exporter; | 
| 16 |  |  |  |  |  |  | our @ISA = qw(Exporter); | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | sub new { | 
| 21 | 76 |  |  | 76 | 1 | 5266 | my $class = shift @_; | 
| 22 | 76 |  |  |  |  | 176 | my ($seq, $name, $comment, $qual, $offset, $line_len, $default_quality); | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | # Descriptive instantiation with parameters -param => value | 
| 25 | 76 | 100 |  |  |  | 373 | if (substr($_[0], 0, 1) eq '-') { | 
|  |  | 50 |  |  |  |  |  | 
| 26 | 14 |  |  |  |  | 65 | my %data = @_; | 
| 27 |  |  |  |  |  |  | # Try parsing | 
| 28 | 14 |  |  |  |  | 51 | for my $i (keys %data) { | 
| 29 | 51 | 100 |  |  |  | 285 | if ($i =~ /^-(seq|sequence)/) { | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
|  |  | 50 |  |  |  |  |  | 
| 30 | 13 |  |  |  |  | 31 | $seq = $data{$i}; | 
| 31 |  |  |  |  |  |  | } elsif ($i =~ /^-(name|id)/) { | 
| 32 | 11 |  |  |  |  | 28 | $name = $data{$i}; | 
| 33 |  |  |  |  |  |  | } elsif ($i =~ /^-(comment|desc|description)/) { | 
| 34 | 7 |  |  |  |  | 15 | $comment = $data{$i}; | 
| 35 |  |  |  |  |  |  | } elsif ($i =~ /^-qual(ity)?/) { | 
| 36 | 13 |  |  |  |  | 35 | $qual = $data{$i}; | 
| 37 |  |  |  |  |  |  | } elsif ($i =~ /^-offset/) { | 
| 38 | 4 |  |  |  |  | 12 | $offset = $data{$i}; | 
| 39 |  |  |  |  |  |  | } elsif ($i =~ /^-line_len/) { | 
| 40 | 2 |  |  |  |  | 6 | $line_len = $data{$i}; | 
| 41 |  |  |  |  |  |  | } elsif ($i =~ /^-default_quality/) { | 
| 42 | 0 |  |  |  |  | 0 | $default_quality = $data{$i}; | 
| 43 |  |  |  |  |  |  | } else { | 
| 44 | 1 |  |  |  |  | 263 | confess "ERROR FASTX::Seq: Unknown parameter $i\n"; | 
| 45 |  |  |  |  |  |  | } | 
| 46 |  |  |  |  |  |  | } | 
| 47 |  |  |  |  |  |  | } elsif (not defined $seq) { | 
| 48 |  |  |  |  |  |  | # Positional instantiation | 
| 49 |  |  |  |  |  |  | # check number of arguments | 
| 50 | 62 | 100 | 66 |  |  | 571 | confess "ERROR FASTX::Seq: Wrong number of arguments\n" if (scalar(@_) < 1 || scalar(@_) > 4); | 
| 51 | 61 |  |  |  |  | 179 | ($seq, $name, $comment, $qual) = @_; | 
| 52 |  |  |  |  |  |  | } | 
| 53 |  |  |  |  |  |  |  | 
| 54 |  |  |  |  |  |  | # Required NOT empty | 
| 55 | 74 | 50 |  |  |  | 187 | if (not defined $seq) { | 
| 56 | 0 |  |  |  |  | 0 | confess "ERROR FASTX::Seq: Sequence missing, record cannot be created\n"; | 
| 57 |  |  |  |  |  |  | } | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | # If quality is provided, must match sequence length | 
| 60 | 74 | 100 | 66 |  |  | 482 | if ( defined $seq && defined $qual | 
|  |  |  | 100 |  |  |  |  | 
| 61 |  |  |  |  |  |  | && (length($seq) != length($qual))) { | 
| 62 | 2 |  |  |  |  | 404 | confess "Sequence/quality length mismatch"; | 
| 63 |  |  |  |  |  |  | } | 
| 64 |  |  |  |  |  |  |  | 
| 65 | 72 |  |  |  |  | 230 | my $self = bless {}, $class; | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  |  | 
| 68 | 72 |  | 100 |  |  | 341 | $self->{name} = $name   // undef; | 
| 69 | 72 |  |  |  |  | 154 | $self->{seq}  = $seq; | 
| 70 | 72 |  | 100 |  |  | 243 | $self->{comment} = $comment // undef; | 
| 71 | 72 |  | 100 |  |  | 273 | $self->{qual} = $qual // undef; | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | # Store defaults | 
| 74 | 72 |  | 33 |  |  | 408 | $self->{default_quality} = $default_quality // $FASTX::Seq::DEFAULT_QUALITY; | 
| 75 | 72 |  | 66 |  |  | 257 | $self->{line_len} = $line_len // $FASTX::Seq::DEFAULT_LINE_LEN; | 
| 76 | 72 |  | 66 |  |  | 247 | $self->{offset} = $offset // $FASTX::Seq::DEFAULT_OFFSET; | 
| 77 |  |  |  |  |  |  |  | 
| 78 | 72 |  |  |  |  | 316 | return $self; | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | } | 
| 81 |  |  |  |  |  |  |  | 
| 82 |  |  |  |  |  |  |  | 
| 83 |  |  |  |  |  |  | sub copy { | 
| 84 | 6 |  |  | 6 | 1 | 26 | my ($self) = @_; | 
| 85 | 6 |  |  |  |  | 19 | my $copy = __PACKAGE__->new($self->seq, $self->name, $self->comment, $self->qual); | 
| 86 | 6 |  |  |  |  | 23 | $copy->{default_quality} = $self->default_quality; | 
| 87 | 6 |  |  |  |  | 17 | $copy->line_len = $self->line_len; | 
| 88 | 6 |  |  |  |  | 20 | $copy->offset = $self->offset; | 
| 89 | 6 |  |  |  |  | 23 | return $copy; | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  |  | 
| 93 |  |  |  |  |  |  | sub seq : lvalue { | 
| 94 |  |  |  |  |  |  | # Update sequence | 
| 95 | 77 |  |  | 77 | 1 | 4429 | my ($self, $new_val) = @_; | 
| 96 | 77 | 100 |  |  |  | 168 | $self->{seq} = $new_val if (defined $new_val); | 
| 97 | 77 |  |  |  |  | 352 | return $self->{seq}; | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | sub name : lvalue { | 
| 103 |  |  |  |  |  |  | # Update name | 
| 104 | 10 |  |  | 10 | 1 | 24 | my ($self, $new_val) = @_; | 
| 105 | 10 | 50 |  |  |  | 25 | $self->{seq} = $new_val if (defined $new_val); | 
| 106 | 10 |  |  |  |  | 38 | return $self->{name}; | 
| 107 |  |  |  |  |  |  | } | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | sub qual : lvalue { | 
| 112 |  |  |  |  |  |  | # Update quality | 
| 113 | 48 |  |  | 48 | 1 | 737 | my ($self, $new_val) = @_; | 
| 114 | 48 | 50 |  |  |  | 106 | $self->{qual} = $new_val if (defined $new_val); | 
| 115 | 48 |  |  |  |  | 205 | return $self->{qual}; | 
| 116 |  |  |  |  |  |  | } | 
| 117 |  |  |  |  |  |  |  | 
| 118 |  |  |  |  |  |  |  | 
| 119 |  |  |  |  |  |  | sub comment : lvalue { | 
| 120 |  |  |  |  |  |  | # Update comment | 
| 121 | 7 |  |  | 7 | 1 | 17 | my ($self, $new_val) = @_; | 
| 122 | 7 | 50 |  |  |  | 16 | $self->{comment} = $new_val if (defined $new_val); | 
| 123 | 7 |  |  |  |  | 24 | return $self->{comment}; | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | sub offset : lvalue { | 
| 128 |  |  |  |  |  |  | # Update offset | 
| 129 | 21 |  |  | 21 | 1 | 306 | my ($self, $new_val) = @_; | 
| 130 | 21 | 50 | 66 |  |  | 65 | confess "ERROR FASTX::Seq: offset must be a positive integer" if (defined $new_val && $new_val !~ /^\d+$/); | 
| 131 | 21 | 100 |  |  |  | 47 | $self->{offset} = $new_val if (defined $new_val); | 
| 132 | 21 |  |  |  |  | 60 | return $self->{offset}; | 
| 133 |  |  |  |  |  |  | } | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | sub line_len : lvalue { | 
| 137 |  |  |  |  |  |  | # Update line_len | 
| 138 | 12 |  |  | 12 | 1 | 37 | my ($self, $new_val) = @_; | 
| 139 | 12 | 50 | 33 |  |  | 47 | confess "ERROR FASTX::Seq: line_len must be a positive integer" if (defined $new_val && $new_val !~ /^\d+$/); | 
| 140 | 12 | 50 |  |  |  | 29 | $self->{line_len} = $new_val if (defined $new_val); | 
| 141 | 12 |  |  |  |  | 47 | return $self->{line_len}; | 
| 142 |  |  |  |  |  |  | } | 
| 143 |  |  |  |  |  |  |  | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | sub default_quality : lvalue { | 
| 146 |  |  |  |  |  |  | # Update default_quality | 
| 147 | 6 |  |  | 6 | 1 | 17 | my ($self, $new_val) = @_; | 
| 148 | 6 | 50 | 33 |  |  | 20 | confess "ERROR FASTX::Seq: default_quality must be a single character" if (defined $new_val && length($new_val) != 1); | 
| 149 | 6 | 50 |  |  |  | 13 | $self->{default_quality} = $new_val if (defined $new_val); | 
| 150 | 6 |  |  |  |  | 16 | return $self->{default_quality}; | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  |  | 
| 154 |  |  |  |  |  |  | sub len { | 
| 155 |  |  |  |  |  |  | # Update comment | 
| 156 | 23 |  |  | 23 | 1 | 5095 | my ($self) = @_; | 
| 157 | 23 |  |  |  |  | 101 | return length($self->{seq}); | 
| 158 |  |  |  |  |  |  | } | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  |  | 
| 163 |  |  |  |  |  |  |  | 
| 164 |  |  |  |  |  |  | sub rev { | 
| 165 |  |  |  |  |  |  | # Update comment | 
| 166 | 2 |  |  | 2 | 1 | 4 | my ($self) = @_; | 
| 167 | 2 |  |  |  |  | 5 | $self->{seq} = reverse($self->{seq}); | 
| 168 | 2 | 50 |  |  |  | 8 | $self->{qual} = reverse($self->{qual}) if (defined reverse($self->{qual})); | 
| 169 | 2 |  |  |  |  | 4 | return $self; | 
| 170 |  |  |  |  |  |  | } | 
| 171 |  |  |  |  |  |  |  | 
| 172 |  |  |  |  |  |  |  | 
| 173 |  |  |  |  |  |  |  | 
| 174 |  |  |  |  |  |  | sub rc { | 
| 175 |  |  |  |  |  |  | # Update comment | 
| 176 | 1 |  |  | 1 | 1 | 2 | my ($self) = @_; | 
| 177 | 1 |  |  |  |  | 3 | $self->rev(); | 
| 178 | 1 | 50 |  |  |  | 5 | if ($self->{seq} =~ /U/i) { | 
| 179 | 0 |  |  |  |  | 0 | $self->{seq} =~ tr/ACGURYSWKMBDHVacguryswkmbdhv/UGCAYRSWMKVHDBugcayrswmkvhdb/; | 
| 180 |  |  |  |  |  |  | } else { | 
| 181 | 1 |  |  |  |  | 2 | $self->{seq} =~ tr/ACGTRYSWKMBDHVacgtryswkmbdhv/TGCAYRSWMKVHDBtgcayrswmkvhdb/; | 
| 182 |  |  |  |  |  |  | } | 
| 183 |  |  |  |  |  |  | #$self->{qual} = reverse($self->{qual}) if (defined reverse($self->{qual})); | 
| 184 | 1 |  |  |  |  | 2 | return $self; | 
| 185 |  |  |  |  |  |  | } | 
| 186 |  |  |  |  |  |  |  | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | sub slice { | 
| 189 |  |  |  |  |  |  | # Update comment | 
| 190 | 7 |  |  | 7 | 1 | 32 | my ($self, $from, $len) = @_; | 
| 191 | 7 |  |  |  |  | 10 | my $new_seq; | 
| 192 |  |  |  |  |  |  | my $new_qual; | 
| 193 | 7 | 100 |  |  |  | 19 | if (defined($len)) { | 
| 194 | 4 |  |  |  |  | 18 | $new_seq = substr($self->{seq}, $from, $len); | 
| 195 | 4 | 100 |  |  |  | 13 | $new_qual = defined($self->{qual}) ? substr($self->{qual}, $from, $len) : undef; | 
| 196 |  |  |  |  |  |  | } else { | 
| 197 | 3 |  |  |  |  | 12 | $new_seq = substr($self->{seq}, $from); | 
| 198 | 3 | 100 |  |  |  | 12 | $new_qual = defined($self->{qual}) ? substr($self->{qual}, $from) : undef; | 
| 199 |  |  |  |  |  |  | } | 
| 200 | 7 |  |  |  |  | 23 | return __PACKAGE__->new($new_seq, $self->{name}, $self->{comment}, $new_qual); | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | sub char2qual { | 
| 205 | 42 |  |  | 42 | 1 | 1042 | my ($self, $quality_encoded, $offset) = @_; | 
| 206 |  |  |  |  |  |  | # Check quality_encoded is a single character | 
| 207 | 42 | 50 |  |  |  | 88 | confess "Quality encoded character must be a single character" if (length($quality_encoded) != 1); | 
| 208 | 42 | 50 |  |  |  | 69 | $offset = defined $offset ? $offset : $FASTX::Seq::DEFAULT_OFFSET; | 
| 209 | 42 |  |  |  |  | 133 | return unpack("C*", $quality_encoded) - $offset; | 
| 210 |  |  |  |  |  |  | } | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | sub qual2char { | 
| 214 | 2 |  |  | 2 | 1 | 15 | my ($self, $quality_integer, $offset) = @_; | 
| 215 | 2 | 50 |  |  |  | 13 | confess "Quality integer must be an integer value" if ($quality_integer !~ /^\d+$/); | 
| 216 | 2 | 50 |  |  |  | 7 | $offset = defined $offset ? $offset : $FASTX::Seq::DEFAULT_OFFSET; | 
| 217 | 2 |  |  |  |  | 11 | return chr($quality_integer + $offset); | 
| 218 |  |  |  |  |  |  | } | 
| 219 |  |  |  |  |  |  |  | 
| 220 |  |  |  |  |  |  |  | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | sub qualities { | 
| 223 | 5 |  |  | 5 | 1 | 14 | my ($self) = @_; | 
| 224 | 5 |  |  |  |  | 10 | my @qualities; | 
| 225 | 5 | 50 |  |  |  | 14 | if (defined $self->{qual}) { | 
| 226 | 5 |  |  |  |  | 15 | for (my $i = 0; $i < length($self->{qual}); $i++) { | 
| 227 | 40 |  |  |  |  | 92 | push @qualities, $self->char2qual(substr($self->{qual}, $i, 1), $self->{offset}); | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  | } | 
| 230 | 5 |  |  |  |  | 22 | return @qualities; | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  |  | 
| 234 |  |  |  |  |  |  | sub min_qual { | 
| 235 | 1 |  |  | 1 | 1 | 1190 | my ($self) = @_; | 
| 236 | 1 |  |  |  |  | 3 | my @qualities = $self->qualities(); | 
| 237 |  |  |  |  |  |  | # Calculate minimum quality | 
| 238 | 1 |  |  |  |  | 6 | @qualities = sort {$a <=> $b} @qualities; | 
|  | 13 |  |  |  |  | 18 |  | 
| 239 | 1 |  |  |  |  | 3 | return $qualities[0]; | 
| 240 |  |  |  |  |  |  | } | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  |  | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | sub max_qual { | 
| 245 | 1 |  |  | 1 | 1 | 320 | my ($self) = @_; | 
| 246 | 1 |  |  |  |  | 4 | my @qualities = $self->qualities(); | 
| 247 |  |  |  |  |  |  | # Calculate minimum quality | 
| 248 | 1 |  |  |  |  | 6 | @qualities = sort {$b <=> $a} @qualities; | 
|  | 13 |  |  |  |  | 17 |  | 
| 249 | 1 |  |  |  |  | 3 | return $qualities[0]; | 
| 250 |  |  |  |  |  |  | } | 
| 251 |  |  |  |  |  |  |  | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | sub trim_after  : lvalue { | 
| 254 | 1 |  |  | 1 | 1 | 293 | my ($self, $qual_value) = @_; | 
| 255 | 1 | 50 |  |  |  | 9 | confess "Quality integer must be an integer value" if ($qual_value !~ /^\d+$/); | 
| 256 |  |  |  |  |  |  | # Detect first base with quality lower or equal than $qual_value | 
| 257 | 1 |  |  |  |  | 3 | my $i = 0; | 
| 258 | 1 |  |  |  |  | 16 | my @qualities = $self->qualities(); | 
| 259 | 1 |  |  |  |  | 5 | for ($i = 0; $i < scalar(@qualities); $i++) { | 
| 260 | 7 | 100 |  |  |  | 15 | if ($qualities[$i] <= $qual_value) { | 
| 261 | 1 |  |  |  |  | 3 | last; | 
| 262 |  |  |  |  |  |  | } | 
| 263 |  |  |  |  |  |  | } | 
| 264 |  |  |  |  |  |  | # Trim sequence and quality | 
| 265 | 1 |  |  |  |  | 7 | my $slice = $self->slice(0, $i); | 
| 266 | 1 |  |  |  |  | 16 | $self->{seq} = $slice->{seq}; | 
| 267 | 1 |  |  |  |  | 5 | $self->{qual} = $slice->{qual}; | 
| 268 | 1 |  |  |  |  | 11 | return $self; | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  |  | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | sub trim_until  : lvalue { | 
| 274 | 1 |  |  | 1 | 1 | 5 | my ($self, $qual_value) = @_; | 
| 275 | 1 | 50 |  |  |  | 9 | confess "Quality integer must be an integer value" if ($qual_value !~ /^\d+$/); | 
| 276 |  |  |  |  |  |  | # Detect first base with quality lower or equal than $qual_value | 
| 277 | 1 |  |  |  |  | 12 | my $i = 0; | 
| 278 | 1 |  |  |  |  | 4 | my @qualities = $self->qualities(); | 
| 279 | 1 |  |  |  |  | 4 | for ($i = 0; $i < scalar(@qualities); $i++) { | 
| 280 | 3 | 100 |  |  |  | 9 | if ($qualities[$i] >= $qual_value) { | 
| 281 | 1 |  |  |  |  | 1 | last; | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  | } | 
| 284 |  |  |  |  |  |  | # Trim sequence and quality | 
| 285 | 1 |  |  |  |  | 13 | my $slice = $self->slice($i); | 
| 286 | 1 |  |  |  |  | 3 | $self->{seq} = $slice->{seq}; | 
| 287 | 1 |  |  |  |  | 1 | $self->{qual} = $slice->{qual}; | 
| 288 |  |  |  |  |  |  |  | 
| 289 | 1 |  |  |  |  | 5 | return $self; | 
| 290 |  |  |  |  |  |  | } | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  | sub _kmer2num { | 
| 293 | 2 |  |  | 2 |  | 5 | my ($kmer) = @_; | 
| 294 |  |  |  |  |  |  |  | 
| 295 | 2 |  |  |  |  | 9 | my %baseVal = ('T' => 0, 'C' => 1, 'A' => 2, 'G' => 3, 'U' => 0); | 
| 296 | 2 |  |  |  |  | 3 | my $klen = length($kmer); | 
| 297 | 2 |  |  |  |  | 4 | my $num = 0; | 
| 298 |  |  |  |  |  |  |  | 
| 299 | 2 |  |  |  |  | 7 | for my $i (0..($klen - 1)) { | 
| 300 | 6 | 50 |  |  |  | 14 | if (exists $baseVal{substr($kmer, $i, 1)}) { | 
| 301 | 6 |  |  |  |  | 16 | my $p = 4 ** ($klen - 1 - $i); | 
| 302 | 6 |  |  |  |  | 13 | $num += $p * $baseVal{substr($kmer, $i, 1)}; | 
| 303 |  |  |  |  |  |  | } else { | 
| 304 | 0 |  |  |  |  | 0 | $num = -1; | 
| 305 | 0 |  |  |  |  | 0 | last; | 
| 306 |  |  |  |  |  |  | } | 
| 307 |  |  |  |  |  |  | } | 
| 308 |  |  |  |  |  |  |  | 
| 309 | 2 |  |  |  |  | 6 | return $num; | 
| 310 |  |  |  |  |  |  | } | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  |  | 
| 313 |  |  |  |  |  |  | sub translate { | 
| 314 | 1 |  |  | 1 | 1 | 3 | my ($self, $code_number) = @_; | 
| 315 |  |  |  |  |  |  |  | 
| 316 | 1 |  |  |  |  | 6 | my $record = $self->copy(); | 
| 317 |  |  |  |  |  |  | # Default genetic code if not specified | 
| 318 | 1 |  | 50 |  |  | 3 | $code_number //= 11; | 
| 319 |  |  |  |  |  |  |  | 
| 320 | 1 |  |  |  |  | 9 | my @code_map = ( | 
| 321 |  |  |  |  |  |  | "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #1 | 
| 322 |  |  |  |  |  |  | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", | 
| 323 |  |  |  |  |  |  | "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 324 |  |  |  |  |  |  | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 325 |  |  |  |  |  |  | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", | 
| 326 |  |  |  |  |  |  | "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 327 |  |  |  |  |  |  | "", | 
| 328 |  |  |  |  |  |  | "", | 
| 329 |  |  |  |  |  |  | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | 
| 330 |  |  |  |  |  |  | "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 331 |  |  |  |  |  |  | "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #11 Bact, Ar and Plant Plast | 
| 332 |  |  |  |  |  |  | "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 333 |  |  |  |  |  |  | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", | 
| 334 |  |  |  |  |  |  | "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | 
| 335 |  |  |  |  |  |  | "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #15 | 
| 336 |  |  |  |  |  |  | "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 337 |  |  |  |  |  |  | "", "", "", "", | 
| 338 |  |  |  |  |  |  | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", | 
| 339 |  |  |  |  |  |  | "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 340 |  |  |  |  |  |  | "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 341 |  |  |  |  |  |  | "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", | 
| 342 |  |  |  |  |  |  | "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 343 |  |  |  |  |  |  | "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 344 |  |  |  |  |  |  | "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 345 |  |  |  |  |  |  | "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 346 |  |  |  |  |  |  | "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 347 |  |  |  |  |  |  | "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", | 
| 348 |  |  |  |  |  |  | "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG" | 
| 349 |  |  |  |  |  |  | ); | 
| 350 |  |  |  |  |  |  |  | 
| 351 | 1 |  |  |  |  | 3 | my $code = $code_map[$code_number - 1]; | 
| 352 |  |  |  |  |  |  | # Check sequence is nucleotidic or RNA | 
| 353 | 1 | 50 |  |  |  | 7 | confess "ERROR FASTX::Seq: Sequence must be nucleotidic" if ($self->{seq} !~ /^[ACGTU]+$/i); | 
| 354 | 1 |  |  |  |  | 2 | my @nucleotides = split //, $self->seq; | 
| 355 | 1 |  |  |  |  | 3 | my @translated_sequence; | 
| 356 |  |  |  |  |  |  |  | 
| 357 | 1 |  |  |  |  | 4 | for (my $i = 0; $i < scalar(@nucleotides) - 2; $i += 3) { | 
| 358 | 2 |  |  |  |  | 7 | my $codon = $nucleotides[$i] . $nucleotides[$i+1] . $nucleotides[$i+2]; | 
| 359 | 2 |  |  |  |  | 12 | my $num = _kmer2num($codon);  # You need to define this function | 
| 360 | 2 | 50 |  |  |  | 4 | if ($num != -1) { | 
| 361 | 2 |  |  |  |  | 7 | push @translated_sequence, substr($code, $num, 1); | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | } else { | 
| 364 | 0 |  |  |  |  | 0 | push @translated_sequence, '-'; | 
| 365 |  |  |  |  |  |  |  | 
| 366 |  |  |  |  |  |  | } | 
| 367 |  |  |  |  |  |  | } | 
| 368 |  |  |  |  |  |  |  | 
| 369 | 1 |  |  |  |  | 4 | my $translated_seq = join('', @translated_sequence); | 
| 370 |  |  |  |  |  |  |  | 
| 371 | 1 |  |  |  |  | 4 | $record->seq($translated_seq); | 
| 372 |  |  |  |  |  |  | # Remove qual if present | 
| 373 | 1 |  |  |  |  | 10 | $record->qual(undef); | 
| 374 | 1 |  |  |  |  | 5 | return $record; | 
| 375 |  |  |  |  |  |  | } | 
| 376 |  |  |  |  |  |  |  | 
| 377 |  |  |  |  |  |  |  | 
| 378 |  |  |  |  |  |  | sub string { | 
| 379 |  |  |  |  |  |  | # Update comment | 
| 380 | 0 |  |  | 0 | 1 | 0 | my ($self, @args) = @_; | 
| 381 | 0 | 0 |  |  |  | 0 | if (defined $self->{qual}) { | 
| 382 | 0 |  |  |  |  | 0 | return $self->asfastq(@args); | 
| 383 |  |  |  |  |  |  | } else { | 
| 384 | 0 |  |  |  |  | 0 | return $self->asfasta(@args); | 
| 385 |  |  |  |  |  |  | } | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | } | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  |  | 
| 390 |  |  |  |  |  |  | sub as_string { | 
| 391 |  |  |  |  |  |  | # Update comment | 
| 392 | 0 |  |  | 0 | 1 | 0 | my ($self) = @_; | 
| 393 | 0 |  |  |  |  | 0 | return $self->string(); | 
| 394 |  |  |  |  |  |  | } | 
| 395 |  |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  |  | 
| 398 |  |  |  |  |  |  | sub asfasta { | 
| 399 |  |  |  |  |  |  | # Update comment | 
| 400 | 15 |  |  | 15 | 1 | 3638 | my ($self, $len) = @_; | 
| 401 |  |  |  |  |  |  |  | 
| 402 | 15 |  | 50 |  |  | 38 | my $name  = $self->{name} // "sequence"; | 
| 403 | 15 | 100 | 66 |  |  | 82 | my $comment = (defined $self->{comment} and length($self->{comment}) > 0) ? " " . $self->{comment} : ""; | 
| 404 | 15 |  |  |  |  | 61 | return ">" . $name . $comment . "\n" . _split_string($self->{seq}, $len) . "\n"; | 
| 405 |  |  |  |  |  |  | } | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  |  | 
| 408 |  |  |  |  |  |  | sub asfastq { | 
| 409 |  |  |  |  |  |  | # Update comment | 
| 410 | 25 |  |  | 25 | 1 | 1547 | my ($self, $user_quality) = @_; | 
| 411 | 25 |  |  |  |  | 54 | my $quality = $self->{qual}; | 
| 412 |  |  |  |  |  |  |  | 
| 413 | 25 | 100 |  |  |  | 66 | if (defined $user_quality) { | 
|  |  | 100 |  |  |  |  |  | 
| 414 |  |  |  |  |  |  | # User requests new quality | 
| 415 | 15 | 100 |  |  |  | 32 | if (length($user_quality) == 1 ) { | 
| 416 |  |  |  |  |  |  | # And it's valid! | 
| 417 | 10 |  |  |  |  | 24 | $quality = $user_quality x length($self->{seq}); | 
| 418 |  |  |  |  |  |  | } else { | 
| 419 | 5 |  |  |  |  | 284 | say STDERR "[WARNING] FASTX::Seq->as_fastq(): Provide a _char_ as quality, not a value (", $user_quality,"): defaulting to $FASTX::Seq::DEFAULT_QUALITY"; | 
| 420 | 5 |  |  |  |  | 32 | $quality = $FASTX::Seq::DEFAULT_QUALITY x length($self->{seq}); | 
| 421 |  |  |  |  |  |  | } | 
| 422 |  |  |  |  |  |  | } elsif (not defined $quality) { | 
| 423 | 6 |  |  |  |  | 70 | $quality = $FASTX::Seq::DEFAULT_QUALITY x length($self->{seq}); | 
| 424 |  |  |  |  |  |  | } | 
| 425 |  |  |  |  |  |  |  | 
| 426 |  |  |  |  |  |  |  | 
| 427 | 25 |  | 50 |  |  | 73 | my $name  = $self->{name} // "sequence"; | 
| 428 | 25 | 100 | 66 |  |  | 150 | my $comment = (defined $self->{comment} and length($self->{comment}) > 0) ? " " . $self->{comment} : ""; | 
| 429 |  |  |  |  |  |  |  | 
| 430 | 25 |  |  |  |  | 135 | return "@" . $name . $comment . "\n" . $self->{seq} . "\n+\n" . $quality  . "\n"; | 
| 431 |  |  |  |  |  |  | } | 
| 432 |  |  |  |  |  |  |  | 
| 433 |  |  |  |  |  |  |  | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  |  | 
| 436 |  |  |  |  |  |  | sub as_fasta { | 
| 437 |  |  |  |  |  |  | # Update comment | 
| 438 | 5 |  |  | 5 | 1 | 17 | my ($self, @args) = @_; | 
| 439 | 5 |  |  |  |  | 11 | return $self->asfasta(@args); | 
| 440 |  |  |  |  |  |  | } | 
| 441 |  |  |  |  |  |  |  | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | sub as_fastq { | 
| 444 |  |  |  |  |  |  | # Update comment | 
| 445 | 10 |  |  | 10 | 1 | 5819 | my ($self, @args) = @_; | 
| 446 | 10 |  |  |  |  | 24 | return $self->asfastq(@args); | 
| 447 |  |  |  |  |  |  | } | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  |  | 
| 450 |  |  |  |  |  |  | sub is_fasta { | 
| 451 |  |  |  |  |  |  | # Return true if record has no quality | 
| 452 | 13 |  |  | 13 | 1 | 62 | my ($self) = @_; | 
| 453 | 13 | 100 | 66 |  |  | 93 | return ((defined $self->{qual}) > 0 and length($self->{qual}) == length($self->{seq})) ? 0 : 1; | 
| 454 |  |  |  |  |  |  | } | 
| 455 |  |  |  |  |  |  |  | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  | sub is_fastq { | 
| 458 |  |  |  |  |  |  | # Update comment | 
| 459 | 7 |  |  | 7 | 1 | 26 | my ($self) = @_; | 
| 460 | 7 | 100 | 66 |  |  | 51 | return ((defined $self->{qual}) > 0 and length($self->{qual}) == length($self->{seq})) ? 1 : 0; | 
| 461 |  |  |  |  |  |  | } | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  |  | 
| 464 |  |  |  |  |  |  | sub _split_string { | 
| 465 | 15 |  |  | 15 |  | 40 | my ($string, $width) = @_; | 
| 466 | 15 | 100 | 66 |  |  | 67 | if (not defined $width or $width == 0) { | 
| 467 | 10 |  |  |  |  | 77 | return "$string"; | 
| 468 |  |  |  |  |  |  | } | 
| 469 |  |  |  |  |  |  |  | 
| 470 | 5 |  |  |  |  | 10 | my $output_string; | 
| 471 | 5 |  |  |  |  | 16 | for (my $i=0; $i | 
| 472 | 74 |  |  |  |  | 97 | my $frag = substr($string, $i, $width); | 
| 473 | 74 |  |  |  |  | 140 | $output_string.=$frag."\n"; | 
| 474 |  |  |  |  |  |  | } | 
| 475 | 5 |  |  |  |  | 8 | chomp($output_string); | 
| 476 | 5 |  |  |  |  | 20 | return $output_string; | 
| 477 |  |  |  |  |  |  | } | 
| 478 |  |  |  |  |  |  |  | 
| 479 |  |  |  |  |  |  | 1; | 
| 480 |  |  |  |  |  |  |  | 
| 481 |  |  |  |  |  |  | __END__ |