| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package App::SimulateReads::Role::IO; | 
| 2 |  |  |  |  |  |  | # ABSTRACT: Input and output custom wrappers. | 
| 3 |  |  |  |  |  |  |  | 
| 4 | 6 |  |  | 6 |  | 2840 | use App::SimulateReads::Base 'role'; | 
|  | 6 |  |  |  |  | 8 |  | 
|  | 6 |  |  |  |  | 51 |  | 
| 5 | 6 |  |  | 6 |  | 1598 | use PerlIO::gzip; | 
|  | 6 |  |  |  |  | 2759 |  | 
|  | 6 |  |  |  |  | 199 |  | 
| 6 | 6 |  |  | 6 |  | 36 | use Scalar::Util 'looks_like_number'; | 
|  | 6 |  |  |  |  | 12 |  | 
|  | 6 |  |  |  |  | 4256 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | our $VERSION = '0.06'; # VERSION | 
| 9 |  |  |  |  |  |  |  | 
| 10 |  |  |  |  |  |  | #===  CLASS METHOD  ============================================================ | 
| 11 |  |  |  |  |  |  | #        CLASS: My::Role::IO (Role) | 
| 12 |  |  |  |  |  |  | #       METHOD: my_open_r | 
| 13 |  |  |  |  |  |  | #   PARAMETERS: $file File | 
| 14 |  |  |  |  |  |  | #      RETURNS: $fh IO::File | 
| 15 |  |  |  |  |  |  | #  DESCRIPTION: Verify if the file is gzipped compressed and open it properly | 
| 16 |  |  |  |  |  |  | #       THROWS: If open fails, throws an error | 
| 17 |  |  |  |  |  |  | #     COMMENTS: none | 
| 18 |  |  |  |  |  |  | #     SEE ALSO: n/a | 
| 19 |  |  |  |  |  |  | #=============================================================================== | 
| 20 |  |  |  |  |  |  | sub my_open_r { | 
| 21 | 20 |  |  | 20 | 0 | 35 | my ($self, $file) = @_; | 
| 22 |  |  |  |  |  |  |  | 
| 23 | 20 |  |  |  |  | 25 | my $fh; | 
| 24 | 20 | 50 |  |  |  | 65 | my $mode = $file =~ /\.gz$/ ? "<:gzip" : "<"; | 
| 25 |  |  |  |  |  |  |  | 
| 26 | 20 | 50 |  |  |  | 590 | open $fh, $mode => $file | 
| 27 |  |  |  |  |  |  | or croak "Not possible to read $file: $!"; | 
| 28 |  |  |  |  |  |  |  | 
| 29 | 20 |  |  |  |  | 70 | return $fh; | 
| 30 |  |  |  |  |  |  | } ## --- end sub my_open_r | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | #===  CLASS METHOD  ============================================================ | 
| 33 |  |  |  |  |  |  | #        CLASS: My::Role::IO (Role) | 
| 34 |  |  |  |  |  |  | #       METHOD: my_open_w | 
| 35 |  |  |  |  |  |  | #   PARAMETERS: $file Str, $is_gzipped Bool | 
| 36 |  |  |  |  |  |  | #      RETURNS: $fh IO::File | 
| 37 |  |  |  |  |  |  | #  DESCRIPTION: Opens for writing a file, gzipped or not | 
| 38 |  |  |  |  |  |  | #       THROWS: If open fails, throws an error | 
| 39 |  |  |  |  |  |  | #     COMMENTS: none | 
| 40 |  |  |  |  |  |  | #     SEE ALSO: n/a | 
| 41 |  |  |  |  |  |  | #=============================================================================== | 
| 42 |  |  |  |  |  |  | sub my_open_w { | 
| 43 | 11 |  |  | 11 | 0 | 85 | my ($self, $file, $is_gzipped) = @_; | 
| 44 |  |  |  |  |  |  |  | 
| 45 | 11 |  |  |  |  | 32 | my $fh; | 
| 46 |  |  |  |  |  |  | my $mode; | 
| 47 |  |  |  |  |  |  |  | 
| 48 | 11 | 50 |  |  |  | 106 | if ($is_gzipped) { | 
| 49 | 0 |  |  |  |  | 0 | $mode = ">:gzip"; | 
| 50 |  |  |  |  |  |  | } else { | 
| 51 | 11 |  |  |  |  | 64 | $mode = ">"; | 
| 52 |  |  |  |  |  |  | } | 
| 53 |  |  |  |  |  |  |  | 
| 54 | 11 | 50 |  |  |  | 1615 | open $fh, $mode => $file | 
| 55 |  |  |  |  |  |  | or croak "Not possible to create $file: $!"; | 
| 56 |  |  |  |  |  |  |  | 
| 57 | 11 |  |  |  |  | 108 | return $fh; | 
| 58 |  |  |  |  |  |  | } ## --- end sub my_open_w | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | #===  CLASS METHOD  ============================================================ | 
| 61 |  |  |  |  |  |  | #        CLASS: My::Role::IO (Role) | 
| 62 |  |  |  |  |  |  | #       METHOD: index_fasta | 
| 63 |  |  |  |  |  |  | #   PARAMETERS: $fasta My:Fasta | 
| 64 |  |  |  |  |  |  | #      RETURNS: HashRef[Hashref] | 
| 65 |  |  |  |  |  |  | #  DESCRIPTION: Indexes a fasta file: id => (seq, size) | 
| 66 |  |  |  |  |  |  | #       THROWS: It tries to validate the fasta file, if fails, throws an error | 
| 67 |  |  |  |  |  |  | #     COMMENTS: none | 
| 68 |  |  |  |  |  |  | #     SEE ALSO: n/a | 
| 69 |  |  |  |  |  |  | #=============================================================================== | 
| 70 |  |  |  |  |  |  | sub index_fasta { | 
| 71 | 20 |  |  | 20 | 0 | 55 | my ($self, $fasta) = @_; | 
| 72 | 20 |  |  |  |  | 70 | my $fh = $self->my_open_r($fasta); | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | # indexed_genome = ID => (seq, len) | 
| 75 | 20 |  |  |  |  | 45 | my %indexed_fasta; | 
| 76 |  |  |  |  |  |  | my $id; | 
| 77 | 20 |  |  |  |  | 365 | while (<$fh>) { | 
| 78 | 860 |  |  |  |  | 1015 | chomp; | 
| 79 | 860 | 50 |  |  |  | 1260 | next if /^;/; | 
| 80 | 860 | 100 |  |  |  | 1285 | if (/^>/) { | 
| 81 | 100 |  |  |  |  | 225 | my @fields = split /\|/; | 
| 82 | 100 |  |  |  |  | 200 | $id = (split / / => $fields[0])[0]; | 
| 83 | 100 |  |  |  |  | 245 | $id =~ s/^>//; | 
| 84 | 100 |  |  |  |  | 340 | $id = uc $id; | 
| 85 |  |  |  |  |  |  | } else { | 
| 86 | 760 | 50 |  |  |  | 970 | croak "Error reading fasta file '$fasta': Not defined id" | 
| 87 |  |  |  |  |  |  | unless defined $id; | 
| 88 | 760 |  |  |  |  | 1810 | $indexed_fasta{$id}{seq} .= $_; | 
| 89 |  |  |  |  |  |  | } | 
| 90 |  |  |  |  |  |  | } | 
| 91 |  |  |  |  |  |  |  | 
| 92 | 20 |  |  |  |  | 200 | for (keys %indexed_fasta) { | 
| 93 | 100 |  |  |  |  | 170 | $indexed_fasta{$_}{size} = length $indexed_fasta{$_}{seq}; | 
| 94 |  |  |  |  |  |  | } | 
| 95 |  |  |  |  |  |  |  | 
| 96 |  |  |  |  |  |  | $fh->close | 
| 97 | 20 | 50 |  |  |  | 145 | or croak "Cannot close file $fasta: $!\n"; | 
| 98 |  |  |  |  |  |  |  | 
| 99 | 20 |  |  |  |  | 300 | return \%indexed_fasta; | 
| 100 |  |  |  |  |  |  | } ## --- end sub index_fasta | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | #===  CLASS METHOD  ============================================================ | 
| 103 |  |  |  |  |  |  | #        CLASS: My::Role::IO | 
| 104 |  |  |  |  |  |  | #       METHOD: index_weight_file | 
| 105 |  |  |  |  |  |  | #   PARAMETERS: $weight_file My:File | 
| 106 |  |  |  |  |  |  | #      RETURNS: $indexed_file Hashref[Int] | 
| 107 |  |  |  |  |  |  | #  DESCRIPTION: It indexes a tab separated file with a seqid and its weight | 
| 108 |  |  |  |  |  |  | #       THROWS: It tries to validate the file, if fails, the throws an exception | 
| 109 |  |  |  |  |  |  | #     COMMENTS: none | 
| 110 |  |  |  |  |  |  | #     SEE ALSO: n/a | 
| 111 |  |  |  |  |  |  | #=============================================================================== | 
| 112 |  |  |  |  |  |  | sub index_weight_file { | 
| 113 | 0 |  |  | 0 | 0 |  | my ($self, $weight_file) = @_; | 
| 114 | 0 |  |  |  |  |  | my $fh = $self->my_open_r($weight_file); | 
| 115 | 0 |  |  |  |  |  | my %indexed_file; | 
| 116 | 0 |  |  |  |  |  | my $line = 0; | 
| 117 | 0 |  |  |  |  |  | while (<$fh>) { | 
| 118 | 0 |  |  |  |  |  | $line++; | 
| 119 | 0 |  |  |  |  |  | chomp; | 
| 120 | 0 | 0 |  |  |  |  | next if /^\s*$/; | 
| 121 | 0 |  |  |  |  |  | my @fields = split /\t/; | 
| 122 | 0 | 0 |  |  |  |  | croak "Error parsing '$weight_file': seqid (first column) not found at line $line\n" unless defined $fields[0]; | 
| 123 | 0 | 0 |  |  |  |  | croak "Error parsing '$weight_file': weight (second column) not found at line $line\n" unless defined $fields[1]; | 
| 124 | 0 | 0 |  |  |  |  | croak "Error parsing '$weight_file': weight (second column) does not look like a number at line $line\n" if not looks_like_number($fields[1]); | 
| 125 | 0 | 0 |  |  |  |  | croak "Error parsing '$weight_file': weight (second column) lesser or equal to zero at line $line\n" if $fields[1] <= 0; | 
| 126 | 0 |  |  |  |  |  | $indexed_file{uc $fields[0]} = $fields[1]; | 
| 127 |  |  |  |  |  |  | } | 
| 128 |  |  |  |  |  |  |  | 
| 129 |  |  |  |  |  |  | $fh->close | 
| 130 | 0 | 0 |  |  |  |  | or croak "Cannot close file $weight_file: $!\n"; | 
| 131 |  |  |  |  |  |  |  | 
| 132 | 0 |  |  |  |  |  | return \%indexed_file; | 
| 133 |  |  |  |  |  |  | } ## --- end sub index_weight_file | 
| 134 |  |  |  |  |  |  |  | 
| 135 |  |  |  |  |  |  | __END__ | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | =pod | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | =encoding UTF-8 | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | =head1 NAME | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | App::SimulateReads::Role::IO - Input and output custom wrappers. | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | =head1 VERSION | 
| 146 |  |  |  |  |  |  |  | 
| 147 |  |  |  |  |  |  | version 0.06 | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | =head1 AUTHOR | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | Thiago L. A. Miller <tmiller@mochsl.org.br> | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 154 |  |  |  |  |  |  |  | 
| 155 |  |  |  |  |  |  | This software is Copyright (c) 2017 by Teaching and Research Institute from SÃrio-Libanês Hospital. | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | This is free software, licensed under: | 
| 158 |  |  |  |  |  |  |  | 
| 159 |  |  |  |  |  |  | The GNU General Public License, Version 3, June 2007 | 
| 160 |  |  |  |  |  |  |  | 
| 161 |  |  |  |  |  |  | =cut |