File Coverage

lib/App/SimulateReads/Quality/Handle.pm
Criterion Covered Total %
statement 18 174 10.3
branch 0 46 0.0
condition 0 3 0.0
subroutine 6 17 35.2
pod 0 5 0.0
total 24 245 9.8


line stmt bran cond sub pod time code
1             package App::SimulateReads::Quality::Handle;
2             # ABSTRACT: Class to handle database schemas.
3              
4 6     6   37 use App::SimulateReads::Base 'class';
  6         16  
  6         36  
5 6     6   2463 use App::SimulateReads::Quality::Schema;
  6         23  
  6         229  
6 6     6   1649 use Path::Class 'file';
  6         165211  
  6         369  
7 6     6   2011 use IO::Compress::Gzip 'gzip';
  6         137790  
  6         432  
8 6     6   1937 use IO::Uncompress::Gunzip 'gunzip';
  6         62458  
  6         421  
9 6     6   48 use Storable qw/nfreeze thaw/;
  6         13  
  6         11177  
10              
11             with 'App::SimulateReads::Role::IO';
12              
13             our $VERSION = '0.06'; # VERSION
14            
15             #-------------------------------------------------------------------------------
16             # Hardcoded paths for quality_profile
17             #-------------------------------------------------------------------------------
18             my $DB = 'quality_profile.db';
19             my @DB_PATH = (
20             file(__FILE__)->dir->parent->parent->parent->parent->file('share'),
21             file(__FILE__)->dir->parent->parent->parent->file('auto', 'share', 'dist', 'App-SimulateReads')
22             );
23              
24             has 'schema' => (
25             is => 'ro',
26             isa => 'App::SimulateReads::Quality::Schema',
27             builder => '_build_schema',
28             lazy_build => 1,
29             );
30              
31             sub _build_schema {
32 0     0     my $self = shift;
33 0           my $db;
34              
35 0           for my $path (@DB_PATH) {
36 0           my $file = file($path, $DB);
37 0 0         if (-f $file) {
38 0           $db = $file;
39 0           last;
40             }
41             }
42              
43 0 0         croak "$DB not found in @DB_PATH" unless defined $db;
44 0           return App::SimulateReads::Quality::Schema->connect(
45             "dbi:SQLite:$db",
46             "",
47             "",
48             {
49             RaiseError => 1,
50             PrintError => 0,
51             on_connect_do => 'PRAGMA foreign_keys = ON'
52             }
53             );
54             }
55              
56             sub insertdb {
57 0     0 0   my ($self, $file, $sequencing_system, $size, $source, $is_user_provided, $type) = @_;
58 0           my $schema = $self->schema;
59              
60 0           log_msg ":: Checking if there is already a sequencing-system '$sequencing_system' ...";
61 0           my $seq_sys_rs = $schema->resultset('SequencingSystem')->find({ name => $sequencing_system });
62 0 0         if ($seq_sys_rs) {
63 0           log_msg ":: Found '$sequencing_system'";
64 0           log_msg ":: Searching for a quality entry '$sequencing_system:$size' ...";
65 0           my $quality_rs = $seq_sys_rs->search_related('qualities' => { size => $size })->single;
66 0 0         if ($quality_rs) {
67 0           croak "There is already a quality entry for $sequencing_system:$size";
68             }
69 0           log_msg ":: Not found '$sequencing_system:$size'";
70             } else {
71 0           log_msg ":: sequencing-system '$sequencing_system' not found";
72             }
73              
74 0           my ($arr, $deepth);
75              
76 0 0         if ($type !~ /^(fastq|raw)$/) {
77 0           croak "Unknown indexing type '$type': Valids are 'raw' and 'fastq'";
78             }
79              
80 0           log_msg ":: Indexing '$file'. It may take several minutes ...";
81 0           ($arr, $deepth) = $self->_index_quality_type($file, $size, $type);
82              
83 0           log_msg ":: Converting array to bytes ...";
84 0           my $bytes = nfreeze $arr;
85 0           log_msg ":: Compressing bytes ...";
86 0           gzip \$bytes => \my $compressed;
87              
88             # Begin transation
89 0           my $guard = $schema->txn_scope_guard;
90              
91 0 0         unless ($seq_sys_rs) {
92 0           log_msg ":: Creating sequencing-system entry for '$sequencing_system' ...";
93 0           $seq_sys_rs = $schema->resultset('SequencingSystem')->create({ name => $sequencing_system });
94             }
95              
96 0           log_msg ":: Storing quality matrix entry at '$sequencing_system:$size'...";
97 0           my $quality_rs = $seq_sys_rs->create_related( qualities => {
98             source => $source,
99             is_user_provided => $is_user_provided,
100             size => $size,
101             deepth => $deepth,
102             matrix => $compressed
103             });
104              
105             # End transation
106 0           $guard->commit;
107             }
108              
109             sub _index_quality {
110 0     0     my ($self, $quality_ref, $size) = @_;
111 0           my $deepth = scalar @$quality_ref;
112              
113 0           my @arr;
114 0           for (@$quality_ref) {
115 0           my @tmp = split //;
116 0           for (my $i = 0; $i < $size; $i++) {
117 0           push @{ $arr[$i] } => $tmp[$i];
  0            
118             }
119             }
120              
121 0           return (\@arr, $deepth);
122             }
123              
124             sub _index_quality_type {
125             # ALgorithm based in perlfaq:
126             # How do I select a random line from a file?
127             # "The Art of Computer Programming"
128              
129 0     0     my ($self, $file, $size, $type) = @_;
130 0           my $fh = $self->my_open_r($file);
131              
132 0           log_msg ":: Counting number of lines in '$file' ...";
133 0           my $num_lines = $self->_wcl($file);
134 0           log_msg ":: Number of lines: $num_lines";
135              
136 0           my $num_left = 0;
137 0           my $line = 0;
138 0           my $acm = 0;
139              
140 0           my $getter = do {
141 0           given ($type) {
142 0           when ('fastq') {
143 0           log_msg ":: Setting fastq validation and getter";
144 0           $num_left = int($num_lines / 4);
145              
146             sub {
147 0     0     my @stack;
148              
149 0           for (1..4) {
150 0           $line++;
151 0 0         defined(my $entry = <$fh>)
152             or croak "Truncated fastq entry in '$file' at line $line";
153 0           push @stack => $entry;
154             }
155              
156 0           chomp @stack;
157              
158 0 0 0       if ($stack[0] !~ /^\@/ || $stack[2] !~ /^\+/) {
159 0           croak "Fastq entry at '$file' line '", $line - 3, "' not seems to be a valid read";
160             }
161              
162 0 0         if (length $stack[3] != $size) {
163 0           croak "Fastq entry in '$file' at line '$line' do not have length $size";
164             }
165              
166 0           return $stack[3];
167             }
168 0           }
169 0           default {
170 0           log_msg ":: Setting raw validation and getter";
171 0           $num_left = $num_lines;
172              
173             sub {
174 0     0     $line++;
175 0           chomp(my $entry = <$fh>);
176              
177 0 0         if (length $entry != $size) {
178 0           croak "Error parsing '$file': Line $line do not have length $size";
179             }
180              
181 0           return $entry;
182             }
183 0           }
184             }
185             };
186              
187 0           log_msg ":: Calculating the number of entries to pick ...";
188 0 0         my $picks = $num_left < 1000 ? $num_left : 1000;
189 0           my $picks_left = $picks;
190              
191 0           my @quality;
192              
193 0           log_msg ":: Picking $picks entries in '$file' ...";
194 0           while ($picks_left > 0) {
195 0           my $entry = $getter->();
196              
197 0           my $rand = int(rand($num_left));
198 0 0         if ($rand < $picks_left) {
199 0           push @quality => $entry;
200 0           $picks_left--;
201 0 0         if (++$acm % int($picks/10) == 0) {
202 0           log_msg sprintf " ==> %d%% processed\n", ($acm / $picks) * 100;
203             }
204             }
205              
206 0           $num_left--;
207             }
208              
209             $fh->close
210 0 0         or croak "Cannot close file '$file'";
211              
212 0           return $self->_index_quality(\@quality, $size);
213             }
214              
215             sub _wcl {
216 0     0     my ($self, $file) = @_;
217 0           my $fh = $self->my_open_r($file);
218 0           my $num_lines = 0;
219 0           $num_lines++ while <$fh>;
220 0           $fh->close;
221 0           return $num_lines;
222             }
223              
224             sub retrievedb {
225 0     0 0   my ($self, $sequencing_system, $size) = @_;
226 0           my $schema = $self->schema;
227              
228 0           my $seq_sys_rs = $schema->resultset('SequencingSystem')->find({ name => $sequencing_system });
229 0 0         croak "'$sequencing_system' not found into database" unless defined $seq_sys_rs;
230              
231 0           my $quality_rs = $seq_sys_rs->search_related('qualities' => { size => $size })->single;
232 0 0         croak "Not found size '$size' for sequencing system '$sequencing_system'" unless defined $quality_rs;
233              
234 0           my $compressed = $quality_rs->matrix;
235 0           my $deepth = $quality_rs->deepth;
236 0 0         croak "Quality profile not found for '$sequencing_system:$size'" unless defined $compressed;
237              
238 0           gunzip \$compressed => \my $bytes;
239 0           my $matrix = thaw $bytes;
240 0           return ($matrix, $deepth);
241             }
242              
243             sub deletedb {
244 0     0 0   my ($self, $sequencing_system, $size) = @_;
245 0           my $schema = $self->schema;
246              
247 0           log_msg ":: Checking if there is a sequencing-system '$sequencing_system' ...";
248 0           my $seq_sys_rs = $schema->resultset('SequencingSystem')->find({ name => $sequencing_system });
249 0 0         croak "'$sequencing_system' not found into database" unless defined $seq_sys_rs;
250 0           log_msg ":: Found '$sequencing_system'";
251              
252 0           log_msg ":: Searching for a quality entry '$sequencing_system:$size' ...";
253 0           my $quality_rs = $seq_sys_rs->search_related('qualities' => { size => $size })->single;
254 0 0         croak "'$sequencing_system:$size' not found into database" unless defined $quality_rs;
255 0 0         croak "'$sequencing_system:$size' is not a user provided entry. Cannot be deleted" unless $quality_rs->is_user_provided;
256              
257 0           log_msg ":: Found. Removing '$sequencing_system:$size' entry ...";
258              
259             # Begin transation
260 0           my $guard = $schema->txn_scope_guard;
261            
262 0           $quality_rs->delete;
263 0 0         $seq_sys_rs->search_related('qualities' => undef)->single
264             or $seq_sys_rs->delete;
265              
266             # End transation
267 0           $guard->commit;
268             }
269              
270             sub restoredb {
271 0     0 0   my $self = shift;
272 0           my $schema = $self->schema;
273              
274 0           log_msg ":: Searching for user-provided entries ...";
275 0           my $user_provided = $schema->resultset('Quality')->search(
276             { is_user_provided => 1 },
277             { prefetch => ['sequencing_system'] }
278             );
279              
280 0           my $entry = $user_provided->next;
281 0 0         if ($entry) {
282 0           log_msg ":: Found:";
283 0           do {
284 0           log_msg ' ==> ' . $entry->sequencing_system->name . ':' . $entry->size;
285             } while ($entry = $user_provided->next);
286             } else {
287 0           croak "Not found user-provided entries. There is no need to restoring\n";
288             }
289              
290 0           log_msg ":: Removing all user-provided entries ...";
291              
292             # Begin transation
293 0           my $guard = $schema->txn_scope_guard;
294              
295             # Select all vendor qualities
296 0           my $inside_rs = $schema->resultset('Quality')->search(
297             { is_user_provided => 0 }
298             );
299              
300             # Select all sequencing_system that is not vendor-provided
301 0           my $seq_sys_rs = $schema->resultset('SequencingSystem')->search(
302             { 'me.id' => { -not_in => $inside_rs->get_column('sequencing_system_id')->as_query } }
303             );
304              
305             # Remove all sequencing_system that is not vendor-provided
306             # It will trigger delete casacade
307 0           $seq_sys_rs->delete_all;
308              
309             # Select all qualities that is user-provided
310 0           my $quality_rs = $schema->resultset('Quality')->search(
311             { is_user_provided => 1 }
312             );
313              
314             # Remove all user-provided qualities that is inside a vendor-provided
315             # sequencing_system but with a custom user-provided size
316 0           $quality_rs->delete;
317              
318             # End transation
319 0           $guard->commit;
320             }
321              
322             sub make_report {
323 0     0 0   my $self = shift;
324 0           my $schema = $self->schema;
325 0           my %report;
326              
327 0           my $quality_rs = $schema->resultset('Quality')->search(
328             undef,
329             { prefetch => ['sequencing_system'] }
330             );
331              
332 0           while (my $quality = $quality_rs->next) {
333 0 0         my %hash = (
334             size => $quality->size,
335             source => $quality->source,
336             provider => $quality->is_user_provided ? "user" : "vendor"
337             );
338 0           push @{ $report{$quality->sequencing_system->name} } => \%hash;
  0            
339             }
340              
341 0           return \%report;
342             }
343              
344             __END__
345              
346             =pod
347              
348             =encoding UTF-8
349              
350             =head1 NAME
351              
352             App::SimulateReads::Quality::Handle - Class to handle database schemas.
353              
354             =head1 VERSION
355              
356             version 0.06
357              
358             =head1 AUTHOR
359              
360             Thiago L. A. Miller <tmiller@mochsl.org.br>
361              
362             =head1 COPYRIGHT AND LICENSE
363              
364             This software is Copyright (c) 2017 by Teaching and Research Institute from Sírio-Libanês Hospital.
365              
366             This is free software, licensed under:
367              
368             The GNU General Public License, Version 3, June 2007
369              
370             =cut