File Coverage

lib/App/Sandy/PieceTable.pm
Criterion Covered Total %
statement 113 155 72.9
branch 29 56 51.7
condition 12 21 57.1
subroutine 17 18 94.4
pod 0 5 0.0
total 171 255 67.0


line stmt bran cond sub pod time code
1             package App::Sandy::PieceTable;
2             # ABSTRACT: Implement a piece table data structure class
3              
4 6     6   2708 use App::Sandy::Base 'class';
  6         16  
  6         43  
5              
6             with 'App::Sandy::Role::BSearch';
7              
8             our $VERSION = '0.22'; # VERSION
9              
10             has 'orig' => (
11             is => 'ro',
12             isa => 'ScalarRef',
13             required => 1
14             );
15              
16             has 'len' => (
17             is => 'ro',
18             isa => 'My:IntGt0',
19             lazy_build => 1,
20             builder => '_build_len'
21             );
22              
23             has 'piece_table' => (
24             traits => ['Array'],
25             is => 'ro',
26             isa => 'ArrayRef[My:Piece]',
27             lazy_build => 1,
28             builder => '_build_piece_table',
29             handles => {
30             _get_piece => 'get',
31             _splice_piece => 'splice',
32             _count_pieces => 'count',
33             _all_pieces => 'elements'
34             }
35             );
36              
37             has 'logical_len' => (
38             is => 'rw',
39             isa => 'My:IntGt0',
40             lazy_build => 1,
41             builder => '_build_logical_len'
42             );
43              
44             sub _build_len {
45 188     188   308 my $self = shift;
46 188         4758 my $orig = $self->orig;
47 188         4635 return length $$orig;
48             }
49              
50             sub _build_logical_len {
51 0     0   0 my $self = shift;
52 0         0 return $self->len;
53             }
54              
55             sub _build_piece_table {
56 188     188   347 my $self = shift;
57 188         5071 my $piece = $self->_piece_new($self->orig, 1, 0, $self->len, 0);
58 188         6394 return [$piece];
59             }
60              
61             sub _piece_new {
62 247     247   621 my ($self, $ref, $is_orig, $start, $len, $pos, $annot1, $annot2) = @_;
63              
64 247 50       1773 my $piece = {
65             'ref' => $ref, # reference to sequence
66             'is_orig' => $is_orig, # set 1 if it is the orig sequence
67             'start' => $start, # start position at reference
68             'len' => $len, # length
69             'pos' => $pos, # position at original sequence
70             'offset' => 0, # position at the changed sequence
71             'annot1' => $annot1, # custom annotation - slot 1
72             'annot2' => $annot2 ? [$annot2] : [] # custom annotation - slot 2
73             };
74              
75 247         546 return $piece;
76             }
77              
78             sub insert {
79 16     16 0 2471 my ($self, $ref, $pos, $annot) = @_;
80              
81             # Test if the position is inside the original sequence boundary
82 16 50       493 if ($pos > $self->len) {
83 0         0 croak "Trying to insert outside the original sequence";
84             }
85              
86             # My length
87 16         46 my $len = length $$ref;
88              
89             # Create piece data
90 16         69 my $new_piece = $self->_piece_new($ref, 0, 0, $len, $pos, $annot);
91              
92             # Insert at end position or split
93             # piece found at position 'pos'.
94 16 100       413 my $index = $pos == $self->len
95             ? $self->_count_pieces
96             : $self->_split_piece($pos);
97              
98 16         596 my $piece = $self->_get_piece($index);
99              
100 16 50 66     81 if (defined $piece && @{ $piece->{annot2} } && ($pos == $piece->{pos})) {
  15   33     66  
101 0         0 unshift @{ $new_piece->{annot2} } => @{ $piece->{annot2} };
  0         0  
  0         0  
102 0         0 $piece->{annot2} = [];
103             }
104              
105             # Then insert new_piece
106 16         638 $self->_splice_piece($index, 0, $new_piece);
107             }
108              
109             sub delete {
110 16     16 0 12062 my ($self, $pos, $len, $annot) = @_;
111              
112             # Test if the removed region is inside the original sequence boundary
113 16 50       568 if (($pos + $len) > $self->len) {
114 0         0 croak "Trying to delete a region outside the original sequence";
115             }
116              
117             # SPECIAL CASE: Delete at the very end
118 16 100       409 if (($pos + $len) == $self->len) {
119 1         13 return $self->_delete_at_end($len);
120             }
121              
122             # Split piece at $pos. It will correctly fix the original
123             # piece before the split and insert a new piece afterward.
124             # So I need to catch tha last and fix the start and len fields
125 15         77 my $index = $self->_split_piece($pos);
126 15         695 my $piece = $self->_get_piece($index);
127              
128             # Fix position and len
129 15         45 my $new_start = $pos + $len;
130 15         43 my $new_len = $piece->{len} - $len;
131              
132             # Update!
133 15         44 $piece->{start} = $piece->{pos} = $new_start;
134 15         23 $piece->{len} = $new_len;
135 15         28 push @{ $piece->{annot2} } => $annot;
  15         55  
136              
137             # If the new len is zero, then remove
138             # this piece
139 15 50       82 if ($new_len == 0) {
140 0         0 $self->_splice_piece($index, 1);
141 0         0 my $next_piece = $self->_get_piece($index);
142 0 0 0     0 if (defined $next_piece && @{ $piece->{annot2} }) {
  0         0  
143 0         0 unshift @{ $next_piece->{annot2} } => @{ $piece->{annot2} };
  0         0  
  0         0  
144             }
145             }
146             }
147              
148             sub _delete_at_end {
149 1     1   9 my ($self, $len) = @_;
150              
151             # Just catch the last piece and remove the
152             # last sequence length
153 1         46 my $index = $self->_count_pieces - 1;
154 1         42 my $piece = $self->_get_piece($index);
155              
156 1         12 my $new_len = $piece->{len} - $len;
157              
158             # Update!
159 1         8 $piece->{len} = $new_len;
160              
161             # If the new len is zero, then remove
162             # this piece
163 1 50       14 if ($new_len == 0) {
164 0         0 $self->_splice_piece($index, 1);
165             }
166             }
167              
168             sub change {
169             # A delete and insert operations.
170             # delete from pos until len and insert ref at pos
171 14     14 0 6064 my ($self, $ref, $pos, $len, $annot) = @_;
172              
173             # Test if the changing region is inside the original sequence boundary
174 14 50       500 if (($pos + $len) > $self->len) {
175 0         0 croak "Trying to change a region outside the original sequence";
176             }
177              
178             # My length
179 14         34 my $ref_len = length $$ref;
180              
181             # Create piece data
182 14         61 my $new_piece = $self->_piece_new($ref, 0, 0, $ref_len, $pos, $annot);
183              
184             # SPECIAL CASE: Change at the very end
185 14 100       379 if (($pos + $len) == $self->len) {
186 12         50 return $self->_change_at_end($new_piece, $len);
187             }
188              
189             # Split piece found at position 'pos'.
190             # Update old piece, insert piece and return
191             # index where I can find the piece to remove
192             # from and insert the change
193 2         9 my $index = $self->_split_piece($pos);
194              
195             # Catch the piece from where I will remove
196 2         91 my $piece = $self->_get_piece($index);
197              
198 2 50 33     13 if (@{ $piece->{annot2} } && ($pos == $piece->{pos})) {
  2         20  
199 0         0 unshift @{ $new_piece->{annot2} } => @{ $piece->{annot2} };
  0         0  
  0         0  
200 0         0 $piece->{annot2} = [];
201             }
202              
203             # Fix position and len
204 2         9 my $new_start = $pos + $len;
205 2         14 my $new_len = $piece->{len} - $len;
206              
207             # Update!
208 2         17 $piece->{start} = $piece->{pos} = $new_start;
209 2         7 $piece->{len} = $new_len;
210              
211             # If the new len is zero, then remove
212             # this piece
213 2 50       19 if ($new_len == 0) {
214 0         0 $self->_splice_piece($index, 1);
215             }
216              
217             # Then insert new_piece
218 2         373 $self->_splice_piece($index, 0, $new_piece);
219             }
220              
221             sub _change_at_end {
222 12     12   48 my ($self, $new_piece, $len) = @_;
223              
224             # Just catch the last piece and remove the
225             # last sequence length
226 12         409 my $index = $self->_count_pieces - 1;
227 12         476 my $piece = $self->_get_piece($index);
228              
229 12         47 my $new_len = $piece->{len} - $len;
230 12         25 $piece->{len} = $new_len;
231              
232             # If the new len is zero, then remove
233             # this piece
234 12 100       41 if ($new_len == 0) {
235 11         423 $self->_splice_piece($index--, 1);
236             }
237              
238             # Then insert new_piece
239 12         453 $self->_splice_piece($index + 1, 0, $new_piece);
240             }
241              
242             sub calculate_logical_offset {
243             # Before lookup() it is necessary to calculate
244             # the positions according to the shift caused by
245             # the structural variations.
246 189     189 0 953 my $self = shift;
247              
248 189         324 my $offset_acm = 0;
249              
250             # Insert each piece reference into a tree
251 189         7399 for my $piece ($self->_all_pieces) {
252             # Update piece offset
253 258         507 $piece->{offset} = $offset_acm;
254              
255             # Update offset acumulator
256 258         515 $offset_acm += $piece->{len};
257             }
258              
259             # Update logical offset with the corrected length
260 189         5313 $self->logical_len($offset_acm);
261             }
262              
263             sub lookup {
264             # Run 'calculate_logical_offset' before
265 2161     2161 0 4618 my ($self, $start, $len) = @_;
266              
267             state $comm = sub {
268 2221     2221   4098 my ($pos, $piece) = @_;
269 2221 100       6932 if ($self->_is_pos_inside_range($pos, $piece->{offset}, $piece->{len})) {
    100          
270 2161         6337 return 0;
271             } elsif ($pos > $piece->{offset}) {
272 29         93 return 1;
273             } else {
274 31         90 return -1;
275             }
276 2161         3623 };
277              
278 2161         61714 my $index = $self->with_bsearch($start, $self->piece_table,
279             $self->_count_pieces, $comm);
280              
281 2161 50       5308 if (not defined $index) {
282 0         0 croak "Not found index for range: start = $start, length = $len";
283             }
284              
285 2161         76348 my $piece = $self->_get_piece($index);
286 2161         4033 my @pieces;
287              
288             do {
289 2410         4740 push @pieces => $piece;
290 2410         81653 $piece = $self->_get_piece(++$index);
291 2161   100     3374 } while ($piece && $self->_do_overlap($start, $len, $piece->{offset}, $piece->{len}));
292              
293 2161         6849 return \@pieces;
294             }
295              
296             sub _do_overlap {
297 430     430   914 my ($self, $start1, $len1, $start2, $len2) = @_;
298 430         1163 my ($end1, $end2) = ($start1 + $len1 - 1, $start2 + $len2 - 1);
299 860   66     1771 return $start1 <= $end2 && $start2 <= $end1;
300             }
301              
302             sub _split_piece {
303 32     32   94 my ($self, $pos) = @_;
304              
305             # Catch orig index where pos is inside
306 32         106 my $index = $self->_piece_at($pos);
307              
308             # Get piece which will be updated
309 32         1158 my $old_piece = $self->_get_piece($index);
310              
311             # Split at the beggining of a piece,
312             # or this piece has length 1
313 32 100       116 if ($pos == $old_piece->{start}) {
314 3         8 return $index;
315             }
316              
317             # Calculate piece end
318 29         97 my $old_end = $old_piece->{start} + $old_piece->{len} - 1;
319              
320             # Calculate the corrected length according to the split
321 29         66 my $new_len = $pos - $old_piece->{start};
322              
323             # Update piece
324 29         55 $old_piece->{len} = $new_len;
325              
326             # Create the second part of the split after the break position
327             my $piece = $self->_piece_new($old_piece->{ref}, $old_piece->{is_orig},
328 29         96 $pos, $old_end - $pos + 1, $pos);
329              
330             # Insert second part after updated piece
331 29         1237 $self->_splice_piece(++$index, 0, $piece);
332              
333             # return corrected index that resolves to
334             # the position between the breaked piece
335 29         73 return $index;
336             }
337              
338             sub _is_pos_inside_range {
339 2266     2266   4220 my ($self, $pos, $start, $len) = @_;
340 2266         3832 my $end = $start + $len - 1;
341 2266   100     10667 return $pos >= $start && $pos <= $end;
342             }
343              
344             sub _piece_at {
345 32     32   105 my ($self, $pos) = @_;
346              
347             # State the function to compare at bsearch
348             state $comm = sub {
349 45     45   108 my ($pos, $piece) = @_;
350 45 100       146 if ($self->_is_pos_inside_range($pos, $piece->{pos}, $piece->{len})) {
    50          
351 32         103 return 0;
352             } elsif ($pos > $piece->{pos}) {
353 13         41 return 1;
354             } else {
355 0         0 return -1;
356             }
357 32         92 };
358              
359             # Search the piece index where $pos is inside the boundaries
360 32         941 my $index = $self->with_bsearch($pos, $self->piece_table,
361             $self->_count_pieces, $comm);
362              
363             # Maybe it is undef. I need to take care to not
364             # search to a position that was removed before.
365             # I can avoid it when parsing the snv file
366 32 50       90 if (not defined $index) {
367 0         0 croak "Not found pos = $pos into piece_table. Maybe the region was removed?";
368             }
369              
370             # Catch the piece at index
371 32         1198 my $piece = $self->_get_piece($index);
372              
373             # It needs to catch a orig piece, so if the piece
374             # is an insertion, it will search forward and backward
375             # from the actual index
376 32 50       99 if (not $piece->{is_orig}) {
377 0         0 my $new_index;
378              
379             # Search forward
380 0         0 for (my $i = $index + 1; $i < $self->_count_pieces; $i++) {
381 0         0 my $piece = $self->_get_piece($i);
382 0 0       0 if ($piece->{is_orig}) {
383 0 0       0 if ($self->_is_pos_inside_range($pos, $piece->{pos}, $piece->{len})) {
384 0         0 $new_index = $i;
385             }
386              
387 0         0 last;
388             }
389             }
390              
391 0 0       0 if (not defined $new_index) {
392             # Search backward
393 0         0 for (my $i = $index - 1; $i >= 0; $i--) {
394 0         0 my $piece = $self->_get_piece($i);
395 0 0       0 if ($piece->{is_orig}) {
396 0 0       0 if ($self->_is_pos_inside_range($pos, $piece->{pos}, $piece->{len})) {
397 0         0 $new_index = $i;
398             }
399              
400 0         0 last;
401             }
402             }
403              
404             # Not found at all :(
405 0 0       0 if (not defined $new_index) {
406 0         0 croak "There is no orig position to '$pos'";
407             }
408             }
409              
410 0         0 $index = $new_index;
411             }
412              
413 32         94 return $index;
414             }
415              
416             __END__
417              
418             =pod
419              
420             =encoding UTF-8
421              
422             =head1 NAME
423              
424             App::Sandy::PieceTable - Implement a piece table data structure class
425              
426             =head1 VERSION
427              
428             version 0.22
429              
430             =head1 AUTHORS
431              
432             =over 4
433              
434             =item *
435              
436             Thiago L. A. Miller <tmiller@mochsl.org.br>
437              
438             =item *
439              
440             J. Leonel Buzzo <lbuzzo@mochsl.org.br>
441              
442             =item *
443              
444             Felipe R. C. dos Santos <fsantos@mochsl.org.br>
445              
446             =item *
447              
448             Helena B. Conceição <hconceicao@mochsl.org.br>
449              
450             =item *
451              
452             Gabriela Guardia <gguardia@mochsl.org.br>
453              
454             =item *
455              
456             Fernanda Orpinelli <forpinelli@mochsl.org.br>
457              
458             =item *
459              
460             Pedro A. F. Galante <pgalante@mochsl.org.br>
461              
462             =back
463              
464             =head1 COPYRIGHT AND LICENSE
465              
466             This software is Copyright (c) 2018 by Teaching and Research Institute from Sírio-Libanês Hospital.
467              
468             This is free software, licensed under:
469              
470             The GNU General Public License, Version 3, June 2007
471              
472             =cut