File Coverage

lib/App/Sandy/PieceTable.pm
Criterion Covered Total %
statement 113 155 72.9
branch 29 56 51.7
condition 12 21 57.1
subroutine 17 18 94.4
pod 0 5 0.0
total 171 255 67.0


line stmt bran cond sub pod time code
1             package App::Sandy::PieceTable;
2             # ABSTRACT: Implement a piece table data structure class
3              
4 6     6   3321 use App::Sandy::Base 'class';
  6         17  
  6         37  
5              
6             with 'App::Sandy::Role::BSearch';
7              
8             our $VERSION = '0.25'; # VERSION
9              
10             has 'orig' => (
11             is => 'ro',
12             isa => 'ScalarRef',
13             required => 1
14             );
15              
16             has 'len' => (
17             is => 'ro',
18             isa => 'My:IntGt0',
19             lazy_build => 1,
20             builder => '_build_len'
21             );
22              
23             has 'piece_table' => (
24             traits => ['Array'],
25             is => 'ro',
26             isa => 'ArrayRef[My:Piece]',
27             lazy_build => 1,
28             builder => '_build_piece_table',
29             handles => {
30             _get_piece => 'get',
31             _splice_piece => 'splice',
32             _count_pieces => 'count',
33             _all_pieces => 'elements'
34             }
35             );
36              
37             has 'logical_len' => (
38             is => 'rw',
39             isa => 'My:IntGt0',
40             lazy_build => 1,
41             builder => '_build_logical_len'
42             );
43              
44             sub _build_len {
45 236     236   371 my $self = shift;
46 236         5705 my $orig = $self->orig;
47 236         5763 return length $$orig;
48             }
49              
50             sub _build_logical_len {
51 0     0   0 my $self = shift;
52 0         0 return $self->len;
53             }
54              
55             sub _build_piece_table {
56 236     236   377 my $self = shift;
57 236         6119 my $piece = $self->_piece_new($self->orig, 1, 0, $self->len, 0);
58 236         7627 return [$piece];
59             }
60              
61             sub _piece_new {
62 371     371   975 my ($self, $ref, $is_orig, $start, $len, $pos, $annot1, $annot2) = @_;
63              
64 371 50       2369 my $piece = {
65             'ref' => $ref, # reference to sequence
66             'is_orig' => $is_orig, # set 1 if it is the orig sequence
67             'start' => $start, # start position at reference
68             'len' => $len, # length
69             'pos' => $pos, # position at original sequence
70             'offset' => 0, # position at the changed sequence
71             'annot1' => $annot1, # custom annotation - slot 1
72             'annot2' => $annot2 ? [$annot2] : [] # custom annotation - slot 2
73             };
74              
75 371         843 return $piece;
76             }
77              
78             sub insert {
79 40     40 0 10150 my ($self, $ref, $pos, $annot) = @_;
80              
81             # Test if the position is inside the original sequence boundary
82 40 50       1180 if ($pos > $self->len) {
83 0         0 croak "Trying to insert outside the original sequence";
84             }
85              
86             # My length
87 40         75 my $len = length $$ref;
88              
89             # Create piece data
90 40         115 my $new_piece = $self->_piece_new($ref, 0, 0, $len, $pos, $annot);
91              
92             # Insert at end position or split
93             # piece found at position 'pos'.
94 40 100       990 my $index = $pos == $self->len
95             ? $self->_count_pieces
96             : $self->_split_piece($pos);
97              
98 40         1530 my $piece = $self->_get_piece($index);
99              
100 40 50 66     110 if (defined $piece && @{ $piece->{annot2} } && ($pos == $piece->{pos})) {
  35   33     130  
101 0         0 unshift @{ $new_piece->{annot2} } => @{ $piece->{annot2} };
  0         0  
  0         0  
102 0         0 $piece->{annot2} = [];
103             }
104              
105             # Then insert new_piece
106 40         1395 $self->_splice_piece($index, 0, $new_piece);
107             }
108              
109             sub delete {
110 40     40 0 27455 my ($self, $pos, $len, $annot) = @_;
111              
112             # Test if the removed region is inside the original sequence boundary
113 40 50       1345 if (($pos + $len) > $self->len) {
114 0         0 croak "Trying to delete a region outside the original sequence";
115             }
116              
117             # SPECIAL CASE: Delete at the very end
118 40 100       985 if (($pos + $len) == $self->len) {
119 5         125 return $self->_delete_at_end($len);
120             }
121              
122             # Split piece at $pos. It will correctly fix the original
123             # piece before the split and insert a new piece afterward.
124             # So I need to catch tha last and fix the start and len fields
125 35         115 my $index = $self->_split_piece($pos);
126 35         1190 my $piece = $self->_get_piece($index);
127              
128             # Fix position and len
129 35         125 my $new_start = $pos + $len;
130 35         80 my $new_len = $piece->{len} - $len;
131              
132             # Update!
133 35         75 $piece->{start} = $piece->{pos} = $new_start;
134 35         45 $piece->{len} = $new_len;
135 35         65 push @{ $piece->{annot2} } => $annot;
  35         85  
136              
137             # If the new len is zero, then remove
138             # this piece
139 35 50       135 if ($new_len == 0) {
140 0         0 $self->_splice_piece($index, 1);
141 0         0 my $next_piece = $self->_get_piece($index);
142 0 0 0     0 if (defined $next_piece && @{ $piece->{annot2} }) {
  0         0  
143 0         0 unshift @{ $next_piece->{annot2} } => @{ $piece->{annot2} };
  0         0  
  0         0  
144             }
145             }
146             }
147              
148             sub _delete_at_end {
149 5     5   20 my ($self, $len) = @_;
150              
151             # Just catch the last piece and remove the
152             # last sequence length
153 5         170 my $index = $self->_count_pieces - 1;
154 5         180 my $piece = $self->_get_piece($index);
155              
156 5         20 my $new_len = $piece->{len} - $len;
157              
158             # Update!
159 5         20 $piece->{len} = $new_len;
160              
161             # If the new len is zero, then remove
162             # this piece
163 5 50       35 if ($new_len == 0) {
164 0         0 $self->_splice_piece($index, 1);
165             }
166             }
167              
168             sub change {
169             # A delete and insert operations.
170             # delete from pos until len and insert ref at pos
171 30     30 0 15310 my ($self, $ref, $pos, $len, $annot) = @_;
172              
173             # Test if the changing region is inside the original sequence boundary
174 30 50       1020 if (($pos + $len) > $self->len) {
175 0         0 croak "Trying to change a region outside the original sequence";
176             }
177              
178             # My length
179 30         70 my $ref_len = length $$ref;
180              
181             # Create piece data
182 30         90 my $new_piece = $self->_piece_new($ref, 0, 0, $ref_len, $pos, $annot);
183              
184             # SPECIAL CASE: Change at the very end
185 30 100       780 if (($pos + $len) == $self->len) {
186 20         100 return $self->_change_at_end($new_piece, $len);
187             }
188              
189             # Split piece found at position 'pos'.
190             # Update old piece, insert piece and return
191             # index where I can find the piece to remove
192             # from and insert the change
193 10         35 my $index = $self->_split_piece($pos);
194              
195             # Catch the piece from where I will remove
196 10         365 my $piece = $self->_get_piece($index);
197              
198 10 50 33     20 if (@{ $piece->{annot2} } && ($pos == $piece->{pos})) {
  10         50  
199 0         0 unshift @{ $new_piece->{annot2} } => @{ $piece->{annot2} };
  0         0  
  0         0  
200 0         0 $piece->{annot2} = [];
201             }
202              
203             # Fix position and len
204 10         25 my $new_start = $pos + $len;
205 10         30 my $new_len = $piece->{len} - $len;
206              
207             # Update!
208 10         25 $piece->{start} = $piece->{pos} = $new_start;
209 10         20 $piece->{len} = $new_len;
210              
211             # If the new len is zero, then remove
212             # this piece
213 10 50       40 if ($new_len == 0) {
214 0         0 $self->_splice_piece($index, 1);
215             }
216              
217             # Then insert new_piece
218 10         355 $self->_splice_piece($index, 0, $new_piece);
219             }
220              
221             sub _change_at_end {
222 20     20   50 my ($self, $new_piece, $len) = @_;
223              
224             # Just catch the last piece and remove the
225             # last sequence length
226 20         690 my $index = $self->_count_pieces - 1;
227 20         660 my $piece = $self->_get_piece($index);
228              
229 20         60 my $new_len = $piece->{len} - $len;
230 20         60 $piece->{len} = $new_len;
231              
232             # If the new len is zero, then remove
233             # this piece
234 20 100       55 if ($new_len == 0) {
235 15         550 $self->_splice_piece($index--, 1);
236             }
237              
238             # Then insert new_piece
239 20         920 $self->_splice_piece($index + 1, 0, $new_piece);
240             }
241              
242             sub calculate_logical_offset {
243             # Before lookup() it is necessary to calculate
244             # the positions according to the shift caused by
245             # the structural variations.
246 201     201 0 1004 my $self = shift;
247              
248 201         349 my $offset_acm = 0;
249              
250             # Insert each piece reference into a tree
251 201         7118 for my $piece ($self->_all_pieces) {
252             # Update piece offset
253 306         489 $piece->{offset} = $offset_acm;
254              
255             # Update offset acumulator
256 306         546 $offset_acm += $piece->{len};
257             }
258              
259             # Update logical offset with the corrected length
260 201         5320 $self->logical_len($offset_acm);
261             }
262              
263             sub lookup {
264             # Run 'calculate_logical_offset' before
265 7799     7799 0 17501 my ($self, $start, $len) = @_;
266              
267             state $comm = sub {
268 7924     7924   13143 my ($pos, $piece) = @_;
269 7924 100       25136 if ($self->_is_pos_inside_range($pos, $piece->{offset}, $piece->{len})) {
    100          
270 7799         19534 return 0;
271             } elsif ($pos > $piece->{offset}) {
272 65         180 return 1;
273             } else {
274 60         180 return -1;
275             }
276 7799         12251 };
277              
278 7799         230334 my $index = $self->with_bsearch($start, $self->piece_table,
279             $self->_count_pieces, $comm);
280              
281 7799 50       17129 if (not defined $index) {
282 0         0 croak "Not found index for range: start = $start, length = $len";
283             }
284              
285 7799         269576 my $piece = $self->_get_piece($index);
286 7799         11702 my @pieces;
287              
288             do {
289 8179         14815 push @pieces => $piece;
290 8179         270764 $piece = $self->_get_piece(++$index);
291 7799   100     11820 } while ($piece && $self->_do_overlap($start, $len, $piece->{offset}, $piece->{len}));
292              
293 7799         22177 return \@pieces;
294             }
295              
296             sub _do_overlap {
297 590     590   1200 my ($self, $start1, $len1, $start2, $len2) = @_;
298 590         1550 my ($end1, $end2) = ($start1 + $len1 - 1, $start2 + $len2 - 1);
299 1180   66     2380 return $start1 <= $end2 && $start2 <= $end1;
300             }
301              
302             sub _split_piece {
303 80     80   175 my ($self, $pos) = @_;
304              
305             # Catch orig index where pos is inside
306 80         195 my $index = $self->_piece_at($pos);
307              
308             # Get piece which will be updated
309 80         2590 my $old_piece = $self->_get_piece($index);
310              
311             # Split at the beggining of a piece,
312             # or this piece has length 1
313 80 100       225 if ($pos == $old_piece->{start}) {
314 15         55 return $index;
315             }
316              
317             # Calculate piece end
318 65         150 my $old_end = $old_piece->{start} + $old_piece->{len} - 1;
319              
320             # Calculate the corrected length according to the split
321 65         110 my $new_len = $pos - $old_piece->{start};
322              
323             # Update piece
324 65         95 $old_piece->{len} = $new_len;
325              
326             # Create the second part of the split after the break position
327             my $piece = $self->_piece_new($old_piece->{ref}, $old_piece->{is_orig},
328 65         180 $pos, $old_end - $pos + 1, $pos);
329              
330             # Insert second part after updated piece
331 65         2615 $self->_splice_piece(++$index, 0, $piece);
332              
333             # return corrected index that resolves to
334             # the position between the breaked piece
335 65         165 return $index;
336             }
337              
338             sub _is_pos_inside_range {
339 8029     8029   14611 my ($self, $pos, $start, $len) = @_;
340 8029         12125 my $end = $start + $len - 1;
341 8029   100     38348 return $pos >= $start && $pos <= $end;
342             }
343              
344             sub _piece_at {
345 80     80   150 my ($self, $pos) = @_;
346              
347             # State the function to compare at bsearch
348             state $comm = sub {
349 105     105   205 my ($pos, $piece) = @_;
350 105 100       370 if ($self->_is_pos_inside_range($pos, $piece->{pos}, $piece->{len})) {
    50          
351 80         225 return 0;
352             } elsif ($pos > $piece->{pos}) {
353 25         80 return 1;
354             } else {
355 0         0 return -1;
356             }
357 80         155 };
358              
359             # Search the piece index where $pos is inside the boundaries
360 80         2175 my $index = $self->with_bsearch($pos, $self->piece_table,
361             $self->_count_pieces, $comm);
362              
363             # Maybe it is undef. I need to take care to not
364             # search to a position that was removed before.
365             # I can avoid it when parsing the snv file
366 80 50       185 if (not defined $index) {
367 0         0 croak "Not found pos = $pos into piece_table. Maybe the region was removed?";
368             }
369              
370             # Catch the piece at index
371 80         2815 my $piece = $self->_get_piece($index);
372              
373             # It needs to catch a orig piece, so if the piece
374             # is an insertion, it will search forward and backward
375             # from the actual index
376 80 50       195 if (not $piece->{is_orig}) {
377 0         0 my $new_index;
378              
379             # Search forward
380 0         0 for (my $i = $index + 1; $i < $self->_count_pieces; $i++) {
381 0         0 my $piece = $self->_get_piece($i);
382 0 0       0 if ($piece->{is_orig}) {
383 0 0       0 if ($self->_is_pos_inside_range($pos, $piece->{pos}, $piece->{len})) {
384 0         0 $new_index = $i;
385             }
386              
387 0         0 last;
388             }
389             }
390              
391 0 0       0 if (not defined $new_index) {
392             # Search backward
393 0         0 for (my $i = $index - 1; $i >= 0; $i--) {
394 0         0 my $piece = $self->_get_piece($i);
395 0 0       0 if ($piece->{is_orig}) {
396 0 0       0 if ($self->_is_pos_inside_range($pos, $piece->{pos}, $piece->{len})) {
397 0         0 $new_index = $i;
398             }
399              
400 0         0 last;
401             }
402             }
403              
404             # Not found at all :(
405 0 0       0 if (not defined $new_index) {
406 0         0 croak "There is no orig position to '$pos'";
407             }
408             }
409              
410 0         0 $index = $new_index;
411             }
412              
413 80         175 return $index;
414             }
415              
416             __END__
417              
418             =pod
419              
420             =encoding UTF-8
421              
422             =head1 NAME
423              
424             App::Sandy::PieceTable - Implement a piece table data structure class
425              
426             =head1 VERSION
427              
428             version 0.25
429              
430             =head1 AUTHORS
431              
432             =over 4
433              
434             =item *
435              
436             Thiago L. A. Miller <tmiller@mochsl.org.br>
437              
438             =item *
439              
440             J. Leonel Buzzo <lbuzzo@mochsl.org.br>
441              
442             =item *
443              
444             Felipe R. C. dos Santos <fsantos@mochsl.org.br>
445              
446             =item *
447              
448             Helena B. Conceição <hconceicao@mochsl.org.br>
449              
450             =item *
451              
452             Rodrigo Barreiro <rbarreiro@mochsl.org.br>
453              
454             =item *
455              
456             Gabriela Guardia <gguardia@mochsl.org.br>
457              
458             =item *
459              
460             Fernanda Orpinelli <forpinelli@mochsl.org.br>
461              
462             =item *
463              
464             Rafael Mercuri <rmercuri@mochsl.org.br>
465              
466             =item *
467              
468             Rodrigo Barreiro <rbarreiro@mochsl.org.br>
469              
470             =item *
471              
472             Pedro A. F. Galante <pgalante@mochsl.org.br>
473              
474             =back
475              
476             =head1 COPYRIGHT AND LICENSE
477              
478             This software is Copyright (c) 2023 by Teaching and Research Institute from Sírio-Libanês Hospital.
479              
480             This is free software, licensed under:
481              
482             The GNU General Public License, Version 3, June 2007
483              
484             =cut