File Coverage

Bio/DB/GFF/Adaptor/memory.pm
Criterion Covered Total %
statement 304 384 79.1
branch 104 170 61.1
condition 56 121 46.2
subroutine 32 36 88.8
pod 13 17 76.4
total 509 728 69.9


line stmt bran cond sub pod time code
1             package Bio::DB::GFF::Adaptor::memory;
2              
3             =head1 NAME
4              
5             Bio::DB::GFF::Adaptor::memory -- Bio::DB::GFF database adaptor for in-memory databases
6              
7             =head1 SYNOPSIS
8              
9             use Bio::DB::GFF;
10             my $db = Bio::DB::GFF->new(-adaptor=> 'memory',
11             -gff => 'my_features.gff',
12             -fasta => 'my_dna.fa'
13             );
14              
15             or
16              
17             my $db = Bio::DB::GFF->new(-adaptor=>'memory');
18             $db->load_gff_file('my_features.gff');
19             $db->load_fasta_file('my_dna.fa');
20              
21             See L for other methods.
22              
23             =head1 DESCRIPTION
24              
25             This adaptor implements an in-memory version of Bio::DB::GFF. It can be used to
26             store and retrieve SHORT GFF files. It inherits from Bio::DB::GFF.
27              
28             =head1 CONSTRUCTOR
29              
30             Use Bio::DB::GFF-Enew() to construct new instances of this class.
31             Three named arguments are recommended:
32              
33             Argument Description
34              
35             -adaptor Set to "memory" to create an instance of this class.
36             -gff Read the indicated file or directory of .gff file.
37             -fasta Read the indicated file or directory of fasta files.
38             -dir Indicates a directory containing .gff and .fa files
39              
40             If you use the -dir option and the indicated directory is writable by
41             the current process, then this library will create a FASTA file index
42             that greatly diminishes the memory usage of this module.
43              
44             Alternatively you may create an empty in-memory object using just the
45             -adaptor=E'memory' argument and then call the load_gff_file() and
46             load_fasta_file() methods to load GFF and/or sequence
47             information. This is recommended in CGI/mod_perl/fastCGI environments
48             because these methods do not modify STDIN, unlike the constructor.
49              
50             =head1 METHODS
51              
52             See L for inherited methods.
53              
54             =head1 BUGS
55              
56             none ;-)
57              
58             =head1 SEE ALSO
59              
60             L, L
61              
62             =head1 AUTHOR
63              
64             Shuly Avraham Eavraham@cshl.orgE.
65              
66             Copyright (c) 2002 Cold Spring Harbor Laboratory.
67              
68             This library is free software; you can redistribute it and/or modify
69             it under the same terms as Perl itself.
70              
71             =cut
72              
73 3     3   12 use strict;
  3         9  
  3         93  
74             # AUTHOR: Shulamit Avraham
75             # This module needs to be cleaned up and documented
76              
77             # Bio::DB::GFF::Adaptor::memory -- in-memory db adaptor
78             # implements the low level handling of data which stored in memory.
79             # This adaptor implements a specific in memory schema that is compatible with Bio::DB::GFF.
80             # Inherits from Bio::DB::GFF.
81              
82              
83 3     3   12 use Bio::DB::GFF::Util::Rearrange; # for rearrange()
  3         6  
  3         153  
84 3     3   1845 use Bio::DB::GFF::Adaptor::memory::iterator;
  3         3  
  3         81  
85 3     3   18 use File::Basename 'dirname';
  3         6  
  3         207  
86 3     3   1668 use Bio::DB::GFF::Adaptor::memory::feature_serializer qw(@hash2array_map);
  3         9  
  3         462  
87              
88              
89 3     3   15 use constant MAX_SEGMENT => 1_000_000_000; # the largest a segment can get
  3         3  
  3         144  
90              
91 3     3   9 use base qw(Bio::DB::GFF);
  3         3  
  3         9018  
92              
93             sub new {
94 5     5 1 10 my $class = shift ;
95 5         29 my ($file,$fasta,$dbdir,$preferred_groups) = rearrange([
96             [qw(GFF FILE)],
97             'FASTA',
98             [qw(DSN DB DIR DIRECTORY)],
99             'PREFERRED_GROUPS',
100             ],@_);
101              
102             # fill in object
103 5         23 my $self = bless{ data => [] },$class;
104 5 50       17 $self->preferred_groups($preferred_groups) if defined $preferred_groups;
105 5   33     22 $file ||= $dbdir;
106 5   33     29 $fasta ||= $dbdir;
107 5 50       10 $self->load_gff($file) if $file;
108 5 50       19 $self->load_or_store_fasta($fasta) if $fasta;
109 5         10 return $self;
110             }
111              
112             sub load_or_store_fasta {
113 0     0 0 0 my $self = shift;
114 0         0 my $fasta = shift;
115 0 0 0     0 if ((-f $fasta && -w dirname($fasta))
      0        
      0        
116             or
117             (-d $fasta && -w $fasta)) {
118 0         0 require Bio::DB::Fasta;
119 0 0       0 my $dna_db = eval {Bio::DB::Fasta->new($fasta);}
  0         0  
120             or warn "$@\nCan't open sequence file(s). Use -gff instead of -dir if you wish to load features without sequence.\n";
121 0 0       0 $dna_db && $self->dna_db($dna_db);
122             } else {
123 0         0 $self->load_fasta($fasta);
124             }
125             }
126              
127             sub dna_db {
128 120     120 0 77 my $self = shift;
129 120         120 my $d = $self->{dna_db};
130 120 50       174 $self->{dna_db} = shift if @_;
131 120         184 $d;
132             }
133              
134             sub insert_sequence {
135 2620     2620 0 1834 my $self = shift;
136 2620         2492 my($id,$offset,$seq) = @_;
137 2620         7003 $self->{dna}{$id} .= $seq;
138             }
139              
140             # low-level fetch of a DNA substring given its
141             # name, class and the desired range.
142             sub get_dna {
143 120     120 1 104 my $self = shift;
144 120         122 my ($id,$start,$stop,$class) = @_;
145 120 50       172 if (my $dna_db = $self->dna_db) {
146 0         0 return $dna_db->seq($id,$start=>$stop);
147             }
148 120 50       208 return '' unless $self->{dna};
149              
150 120 100 66     226 return $self->{dna}{$id} unless defined $start || defined $stop;
151 115 50       148 $start = 1 if !defined $start;
152              
153 115         102 my $reversed = 0;
154 115 100       179 if ($start > $stop) {
155 55         40 $reversed++;
156 55         71 ($start,$stop) = ($stop,$start);
157             }
158 115         635 my $dna = substr($self->{dna}{$id},$start-1,$stop-$start+1);
159 115 100       167 if ($reversed) {
160 55         110 $dna =~ tr/gatcGATC/ctagCTAG/;
161 55         100 $dna = reverse $dna;
162             }
163              
164 115         426 $dna;
165             }
166              
167             sub setup_load {
168 5     5 1 5 my $self = shift;
169 5         9 $self->{tmp} = {};
170 5         12 $self->{data} = [];
171 5         10 1;
172             }
173              
174             sub finish_load {
175 5     5 1 10 my $self = shift;
176 5         7 my $idx = 0;
177 5         8 foreach my $arrayref (values %{$self->{tmp}}) {
  5         28  
178 92         81 foreach (@$arrayref) {$_->{feature_id} = $idx++; }
  175         159  
179 92         37 push @{$self->{data}},@$arrayref;
  92         106  
180             }
181 5         12 1;
182             }
183              
184             # this method loads the feature as a hash into memory -
185             # keeps an array of features-hashes as an in-memory db
186             sub load_gff_line {
187 175     175 1 138 my $self = shift;
188 175         115 my $feature_hash = shift;
189 175 50 66     504 $feature_hash->{strand} = '' if $feature_hash->{strand} && $feature_hash->{strand} eq '.';
190 175 50 66     326 $feature_hash->{phase} = '' if $feature_hash->{phase} && $feature_hash->{phase} eq '.';
191 175 50       247 $feature_hash->{gclass} = 'Sequence' unless length $feature_hash->{gclass} > 0;
192             # sort by group please
193 175         124 push @{$self->{tmp}{$feature_hash->{gclass},$feature_hash->{gname}}},$feature_hash;
  175         565  
194             }
195              
196             # given sequence name, return (reference,start,stop,strand)
197             sub get_abscoords {
198 193     193 1 150 my $self = shift;
199 193         206 my ($name,$class,$refseq) = @_;
200 193         150 my %refs;
201             my $regexp;
202            
203 193 50       459 if ($name =~ /[*?]/) { # uh oh regexp time
204 0         0 $name = quotemeta($name);
205 0         0 $name =~ s/\\\*/.*/g;
206 0         0 $name =~ s/\\\?/.?/g;
207 0         0 $regexp++;
208             }
209              
210             # Find all features that have the requested name and class.
211             # Sort them by reference point.
212 193         162 for my $feature (@{$self->{data}}) {
  193         355  
213              
214 6560         3861 my $no_match_class_name;
215             my $empty_class_name;
216             my $class_matches = !defined($feature->{gclass}) ||
217             length($feature->{gclass}) == 0 ||
218 6560   66     23514 $feature->{gclass} eq $class;
219              
220 6560 50       6221 if (defined $feature->{gname}) {
221             my $matches = $class_matches
222 6560   66     8190 && ($regexp ? $feature->{gname} =~ /$name/i : lc($feature->{gname}) eq lc($name));
223 6560         4526 $no_match_class_name = !$matches; # to accomodate Shuly's interesting logic
224             }
225              
226             else{
227 0         0 $empty_class_name = 1;
228             }
229              
230 6560 100       6841 if ($no_match_class_name){
231 6170         4224 my $feature_attributes = $feature->{attributes};
232 6170         6914 my $attributes = {Alias => $name};
233 6170 100       6376 if (!$self->_matching_attributes($feature_attributes,$attributes)){
234 6165         7433 next;
235             }
236             }
237              
238 395         229 push @{$refs{$feature->{ref}}},$feature;
  395         735  
239             }
240              
241             # find out how many reference points we recovered
242 193 100       716 if (! %refs) {
243 8         56 $self->error("$name not found in database");
244 8         46 return;
245             }
246              
247             # compute min and max
248 185         251 my ($ref) = keys %refs;
249 185         174 my @found = @{$refs{$ref}};
  185         370  
250 185         160 my ($strand,$start,$stop);
251              
252 0         0 my @found_segments;
253 185         235 foreach my $ref (keys %refs) {
254 185 50 33     375 next if defined($refseq) and lc($ref) ne lc($refseq);
255 185         117 my @found = @{$refs{$ref}};
  185         304  
256 185         108 my ($strand,$start,$stop,$name);
257 185         190 foreach (@found) {
258 395   100     695 $strand ||= $_->{strand};
259 395 50 66     886 $strand = '+' if $strand && $strand eq '.';
260 395 100 66     980 $start = $_->{start} if !defined($start) || $start > $_->{start};
261 395 100 66     842 $stop = $_->{stop} if !defined($stop) || $stop < $_->{stop};
262 395   66     804 $name ||= $_->{gname};
263             }
264 185         559 push @found_segments,[$ref,$class,$start,$stop,$strand,$name];
265              
266             }
267              
268 185         899 return \@found_segments;
269             }
270              
271             sub search_notes {
272 0     0 1 0 my $self = shift;
273 0         0 my ($search_string,$limit) = @_;
274              
275 0         0 $search_string =~ tr/*?//d;
276              
277 0         0 my @results;
278 0         0 my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
  0         0  
279 0         0 my $search = join '|',@words;
280              
281 0         0 for my $feature (@{$self->{data}}) {
  0         0  
282 0 0 0     0 next unless defined $feature->{gclass} && defined $feature->{gname}; # ignore NULL objects
283 0 0       0 next unless $feature->{attributes};
284 0         0 my @attributes = @{$feature->{attributes}};
  0         0  
285 0         0 my @values = map {$_->[1]} @attributes;
  0         0  
286 0         0 my $value = "@values";
287 0         0 my $matches = 0;
288 0         0 for my $w (@words) {
289 0         0 my @hits = $value =~ /($w)/ig;
290 0         0 $matches += @hits;
291             }
292 0 0       0 next unless $matches;
293              
294 0         0 my $relevance = 10 * $matches;
295 0         0 my $featname = Bio::DB::GFF::Featname->new($feature->{gclass}=>$feature->{gname});
296 0         0 my $note;
297 0         0 $note = join ' ',map {$_->[1]} grep {$_->[0] eq 'Note'} @{$feature->{attributes}};
  0         0  
  0         0  
  0         0  
298 0         0 $note .= join ' ',grep /$search/,map {$_->[1]} grep {$_->[0] ne 'Note'} @{$feature->{attributes}};
  0         0  
  0         0  
  0         0  
299 0         0 my $type = Bio::DB::GFF::Typename->new($feature->{method},$feature->{source});
300 0         0 push @results,[$featname,$note,$relevance,$type];
301 0 0 0     0 last if defined $limit && @results >= $limit;
302             }
303              
304             #added result filtering so that this method returns the expected results
305             #this section of code used to be in GBrowse's do_keyword_search method
306              
307 0         0 my $match_sub = 'sub {';
308 0         0 foreach (split /\s+/,$search_string) {
309 0         0 $match_sub .= "return unless \$_[0] =~ /\Q$_\E/i; ";
310             }
311 0         0 $match_sub .= "};";
312 0         0 my $match = eval $match_sub;
313              
314 0         0 my @matches = grep { $match->($_->[1]) } @results;
  0         0  
315              
316 0         0 return @matches;
317             }
318              
319             sub _delete_features {
320 20     20   20 my $self = shift;
321 20         63 my @feature_ids = sort {$b<=>$a} @_;
  77         108  
322 20         20 my $removed = 0;
323 20         30 foreach (@feature_ids) {
324 75 50 33     99 next unless $_ >= 0 && $_ < @{$self->{data}};
  75         178  
325 75         56 $removed += defined splice(@{$self->{data}},$_,1);
  75         103  
326             }
327 20         60 $removed;
328             }
329              
330             sub _delete {
331 21     21   27 my $self = shift;
332 21         23 my $delete_spec = shift;
333 21   50     52 my $ranges = $delete_spec->{segments} || [];
334 21   50     44 my $types = $delete_spec->{types} || [];
335 21         26 my $force = $delete_spec->{force};
336 21         22 my $range_type = $delete_spec->{range_type};
337              
338 21         22 my $deleted = 0;
339 21 100       60 if (@$ranges) {
    100          
340 10 100       25 my @args = @$types ? (-type=>$types) : ();
341 10         20 push @args,(-range_type => $range_type);
342 10         18 my %ids_to_remove = map {$_->id => 1} map {$_->features(@args)} @$ranges;
  40         67  
  10         20  
343 10         39 $deleted = $self->delete_features(keys %ids_to_remove);
344             } elsif (@$types) {
345 5         20 my %ids_to_remove = map {$_->id => 1} $self->features(-type=>$types);
  20         33  
346 5         15 $deleted = $self->delete_features(keys %ids_to_remove);
347             } else {
348 6 100       66 $self->throw("This operation would delete all feature data and -force not specified")
349             unless $force;
350 3         11 $deleted = @{$self->{data}};
  3         18  
351 3         10 @{$self->{data}} = ();
  3         43  
352             }
353 18         96 $deleted;
354             }
355              
356             # attributes -
357              
358             # Some GFF version 2 files use the groups column to store a series of
359             # attribute/value pairs. In this interpretation of GFF, the first such
360             # pair is treated as the primary group for the feature; subsequent pairs
361             # are treated as attributes. Two attributes have special meaning:
362             # "Note" is for backward compatibility and is used for unstructured text
363             # remarks. "Alias" is considered as a synonym for the feature name.
364             # If no name is provided, then attributes() returns a flattened hash, of
365             # attribute=>value pairs.
366              
367             sub do_attributes{
368 20     20 1 20 my $self = shift;
369 20         25 my ($feature_id,$tag) = @_;
370 20         22 my $attr ;
371              
372             #my $feature = ${$self->{data}}[$feature_id];
373 20         43 my $feature = $self->_basic_features_by_id($feature_id);
374              
375 20         17 my @result;
376 20         20 for my $attr (@{$feature->{attributes}}) {
  20         38  
377 50         70 my ($attr_name,$attr_value) = @$attr ;
378 50 100 100     172 if (defined($tag) && lc($attr_name) eq lc($tag)){push @result,$attr_value;}
  20 100       36  
379 15         28 elsif (!defined($tag)) {push @result,($attr_name,$attr_value);}
380             }
381 20         71 return @result;
382             }
383              
384              
385             #sub get_feature_by_attribute{
386             sub _feature_by_attribute{
387 5     5   8 my $self = shift;
388 5         10 my ($attributes,$callback) = @_;
389 5 50       13 $callback || $self->throw('must provide a callback argument');
390 5         7 my $count = 0;
391 5         8 my $feature_id = -1;
392 5         5 my $feature_group_id = undef;
393              
394 5         5 for my $feature (@{$self->{data}}) {
  5         17  
395              
396 175         100 $feature_id++;
397 175         94 for my $attr (@{$feature->{attributes}}) {
  175         261  
398 65         96 my ($attr_name,$attr_value) = @$attr ;
399             #there could be more than one set of attributes......
400 65         78 foreach (keys %$attributes) {
401 65 100 100     204 if (lc($_) eq lc($attr_name) && lc($attributes->{$_}) eq lc($attr_value)) {
402 10         23 $callback->($self->_hash_to_array($feature));
403 10         25 $count++;
404             }
405             }
406             }
407             }
408              
409             }
410              
411              
412             # This is the low-level method that is called to retrieve GFF lines from
413             # the database. It is responsible for retrieving features that satisfy
414             # range and feature type criteria, and passing the GFF fields to a
415             # callback subroutine.
416              
417             sub get_features{
418 85     85 1 112 my $self = shift;
419 85         92 my $count = 0;
420 85         95 my ($search,$options,$callback) = @_;
421              
422 85         65 my $found_features;
423              
424 85         195 $found_features = $self->_get_features_by_search_options($search,$options);
425              
426             # only true if the sort by group option was specified
427 0         0 @{$found_features} = sort {lc("$a->{gclass}:$a->{gname}") cmp lc("$b->{gclass}:$b->{gname}")}
  0         0  
428 85 50       173 @{$found_features} if $options->{sort_by_group} ;
  0         0  
429              
430 85         142 for my $feature (@{$found_features}) { # only true if the sort by group option was specified
  85         122  
431 470         317 $count++;
432 470         684 $callback->(
433             $self->_hash_to_array($feature)
434             );
435             }
436              
437 85         176 return $count;
438             }
439              
440              
441             # Low level implementation of fetching a named feature.
442             # GFF annotations are named using the group class and name fields.
443             # May return zero, one, or several Bio::DB::GFF::Feature objects.
444              
445             =head2 _feature_by_name
446              
447             Title : _feature_by_name
448             Usage : $db->get_features_by_name($name,$class,$callback)
449             Function: get a list of features by name and class
450             Returns : count of number of features retrieved
451             Args : name of feature, class of feature, and a callback
452             Status : protected
453              
454             This method is used internally. The callback arguments are those used
455             by make_feature().
456              
457             =cut
458              
459             sub _feature_by_name {
460 16     16   19 my $self = shift;
461 16         23 my ($class,$name,$location,$callback) = @_;
462 16 50       32 $callback || $self->throw('must provide a callback argument');
463 16         26 my $count = 0;
464 16         6 my $regexp;
465              
466 16 50       47 if ($name =~ /[*?]/) { # uh oh regexp time
467 0         0 $name = quotemeta($name);
468 0         0 $name =~ s/\\\*/.*/g;
469 0         0 $name =~ s/\\\?/.?/g;
470 0         0 $regexp++;
471             }
472              
473 16         21 for my $feature (@{$self->{data}}) {
  16         42  
474 560 100 33     1864 next unless ($regexp && $feature->{gname} =~ /$name/i) || lc($feature->{gname}) eq lc($name);
      66        
475 74 50 33     375 next if defined($feature->{gclass}) && length($feature->{gclass}) > 0 && $feature->{gclass} ne $class;
      33        
476              
477 74 50       113 if ($location) {
478 0 0       0 next if $location->[0] ne $feature->{ref};
479 0 0 0     0 next if $location->[1] && $location->[1] > $feature->{stop};
480 0 0 0     0 next if $location->[2] && $location->[2] < $feature->{start};
481             }
482 74         48 $count++;
483 74         120 $callback->($self->_hash_to_array($feature),0);
484             }
485 16         26 return $count;
486             }
487              
488             # Low level implementation of fetching a feature by it's id.
489             # The id of the feature as implemented in the in-memory db, is the location of the
490             # feature in the features hash array.
491             sub _feature_by_id{
492 5     5   10 my $self = shift;
493 5         5 my ($ids,$type,$callback) = @_;
494 5 50       15 $callback || $self->throw('must provide a callback argument');
495              
496 5         5 my $feature_group_id = undef;
497              
498 5         8 my $count = 0;
499 5 50       13 if ($type eq 'feature'){
500 5         15 for my $feature_id (@$ids){
501 5         17 my $feature = $self->_basic_features_by_id($feature_id);
502 5 50       18 $callback->($self->_hash_to_array($feature)) if $callback;
503 5         13 $count++;
504             }
505             }
506             }
507              
508             sub _basic_features_by_id{
509 25     25   23 my $self = shift;
510 25         35 my ($ids) = @_;
511            
512 25 50       80 $ids = [$ids] unless ref $ids =~ /ARRAY/;
513              
514 25         23 my @result;
515 25         36 for my $feature_id (@$ids){
516 25         25 push @result, ${$self->{data}}[$feature_id];
  25         68  
517             }
518 25 50       68 return wantarray() ? @result : $result[0];
519             }
520              
521             # This method is similar to get_features(), except that it returns an
522             # iterator across the query.
523             # See Bio::DB::GFF::Adaptor::memory::iterator.
524              
525             sub get_features_iterator {
526 15     15 1 18 my $self = shift;
527 15         18 my ($search,$options,$callback) = @_;
528 15 50       27 $callback || $self->throw('must provide a callback argument');
529              
530 15         33 my $results = $self->_get_features_by_search_options($search,$options);
531 15         39 my $results_array = $self->_convert_feature_hash_to_array($results);
532              
533 15         84 return Bio::DB::GFF::Adaptor::memory::iterator->new($results_array,$callback);
534             }
535              
536              
537             # This method is responsible for fetching the list of feature type names.
538             # The query may be limited to a particular range, in
539             # which case the range is indicated by a landmark sequence name and
540             # class and its subrange, if any. These arguments may be undef if it is
541             # desired to retrieve all feature types.
542              
543             # If the count flag is false, the method returns a simple list of
544             # Bio::DB::GFF::Typename objects. If $count is true, the method returns
545             # a list of $name=>$count pairs, where $count indicates the number of
546             # times this feature occurs in the range.
547              
548             sub get_types {
549 25     25 1 25 my $self = shift;
550 25         37 my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
551              
552 25         20 my(%result,%obj);
553              
554 25         26 for my $feature (@{$self->{data}}) {
  25         58  
555 875         776 my $feature_start = $feature->{start};
556 875         697 my $feature_stop = $feature->{stop};
557 875         679 my $feature_ref = $feature->{ref};
558 875         646 my $feature_class = $feature->{class};
559 875         650 my $feature_method = $feature->{method};
560 875         574 my $feature_source = $feature->{source};
561              
562 875 100       956 if (defined $srcseq){
563 525 100       769 next unless lc($feature_ref) eq lc($srcseq);
564             }
565              
566 605 50       644 if (defined $class){
567 0 0 0     0 next unless defined $feature_class && $feature_class eq $class ;
568             }
569              
570             # the requested range should OVERLAP the retrieved features
571 605 100 66     1192 if (defined $start or defined $stop) {
572 255 50       317 $start = 1 unless defined $start;
573 255 50       274 $stop = MAX_SEGMENT unless defined $stop;
574 255 50 33     677 next unless $feature_stop >= $start && $feature_start <= $stop;
575             }
576              
577 605 50 33     842 if (defined $typelist && @$typelist){
578 0 0       0 next unless $self->_matching_typelist($feature_method,$feature_source,$typelist);
579             }
580              
581 605         850 my $type = Bio::DB::GFF::Typename->new($feature_method,$feature_source);
582 605         684 $result{$type}++;
583 605         618 $obj{$type} = $type;
584              
585             } #end features loop
586              
587 25 100       234 return $want_count ? %result : values %obj;
588             }
589              
590             sub classes {
591 0     0 1 0 my $self = shift;
592 0         0 my %classes;
593 0         0 for my $feature (@{$self->{data}}) {
  0         0  
594 0         0 $classes{$feature->{gclass}}++;
595             }
596 0         0 my @classes = sort keys %classes;
597 0         0 return @classes;
598             }
599              
600             # Internal method that performs a search on the features array,
601             # sequentialy retrieves the features, and performs a check on each feature
602             # according to the search options.
603             sub _get_features_by_search_options{
604 100     100   92 my $count = 0;
605 100         89 my ($self, $search,$options) = @_;
606             my ($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes) =
607 100         178 (@{$search}{qw(rangetype refseq refclass start stop types)},
608 100         117 @{$options}{qw(sparse sort_by_group ATTRIBUTES)}) ;
  100         156  
609              
610 100         88 my @found_features;
611 100         134 my $data = $self->{data};
612              
613 100         69 my $feature_id = -1 ;
614 100         82 my $feature_group_id = undef;
615              
616 100         85 for my $feature (@{$data}) {
  100         133  
617              
618 3270         1853 $feature_id++;
619              
620 3270         2620 my $feature_start = $feature->{start};
621 3270         2233 my $feature_stop = $feature->{stop};
622 3270         2413 my $feature_ref = $feature->{ref};
623              
624 3270 100       3534 if (defined $refseq){
625 2905 100       3987 next unless lc($feature_ref) eq lc($refseq);
626             }
627              
628 1330 100 66     2942 if (defined $start or defined $stop) {
629 455 50       530 $start = 0 unless defined($start);
630 455 50       490 $stop = MAX_SEGMENT unless defined($stop);
631              
632 455 100       536 if ($rangetype eq 'overlaps') {
    50          
    0          
633 385 100 100     1079 next unless $feature_stop >= $start && $feature_start <= $stop;
634             } elsif ($rangetype eq 'contains') {
635 70 100 66     233 next unless $feature_start >= $start && $feature_stop <= $stop;
636             } elsif ($rangetype eq 'contained_in') {
637 0 0 0     0 next unless $feature_start <= $start && $feature_stop >= $stop;
638             } else {
639 0 0 0     0 next unless $feature_start == $start && $feature_stop == $stop;
640             }
641              
642             }
643              
644 1210         985 my $feature_source = $feature->{source};
645 1210         876 my $feature_method = $feature->{method};
646              
647 1210 100 66     2953 if (defined $types && @$types){
648 375 100       480 next unless $self->_matching_typelist($feature_method,$feature_source,$types);
649             }
650              
651 970         762 my $feature_attributes = $feature->{attributes};
652 970 100       1050 if (defined $attributes){
653 255 100       259 next unless $self->_matching_attributes($feature_attributes,$attributes);
654             }
655              
656             # if we get here, then we have a feature that meets the criteria.
657             # Then we just push onto an array
658             # of found features and continue.
659              
660 740         490 my $found_feature = $feature ;
661 740         496 $found_feature->{feature_id} = $feature_id;
662 740         721 $found_feature->{group_id} = $feature_group_id;
663 740         904 push @found_features,$found_feature;
664             }
665              
666 100         218 return \@found_features;
667             }
668              
669              
670             sub _hash_to_array {
671 829     829   698 my ($self,$feature_hash) = @_;
672 829         668 my @array = @{$feature_hash}{@hash2array_map};
  829         3617  
673 829 100       2681 return wantarray ? @array : \@array;
674             }
675              
676             # this subroutine is needed for convertion of the feature from hash to array in order to
677             # pass it to the callback subroutine
678             sub _convert_feature_hash_to_array{
679 15     15   17 my ($self, $feature_hash_array) = @_;
680 15         22 my @features_array_array = map {scalar $self->_hash_to_array($_)} @$feature_hash_array;
  270         300  
681 15         32 return \@features_array_array;
682             }
683              
684             sub _matching_typelist{
685 375     375   349 my ($self, $feature_method,$feature_source,$typelist) = @_;
686 375         370 foreach (@$typelist) {
687 1090         993 my ($search_method,$search_source) = @$_;
688 1090 100       1683 next if lc($search_method) ne lc($feature_method);
689 135 50 66     254 next if defined($search_source) && lc($search_source) ne lc($feature_source);
690 135         254 return 1;
691             }
692 240         425 return 0;
693             }
694              
695             sub _matching_attributes {
696 6425     6425   4842 my ($self, $feature_attributes,$attributes) = @_ ;
697 6425         8063 foreach (keys %$attributes) {
698 6440 100       6013 return 0 if !_match_all_attr_in_feature($_,$attributes->{$_},$feature_attributes)
699             }
700 30         60 return 1;
701             }
702              
703             sub _match_all_attr_in_feature{
704 6440     6440   4699 my ($attr_name,$attr_value,$feature_attributes) = @_;
705 6440         5641 for my $attr (@$feature_attributes) {
706 2405         2865 my ($feature_attr_name,$feature_attr_value) = @$attr ;
707 2405 100 100     4716 next if ($attr_name ne $feature_attr_name || $attr_value ne $feature_attr_value);
708 45         97 return 1;
709             }
710 6395         14561 return 0;
711             }
712              
713              
714 5     5 1 15 sub do_initialize { 1; }
715 0     0 0   sub get_feature_by_group_id{ 1; }
716              
717             1;
718