File Coverage

Bio/DB/GFF/Adaptor/memory.pm

Criterion	Covered	Total	%
statement	305	384	79.4
branch	104	170	61.1
condition	57	121	47.1
subroutine	32	36	88.8
pod	13	17	76.4
total	511	728	70.1

line	stmt	bran	cond	sub	pod	time	code
1							package Bio::DB::GFF::Adaptor::memory;
2
3							=head1 NAME
4
5							Bio::DB::GFF::Adaptor::memory -- Bio::DB::GFF database adaptor for in-memory databases
6
7							=head1 SYNOPSIS
8
9							use Bio::DB::GFF;
10							my $db = Bio::DB::GFF->new(-adaptor=> 'memory',
11							-gff => 'my_features.gff',
12							-fasta => 'my_dna.fa'
13							);
14
15							or
16
17							my $db = Bio::DB::GFF->new(-adaptor=>'memory');
18							$db->load_gff_file('my_features.gff');
19							$db->load_fasta_file('my_dna.fa');
20
21							See L for other methods.
22
23							=head1 DESCRIPTION
24
25							This adaptor implements an in-memory version of Bio::DB::GFF. It can be used to
26							store and retrieve SHORT GFF files. It inherits from Bio::DB::GFF.
27
28							=head1 CONSTRUCTOR
29
30							Use Bio::DB::GFF-Enew() to construct new instances of this class.
31							Three named arguments are recommended:
32
33							Argument Description
34
35							-adaptor Set to "memory" to create an instance of this class.
36							-gff Read the indicated file or directory of .gff file.
37							-fasta Read the indicated file or directory of fasta files.
38							-dir Indicates a directory containing .gff and .fa files
39
40							If you use the -dir option and the indicated directory is writable by
41							the current process, then this library will create a FASTA file index
42							that greatly diminishes the memory usage of this module.
43
44							Alternatively you may create an empty in-memory object using just the
45							-adaptor=E'memory' argument and then call the load_gff_file() and
46							load_fasta_file() methods to load GFF and/or sequence
47							information. This is recommended in CGI/mod_perl/fastCGI environments
48							because these methods do not modify STDIN, unlike the constructor.
49
50							=head1 METHODS
51
52							See L for inherited methods.
53
54							=head1 BUGS
55
56							none ;-)
57
58							=head1 SEE ALSO
59
60							L, L
61
62							=head1 AUTHOR
63
64							Shuly Avraham Eavraham@cshl.orgE.
65
66							Copyright (c) 2002 Cold Spring Harbor Laboratory.
67
68							This library is free software; you can redistribute it and/or modify
69							it under the same terms as Perl itself.
70
71							=cut
72
73	3			3		21	use strict;
	3					9
	3					99
74							# AUTHOR: Shulamit Avraham
75							# This module needs to be cleaned up and documented
76
77							# Bio::DB::GFF::Adaptor::memory -- in-memory db adaptor
78							# implements the low level handling of data which stored in memory.
79							# This adaptor implements a specific in memory schema that is compatible with Bio::DB::GFF.
80							# Inherits from Bio::DB::GFF.
81
82
83	3			3		15	use Bio::DB::GFF::Util::Rearrange; # for rearrange()
	3					6
	3					165
84	3			3		1452	use Bio::DB::GFF::Adaptor::memory::iterator;
	3					6
	3					93
85	3			3		15	use File::Basename 'dirname';
	3					6
	3					213
86	3			3		1461	use Bio::DB::GFF::Adaptor::memory::feature_serializer qw(@hash2array_map);
	3					9
	3					369
87
88
89	3			3		15	use constant MAX_SEGMENT => 1_000_000_000; # the largest a segment can get
	3					3
	3					132
90
91	3			3		15	use base qw(Bio::DB::GFF);
	3					3
	3					9474
92
93							sub new {
94	5			5	1	15	my $class = shift ;
95	5					43	my ($file,$fasta,$dbdir,$preferred_groups) = rearrange([
96							[qw(GFF FILE)],
97							'FASTA',
98							[qw(DSN DB DIR DIRECTORY)],
99							'PREFERRED_GROUPS',
100							],@_);
101
102							# fill in object
103	5					27	my $self = bless{ data => [] },$class;
104	5	50				18	$self->preferred_groups($preferred_groups) if defined $preferred_groups;
105	5		33			29	$file \|\|= $dbdir;
106	5		33			28	$fasta \|\|= $dbdir;
107	5	50				12	$self->load_gff($file) if $file;
108	5	50				27	$self->load_or_store_fasta($fasta) if $fasta;
109	5					17	return $self;
110							}
111
112							sub load_or_store_fasta {
113	0			0	0	0	my $self = shift;
114	0					0	my $fasta = shift;
115	0	0	0			0	if ((-f $fasta && -w dirname($fasta))
			0
			0
116							or
117							(-d $fasta && -w $fasta)) {
118	0					0	require Bio::DB::Fasta;
119	0	0				0	my $dna_db = eval {Bio::DB::Fasta->new($fasta);}
	0					0
120							or warn "$@\nCan't open sequence file(s). Use -gff instead of -dir if you wish to load features without sequence.\n";
121	0	0				0	$dna_db && $self->dna_db($dna_db);
122							} else {
123	0					0	$self->load_fasta($fasta);
124							}
125							}
126
127							sub dna_db {
128	120			120	0	152	my $self = shift;
129	120					176	my $d = $self->{dna_db};
130	120	50				275	$self->{dna_db} = shift if @_;
131	120					236	$d;
132							}
133
134							sub insert_sequence {
135	2620			2620	0	3320	my $self = shift;
136	2620					4117	my($id,$offset,$seq) = @_;
137	2620					7867	$self->{dna}{$id} .= $seq;
138							}
139
140							# low-level fetch of a DNA substring given its
141							# name, class and the desired range.
142							sub get_dna {
143	120			120	1	199	my $self = shift;
144	120					204	my ($id,$start,$stop,$class) = @_;
145	120	50				229	if (my $dna_db = $self->dna_db) {
146	0					0	return $dna_db->seq($id,$start=>$stop);
147							}
148	120	50				265	return '' unless $self->{dna};
149
150	120	100	66			291	return $self->{dna}{$id} unless defined $start \|\| defined $stop;
151	115	50				217	$start = 1 if !defined $start;
152
153	115					143	my $reversed = 0;
154	115	100				198	if ($start > $stop) {
155	55					70	$reversed++;
156	55					115	($start,$stop) = ($stop,$start);
157							}
158	115					713	my $dna = substr($self->{dna}{$id},$start-1,$stop-$start+1);
159	115	100				218	if ($reversed) {
160	55					147	$dna =~ tr/gatcGATC/ctagCTAG/;
161	55					104	$dna = reverse $dna;
162							}
163
164	115					462	$dna;
165							}
166
167							sub setup_load {
168	5			5	1	13	my $self = shift;
169	5					15	$self->{tmp} = {};
170	5					16	$self->{data} = [];
171	5					16	1;
172							}
173
174							sub finish_load {
175	5			5	1	10	my $self = shift;
176	5					10	my $idx = 0;
177	5					5	foreach my $arrayref (values %{$self->{tmp}}) {
	5					28
178	92					115	foreach (@$arrayref) {$_->{feature_id} = $idx++; }
	175					218
179	92					94	push @{$self->{data}},@$arrayref;
	92					147
180							}
181	5					10	1;
182							}
183
184							# this method loads the feature as a hash into memory -
185							# keeps an array of features-hashes as an in-memory db
186							sub load_gff_line {
187	175			175	1	261	my $self = shift;
188	175					209	my $feature_hash = shift;
189	175	50	66			530	$feature_hash->{strand} = '' if $feature_hash->{strand} && $feature_hash->{strand} eq '.';
190	175	50	66			349	$feature_hash->{phase} = '' if $feature_hash->{phase} && $feature_hash->{phase} eq '.';
191	175	50				337	$feature_hash->{gclass} = 'Sequence' unless length $feature_hash->{gclass} > 0;
192							# sort by group please
193	175					204	push @{$self->{tmp}{$feature_hash->{gclass},$feature_hash->{gname}}},$feature_hash;
	175					675
194							}
195
196							# given sequence name, return (reference,start,stop,strand)
197							sub get_abscoords {
198	193			193	1	250	my $self = shift;
199	193					330	my ($name,$class,$refseq) = @_;
200	193					250	my %refs;
201							my $regexp;
202
203	193	50				530	if ($name =~ /[*?]/) { # uh oh regexp time
204	0					0	$name = quotemeta($name);
205	0					0	$name =~ s/\\\/./g;
206	0					0	$name =~ s/\\\?/.?/g;
207	0					0	$regexp++;
208							}
209
210							# Find all features that have the requested name and class.
211							# Sort them by reference point.
212	193					257	for my $feature (@{$self->{data}}) {
	193					410
213
214	6560					7467	my $no_match_class_name;
215							my $empty_class_name;
216							my $class_matches = !defined($feature->{gclass}) \|\|
217							length($feature->{gclass}) == 0 \|\|
218	6560		66			20674	$feature->{gclass} eq $class;
219
220	6560	50				9613	if (defined $feature->{gname}) {
221							my $matches = $class_matches
222	6560		100			10967	&& ($regexp ? $feature->{gname} =~ /$name/i : lc($feature->{gname}) eq lc($name));
223	6560					7710	$no_match_class_name = !$matches; # to accomodate Shuly's interesting logic
224							}
225
226							else{
227	0					0	$empty_class_name = 1;
228							}
229
230	6560	100				8771	if ($no_match_class_name){
231	6170					6974	my $feature_attributes = $feature->{attributes};
232	6170					9501	my $attributes = {Alias => $name};
233	6170	100				8782	if (!$self->_matching_attributes($feature_attributes,$attributes)){
234	6165					11113	next;
235							}
236							}
237
238	395					414	push @{$refs{$feature->{ref}}},$feature;
	395					908
239							}
240
241							# find out how many reference points we recovered
242	193	100				333	if (! %refs) {
243	8					54	$self->error("$name not found in database");
244	8					43	return;
245							}
246
247							# compute min and max
248	185					322	my ($ref) = keys %refs;
249	185					216	my @found = @{$refs{$ref}};
	185					344
250	185					378	my ($strand,$start,$stop);
251
252	185					0	my @found_segments;
253	185					260	foreach my $ref (keys %refs) {
254	185	50	33			356	next if defined($refseq) and lc($ref) ne lc($refseq);
255	185					252	my @found = @{$refs{$ref}};
	185					308
256	185					249	my ($strand,$start,$stop,$name);
257	185					260	foreach (@found) {
258	395		100			929	$strand \|\|= $_->{strand};
259	395	50	66			820	$strand = '+' if $strand && $strand eq '.';
260	395	100	66			879	$start = $_->{start} if !defined($start) \|\| $start > $_->{start};
261	395	100	66			789	$stop = $_->{stop} if !defined($stop) \|\| $stop < $_->{stop};
262	395		66			752	$name \|\|= $_->{gname};
263							}
264	185					666	push @found_segments,[$ref,$class,$start,$stop,$strand,$name];
265
266							}
267
268	185					831	return \@found_segments;
269							}
270
271							sub search_notes {
272	0			0	1	0	my $self = shift;
273	0					0	my ($search_string,$limit) = @_;
274
275	0					0	$search_string =~ tr/*?//d;
276
277	0					0	my @results;
278	0					0	my @words = map {quotemeta($_)} $search_string =~ /(\w+)/g;
	0					0
279	0					0	my $search = join '\|',@words;
280
281	0					0	for my $feature (@{$self->{data}}) {
	0					0
282	0	0	0			0	next unless defined $feature->{gclass} && defined $feature->{gname}; # ignore NULL objects
283	0	0				0	next unless $feature->{attributes};
284	0					0	my @attributes = @{$feature->{attributes}};
	0					0
285	0					0	my @values = map {$_->[1]} @attributes;
	0					0
286	0					0	my $value = "@values";
287	0					0	my $matches = 0;
288	0					0	for my $w (@words) {
289	0					0	my @hits = $value =~ /($w)/ig;
290	0					0	$matches += @hits;
291							}
292	0	0				0	next unless $matches;
293
294	0					0	my $relevance = 10 * $matches;
295	0					0	my $featname = Bio::DB::GFF::Featname->new($feature->{gclass}=>$feature->{gname});
296	0					0	my $note;
297	0					0	$note = join ' ',map {$_->[1]} grep {$_->[0] eq 'Note'} @{$feature->{attributes}};
	0					0
	0					0
	0					0
298	0					0	$note .= join ' ',grep /$search/,map {$_->[1]} grep {$_->[0] ne 'Note'} @{$feature->{attributes}};
	0					0
	0					0
	0					0
299	0					0	my $type = Bio::DB::GFF::Typename->new($feature->{method},$feature->{source});
300	0					0	push @results,[$featname,$note,$relevance,$type];
301	0	0	0			0	last if defined $limit && @results >= $limit;
302							}
303
304							#added result filtering so that this method returns the expected results
305							#this section of code used to be in GBrowse's do_keyword_search method
306
307	0					0	my $match_sub = 'sub {';
308	0					0	foreach (split /\s+/,$search_string) {
309	0					0	$match_sub .= "return unless \$_[0] =~ /\Q$_\E/i; ";
310							}
311	0					0	$match_sub .= "};";
312	0					0	my $match = eval $match_sub;
313
314	0					0	my @matches = grep { $match->($_->[1]) } @results;
	0					0
315
316	0					0	return @matches;
317							}
318
319							sub _delete_features {
320	20			20		37	my $self = shift;
321	20					77	my @feature_ids = sort {$b<=>$a} @_;
	85					147
322	20					31	my $removed = 0;
323	20					37	foreach (@feature_ids) {
324	75	50	33			134	next unless $_ >= 0 && $_ < @{$self->{data}};
	75					174
325	75					90	$removed += defined splice(@{$self->{data}},$_,1);
	75					138
326							}
327	20					75	$removed;
328							}
329
330							sub _delete {
331	21			21		37	my $self = shift;
332	21					28	my $delete_spec = shift;
333	21		50			52	my $ranges = $delete_spec->{segments} \|\| [];
334	21		50			51	my $types = $delete_spec->{types} \|\| [];
335	21					31	my $force = $delete_spec->{force};
336	21					30	my $range_type = $delete_spec->{range_type};
337
338	21					31	my $deleted = 0;
339	21	100				63	if (@$ranges) {
		100
340	10	100				23	my @args = @$types ? (-type=>$types) : ();
341	10					23	push @args,(-range_type => $range_type);
342	10					23	my %ids_to_remove = map {$_->id => 1} map {$_->features(@args)} @$ranges;
	40					85
	10					23
343	10					38	$deleted = $self->delete_features(keys %ids_to_remove);
344							} elsif (@$types) {
345	5					18	my %ids_to_remove = map {$_->id => 1} $self->features(-type=>$types);
	20					36
346	5					18	$deleted = $self->delete_features(keys %ids_to_remove);
347							} else {
348	6	100				84	$self->throw("This operation would delete all feature data and -force not specified")
349							unless $force;
350	3					7	$deleted = @{$self->{data}};
	3					16
351	3					9	@{$self->{data}} = ();
	3					43
352							}
353	18					118	$deleted;
354							}
355
356							# attributes -
357
358							# Some GFF version 2 files use the groups column to store a series of
359							# attribute/value pairs. In this interpretation of GFF, the first such
360							# pair is treated as the primary group for the feature; subsequent pairs
361							# are treated as attributes. Two attributes have special meaning:
362							# "Note" is for backward compatibility and is used for unstructured text
363							# remarks. "Alias" is considered as a synonym for the feature name.
364							# If no name is provided, then attributes() returns a flattened hash, of
365							# attribute=>value pairs.
366
367							sub do_attributes{
368	20			20	1	28	my $self = shift;
369	20					46	my ($feature_id,$tag) = @_;
370	20					23	my $attr ;
371
372							#my $feature = ${$self->{data}}[$feature_id];
373	20					43	my $feature = $self->_basic_features_by_id($feature_id);
374
375	20					29	my @result;
376	20					27	for my $attr (@{$feature->{attributes}}) {
	20					41
377	50					88	my ($attr_name,$attr_value) = @$attr ;
378	50	100	100			163	if (defined($tag) && lc($attr_name) eq lc($tag)){push @result,$attr_value;}
	20	100				34
379	15					28	elsif (!defined($tag)) {push @result,($attr_name,$attr_value);}
380							}
381	20					72	return @result;
382							}
383
384
385							#sub get_feature_by_attribute{
386							sub _feature_by_attribute{
387	5			5		10	my $self = shift;
388	5					10	my ($attributes,$callback) = @_;
389	5	50				13	$callback \|\| $self->throw('must provide a callback argument');
390	5					8	my $count = 0;
391	5					10	my $feature_id = -1;
392	5					5	my $feature_group_id = undef;
393
394	5					8	for my $feature (@{$self->{data}}) {
	5					16
395
396	175					176	$feature_id++;
397	175					177	for my $attr (@{$feature->{attributes}}) {
	175					263
398	65					90	my ($attr_name,$attr_value) = @$attr ;
399							#there could be more than one set of attributes......
400	65					105	foreach (keys %$attributes) {
401	65	100	100			201	if (lc($_) eq lc($attr_name) && lc($attributes->{$_}) eq lc($attr_value)) {
402	10					25	$callback->($self->_hash_to_array($feature));
403	10					30	$count++;
404							}
405							}
406							}
407							}
408
409							}
410
411
412							# This is the low-level method that is called to retrieve GFF lines from
413							# the database. It is responsible for retrieving features that satisfy
414							# range and feature type criteria, and passing the GFF fields to a
415							# callback subroutine.
416
417							sub get_features{
418	85			85	1	160	my $self = shift;
419	85					118	my $count = 0;
420	85					176	my ($search,$options,$callback) = @_;
421
422	85					102	my $found_features;
423
424	85					201	$found_features = $self->_get_features_by_search_options($search,$options);
425
426							# only true if the sort by group option was specified
427	0					0	@{$found_features} = sort {lc("$a->{gclass}:$a->{gname}") cmp lc("$b->{gclass}:$b->{gname}")}
	0					0
428	85	50				168	@{$found_features} if $options->{sort_by_group} ;
	0					0
429
430	85					94	for my $feature (@{$found_features}) { # only true if the sort by group option was specified
	85					151
431	470					564	$count++;
432	470					795	$callback->(
433							$self->_hash_to_array($feature)
434							);
435							}
436
437	85					209	return $count;
438							}
439
440
441							# Low level implementation of fetching a named feature.
442							# GFF annotations are named using the group class and name fields.
443							# May return zero, one, or several Bio::DB::GFF::Feature objects.
444
445							=head2 _feature_by_name
446
447							Title : _feature_by_name
448							Usage : $db->get_features_by_name($name,$class,$callback)
449							Function: get a list of features by name and class
450							Returns : count of number of features retrieved
451							Args : name of feature, class of feature, and a callback
452							Status : protected
453
454							This method is used internally. The callback arguments are those used
455							by make_feature().
456
457							=cut
458
459							sub _feature_by_name {
460	16			16		24	my $self = shift;
461	16					34	my ($class,$name,$location,$callback) = @_;
462	16	50				32	$callback \|\| $self->throw('must provide a callback argument');
463	16					24	my $count = 0;
464	16					29	my $regexp;
465
466	16	50				57	if ($name =~ /[*?]/) { # uh oh regexp time
467	0					0	$name = quotemeta($name);
468	0					0	$name =~ s/\\\/./g;
469	0					0	$name =~ s/\\\?/.?/g;
470	0					0	$regexp++;
471							}
472
473	16					22	for my $feature (@{$self->{data}}) {
	16					39
474	560	100	33			1563	next unless ($regexp && $feature->{gname} =~ /$name/i) \|\| lc($feature->{gname}) eq lc($name);
			66
475	74	50	33			348	next if defined($feature->{gclass}) && length($feature->{gclass}) > 0 && $feature->{gclass} ne $class;
			33
476
477	74	50				119	if ($location) {
478	0	0				0	next if $location->[0] ne $feature->{ref};
479	0	0	0			0	next if $location->[1] && $location->[1] > $feature->{stop};
480	0	0	0			0	next if $location->[2] && $location->[2] < $feature->{start};
481							}
482	74					79	$count++;
483	74					130	$callback->($self->_hash_to_array($feature),0);
484							}
485	16					32	return $count;
486							}
487
488							# Low level implementation of fetching a feature by it's id.
489							# The id of the feature as implemented in the in-memory db, is the location of the
490							# feature in the features hash array.
491							sub _feature_by_id{
492	5			5		10	my $self = shift;
493	5					15	my ($ids,$type,$callback) = @_;
494	5	50				15	$callback \|\| $self->throw('must provide a callback argument');
495
496	5					10	my $feature_group_id = undef;
497
498	5					5	my $count = 0;
499	5	50				15	if ($type eq 'feature'){
500	5					17	for my $feature_id (@$ids){
501	5					18	my $feature = $self->_basic_features_by_id($feature_id);
502	5	50				18	$callback->($self->_hash_to_array($feature)) if $callback;
503	5					20	$count++;
504							}
505							}
506							}
507
508							sub _basic_features_by_id{
509	25			25		47	my $self = shift;
510	25					40	my ($ids) = @_;
511
512	25	50				86	$ids = [$ids] unless ref $ids =~ /ARRAY/;
513
514	25					39	my @result;
515	25					48	for my $feature_id (@$ids){
516	25					32	push @result, ${$self->{data}}[$feature_id];
	25					68
517							}
518	25	50				65	return wantarray() ? @result : $result[0];
519							}
520
521							# This method is similar to get_features(), except that it returns an
522							# iterator across the query.
523							# See Bio::DB::GFF::Adaptor::memory::iterator.
524
525							sub get_features_iterator {
526	15			15	1	30	my $self = shift;
527	15					29	my ($search,$options,$callback) = @_;
528	15	50				24	$callback \|\| $self->throw('must provide a callback argument');
529
530	15					35	my $results = $self->_get_features_by_search_options($search,$options);
531	15					33	my $results_array = $self->_convert_feature_hash_to_array($results);
532
533	15					94	return Bio::DB::GFF::Adaptor::memory::iterator->new($results_array,$callback);
534							}
535
536
537							# This method is responsible for fetching the list of feature type names.
538							# The query may be limited to a particular range, in
539							# which case the range is indicated by a landmark sequence name and
540							# class and its subrange, if any. These arguments may be undef if it is
541							# desired to retrieve all feature types.
542
543							# If the count flag is false, the method returns a simple list of
544							# Bio::DB::GFF::Typename objects. If $count is true, the method returns
545							# a list of $name=>$count pairs, where $count indicates the number of
546							# times this feature occurs in the range.
547
548							sub get_types {
549	25			25	1	50	my $self = shift;
550	25					55	my ($srcseq,$class,$start,$stop,$want_count,$typelist) = @_;
551
552	25					47	my(%result,%obj);
553
554	25					37	for my $feature (@{$self->{data}}) {
	25					62
555	875					1281	my $feature_start = $feature->{start};
556	875					1044	my $feature_stop = $feature->{stop};
557	875					1039	my $feature_ref = $feature->{ref};
558	875					1013	my $feature_class = $feature->{class};
559	875					1002	my $feature_method = $feature->{method};
560	875					1036	my $feature_source = $feature->{source};
561
562	875	100				1190	if (defined $srcseq){
563	525	100				901	next unless lc($feature_ref) eq lc($srcseq);
564							}
565
566	605	50				807	if (defined $class){
567	0	0	0			0	next unless defined $feature_class && $feature_class eq $class ;
568							}
569
570							# the requested range should OVERLAP the retrieved features
571	605	100	66			1227	if (defined $start or defined $stop) {
572	255	50				341	$start = 1 unless defined $start;
573	255	50				338	$stop = MAX_SEGMENT unless defined $stop;
574	255	50	33			603	next unless $feature_stop >= $start && $feature_start <= $stop;
575							}
576
577	605	50	33			955	if (defined $typelist && @$typelist){
578	0	0				0	next unless $self->_matching_typelist($feature_method,$feature_source,$typelist);
579							}
580
581	605					1034	my $type = Bio::DB::GFF::Typename->new($feature_method,$feature_source);
582	605					960	$result{$type}++;
583	605					940	$obj{$type} = $type;
584
585							} #end features loop
586
587	25	100				221	return $want_count ? %result : values %obj;
588							}
589
590							sub classes {
591	0			0	1	0	my $self = shift;
592	0					0	my %classes;
593	0					0	for my $feature (@{$self->{data}}) {
	0					0
594	0					0	$classes{$feature->{gclass}}++;
595							}
596	0					0	my @classes = sort keys %classes;
597	0					0	return @classes;
598							}
599
600							# Internal method that performs a search on the features array,
601							# sequentialy retrieves the features, and performs a check on each feature
602							# according to the search options.
603							sub _get_features_by_search_options{
604	100			100		125	my $count = 0;
605	100					170	my ($self, $search,$options) = @_;
606							my ($rangetype,$refseq,$class,$start,$stop,$types,$sparse,$order_by_group,$attributes) =
607	100					177	(@{$search}{qw(rangetype refseq refclass start stop types)},
608	100					145	@{$options}{qw(sparse sort_by_group ATTRIBUTES)}) ;
	100					261
609
610	100					127	my @found_features;
611	100					165	my $data = $self->{data};
612
613	100					121	my $feature_id = -1 ;
614	100					126	my $feature_group_id = undef;
615
616	100					113	for my $feature (@{$data}) {
	100					158
617
618	3270					3344	$feature_id++;
619
620	3270					4063	my $feature_start = $feature->{start};
621	3270					3622	my $feature_stop = $feature->{stop};
622	3270					3898	my $feature_ref = $feature->{ref};
623
624	3270	100				4422	if (defined $refseq){
625	2905	100				4671	next unless lc($feature_ref) eq lc($refseq);
626							}
627
628	1330	100	66			2674	if (defined $start or defined $stop) {
629	455	50				601	$start = 0 unless defined($start);
630	455	50				633	$stop = MAX_SEGMENT unless defined($stop);
631
632	455	100				624	if ($rangetype eq 'overlaps') {
		50
		0
633	385	100	100			987	next unless $feature_stop >= $start && $feature_start <= $stop;
634							} elsif ($rangetype eq 'contains') {
635	70	100	66			176	next unless $feature_start >= $start && $feature_stop <= $stop;
636							} elsif ($rangetype eq 'contained_in') {
637	0	0	0			0	next unless $feature_start <= $start && $feature_stop >= $stop;
638							} else {
639	0	0	0			0	next unless $feature_start == $start && $feature_stop == $stop;
640							}
641
642							}
643
644	1210					1521	my $feature_source = $feature->{source};
645	1210					1374	my $feature_method = $feature->{method};
646
647	1210	100	66			2783	if (defined $types && @$types){
648	375	100				559	next unless $self->_matching_typelist($feature_method,$feature_source,$types);
649							}
650
651	970					1269	my $feature_attributes = $feature->{attributes};
652	970	100				1358	if (defined $attributes){
653	255	100				362	next unless $self->_matching_attributes($feature_attributes,$attributes);
654							}
655
656							# if we get here, then we have a feature that meets the criteria.
657							# Then we just push onto an array
658							# of found features and continue.
659
660	740					840	my $found_feature = $feature ;
661	740					852	$found_feature->{feature_id} = $feature_id;
662	740					999	$found_feature->{group_id} = $feature_group_id;
663	740					1244	push @found_features,$found_feature;
664							}
665
666	100					252	return \@found_features;
667							}
668
669
670							sub _hash_to_array {
671	829			829		1197	my ($self,$feature_hash) = @_;
672	829					1026	my @array = @{$feature_hash}{@hash2array_map};
	829					3963
673	829	100				3176	return wantarray ? @array : \@array;
674							}
675
676							# this subroutine is needed for convertion of the feature from hash to array in order to
677							# pass it to the callback subroutine
678							sub _convert_feature_hash_to_array{
679	15			15		33	my ($self, $feature_hash_array) = @_;
680	15					24	my @features_array_array = map {scalar $self->_hash_to_array($_)} @$feature_hash_array;
	270					360
681	15					29	return \@features_array_array;
682							}
683
684							sub _matching_typelist{
685	375			375		564	my ($self, $feature_method,$feature_source,$typelist) = @_;
686	375					464	foreach (@$typelist) {
687	1090					1329	my ($search_method,$search_source) = @$_;
688	1090	100				1728	next if lc($search_method) ne lc($feature_method);
689	135	50	66			242	next if defined($search_source) && lc($search_source) ne lc($feature_source);
690	135					264	return 1;
691							}
692	240					473	return 0;
693							}
694
695							sub _matching_attributes {
696	6425			6425		9072	my ($self, $feature_attributes,$attributes) = @_ ;
697	6425					10640	foreach (keys %$attributes) {
698	6442	100				9730	return 0 if !_match_all_attr_in_feature($_,$attributes->{$_},$feature_attributes)
699							}
700	30					71	return 1;
701							}
702
703							sub _match_all_attr_in_feature{
704	6442			6442		8282	my ($attr_name,$attr_value,$feature_attributes) = @_;
705	6442					7829	for my $attr (@$feature_attributes) {
706	2409					3423	my ($feature_attr_name,$feature_attr_value) = @$attr ;
707	2409	100	100			4394	next if ($attr_name ne $feature_attr_name \|\| $attr_value ne $feature_attr_value);
708	47					104	return 1;
709							}
710	6395					13846	return 0;
711							}
712
713
714	5			5	1	15	sub do_initialize { 1; }
715	0			0	0		sub get_feature_by_group_id{ 1; }
716
717							1;
718