File Coverage

blib/lib/Bio/DB/Das/Chado.pm

Criterion	Covered	Total	%
statement	42	678	6.1
branch	0	318	0.0
condition	0	131	0.0
subroutine	14	60	23.3
pod	32	38	84.2
total	88	1225	7.1

line	stmt	bran	cond	sub	pod	time	code
1							# $Id: Chado.pm,v 1.11 2009-08-25 19:29:43 scottcain Exp $
2
3							=head1 NAME
4
5							Bio::DB::Das::Chado - DAS-style access to a chado database
6
7							=head1 SYNOPSIS
8
9							# Open up a feature database
10							$db = Bio::DB::Das::Chado->new(
11							-dsn => 'dbi:Pg:dbname=gadfly;host=lajolla'
12							-user => 'jimbo',
13							-pass => 'supersecret',
14							);
15
16							@segments = $db->segment(-name => '2L',
17							-start => 1,
18							-end => 1000000);
19
20							# segments are Bio::Das::SegmentI - compliant objects
21
22							# fetch a list of features
23							@features = $db->features(-type=>['type1','type2','type3']);
24
25							# invoke a callback over features
26							$db->features(-type=>['type1','type2','type3'],
27							-callback => sub { ... }
28							);
29
30							# get all feature types
31							@types = $db->types;
32
33							# count types
34							%types = $db->types(-enumerate=>1);
35
36							@feature = $db->get_feature_by_name($class=>$name);
37							@feature = $db->get_feature_by_target($target_name);
38							@feature = $db->get_feature_by_attribute($att1=>$value1,$att2=>$value2);
39							$feature = $db->get_feature_by_id($id);
40
41							$error = $db->error;
42
43							=head1 DESCRIPTION
44
45							Bio::DB::Das::Chado allows DAS style access to a Chado database, getting
46							SeqFeatureI-compliant BioPerl objects and allowing GBrowse to access
47							a Chado database directly.
48
49							=head1 FEEDBACK
50
51							=head2 Mailing Lists
52
53							User feedback is an integral part of the evolution of this and other
54							GMOD modules. Send your comments and suggestions preferably to one
55							of the GMOD mailing lists. Your participation is much appreciated.
56
57							gmod-gbrowse@lists.sourceforge.com
58
59							=head2 Reporting Bugs
60
61							Report bugs to the GMOD bug tracking system at SourceForge to help us keep
62							track the bugs and their resolution.
63
64							http://sourceforge.net/tracker/?group_id=27707&atid=391291
65
66
67							=head1 AUTHOR
68
69							Scott Cain <scain@cpan.org>
70
71							=head1 LICENSE
72
73							This software may be redistributed under the same license as perl.
74
75							=head1 APPENDIX
76
77							The rest of the documentation details each of the object
78							methods. Internal methods are usually preceded with a _
79
80							=cut
81
82							#'
83
84							package Bio::DB::Das::Chado;
85	1			1		27346	use strict;
	1					2
	1					52
86
87	1			1		1023	use Bio::DB::Das::Chado::Segment;
	1					3
	1					48
88	1			1		31	use Bio::Root::Root;
	1					1
	1					23
89	1			1		1042	use Bio::DasI;
	1					2018
	1					37
90	1			1		8	use Bio::PrimarySeq;
	1					2
	1					31
91	1			1		5	use Bio::DB::GFF::Typename;
	1					2
	1					27
92	1			1		4598	use DBI;
	1					21941
	1					88
93	1			1		1255	use Bio::SeqFeature::Lite;
	1					5796
	1					54
94	1			1		8	use Carp qw(longmess);
	1					3
	1					73
95	1			1		7	use vars qw($VERSION @ISA);
	1					3
	1					60
96
97	1			1		6	use Data::Dumper;
	1					3
	1					54
98
99	1			1		7	use constant SEGCLASS => 'Bio::DB::Das::Chado::Segment';
	1					3
	1					73
100	1			1		5	use constant MAP_REFERENCE_TYPE => 'MapReferenceType'; #dgg
	1					3
	1					44
101	1			1		5	use constant DEBUG => 0;
	1					2
	1					11009
102
103							$VERSION = 0.34;
104							@ISA = qw(Bio::Root::Root Bio::DasI);
105
106							=head2 new
107
108							Title : new
109							Usage : $db = Bio::DB::Das::Chado(
110							-dsn => 'dbi:Pg:dbname=gadfly;host=lajolla'
111							-user => 'jimbo',
112							-pass => 'supersecret',
113							);
114
115							Function: Open up a Bio::DB::DasI interface to a Chado database
116							Returns : a new Bio::DB::Das::Chado object
117							Args :
118
119							=over
120
121							=item -dsn [dsn string]
122
123							A full dbi dsn string for the database, optionally including host and port
124							information, like "dbi:Pg:dbname=chado;host=localhost;port=5432".
125
126							=item -user [username]
127
128							The database user name.
129
130							=item -pass [password]
131
132							The users password for the database.
133
134							=item -organism [common_name\|abbreviation\|"Genus species"]
135
136							Used to specify the organism that the features should be drawn from in
137							Chado instances that have more than one organism. The argument can be
138							the common name, the abbreviation or "Genus species". Since common name
139							and abbreviation are not guaranteed to be unique, if one of those is supplied
140							and it corresponds to more than one organism_id, the Chado adaptor will die.
141							Since the combination is guaranteed to be unique by table constraints,
142							supplying "Genus species" should always work.
143
144							=item -srcfeatureslice [1\|0] default: 1
145
146							Setting this to 1 will enable searching for features using a function and
147							a corresponding index that can significantly speed searches, as long as
148							the featureloc_slice function is present in the Chado instance (all
149							"modern" instances of Chado do have this function). Since it available
150							in nearly all Chado instances, in a future release of this adaptor,
151							the default value of -srcfeatureslice will be set to 1 (on).
152
153							=item -inferCDS [1\|0] default: 0
154
155							Given mRNA features that have exons and polypeptide features as children,
156							when inferCDS is set, the Chado adaptor will calculate the intersection
157							of the exons and polypeptide features and create CDS features that result.
158							This is generally needed when using gene and mRNA features with glyphs in
159							GBrowse that show subparts, like the gene and processed_transcript glyphs.
160							Since this is almost always required, in a future release of this adaptor,
161							the default will be switched to 1 (on).
162
163							=item -fulltext [1\|0] default: 0
164
165							This item allows full text searching of various Chado text fields,
166							including feature.name, feature.uniquename, synonym.synonym_sgml,
167							dbxref.accession, and all_feature_names.name (which fequently includes
168							featureprop.value, depending on how all_feature_names is configured). Note
169							that to use -fulltext, you must run the preparation script,
170							gmod_chado_fts_prep.pl, on the database, and in addition, it might
171							be a good idea to set up a cronjob to keep the all_feature_names
172							materialized view up to date with the materialized view tool,
173							gmod_materialized_view_tool.pl.
174
175							=item -recursivMapping [1\|0] default: 0
176
177							In the case where features are mapped to a "small" srcfeature (like
178							a contig) and then that small feature is mapped to a larger feature
179							(like a chromosome), setting -recursivMapping will allow the Chado
180							adaptor to calculate the coordinates of the feature on the larger
181							feature even though it isn't explicitly mapped to it. The Chado adaptor
182							suffers an approximately 20% performance penalty to do this mapping.
183
184							=item -allow_obsolete [1\|0] default: 0
185
186							If set to 1, allow_obsolete will tell the Chado adaptor to ignore the
187							feature.is_obsolete column when querying to find features.
188
189							=item -enable_seqscan [1\|0] default: 1
190
191							If set to zero, the -enable_seqscan will send a query planner hint to the
192							PostgreSQL server to make it more costly to do sequential scans on a table.
193							This is generally not necessary, as the query planner in Pg 8+ is smarter
194							than it used to be.
195
196							=item -do2Level [1\|0] default: 0
197
198							do2Level is a flag for specifying that two "levels" at most of features should
199							be fetch when getting child features. This flag is generally unnecessary as
200							Bio::Graphics::Glyph supports specifying on a per glyph basis what should
201							be fetch. Use of this flag is incompatible with the -recursivMapping flag.
202
203							=item -reference_class [SO type name]
204
205							Used to specify what the "base type" is. Typically, this would be chromosome
206							or contig, but setting it is only necessary in the case where features
207							are mapped to more than one srcfeature and you don't want to use the
208							one that is lowest on the graph. For example, you have polypeptides that are
209							mapped to chromosomes and motifs that are mapped to polypeptides. If you
210							want to display the motifs on the polypeptide, you need to set "polypeptide"
211							as the argument for -reference_class.
212
213							=item -tripal [1\|0] default: 0
214
215							If turned on, the tripal flag tells the adaptor that it is dealing with
216							a Chado instance that is working with Tripal, and so the query to fetch
217							features may fail with regard to analysis features. This flag attempts to
218							prevent that. It may mean that analysis features (like similarity results)
219							will be inaccessible to the adaptor, or at least scores associated with them
220							will be, depending on how they were loaded.
221
222							=back
223
224							=cut
225
226							# create new database accessor object
227							# takes all the same args as a Bio::DB::BioDB class
228							sub new {
229	0			0	1		my $proto = shift;
230	0		0				my $self = bless {}, ref($proto) \|\| $proto;
231
232	0						my %arg = @_;
233
234	0						my $dsn = $arg{-dsn};
235	0						my $username = $arg{-user};
236	0						my $password = $arg{-pass};
237	0						my $refclass = $arg{-reference_class};
238	0						my $tripal = $arg{-tripal};
239
240	0						$self->{db_args}->{dsn} = $dsn;
241	0						$self->{db_args}->{username} = $username;
242	0						$self->{db_args}->{password} = $password;
243
244	0	0					my $dbh = DBI->connect( $dsn, $username, $password )
245							or $self->throw("unable to open db handle");
246	0						$self->dbh($dbh);
247
248	0						warn "$dbh\n" if DEBUG;
249
250							# determine which cv to use for SO terms
251
252	0						$self->sofa_id(1);
253
254	0						warn "SOFA id to use: ",$self->sofa_id() if DEBUG;
255
256							# get the cvterm relationships here and save for later use
257
258	0						my $cvterm_query="select ct.cvterm_id,ct.name as name, c.name as cvname
259							from cvterm ct, cv c
260							where ct.cv_id=c.cv_id and
261							(c.name IN (
262							'relationship',
263							'relationship type','Relationship Ontology',
264							'autocreated')
265							OR c.cv_id = ?)";
266
267	0						warn "cvterm query: $cvterm_query\n" if DEBUG;
268
269	0	0					my $sth = $self->dbh->prepare($cvterm_query)
270							or warn "unable to prepare select cvterms";
271
272	0	0					$sth->execute($self->sofa_id()) or $self->throw("unable to select cvterms");
273
274							# my $cvterm_id = {}; replaced with better-named variables
275							# my $cvname = {};
276
277	0						my(%term2name,%name2term) = ({},{});
278	0						my %termcv=();
279
280	0						while (my $hashref = $sth->fetchrow_hashref) {
281	0						$term2name{ $hashref->{cvterm_id} } = $hashref->{name};
282	0						$termcv{ $hashref->{cvterm_id} } = $hashref->{cvname}; # dgg
283
284							#this addresses a bug in gmod_load_gff3 (Scott!), which creates a 'part_of'
285							#term in addition to the OBO_REL one that already exists! this will also
286							#help with names that exist in both GO and SO, like 'protein'.
287							# dgg: but this array is bad for callers of name2term() who expect scalar result
288							# mostly want only sofa terms
289
290	0	0					if(defined($name2term{ $hashref->{name} })){ #already seen this name
291
292	0	0					if(ref($name2term{ $hashref->{name} }) ne 'ARRAY'){ #already array-converted
293
294	0						$name2term{ $hashref->{name} } = [ $name2term{ $hashref->{name} } ];
295
296							}
297
298	0						push @{ $name2term{ $hashref->{name} } }, $hashref->{cvterm_id};
	0
299
300							} else {
301
302	0						$name2term{ $hashref->{name} } = $hashref->{cvterm_id};
303
304							}
305							}
306	0						$sth->finish;
307
308	0						$self->term2name(\%term2name);
309	0						$self->name2term(\%name2term, \%termcv);
310
311							#Recursive Mapping
312	0	0					$self->recursivMapping($arg{-recursivMapping} ? $arg{-recursivMapping} : 0);
313
314	0	0					$self->inferCDS($arg{-inferCDS} ? $arg{-inferCDS} : 0);
315	0	0					$self->allow_obsolete($arg{-allow_obsolete} ? $arg{-allow_obsolete} : 0);
316
317	0	0	0				if (exists($arg{-enable_seqscan}) && ! $arg{-enable_seqscan}){
318	0						$self->dbh->do("set enable_seqscan=0");
319							}
320
321	0	0					$self->srcfeatureslice(defined $arg{-srcfeatureslice} ? $arg{-srcfeatureslice} : 1);
322	0	0					$self->do2Level($arg{-do2Level} ? $arg{-do2Level} : 0);
323
324	0	0					if ($arg{-organism}) {
325	0						$self->organism_id($arg{-organism});
326							}
327
328							#determine if all_feature_names view or table exist
329							#$self->use_all_feature_names();
330
331							#determine the type_id of the ref class and cache it
332	0						$self->refclass($self->name2term($refclass));
333
334	0						$self->fulltext($arg{-fulltext});
335	0						$self->tripal($arg{-tripal});
336
337	0						return $self;
338							}
339
340							=head2 feature_summary
341
342							=over
343
344							=item Usage
345
346							$obj->feature_summary()
347
348							=item Function
349
350							This function is based on Bio::DB::SeqFeature::Store->feature_summary.
351							The text that follows comes from it's documtation:
352
353							This method is used to get coverage density information across a
354							region of interest. You provide it with a region of interest, optional
355							a list of feature types, and a count of the number of bins over which
356							you want to calculate the coverage density. An object is returned
357							corresponding to the requested region. It contains a tag called
358							"coverage" that will return an array ref of "bins" length. Each
359							element of the array describes the number of features that overlap the
360							bin at this postion.
361
362							Note that this method uses an approximate algorithm that is only
363							accurate to 500 bp, so when dealing with bins that are smaller than
364							1000 bp, you may see some shifting of counts between adjacent bins.
365
366							Although an -iterator option is provided, the method only ever returns
367							a single feature, so this is fairly useless.
368
369							=item Returns
370
371							A single feature containing summary data, or an interator containing
372							that one feature.
373
374							=item Arguments
375
376							-seq_id Sequence ID for the region
377							-start Start of region
378							-end End of region
379							-type/-types Feature type of interest or array ref of types
380							-bins Number of bins across region. Defaults to 1000.
381							-iterator Return an iterator across the region
382
383							=back
384
385							=cut
386
387							sub feature_summary {
388	0			0	1		my $self = shift;
389	0						my ($seq_name,$seq_id,$ref,$start,$stop,$end,$types,$type,$primary_tag,$bins,$iterator) =
390							$self->_rearrange(['SEQID','SEQ_ID','REF','START','STOP','END',
391							'TYPES','TYPE','PRIMARY_TAG',
392							'BINS',
393							'ITERATOR',
394							],@_);
395
396	0		0				$seq_name \|\|=$seq_id \|\|=$ref;
			0
397	0		0				$end \|\|=$end;
398	0		0				$types \|\|=$type \|\|=$primary_tag;
			0
399
400	0						warn $types if DEBUG;
401
402	0	0					my ($coverage,$tag) = $self->coverage_array(-seqid=> $seq_name,
403							-start=> $start,
404							-end => $end,
405							-type => $types,
406							-bins => $bins) or return;
407	0						my $score = 0;
408	0						for (@$coverage) { $score += $_ }
	0
409	0						$score /= @$coverage;
410
411	0						my $feature = Bio::SeqFeature::Lite->new(-seq_id => $seq_name,
412							-start => $start,
413							-end => $end,
414							-type => $tag,
415							-score => $score,
416							-attributes =>
417							{ coverage => [$coverage] });
418
419	0						my @features = ($feature);
420	0	0					return $iterator
421							? Bio::DB::Das::ChadoIterator->new(\@features)
422							: $feature;
423							}
424
425
426							=head2 coverage_array
427
428							=over
429
430							=item Usage
431
432							$obj->coverage_array()
433
434							=item Function
435
436							Calculates the coverage/density of a particular feature type
437							over a range.
438
439							=item Returns
440
441							A reference to the coverage array, or if called in an array
442							context, a two element array with the reference to the coverage
443							array first and the type that it was called with as the second
444							element.
445
446							=item Arguments
447
448							seqid
449							start
450							stop
451							type
452							bins
453
454							=back
455
456							This is based on the method of the same name in
457							Bio::DB::SeqFeature::Store::DBI::mysql
458
459							=cut
460
461							sub coverage_array {
462	0			0	1		my $self = shift;
463	0						my ($seq_name,$seq_id,$ref,$start,$end,$stop,$types,$type,$primary_tag,$bins) =
464							$self->_rearrange(['SEQID','SEQ_ID','REF','START','STOP','END',
465							'TYPES','TYPE','PRIMARY_TAG','BINS'],@_);
466
467	0		0				$seq_name \|\|= $seq_id \|\|= $ref;
			0
468	0		0				$types \|\|= $type \|\|= $primary_tag;
			0
469	0		0				$end \|\|= $stop;
470
471	0						my $summary_bin_size = 1000;
472	0		0				$bins \|\|= 1000;
473	0		0				$start \|\|= 1;
474	0	0					my $segment = $self->segment(-name =>$seq_name) or $self->throw("unknown seq_id $seq_name");
475	0		0				$end \|\|= $segment->end;
476
477	0						my $binsize = ($end-$start+1)/$bins;
478	0						my $seqid = $segment->feature_id;
479
480	0						warn "$seqid in coverage array" if DEBUG;
481
482	0	0					return [] unless $seqid;
483
484							# where each bin starts
485	0						my @his_bin_array = map {$start + $binsize * $_} (0..$bins);
	0
486	0						my @sum_bin_array = map {int(($_-1)/$summary_bin_size)} @his_bin_array;
	0
487
488	0						my $interval_stats = 'gff_interval_stats';
489
490							# pick up the type ids
491
492							#WARNING: potential bug below. This query and the loop that processes
493							#it is from Lincoln's implementation for SeqFeature::Store. The query
494							#seems to rely on getting the results back sorted even though the
495							#query doesn't explicitly sort (the ORDER BY commented out was from me)
496							#With sorting the processing takes much longer, so I'm leaving it out
497							#for now, but reimplementing might be a good idea.
498
499	0						my %bins;
500	0						my $sql = <<END;
501							SELECT bin,cum_count
502							FROM $interval_stats
503							WHERE (typeid=? OR typeid like ? ) AND bin >=? AND srcfeature_id =?
504							-- ORDER BY bin
505							LIMIT 1
506							END
507							;
508
509	0						my $sth = $self->dbh->prepare($sql);
510
511	0						my @t;
512	0	0					if (ref $types eq 'ARRAY') {
513	0						@t = @$types;
514							}
515							else {
516	0						@t = ($types);
517							}
518
519	0						warn join(" ", @t) . " types in coverage array" if DEBUG;
520
521	0						eval {
522	0						for my $typeid (@t) {
523	0						my $typestr = $self->_types_sql($typeid);
524
525	0						warn "$typestr typestr in coverage array" if DEBUG;
526
527	0						for (my $i=0;$i<@sum_bin_array;$i++) {
528
529	0						my @args = ($typestr,$typestr,$sum_bin_array[$i],$seqid);
530
531	0	0					$sth->execute(@args) or $self->throw($sth->errstr);
532	0						my ($bin,$cum_count) = $sth->fetchrow_array;
533	0						push @{$bins{$typeid}},[$bin,$cum_count];
	0
534							}
535							}
536							};
537
538
539	0	0					return unless %bins;
540
541	0						my @tags;
542							my @merged_bins;
543	0						my $firstbin = int(($start-1)/$binsize);
544	0						for my $type (keys %bins) {
545	0						push @tags, $type;
546	0						my $arry = $bins{$type};
547	0						my $last_count = $arry->[0][1];
548	0						my $last_bin = -1;
549	0						my $i = 0;
550	0						my $delta;
551	0						for my $b (@$arry) {
552	0						my ($bin,$count) = @$b;
553	0	0					$delta = $count - $last_count if $bin > $last_bin;
554	0						$merged_bins[$i++] = $delta;
555	0						$last_count = $count;
556	0						$last_bin = $bin;
557							}
558							}
559
560	0						my $report_tag = join(",",@tags);
561	0	0					return wantarray ? (\@merged_bins,$report_tag) : \@merged_bins;
562							}
563
564
565							sub _types_sql {
566	0			0			my $self = shift;
567	0						my $type = shift;
568	0						my ($primary_tag,$source_tag,$typestr);
569
570	0	0	0				if (ref $type && $type->isa('Bio::DB::GFF::Typename')) {
571	0						$primary_tag = $type->method;
572	0						$source_tag = $type->source;
573							} else {
574	0						($primary_tag,$source_tag) = split ':',$type,2;
575							}
576
577	0	0					if (defined $source_tag) {
578	0	0					if (length($primary_tag)) {
579	0						$typestr = "$primary_tag:$source_tag";
580							}
581							else {
582	0						$typestr = "%:$source_tag";
583							}
584							} else {
585	0						$typestr = "$primary_tag:%";
586							}
587
588	0						return ($typestr);
589							}
590
591							=head2 tripal
592
593							=over
594
595							=item Usage
596
597							$obj->tripal() #get existing value
598							$obj->tripal($newval) #set new value
599
600							=item Function
601
602							Flag to identfy Chado database that are working with Tripal
603
604							=item Returns
605
606							value of tripal (a scalar)
607
608							=item Arguments
609
610							new value of tripal (to set)
611
612							=back
613
614							=cut
615
616							sub tripal {
617	0			0	1		my $self = shift;
618	0	0					my $tripal = shift if defined(@_);
619	0	0					return $self->{'tripal'} = $tripal if defined($tripal);
620	0						return $self->{'tripal'};
621							}
622
623
624
625							=head2 fulltext
626
627							=over
628
629							=item Usage
630
631							$obj->fulltext() #get existing value
632							$obj->fulltext($newval) #set new value
633
634							=item Function
635
636							Flag to govern the use of full text searching queries
637
638							=item Returns
639
640							value of fulltext (a scalar)
641
642							=item Arguments
643
644							new value of fulltext (to set)
645
646							=back
647
648							=cut
649
650							sub fulltext {
651	0			0	1		my $self = shift;
652	0	0					my $fulltext = shift if defined(@_);
653	0	0					return $self->{'fulltext'} = $fulltext if defined($fulltext);
654	0						return $self->{'fulltext'};
655							}
656
657
658							=head2 refclass
659
660							=over
661
662							=item Usage
663
664							$obj->refclass() #get existing value
665							$obj->refclass($newval) #set new value
666
667							=item Function
668
669							=item Returns
670
671							value of the reference class's cvterm_id (a scalar)
672
673							=item Arguments
674
675							new value of the reference class's cvterm_id (to set)
676
677							=back
678
679							=cut
680
681							sub refclass {
682	0			0	1		my $self = shift;
683	0	0					my $refclass = shift if defined(@_);
684	0	0					return $self->{'refclass'} = $refclass if defined($refclass);
685	0						return $self->{'refclass'};
686							}
687
688
689							=head2 use_all_feature_names
690
691							Title : use_all_feature_names
692							Usage : $obj->use_all_feature_names()
693							Function: set or return flag indicating that all_feature_names view is present
694							Returns : 1 if all_feature_names present, 0 if not
695							Args : to return the flag, none; to set, 1
696
697
698							=cut
699
700							sub use_all_feature_names {
701	0			0	1		my ($self, $flag) = @_;
702
703	0	0					return $self->{use_all_feature_names} = $flag
704							if defined($flag);
705	0	0					return $self->{use_all_feature_names}
706							if defined $self->{use_all_feature_names};
707
708							#now determine if either a view or table named all_feature_names is present
709	0						my $query
710							= "SELECT relkind FROM pg_class WHERE relname = 'all_feature_names'";
711
712	0						my $exists = $self->dbh->prepare($query);
713	0	0					$exists->execute or warn "all_feature_names query failed: $!";
714
715	0						my ($kind) = $exists->fetchrow_array;
716	0	0	0				if ($kind and ($kind eq 'r' or $kind eq 'v')) {
		0	0
717	0						$self->{use_all_feature_names} = 1;
718							}
719							elsif ($kind) {
720	0						warn "all_feature_names: This option shouldn't happen--setting use_all_feature_names to zero.";
721	0						$self->{use_all_feature_names} = 0;
722							}
723							else {
724	0						$self->{use_all_feature_names} = 0;
725							}
726	0						$exists->finish;
727
728	0						return $self->{use_all_feature_names};
729							}
730
731							=head2 organism_id
732
733							Title : organism_id
734							Usage : $obj->organism_id()
735							Function: set or return the organism_id
736							Returns : the value of the id
737							Args : to return the flag, none; to set, the common name of the organism
738
739							If -organism is set when the Chado feature is instantiated, this method
740							queries the database with the common name to cache the organism_id.
741
742							=cut
743
744							sub organism_id {
745	0			0	1		my $self = shift;
746	0						my $organism_name = shift;
747
748	0	0					if (!$organism_name) {
749	0						return $self->{'organism_id'};
750							}
751
752	0						my $dbh = $self->dbh;
753
754							#if there is a space in the name, check genus species
755	0	0					if ($organism_name =~ /(\S+?)\s+(.+)/) {
756	0						my $genus = $1;
757	0						my $species = $2;
758	0						my $species_query = $dbh->prepare("SELECT organism_id FROM organism WHERE genus = ? and species =
759							?");
760	0	0					$species_query->execute($genus, $species) or die "organism genus species query failed:$!";
761
762							#don't need to check for multiple rows because of unique constraint
763	0	0					if ($species_query->rows == 1) {
764	0						my($organism_id) = $species_query->fetchrow_array;
765
766	0	0					if ($organism_id) {
767	0						return $self->{'organism_id'} = $organism_id;
768							}
769
770							}
771							}
772
773							#check common name
774	0						my $org_query = $dbh->prepare("SELECT organism_id FROM organism WHERE common_name = ?");
775
776	0	0					$org_query->execute($organism_name) or die "organism query failed:$!";
777
778							#if more than one result for common name, croak
779	0	0					if ($org_query->rows > 1) {
		0
780	0						$self->throw("The common organism name, $organism_name, is present more than once in the organism table; please use a more precice representation of the organism.");
781							}
782							elsif ($org_query->rows == 0 ) {
783							#no--don't do anything here--let it go on to check other things
784							#$self->throw("There is no organism in the organism table with a common name '$organism_name'; please check the spelling.");
785							}
786							else {
787	0						my($organism_id) = $org_query->fetchrow_array;
788
789	0	0					if ($organism_id) {
790	0						return $self->{'organism_id'} = $organism_id;
791							}
792							}
793	0						$org_query->finish;
794
795							#check abbrev
796	0						my $abbrev_query = $dbh->prepare("SELECT organism_id FROM organism WHERE abbreviation = ?");
797
798	0	0					$abbrev_query->execute($organism_name) or die "organism abbrev query failed:$!";
799
800	0	0					if ($abbrev_query->rows > 1) {
		0
801	0						$self->throw("The abbreviated organism name, $organism_name, is present more than once in the organism table; please use a more precice representation of the organism.");
802							}
803							elsif ($abbrev_query->rows == 0) {
804							#do nothing in case another check is added after this one
805							}
806							else {
807	0						my($organism_id) = $abbrev_query->fetchrow_array;
808
809	0	0					if ($organism_id) {
810	0						return $self->{'organism_id'} = $organism_id;
811							}
812							}
813
814	0						$self->throw("Tried everything to get an organism_id for '$organism_name' but failed; try 'genus species'");
815	0						return; #of course, this return will never get used
816							}
817
818
819
820							=head2 inferCDS
821
822							Title : inferCDS
823							Usage : $obj->inferCDS()
824							Function: set or return the inferCDS flag
825							Returns : the value of the inferCDS flag
826							Args : to return the flag, none; to set, 1
827
828							Often, chado databases will be populated without CDS features, since
829							they can be inferred from a union of exons and polypeptide features.
830							Setting this flag tells the adaptor to do the inferrence to get
831							those derived CDS features (at some small performance penatly).
832
833							=cut
834
835							sub inferCDS {
836	0			0	1		my $self = shift;
837
838	0						my $flag = shift;
839	0	0					return $self->{inferCDS} = $flag if defined($flag);
840	0						return $self->{inferCDS};
841							}
842
843							=head2 allow_obsolete
844
845							Title : allow_obsolete
846							Usage : $obj->allow_obsolete()
847							Function: set or return the allow_obsolete flag
848							Returns : the value of the allow_obsolete flag
849							Args : to return the flag, none; to set, 1
850
851							The chado feature table has a flag column called 'is_obsolete'.
852							Normally, these features should be ignored by GBrowse, but
853							the -allow_obsolete method is provided to allow displaying
854							obsolete features.
855
856							=cut
857
858							sub allow_obsolete {
859	0			0	1		my $self = shift;
860	0	0					my $allow_obsolete = shift if defined(@_);
861	0	0					return $self->{'allow_obsolete'} = $allow_obsolete if defined($allow_obsolete);
862	0						return $self->{'allow_obsolete'};
863							}
864
865
866							=head2 sofa_id
867
868							Title : sofa_id
869							Usage : $obj->sofa_id()
870							Function: get or return the ID to use for SO terms
871							Returns : the cv.cv_id for the SO ontology to use
872							Args : to return the id, none; to determine the id, 1
873
874							=cut
875
876							sub sofa_id {
877	0			0	1		my $self = shift;
878	0	0					return $self->{'sofa_id'} unless @_;
879
880	0						my $query = "select cv_id from cv where name in (
881							'SOFA',
882							'Sequence Ontology Feature Annotation',
883							'sofa.ontology')";
884
885	0						my $sth = $self->dbh->prepare($query);
886	0	0					$sth->execute() or $self->throw("trying to find SOFA");
887
888	0						my $data = $sth->fetchrow_hashref();
889	0						my $sofa_id = $$data{'cv_id'};
890
891	0						$sth->finish;
892	0	0					return $self->{'sofa_id'} = $sofa_id if $sofa_id;
893
894	0						$query = "select cv_id from cv where name in (
895							'Sequence Ontology',
896							'sequence',
897							'SO')";
898
899	0						$sth = $self->dbh->prepare($query);
900	0	0					$sth->execute() or $self->throw("trying to find SO");
901
902	0						$data = $sth->fetchrow_hashref();
903	0						$sofa_id = $$data{'cv_id'};
904
905	0						$sth->finish;
906	0	0					return $self->{'sofa_id'} = $sofa_id if $sofa_id;
907
908	0						$self->throw("unable to find SO or SOFA in the database!");
909							}
910
911							=head2 recursivMapping
912
913							Title : recursivMapping
914							Usage : $obj->recursivMapping($newval)
915							Function: Flag for activating the recursive mapping (desactivated by default)
916							Returns : value of recursivMapping (a scalar)
917							Args : on set, new value (a scalar or undef, optional)
918
919							Goal : When we have a clone mapped on a chromosome, the recursive mapping maps the features of the clone on the chromosome.
920
921							=cut
922
923							sub recursivMapping{
924	0			0	1		my $self = shift;
925
926	0	0					return $self->{'recursivMapping'} = shift if @_;
927	0						return $self->{'recursivMapping'};
928							}
929
930							=head2 srcfeatureslice
931
932							Title : srcfeatureslice
933							Usage : $obj->srcfeatureslice
934							Function: Flag for activating
935							Returns : value of srcfeatureslice
936							Args : on set, new value (a scalar or undef, optional)
937							Desc : Allows to use a featureslice of type featureloc_slice(srcfeat_id, int, int)
938							Important : this and recursivMapping are mutually exclusives
939
940							=cut
941
942							sub srcfeatureslice{
943	0			0	1		my $self = shift;
944	0	0					return $self->{'srcfeatureslice'} = shift if @_;
945	0						return $self->{'srcfeatureslice'};
946							}
947
948							=head2 do2Level
949
950							Title : do2Level
951							Usage : $obj->do2Level
952							Function: Flag for activating the fetching of 2levels in segment->features
953							Returns : value of do2Level
954							Args : on set, new value (a scalar or undef, optional)
955
956							=cut
957
958							sub do2Level{
959	0			0	1		my $self = shift;
960	0	0					return $self->{'do2Level'} = shift if @_;
961	0						return $self->{'do2Level'};
962							}
963
964
965							=head2 dbh
966
967							Title : dbh
968							Usage : $obj->dbh($newval)
969							Function:
970							Returns : value of dbh (a scalar)
971							Args : on set, new value (a scalar or undef, optional)
972
973
974							=cut
975
976							sub dbh {
977	0			0	1		my $self = shift;
978
979	0	0					return $self->{'dbh'} = shift if @_;
980	0	0					return $self->{'dbh'} if defined ($self->{'dbh'});
981
982							#uh oh, there isn't already a dbh object, try to create one
983	0						my $dsn = $self->{db_args}->{dsn};
984	0						my $username = $self->{db_args}->{username};
985	0						my $password = $self->{db_args}->{password};
986
987	0	0					my $dbh = DBI->connect( $dsn, $username, $password )
988							or $self->throw("unable to open db handle");
989	0						$self->{'dbh'} = $dbh;
990
991	0	0	0				if (exists($self->{-enable_seqscan}) && ! $self->{-enable_seqscan}){
992	0						$dbh->do("set enable_seqscan=0");
993							}
994
995	0						return $self->{'dbh'};
996							}
997
998							=head2 term2name
999
1000							Title : term2name
1001							Usage : $obj->term2name($newval)
1002							Function: When called with a hashref, sets cvterm.cvterm_id to cvterm.name
1003							mapping hashref; when called with an int, returns the name
1004							corresponding to that cvterm_id; called with no arguments, returns
1005							the hashref.
1006							Returns : see above
1007							Args : on set, a hashref; to retrieve a name, an int; to retrieve the
1008							hashref, none.
1009
1010							Note: should be replaced by Bio::GMOD::Util->term2name
1011
1012							=cut
1013
1014							sub term2name {
1015	0			0	1		my $self = shift;
1016	0						my $arg = shift;
1017
1018	0	0					if(ref($arg) eq 'HASH'){
		0
1019	0						return $self->{'term2name'} = $arg;
1020							} elsif($arg) {
1021	0						return $self->{'term2name'}{$arg};
1022							} else {
1023	0						return $self->{'term2name'};
1024							}
1025							}
1026
1027
1028							=head2 name2term
1029
1030							Title : name2term
1031							Usage : $obj->name2term($newval)
1032							Function: When called with a hashref, sets cvterm.name to cvterm.cvterm_id
1033							mapping hashref; when called with a string, returns the cvterm_id
1034							corresponding to that name; called with no arguments, returns
1035							the hashref.
1036							Returns : see above
1037							Args : on set, a hashref; to retrieve a cvterm_id, a string; to retrieve
1038							the hashref, none.
1039
1040							Note: Should be replaced by Bio::GMOD::Util->name2term
1041
1042							=cut
1043
1044							sub name2term {
1045	0			0	1		my $self = shift;
1046	0						my $arg = shift;
1047	0						my $cvnames = shift;
1048
1049	0	0					if(ref($cvnames) eq 'HASH'){ $self->{'termcvs'} = $cvnames; }
	0
1050	0	0					if(ref($arg) eq 'HASH'){
		0
1051	0						return $self->{'name2term'} = $arg;
1052							} elsif($arg) {
1053	0						return $self->{'name2term'}{$arg};
1054
1055							#rather than trying to guess what a caller wants, the caller will have
1056							#deal with what comes... (ie, a scalar or a hash).
1057							# my $val= $self->{'name2term'}{$arg};
1058							# if(ref($val)) {
1059							# #? use $cvnames scalar here to pick which cv?
1060							# my @val= @$val;
1061							# foreach $val (@val) {
1062							# my $cv= $self->{'termcvs'}{$val};
1063							# return $val if($cv =~ /^(SO\|sequence)/i); # want sofa_id
1064							# }
1065							# return $val[0]; #? 1st is best guess
1066							# }
1067							# return $val;
1068
1069							} else {
1070	0						return $self->{'name2term'};
1071							}
1072							}
1073
1074							=head2 segment
1075
1076							Title : segment
1077							Usage : $db->segment(@args);
1078							Function: create a segment object
1079							Returns : segment object(s)
1080							Args : see below
1081
1082							This method generates a Bio::Das::SegmentI object (see
1083							L<Bio::Das::SegmentI>). The segment can be used to find overlapping
1084							features and the raw sequence.
1085
1086							When making the segment() call, you specify the ID of a sequence
1087							landmark (e.g. an accession number, a clone or contig), and a
1088							positional range relative to the landmark. If no range is specified,
1089							then the entire region spanned by the landmark is used to generate the
1090							segment.
1091
1092							Arguments are -option=E<gt>value pairs as follows:
1093
1094							-name ID of the landmark sequence.
1095
1096							-class A namespace qualifier. It is not necessary for the
1097							database to honor namespace qualifiers, but if it
1098							does, this is where the qualifier is indicated.
1099
1100							-version Version number of the landmark. It is not necessary for
1101							the database to honor versions, but if it does, this is
1102							where the version is indicated.
1103
1104							-start Start of the segment relative to landmark. Positions
1105							follow standard 1-based sequence rules. If not specified,
1106							defaults to the beginning of the landmark.
1107
1108							-end End of the segment relative to the landmark. If not specified,
1109							defaults to the end of the landmark.
1110
1111							The return value is a list of Bio::Das::SegmentI objects. If the method
1112							is called in a scalar context and there are no more than one segments
1113							that satisfy the request, then it is allowed to return the segment.
1114							Otherwise, the method must throw a "multiple segment exception".
1115
1116							=cut
1117
1118							sub segment {
1119	0			0	1		my $self = shift;
1120	0						my ($name,$base_start,$stop,$end,$class,$version,$db_id,$feature_id,$srcfeature_id)
1121							= $self->_rearrange([qw(NAME
1122							START
1123							STOP
1124							END
1125							CLASS
1126							VERSION
1127							DB_ID
1128							FEATURE_ID
1129							SRCFEATURE_ID )],@_);
1130							# lets the Segment class handle all the lifting.
1131
1132	0		0				$end \|\|= $stop;
1133	0						return $self->_segclass->new($name,$self,$base_start,$end,$db_id,0,$feature_id,$srcfeature_id);
1134							}
1135
1136							=head2 features
1137
1138							Title : features
1139							Usage : $db->features(@args)
1140							Function: get all features, possibly filtered by type
1141							Returns : a list of Bio::SeqFeatureI objects
1142							Args : see below
1143							Status : public
1144
1145							This routine will retrieve features in the database regardless of
1146							position. It can be used to return all features, or a subset based on
1147							their type
1148
1149							Arguments are -option=E<gt>value pairs as follows:
1150
1151							-type List of feature types to return. Argument is an array
1152							of Bio::Das::FeatureTypeI objects or a set of strings
1153							that can be converted into FeatureTypeI objects.
1154
1155							-callback A callback to invoke on each feature. The subroutine
1156							will be passed each Bio::SeqFeatureI object in turn.
1157
1158							-attributes A hash reference containing attributes to match.
1159
1160							The -attributes argument is a hashref containing one or more attributes
1161							to match against:
1162
1163							-attributes => { Gene => 'abc-1',
1164							Note => 'confirmed' }
1165
1166							Attribute matching is simple exact string matching, and multiple
1167							attributes are ANDed together.
1168
1169							If one provides a callback, it will be invoked on each feature in
1170							turn. If the callback returns a false value, iteration will be
1171							interrupted. When a callback is provided, the method returns undef.
1172
1173							=cut
1174
1175							sub features {
1176	0			0	1		my $self = shift;
1177	0						my ($type,$types,$callback,$attributes,$iterator,$feature_id,$seq_id,$start,$end) =
1178							$self->_rearrange([qw(TYPE TYPES CALLBACK ATTRIBUTES ITERATOR FEATURE_ID SEQ_ID START END)],
1179							@_);
1180
1181	0		0				$type \|\|= $types; #GRRR
1182
1183	0						warn "Chado,features: $type\n" if DEBUG;
1184	0						my @features = $self->_segclass->features(-type => $type,
1185							-attributes => $attributes,
1186							-callback => $callback,
1187							-iterator => $iterator,
1188							-factory => $self,
1189							-feature_id=>$feature_id,
1190							-seq_id =>$seq_id,
1191							-start =>$start,
1192							-end =>$end,
1193							);
1194	0						return @features;
1195							}
1196
1197							sub get_seq_stream {
1198	0			0	1		my $self = shift;
1199							#warn "get_seq_stream args:@_";
1200	0						my ($type,$types,$callback,$attributes,$iterator,$feature_id,$seq_id,$start,$end) =
1201							$self->_rearrange([qw(TYPE TYPES CALLBACK ATTRIBUTES ITERATOR FEATURE_ID SEQ_ID START END)],
1202							@_);
1203
1204	0						my @features = $self->_segclass->features(-type => $type,
1205							-attributes => $attributes,
1206							-callback => $callback,
1207							-iterator => $iterator,
1208							-factory => $self,
1209							-feature_id=>$feature_id,
1210							-seq_id =>$seq_id,
1211							-start =>$start,
1212							-end =>$end,
1213							);
1214
1215	0						return Bio::DB::Das::ChadoIterator->new(\@features);
1216
1217
1218							}
1219
1220							=head2 types
1221
1222							Title : types
1223							Usage : $db->types(@args)
1224							Function: return list of feature types in database
1225							Returns : a list of Bio::Das::FeatureTypeI objects
1226							Args : see below
1227
1228							This routine returns a list of feature types known to the database. It
1229							is also possible to find out how many times each feature occurs.
1230
1231							Arguments are -option=E<gt>value pairs as follows:
1232
1233							-enumerate if true, count the features
1234
1235							The returned value will be a list of Bio::Das::FeatureTypeI objects
1236							(see L<Bio::Das::FeatureTypeI>.
1237
1238							If -enumerate is true, then the function returns a hash (not a hash
1239							reference) in which the keys are the stringified versions of
1240							Bio::Das::FeatureTypeI and the values are the number of times each
1241							feature appears in the database.
1242
1243							NOTE: This currently raises a "not-implemented" exception, as the
1244							BioSQL API does not appear to provide this functionality.
1245
1246							=cut
1247
1248							sub types {
1249	0			0	1		my $self = shift;
1250	0						my ($enumerate) = $self->_rearrange([qw(ENUMERATE)],@_);
1251	0						$self->throw_not_implemented;
1252							#if lincoln didn't need to implement it, neither do I!
1253							}
1254
1255							=head2 get_feature_by_alias, get_features_by_alias
1256
1257							Title : get_features_by_alias
1258							Usage : $db->get_feature_by_alias(@args)
1259							Function: return list of feature whose name or synonyms match
1260							Returns : a list of Bio::Das::Chado::Segment::Feature objects
1261							Args : See below
1262
1263							This method finds features matching the criteria outlined by the
1264							supplied arguments. Wildcards (*) are allowed. Valid arguments are:
1265
1266							=over
1267
1268							=item -name
1269
1270							=item -class
1271
1272							=item -ref (refrence sequence)
1273
1274							=item -start
1275
1276							=item -end
1277
1278							=back
1279
1280							=cut
1281
1282
1283							sub get_feature_by_alias {
1284	0			0	1		my $self = shift;
1285	0						my @args = @_;
1286
1287	0	0					if ( @args == 1 ) {
1288	0						@args = (-name => $args[0]);
1289							}
1290
1291	0						push @args, -operation => 'by_alias';
1292
1293	0						return $self->_by_alias_by_name(@args);
1294							}
1295
1296							*get_features_by_alias = \&get_feature_by_alias;
1297
1298							=head2 get_feature_by_name, get_features_by_name
1299
1300							Title : get_features_by_name
1301							Usage : $db->get_features_by_name(@args)
1302							Function: return list of feature whose names match
1303							Returns : a list of Bio::Das::Chado::Segment::Feature objects
1304							Args : See below
1305
1306							This method finds features matching the criteria outlined by the
1307							supplied arguments. Wildcards (*) are allowed. Valid arguments are:
1308
1309							=over
1310
1311							=item -name
1312
1313							=item -class
1314
1315							=item -ref (refrence sequence)
1316
1317							=item -start
1318
1319							=item -end
1320
1321							=back
1322
1323							=cut
1324
1325
1326							*get_features_by_name = \&get_feature_by_name;
1327
1328							sub get_feature_by_name {
1329	0			0	1		my $self = shift;
1330	0						my @args = @_;
1331
1332	0						warn "in get_feature_by_name, args:@args" if DEBUG;
1333
1334	0	0					if ( @args == 1 ) {
1335	0						@args = (-name => $args[0]);
1336							}
1337
1338	0						push @args, -operation => 'by_name';
1339
1340	0						return $self->_by_alias_by_name(@args);
1341							}
1342
1343							=head2 _by_alias_by_name
1344
1345							Title : _by_alias_by_name
1346							Usage : $db->_by_alias_by_name(@args)
1347							Function: return list of feature whose names match
1348							Returns : a list of Bio::Das::Chado::Segment::Feature objects
1349							Args : See below
1350
1351							A private method that implements the get_features_by_name and
1352							get_features_by_alias methods. It accepts the same args as
1353							those methods, plus an addtional on (-operation) which is
1354							either 'by_alias' or 'by_name' to indicate what rule it is to
1355							use for finding features.
1356
1357							=cut
1358
1359							sub _by_alias_by_name {
1360	0			0			my $self = shift;
1361
1362	0						my ($name, $class, $ref, $base_start, $stop, $operation)
1363							= $self->_rearrange([qw(NAME CLASS REF START END OPERATION)],@_);
1364
1365	0	0					if ($name =~ /^id:(\d+)/) {
1366	0						my $feature_id = $1;
1367	0						return $self->get_feature_by_feature_id($feature_id);
1368							}
1369
1370	0						my @temp_array = split /:/, $name;
1371	0	0					if (scalar @temp_array == 2) {
1372	0	0					if ($self->source2dbxref($temp_array[0]) > 0) {
1373	0						warn "assuming that the name with a colon ($name) is coming from a multiple hit search result (ie, is of the form 'source:name'";
1374	0						$name = $temp_array[1];
1375							}
1376							}
1377
1378							##I think this is where this should go...
1379							# We need to split the query on whitespaces, and replace the whitespace with &
1380							# so that we can get proper full test search on allquery terms [LP]
1381							# but it only make sense to do this for full text searching [Scott]
1382	0	0					$name = $self->_search_name_prep_spaces($name) if $self->fulltext;
1383
1384
1385	0						my $wildcard = 0;
1386	0	0					if ($name =~ /\*/) {
1387	0						$wildcard = 1;
1388	0						undef $class;
1389							}
1390
1391	0						warn "name:$name in get_feature_by_name" if DEBUG;
1392
1393							# $name = $self->_search_name_prep($name);
1394
1395							# warn "name after protecting _ and % in the string:$name\n" if DEBUG;
1396
1397	0						my (@features,$sth);
1398
1399							# get feature_id
1400							# foreach feature_id, get the feature info
1401							# then get src_feature stuff (chromosome info) and create a parent feature,
1402
1403	0						my ($select_part,$from_part,$where_part);
1404
1405	0	0					if ($class) {
1406							#warn "class: $class";
1407	0	0	0				my $type = ($class eq 'CDS' && $self->inferCDS)
1408							? $self->name2term('polypeptide')
1409							: $self->name2term($class);
1410	0	0					return unless $type;
1411
1412	0	0					if (ref $type eq 'ARRAY') {
		0
1413	0						$type = join(',',@$type);
1414							}
1415							elsif (ref $type eq 'HASH') {
1416	0						$type = join(',', map($$type{$_}, keys %$type) );
1417							}
1418	0						$from_part = " feature f ";
1419	0						$where_part.= " AND f.type_id in ( $type ) ";
1420							}
1421
1422	0	0	0				if ($self->organism_id and $operation eq 'by_alias') {
		0
1423	0	0					$where_part.= $self->use_all_feature_names()
1424							? " AND afn.organism_id =".$self->organism_id
1425							: " AND f.organism_id =".$self->organism_id;
1426							}
1427							elsif ($self->organism_id) {
1428	0						$where_part.= " AND f.organism_id =".$self->organism_id;
1429							}
1430
1431	0	0					if ( $operation eq 'by_alias') {
1432	0	0					if ($self->use_all_feature_names()) {
1433	0						$select_part = "select distinct afn.feature_id \n";
1434	0	0					$from_part = $from_part ?
1435							"$from_part join all_feature_names afn using (feature_id) "
1436							: "all_feature_names afn ";
1437
1438	0						my $alias_only_where;
1439							# There is no difference in the wildcard or non-wildcard call to
1440							# the full-text search [LP]
1441	0	0					if ($self->fulltext) {
		0
1442	0						$alias_only_where = "where afn.searchable_name @@ to_tsquery(?)";
1443							}
1444							elsif ($wildcard) {
1445	0						$alias_only_where = "where lower(afn.name) like ?";
1446							}
1447							else {
1448	0						$alias_only_where = "where lower(afn.name) = ?";
1449							}
1450
1451	0	0					$where_part = $where_part ?
1452							"$alias_only_where $where_part"
1453							: $alias_only_where;
1454
1455							}
1456							else { #need to use the synonym table
1457	0						$select_part = "select distinct fs.feature_id \n";
1458	0	0					$from_part = $from_part ?
1459							"$from_part join feature_synonym fs using (feature_id), synonym s "
1460							: "feature_synonym fs, synonym s ";
1461
1462	0						my $alias_only_where;
1463							# Again, with full-text there's no difference in wildcard/non-wildcard [LP]
1464	0	0					if ($self->fulltext) {
		0
1465	0						$alias_only_where = "where fs.synonym_id = s.synonym_id and\n"
1466							. "s.searchable_synonym_sgml @@ to_tsquery(?)";
1467							}
1468							elsif ($wildcard) {
1469	0						$alias_only_where = "where fs.synonym_id = s.synonym_id and\n"
1470							. "lower(s.synonym_sgml) like ?";
1471							}
1472							else {
1473	0						$alias_only_where = "where fs.synonym_id = s.synonym_id and\n"
1474							. "lower(s.synonym_sgml) = ?";
1475							}
1476
1477
1478	0	0					$where_part = $where_part ?
1479							"$alias_only_where $where_part"
1480							: $alias_only_where;
1481							}
1482							}
1483							else { #searching by name only
1484	0						$select_part = "select f.feature_id ";
1485	0						$from_part = " feature f ";
1486
1487	0						my $name_only_where;
1488							# Using full text search we only need create one WHERE clause, regardless of
1489							# the presence of any wildcards... [LP]
1490	0	0					if ($self->fulltext) {
		0
1491	0						$name_only_where = "where f.searchable_name @@ to_tsquery(?)";
1492							}
1493							elsif ($wildcard) {
1494	0						$name_only_where = "where lower(f.name) like ?";
1495							}
1496							else {
1497	0						$name_only_where = "where lower(f.name) = ?";
1498							}
1499
1500
1501	0	0					$where_part = $where_part ?
1502							"$name_only_where $where_part"
1503							: $name_only_where;
1504							}
1505
1506	0						my $query = $select_part . ' FROM ' . $from_part . $where_part;
1507
1508							# Added at suggestion of James Ward to strip confusing/fatal whitespace,
1509							# so we trim leading and trailing whitespace before processing query [LP]
1510	0						$query =~ s/^[ \t\r\n]+\|[ \t\r\n]$//g;
1511
1512
1513	0						warn "first get_feature_by_name query:$query" if DEBUG;
1514
1515	0						$sth = $self->dbh->prepare($query);
1516
1517	0	0					if ($wildcard) {
1518	0						$name = $self->_search_name_prep($name);
1519	0						warn "name after protecting _ and % in the string:$name\n" if DEBUG;
1520							}
1521
1522							# what the hell happened to the lower casing!!!
1523							# left over bug from making the adaptor case insensitive?
1524
1525							#$name = lc($name);
1526
1527	0	0					$sth->execute(lc($name)) or $self->throw("getting the feature_ids failed");
1528
1529							# this makes performance awful! It does a wildcard search on a view
1530							# that has several selects in it. For any reasonably sized database,
1531							# this won't work.
1532							#
1533							# if ($sth->rows < 1 and
1534							# $class ne 'chromosome' and
1535							# $class ne 'region' and
1536							# $class ne 'contig') {
1537							#
1538							# my $query;
1539							# ($name,$query) = $self->_complex_search($name,$class,$wildcard);
1540							#
1541							# warn "complex_search query:$query\n";
1542							#
1543							# $sth = $self->dbh->prepare($query);
1544							# $sth->execute($name) or $self->throw("getting the feature_ids failed");
1545							#
1546							# }
1547
1548
1549							# prepare sql queries for use in while loops
1550
1551	0						my $isth = $self->dbh->prepare("
1552							select f.feature_id, f.name, f.type_id,f.uniquename,af.significance as score,
1553							fl.fmin,fl.fmax,fl.strand,fl.phase, fl.srcfeature_id, fd.dbxref_id,
1554							f.is_obsolete,f.seqlen
1555							from feature f join featureloc fl using (feature_id)
1556							left join analysisfeature af using (feature_id)
1557							left join feature_dbxref fd using (feature_id)
1558							where
1559							f.feature_id = ? and fl.rank=0 and
1560							(fd.dbxref_id is null or fd.dbxref_id in
1561							(select dbxref_id from dbxref where db_id = ?))
1562							order by fl.srcfeature_id
1563							");
1564
1565	0						my $jsth = $self->dbh->prepare("select name from feature
1566							where feature_id = ?");
1567
1568							# getting feature info
1569	0						while (my $feature_id_ref = $sth->fetchrow_hashref) {
1570
1571	0						warn "feature_id in features method loop:".$$feature_id_ref{feature_id} if DEBUG;
1572
1573	0	0					$isth->execute($$feature_id_ref{'feature_id'},$self->gff_source_db_id)
1574							or $self->throw("getting feature info failed");
1575
1576	0	0					if ($isth->rows == 0) { #this might be a srcfeature
1577
1578	0						warn "$name might be a srcfeature" if DEBUG;
1579
1580	0						my $is_srcfeature_query = $self->dbh->prepare("
1581							select srcfeature_id from featureloc where srcfeature_id=? limit 1
1582							");
1583	0	0					$is_srcfeature_query->execute($$feature_id_ref{'feature_id'})
1584							or $self->throw("checking if feature is a srcfeature failed");
1585
1586	0						$sth->finish;
1587	0						$isth->finish;
1588	0						$jsth->finish;
1589	0	0					if ($is_srcfeature_query->rows == 1) {#yep, its a srcfeature
1590							#build a feature out of the srcfeature:
1591	0						warn "Yep, $name is a srcfeature" if DEBUG;
1592
1593	0						my @args = ($name) ;
1594	0	0					push @args, $base_start if $base_start;
1595	0	0					push @args, $stop if $stop;
1596
1597	0						warn "srcfeature args:$args[0]" if DEBUG;
1598
1599	0						my @seg = ($self->segment(@args));
1600
1601	0						$is_srcfeature_query->finish;
1602	0						return @seg;
1603							}
1604							else {
1605	0						$is_srcfeature_query->finish;
1606	0						return; #I got nothing!
1607							}
1608							}
1609
1610							#getting chromosome info
1611	0						my $old_srcfeature_id=-1;
1612	0						my $parent_segment;
1613	0						while (my $hashref = $isth->fetchrow_hashref) {
1614
1615	0	0	0				next if ($$hashref{'is_obsolete'} and !$self->allow_obsolete);
1616
1617	0	0	0				if ($self->refclass && $$hashref{type_id} == $self->refclass) {
1618							#this feature is supposed to be a reference feature
1619	0						my $f = Bio::DB::Das::Chado::Segment->new($$hashref{'name'},
1620							$self,
1621							1,$$hashref{'seqlen'},
1622							$$hashref{'uniquename'},
1623							undef,
1624							$$hashref{'feature_id'},
1625							undef);
1626	0						push @features,$f;
1627	0						next;
1628							}
1629
1630	0	0					if ($$hashref{'srcfeature_id'} != $old_srcfeature_id) {
1631	0	0					$jsth->execute($$hashref{'srcfeature_id'})
1632							or die ("getting assembly info failed");
1633	0						my $src_name = $jsth->fetchrow_hashref;
1634	0						warn "src_name:$$src_name{'name'}" if DEBUG;
1635	0						$parent_segment =
1636							Bio::DB::Das::Chado::Segment->new($$src_name{'name'},$self,undef,undef,undef,undef,$$hashref{'srcfeature_id'});
1637	0						$old_srcfeature_id=$$hashref{'srcfeature_id'};
1638							}
1639							#now build the feature
1640
1641							#Recursive Mapping
1642	0	0					if ($self->{recursivMapping}){
1643							#Fetch the recursively mapped position
1644
1645	0						my $sql = "select fl.fmin,fl.fmax,fl.strand,fl.phase
1646							from feat_remapping(?) fl
1647							where fl.rank=0";
1648	0						my $recurs_sth = $self->dbh->prepare($sql);
1649	0						$sql =~ s/\s+/ /gs ;
1650	0						$recurs_sth->execute($$feature_id_ref{'feature_id'});
1651	0						my $hashref2 = $recurs_sth->fetchrow_hashref;
1652	0						my $strand_ = $$hashref{'strand'};
1653	0						my $phase_ = $$hashref{'phase'};
1654	0						my $fmax_ = $$hashref{'fmax'};
1655	0						my $interbase_start;
1656
1657							#If unable to recursively map we assume that the feature is
1658							# already mapped on the lowest refseq
1659
1660	0	0					if ($recurs_sth->rows != 0){
1661	0						$interbase_start = $$hashref2{'fmin'};
1662	0						$strand_ = $$hashref2{'strand'};
1663	0						$phase_ = $$hashref2{'phase'};
1664	0						$fmax_ = $$hashref2{'fmax'};
1665							}else{
1666	0						$interbase_start = $$hashref{'fmin'};
1667							}
1668	0						$base_start = $interbase_start +1;
1669
1670	0		0				my $type_obj = Bio::DB::GFF::Typename->new(
1671							$self->term2name($$hashref{type_id}),
1672							$self->dbxref2source($$hashref{dbxref_id}) \|\| "");
1673
1674	0						my $feat = Bio::DB::Das::Chado::Segment::Feature->new(
1675							$self,
1676							$parent_segment,
1677							$parent_segment->seq_id,
1678							$base_start,$fmax_,
1679							$self->term2name($$hashref{'type_id'}),
1680							$$hashref{'score'},
1681							$strand_,
1682							$phase_,
1683							$$hashref{'name'},
1684							$$hashref{'uniquename'},
1685							$$hashref{'feature_id'}
1686							);
1687	0						push @features, $feat;
1688	0						$recurs_sth->finish;
1689							#END Recursive Mapping
1690							} else {
1691
1692	0	0	0				if ($class && $class eq 'CDS' && $self->inferCDS) {
			0
1693							#$hashref holds info for the polypeptide
1694	0						my $poly_min = $$hashref{'fmin'};
1695	0						my $poly_max = $$hashref{'fmax'};
1696	0						my $poly_fid = $$hashref{'feature_id'};
1697
1698							#get fid of parent transcript
1699	0						my $id_list = ref $self->term2name('derives_from') eq 'ARRAY'
1700	0	0					? "in (".join(",",@{$self->term2name('derives_from')}).")"
1701							: "= ".$self->term2name('derives_from');
1702
1703	0						my $transcript_query = $self->dbh->prepare("
1704							SELECT object_id FROM feature_relationship
1705							WHERE type_id ".$id_list
1706							." AND subject_id = $poly_fid"
1707							);
1708
1709	0						$transcript_query->execute;
1710	0						my ($trans_id) = $transcript_query->fetchrow_array;
1711
1712	0						$id_list = ref $self->term2name('part_of') eq 'ARRAY'
1713	0	0					? "in (".join(",",@{$self->term2name('part_of')}).")"
1714							: "= ".$self->term2name('part_of');
1715
1716							#now get exons that are part of the transcript
1717	0						my $exon_query = $self->dbh->prepare("
1718							SELECT f.feature_id,f.name,f.type_id,f.uniquename,
1719							af.significance as score,fl.fmin,fl.fmax,fl.strand,
1720							fl.phase, fl.srcfeature_id, fd.dbxref_id,f.is_obsolete
1721							FROM feature f join featureloc fl using (feature_id)
1722							left join analysisfeature af using (feature_id)
1723							left join feature_dbxref fd using (feature_id)
1724							WHERE
1725							f.type_id = ".$self->term2name('exon')." and f.feature_id in
1726							(select subject_id from feature_relationship where object_id = $trans_id and
1727							type_id ".$id_list." ) and
1728							fl.rank=0 and
1729							(fd.dbxref_id is null or fd.dbxref_id in
1730							(select dbxref_id from dbxref where db_id =".$self->gff_source_db_id."))
1731							");
1732
1733	0						$exon_query->execute();
1734
1735	0						while (my $exonref = $exon_query->fetchrow_hashref) {
1736	0	0					next if ($$exonref{fmax} < $poly_min);
1737	0	0					next if ($$exonref{fmin} > $poly_max);
1738	0	0	0				next if ($$exonref{is_obsolete} and !$self->allow_obsolete);
1739
1740	0						my ($start,$stop);
1741	0	0	0				if ($$exonref{fmin} <= $poly_min && $$exonref{fmax} >= $poly_max) {
1742							#the exon starts before polypeptide start
1743	0						$start = $poly_min +1;
1744							}
1745							else {
1746	0						$start = $$exonref{fmin} +1;
1747							}
1748
1749	0	0	0				if ($$exonref{fmax} >= $poly_max && $$exonref{fmin} <= $poly_min) {
1750	0						$stop = $poly_max;
1751							}
1752							else {
1753	0						$stop = $$exonref{fmax};
1754							}
1755
1756	0		0				my $type_obj = Bio::DB::GFF::Typename->new(
1757							'CDS',
1758							$self->dbxref2source($$hashref{'dbxref_id'}) \|\| '');
1759
1760
1761	0						my $feat = Bio::DB::Das::Chado::Segment::Feature->new(
1762							$self,
1763							$parent_segment,
1764							$parent_segment->seq_id,
1765							$start,$stop,
1766							$type_obj,
1767							$$hashref{'score'},
1768							$$hashref{'strand'},
1769							$$hashref{'phase'},
1770							$$hashref{'name'},
1771							$$hashref{'uniquename'},
1772							$$hashref{'feature_id'}
1773							);
1774	0						push @features, $feat;
1775							}
1776	0						$exon_query->finish;
1777	0						$transcript_query->finish;
1778							}
1779							else {
1780							#the normal case where you don't infer CDS features
1781	0						my $interbase_start = $$hashref{'fmin'};
1782	0						$base_start = $interbase_start +1;
1783
1784	0		0				my $type_obj = Bio::DB::GFF::Typename->new(
1785							$self->term2name($$hashref{'type_id'}),
1786							$self->dbxref2source($$hashref{'dbxref_id'}) \|\| '');
1787
1788	0	0					my $srcf = 1 if ($self->refclass() == $$hashref{'type_id'}) ;
1789
1790	0	0					my $feat = Bio::DB::Das::Chado::Segment::Feature->new(
		0
1791							$self,
1792							$srcf ? '' : $parent_segment,
1793							$srcf ? '' : $parent_segment->seq_id,
1794							$base_start,$$hashref{'fmax'},
1795							$type_obj,
1796							$$hashref{'score'},
1797							$$hashref{'strand'},
1798							$$hashref{'phase'},
1799							$$hashref{'name'},
1800							$$hashref{'uniquename'},
1801							$$hashref{'feature_id'}
1802							);
1803
1804	0						push @features, $feat;
1805							}
1806							}
1807							}
1808							}
1809	0						$sth->finish;
1810	0						$isth->finish;
1811	0						$jsth->finish;
1812	0						return @features;
1813							}
1814
1815							# Handle spaces in search query; we need to avoid replacing
1816							# ' & ' with ' & & & ', though... [LP]
1817							sub _search_name_prep_spaces {
1818	0			0			my $self = shift;
1819	0						my $name = shift;
1820
1821	0						$name =~ s/\s&\s/ /g; # Replace any user-defined ' & ' with spaces...
1822	0						$name =~ s/\s/ & /g; # then replace all spaces with ' & '
1823
1824	0						return $name;
1825							}
1826
1827
1828							*fetch_feature_by_name = \&get_feature_by_name;
1829
1830							sub get_feature_by_feature_id {
1831	0			0	0		my $self = shift;
1832	0						my $f_id = shift;
1833
1834	0						my @features = $self->features(-feature_id => $f_id);
1835	0						return @features;
1836							}
1837
1838							sub get_feature_by_id {
1839	0			0	1		my $self = shift;
1840	0						my $f_id = shift;
1841
1842	0						my @features = $self->features(-feature_id => $f_id);
1843	0						return $features[0];
1844							}
1845
1846							fetch = get_feature_by_primary_id = \&get_feature_by_feature_id;
1847
1848							sub _complex_search {
1849	0			0			my $self = shift;
1850	0						my $name = shift;
1851	0						my $class= shift;
1852
1853	0						warn "name before wildcard subs:$name\n" if DEBUG;
1854
1855	0	0					$name = "\%$name" unless (0 == index($name, "%"));
1856	0	0					$name = "$name%" unless (0 == index(reverse($name), "%"));
1857
1858	0						warn "name after wildcard subs:$name\n" if DEBUG;
1859
1860	0						my $select_part = "select ga.feature_id ";
1861	0						my $from_part = "from gffatts ga ";
1862	0						my $where_part = "where lower(ga.attribute) like ? ";
1863
1864	0	0					if ($class) {
1865	0						my $type = $self->name2term($class);
1866	0	0					return unless $type;
1867	0						$from_part .= ", feature f ";
1868	0						$where_part.= "and ga.feature_id = f.feature_id and "
1869							."f.type_id = $type";
1870							}
1871
1872	0	0					$where_part .= " and organism_id = ".$self->organism_id
1873							if $self->organism_id;
1874
1875	0						my $query = $select_part . $from_part . $where_part;
1876	0						return ($name, $query);
1877							}
1878
1879							sub _search_name_prep {
1880	0			0			my $self = shift;
1881	0						my $name = shift;
1882
1883	0	0					if ($self->fulltext) {
1884
1885							# For full-text search, the appropriate extension wildcard
1886							# is ':*' for prefix-matching. There are limitations to
1887							# full-text search in that we cannot find internal parts of
1888							# words, so wildcards can only come at the ends of phrases/
1889							# lexemes. Internal * are converted by tsquery into & [LP]
1890	0						$name =~ s/_/\\_/g; # escape underscores in name
1891	0						$name =~ s/(?<=\s)\*//g; # lose prefix wildcards (word start)
1892	0						$name =~ s/(?<=^)\*//g; # lose prefix wildcards (query start)
1893	0						$name =~ s/\(?=$)/:\/g; # convert trailing * (query end) into :*
1894	0						$name =~ s/\(?=\s)/:\/g; # convert trailing * (word end) into :*
1895
1896							}
1897							else {
1898	0						$name =~ s/_/\\_/g; # escape underscores in name
1899	0						$name =~ s/\%/\\%/g; # ditto for percent signs
1900
1901	0						$name =~ s/\*/%/g;
1902							}
1903
1904	0						return $name;
1905							}
1906
1907
1908							=head2 srcfeature2name
1909
1910							returns a srcfeature name given a srcfeature_id
1911
1912							=cut
1913
1914							sub srcfeature2name {
1915	0			0	1		my $self = shift;
1916	0						my $id = shift;
1917
1918	0	0					return $self->{'srcfeature_id'}->{$id} if $self->{'srcfeature_id'}->{$id};
1919
1920	0						my $sth = $self->dbh->prepare("select name from feature "
1921							."where feature_id = ?");
1922	0						$sth->execute($id);
1923
1924	0						my $hashref = $sth->fetchrow_hashref;
1925	0						$self->{'srcfeature_id'}->{$id} = $$hashref{'name'};
1926
1927	0						$sth->finish;
1928	0						return $self->{'srcfeature_id'}->{$id};
1929							}
1930
1931							=head2 gff_source_db_id
1932
1933							Title : gff_source_db_id
1934							Function: caches the chado db_id from the chado db table
1935
1936							=cut
1937
1938							sub gff_source_db_id {
1939	0			0	1		my $self = shift;
1940	0	0					return $self->{'gff_source_db_id'} if $self->{'gff_source_db_id'};
1941
1942	0						my $sth = $self->dbh->prepare("
1943							select db_id from db
1944							where name = 'GFF_source'");
1945	0						$sth->execute();
1946
1947	0						my $hashref = $sth->fetchrow_hashref;
1948	0						$self->{'gff_source_db_id'} = $$hashref{'db_id'};
1949
1950	0						$sth->finish;
1951	0						return $self->{'gff_source_db_id'};
1952							}
1953
1954							=head2 gff_source_dbxref_id
1955
1956							Gets dbxref_id for features that have a gff source associated
1957
1958							=cut
1959
1960							sub source2dbxref {
1961	0			0	0		my $self = shift;
1962	0						my $source = shift;
1963
1964							#Why was this here? Debugging?
1965							#return 'fake' unless defined($self->gff_source_db_id);
1966
1967	0	0					return $self->{'source_dbxref'}->{$source}
1968							if $self->{'source_dbxref'}->{$source};
1969
1970	0						my $sth = $self->dbh->prepare("
1971							select dbxref_id,accession from dbxref where db_id= ?"
1972							);
1973	0						$sth->execute($self->gff_source_db_id);
1974
1975	0						while (my $hashref = $sth->fetchrow_hashref) {
1976	0						warn "s2d:accession:$$hashref{accession}, dbxref_id:$$hashref{dbxref_id}\n" if DEBUG;
1977
1978	0						$self->{'source_dbxref'}->{$$hashref{accession}} = $$hashref{dbxref_id};
1979	0						$self->{'dbxref_source'}->{$$hashref{dbxref_id}} = $$hashref{accession};
1980							}
1981
1982	0						$sth->finish;
1983	0						return $self->{'source_dbxref'}->{$source};
1984							}
1985
1986							=head2 dbxref2source
1987
1988							returns the source (string) when given a dbxref_id
1989
1990							=cut
1991
1992							sub dbxref2source {
1993	0			0	1		my $self = shift;
1994	0						my $dbxref = shift;
1995
1996	0	0					return '.' unless defined($self->gff_source_db_id);
1997
1998	0						warn "d2s:dbxref:$dbxref\n" if DEBUG;
1999
2000	0	0	0				if (defined ($self->{'dbxref_source'}) && $dbxref
			0
2001							&& defined ($self->{'dbxref_source'}->{$dbxref})) {
2002	0						return $self->{'dbxref_source'}->{$dbxref};
2003							}
2004
2005	0						my $sth = $self->dbh->prepare("
2006							select dbxref_id,accession from dbxref where db_id=?"
2007							);
2008	0						$sth->execute($self->gff_source_db_id);
2009
2010	0	0					if ($sth->rows < 1) {
2011	0						$sth->finish;
2012	0						return ".";
2013							}
2014
2015	0						while (my $hashref = $sth->fetchrow_hashref) {
2016	0						warn "d2s:accession:$$hashref{accession}, dbxref_id:$$hashref{dbxref_id}\n"
2017							if DEBUG;
2018
2019	0						$self->{'source_dbxref'}->{$$hashref{accession}} = $$hashref{dbxref_id};
2020	0						$self->{'dbxref_source'}->{$$hashref{dbxref_id}} = $$hashref{accession};
2021							}
2022
2023	0						$sth->finish;
2024	0	0	0				if (defined $self->{'dbxref_source'} && $dbxref
			0
2025							&& defined $self->{'dbxref_source'}->{$dbxref}) {
2026	0						return $self->{'dbxref_source'}->{$dbxref};
2027							} else {
2028	0	0					$self->{'dbxref_source'}->{$dbxref} = "." if $dbxref;
2029	0						return ".";
2030							}
2031							}
2032
2033							=head2 source_dbxref_list
2034
2035							Title : source_dbxref_list
2036							Usage : @all_dbxref_ids = $db->source_dbxref_list()
2037							Function: Gets a list of all dbxref_ids that are used for GFF sources
2038							Returns : a comma delimited string that is a list of dbxref_ids
2039							Args : none
2040							Status : public
2041
2042							This method queries the database for all dbxref_ids that are used
2043							to store GFF source terms.
2044
2045							=cut
2046
2047							sub source_dbxref_list {
2048	0			0	1		my $self = shift;
2049	0	0					return $self->{'source_dbxref_list'} if defined $self->{'source_dbxref_list'};
2050
2051	0						my $query = "select dbxref_id from dbxref where db_id = ?";
2052	0						my $sth = $self->dbh->prepare($query);
2053	0						$sth->execute($self->gff_source_db_id);
2054
2055							#unpack it here to make it easier
2056	0						my @dbxref_list;
2057	0						while (my $row = $sth->fetchrow_arrayref) {
2058	0						push @dbxref_list, $$row[0];
2059							}
2060
2061	0						$sth->finish;
2062	0						$self->{'source_dbxref_list'} = join (",",@dbxref_list);
2063	0						return $self->{'source_dbxref_list'};
2064							}
2065
2066
2067							=head2 search_notes
2068
2069							Title : search_notes
2070							Usage : $db->search_notes($search_term,$max_results)
2071							Function: full-text search on features, ENSEMBL-style
2072							Returns : an array of [$name,$description,$score]
2073							Args : see below
2074							Status : public
2075
2076							This routine performs a full-text search on feature attributes (which
2077							attributes depend on implementation) and returns a list of
2078							[$name,$description,$score], where $name is the feature ID (accession?),
2079							$description is a human-readable description such as a locus line, and
2080							$score is the match strength.
2081
2082							=cut
2083
2084							=head2 NOT YET ACTIVE: search_notes IS IN TESTING STAGE
2085
2086							sub search_notes {
2087							my $self = shift;
2088							my ($search_string,$limit) = @_;
2089							my $limit_str;
2090							if (defined $limit) {
2091							$limit_str = " LIMIT $limit ";
2092							} else {
2093							$limit_str = "";
2094							}
2095
2096							# so here's the plan:
2097							# if there is only 1 word, do 1-3
2098							# 1. search for accessions like $string.'%'--if any are found, quit and return them
2099							# 2. search for feature.name like $string.'%'--if found, keep and continue
2100							# 3. search somewhere in analysis like $string.'%'--if found, keep and continue
2101							# if there is more than one word, don't search accessions
2102							# 4. search each word anded together like '%'.$string.'%' --if found, keep and continue
2103							# 5. search somewhere in analysis like '%'.$string.'%'
2104
2105							# $self->dbh->trace(1);
2106
2107							my @search_str = split /\s+/, $search_string;
2108							my $qsearch_term = $self->dbh->quote($search_str[0]);
2109							my $like_str = "( (dbx.accession ~* $qsearch_term OR \n"
2110							." f.name ~* $qsearch_term) ";
2111							for (my $i=1;$i<(scalar @search_str);$i++) {
2112							$qsearch_term = $self->dbh->quote($search_str[$i]);
2113							$like_str .= "and \n";
2114							$like_str .= " (dbx.accession ~* $qsearch_term OR \n"
2115							." f.name ~* $qsearch_term) ";
2116							}
2117							$like_str .= ")";
2118
2119							my $sth = $self->dbh->prepare("
2120							select dbx.accession,f.name,0
2121							from feature f, dbxref dbx, feature_dbxref fd
2122							where
2123							f.feature_id = fd.feature_id and
2124							fd.dbxref_id = dbx.dbxref_id and
2125							$like_str
2126							$limit_str
2127							");
2128							$sth->execute or throw ("couldn't execute keyword query");
2129
2130							my @results;
2131							while (my ($acc, $name, $score) = $sth->fetchrow_array) {
2132							$score = sprintf("%.2f",$score);
2133							push @results, [$acc, $name, $score];
2134							}
2135							$sth->finish;
2136							return @results;
2137							}
2138
2139							=cut
2140
2141							=head2 attributes
2142
2143							Title : attributes
2144							Usage : @attributes = $db->attributes($id,$name)
2145							Function: get the "attributes" on a particular feature
2146							Returns : an array of string
2147							Args : feature ID [, attribute name]
2148							Status : public
2149
2150							This method is intended as a "work-alike" to Bio::DB::GFF's
2151							attributes method, which has the following returns:
2152
2153							Called in list context, it returns a list. If called in a
2154							scalar context, it returns the first value of the attribute
2155							if an attribute name is provided, otherwise it returns a
2156							hash reference in which the keys are attribute names
2157							and the values are anonymous arrays containing the values.
2158
2159							=cut
2160
2161							sub attributes {
2162	0			0	1		my $self = shift;
2163	0						my ($id,$tag) = @_;
2164
2165							#get feature_id
2166
2167	0						my $query = "select feature_id from feature where uniquename = ?";
2168	0	0					$query .= " and organism_id = ".$self->organism_id if $self->organism_id;
2169
2170	0						my $sth = $self->dbh->prepare($query);
2171	0	0					$sth->execute($id) or $self->throw("failed to get feature_id in attributes");
2172	0						my $hashref = $sth->fetchrow_hashref;
2173	0						my $feature_id = $$hashref{'feature_id'};
2174
2175	0	0					if (defined $tag) {
2176	0						my $query = "SELECT attribute FROM gfffeatureatts(?) WHERE type = ?";
2177	0						$sth = $self->dbh->prepare($query);
2178	0						$sth->execute($feature_id,$tag);
2179							} else {
2180	0						my $query = "SELECT type,attribute FROM gfffeatureatts(?)";
2181	0						$sth = $self->dbh->prepare($query);
2182	0						$sth->execute($feature_id);
2183							}
2184
2185	0						my $arrayref = $sth->fetchall_arrayref;
2186
2187	0						my @array = @$arrayref;
2188	0	0	0				($sth->finish && return ()) if scalar @array == 0;
2189
2190							## dgg; ugly patch to copy polypeptide/protein residues into 'translation' attribute
2191							# need to add to gfffeatureatts ..
2192	0	0	0				if (!defined $tag \|\| $tag eq 'translation') {
2193	0						$sth = $self->dbh->prepare("select type_id from feature where feature_id = ?");
2194	0						$sth->execute($feature_id); # or $self->throw("failed to get feature_id in attributes");
2195	0						$hashref = $sth->fetchrow_hashref;
2196	0						my $type_id = $$hashref{'type_id'};
2197							## warn("DEBUG: dgg ugly prot. patch; type=$type_id for ftid=$feature_id\n");
2198
2199	0	0	0				if( $self->name2term('polypeptide')
			0
			0
			0
			0
2200							&& $type_id
2201							&& $type_id == $self->name2term('polypeptide')
2202							\|\| $self->name2term('protein')
2203							&& $type_id
2204							&& $type_id == $self->name2term('protein')
2205							) {
2206	0						$sth = $self->dbh->prepare("select residues from feature where feature_id = ?");
2207	0						$sth->execute($feature_id); # or $self->throw("failed to get feature_id in attributes");
2208	0						$hashref = $sth->fetchrow_hashref;
2209	0						my $aa = $$hashref{'residues'};
2210	0	0					if($aa) {
2211							## warn("DEBUG: dgg ugly prot. patch; aalen=",length($aa),"\n");
2212							## this wasn't working till I added in a featureprop 'translation=dummy' .. why?
2213	0	0					if($tag) { push( @array, [ $aa]); }
	0
2214	0						else { push( @array, ['translation', $aa]); }
2215							}
2216							}
2217							}
2218
2219	0						my @result;
2220	0						foreach my $lineref (@array) {
2221	0						my @la = @$lineref;
2222	0						push @result, @la;
2223							}
2224
2225	0						$sth->finish;
2226	0	0					return @result if wantarray;
2227
2228	0	0					return $result[0] if $tag;
2229
2230	0						my %result;
2231
2232	0						foreach my $lineref (@array) {
2233	0						my ($key,$value) = splice(@$lineref,0,2);
2234	0						push @{$result{$key}},$value;
	0
2235							}
2236	0						return \%result;
2237
2238							}
2239
2240
2241
2242							=head2 _segclass
2243
2244							Title : _segclass
2245							Usage : $class = $db->_segclass
2246							Function: returns the perl class that we use for segment() calls
2247							Returns : a string containing the segment class
2248							Args : none
2249							Status : reserved for subclass use
2250
2251							=cut
2252
2253							#sub default_class {return 'Sequence' }
2254							## URGI changes
2255							sub default_class {
2256
2257	0			0	0		my $self = shift;
2258
2259							#dgg
2260	0	0	0				unless( $self->{'reference_class'} \|\| @_ ) {
2261	0						$self->{'reference_class'} = $self->chado_reference_class();
2262							}
2263
2264	0	0					if(@_) {
2265	0						my $checkref = $self->check_chado_reference_class(@_);
2266	0	0					unless($checkref) {
2267	0						$self->throw("unable to find reference_class '$_[0]' feature in the database");
2268							}
2269							}
2270
2271	0	0	0				$self->{'reference_class'} = shift \|\| 'Sequence' if(@_);
2272
2273	0						return $self->{'reference_class'};
2274
2275							}
2276
2277							sub check_chado_reference_class {
2278	0			0	0		my $self = shift;
2279	0	0					if(@_) {
2280	0						my $refclass= shift;
2281	0						my $type_id = $self->name2term($refclass);
2282	0						my $query = "select feature_id from feature where type_id = ?";
2283	0						my $sth = $self->dbh->prepare($query);
2284	0	0					$sth->execute($type_id) or $self->throw("trying to find chado_reference_class");
2285	0						my $data = $sth->fetchrow_hashref();
2286	0						my $refid= $$data{'feature_id'};
2287							## warn("check_chado_reference_class: $refclass = $type_id -> $refid"); # DEBUG
2288
2289	0						$sth->finish;
2290	0						return $refid;
2291							}
2292							}
2293
2294							=head2 chado_reference_class
2295
2296							Title : chado_reference_class
2297							Usage : $obj->chado_reference_class()
2298							Function: get or return the ID to use for Gbrowse map reference class
2299							using cvtermprop table, value = MAP_REFERENCE_TYPE
2300							Returns : the cvterm.name
2301							Args : to return the id, none; to determine the id, 1
2302							See also: default_class, refclass_feature_id
2303
2304							Optionally test that user/config supplied ref class is indeed a proper
2305							chado feature type.
2306
2307							=cut
2308
2309
2310							sub chado_reference_class {
2311	0			0	1		my $self = shift;
2312	0	0					return $self->{'chado_reference_class'} if($self->{'chado_reference_class'});
2313
2314	0						my $chado_reference_class='Sequence'; # default ?
2315
2316	0						my $query = "select cvterm_id from cvtermprop where value = ?";
2317	0						my $sth = $self->dbh->prepare($query);
2318	0	0					$sth->execute(MAP_REFERENCE_TYPE) or $self->throw("trying to find chado_reference_class");
2319	0						my $data = $sth->fetchrow_hashref(); #? FIXME: could be many values *?
2320	0						my $ref_cvtermid = $$data{'cvterm_id'};
2321
2322	0						$sth->finish;
2323	0	0					if($ref_cvtermid) {
2324	0						$query = "select name from cvterm where cvterm_id = ?";
2325	0						$sth = $self->dbh->prepare($query);
2326	0	0					$sth->execute($ref_cvtermid) or $self->throw("trying to find chado_reference_class");
2327	0						$data = $sth->fetchrow_hashref();
2328	0	0					$chado_reference_class = $$data{'name'} if ($$data{'name'});
2329							# warn("chado_reference_class: $chado_reference_class = $ref_cvtermid"); # DEBUG
2330	0						$sth->finish;
2331							}
2332
2333	0						return $self->{'chado_reference_class'} = $chado_reference_class;
2334							}
2335
2336
2337							=head2 refclass_feature_id
2338
2339							Title : refclass_feature_id
2340							Usage : $self->refclass_srcfeature_id()
2341							Function: Used to store the feature_id of the reference class feature we are working on (e.g. contig, supercontig)
2342							With this feature we can filter out all the request to be sure we are extracting a feature located on
2343							the reference class feature.
2344							Returns : A scalar
2345							Args : The feature_id on setting
2346
2347							=cut
2348
2349							sub refclass_feature_id {
2350
2351	0			0	1		my $self = shift;
2352
2353	0	0					$self->{'refclass_feature_id'} = shift if(@_);
2354
2355	0						return $self->{'refclass_feature_id'};
2356
2357							}
2358
2359
2360	0			0			sub _segclass { return SEGCLASS }
2361
2362	0			0	0		sub absolute {return}
2363
2364							#implemented exactly the same as Bio::DB::SeqFeature::Store::DBI::mysql
2365							sub clone {
2366							#this is EO's implementation for the BDSFS::DBI::Pg implementation
2367							#he says Pg's clone method is flawed
2368	0			0	1		my $self = shift;
2369
2370							# my $dsn = $self->{db_args}->{dsn};
2371							# my $user = $self->{db_args}->{username};
2372							# my $pass = $self->{db_args}->{password};
2373
2374							# $self->dbh()->{InactiveDestroy} = 1;
2375							# my $new_dbh = DBI->connect($dsn,$user,$pass) or $self->throw($DBI::errstr);
2376							# $new_dbh->{InactiveDestroy} = 1;
2377							# $self->{dbh} = $new_dbh unless $self->is_temp;
2378
2379
2380							# this is the BDSFS::DBI::mysql implementation
2381	0						$self->{dbh}{InactiveDestroy} = 1;
2382	0						$self->{dbh} = $self->{dbh}->clone({})
2383							#magic from perlmonks to silence a warning:
2384							# http://www.perlmonks.org/?node_id=594175
2385							# without the empty {} you get warnings about unrecognised attribute name
2386							; # unless $self->is_temp;
2387							}
2388
2389
2390							#this sub doesn't work and just causes annoying warnings
2391							#sub DESTROY {
2392							# my $self = shift;
2393							# $self->dbh->disconnect;
2394							# return;
2395							#}
2396
2397							=head1 LEFTOVERS FROM BIO::DB::GFF NEEDED FOR DAS
2398
2399							these methods should probably be declared in an interface class
2400							that Bio::DB::GFF implements. for instance, the aggregator methods
2401							could be described in Bio::SeqFeature::AggregatorI
2402
2403							=cut
2404
2405	0			0	0		sub aggregators { return(); }
2406
2407							=head1 END LEFTOVERS
2408
2409							=cut
2410
2411
2412							package Bio::DB::Das::ChadoIterator;
2413
2414							sub new {
2415	0			0			my $package = shift;
2416	0						my $features = shift;
2417	0						return bless $features,$package;
2418							}
2419
2420							sub next_seq {
2421	0			0			my $self = shift;
2422	0	0					return unless @$self;
2423	0						my $next_feature = shift @$self;
2424	0						return $next_feature;
2425							}
2426
2427							1;
2428
2429
2430