File Coverage

Bio/SeqIO/chadoxml.pm

Criterion	Covered	Total	%
statement	60	681	8.8
branch	0	306	0.0
condition	0	206	0.0
subroutine	20	42	47.6
pod	12	12	100.0
total	92	1247	7.3

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::SeqIO::chadoxml
3							#
4							# Peili Zhang
5							#
6							# You may distribute this module under the same terms as perl itself
7
8							# POD documentation - main docs before the code
9
10							=head1 NAME
11
12							Bio::SeqIO::chadoxml - chadoxml sequence output stream
13
14							=head1 SYNOPSIS
15
16							It is probably best not to use this object directly, but
17							rather go through the SeqIO handler system:
18
19							$writer = Bio::SeqIO->new(-file => ">chado.xml",
20							-format => 'chadoxml');
21
22							# assume you already have Sequence or SeqFeature objects
23							$writer->write_seq($seq_obj);
24
25							#after writing all seqs
26							$writer->close_chadoxml();
27
28
29
30							=head1 DESCRIPTION
31
32							This object can transform Bio::Seq objects to chadoxml flat
33							file databases (for chadoxml DTD, see
34							http://gmod.cvs.sourceforge.net/gmod/schema/chado/dat/chado.dtd).
35
36							This is currently a write-only module.
37
38							$seqio = Bio::SeqIO->new(-file => '>outfile.xml',
39							-format => 'chadoxml'
40							-suppress_residues => 1,
41							-allow_residues => 'chromosome',
42							);
43
44							# we have a Bio::Seq object $seq which is a gene located on
45							# chromosome arm 'X', to be written out to chadoxml
46							# before converting to chadoxml, $seq object B be transformed
47							# so that all the coordinates in $seq are against the source
48							# feature to be passed into Bio::SeqIO::chadoxml->write_seq()
49							# -- chromosome arm X in the example below.
50
51							$seqio->write_seq(-seq=>$seq,
52							-genus => 'Homo',
53							-species => 'sapiens',
54							-seq_so_type=>'gene',
55							-src_feature=>'X',
56							-src_feat_type=>'chromosome_arm',
57							-nounflatten=>1,
58							-is_analysis=>'true',
59							-data_source=>'GenBank');
60
61							The chadoxml output of Bio::SeqIO::chadoxml-Ewrite_seq() method can be
62							passed to the loader utility in XORT package
63							(http://gmod.cvs.sourceforge.net/gmod/schema/XMLTools/XORT/)
64							to be loaded into chado.
65
66							This object is currently implemented to work with sequence and
67							annotation data from whole genome projects deposited in GenBank. It
68							may not be able to handle all different types of data from all
69							different sources.
70
71							In converting a Bio::Seq object into chadoxml, a top-level feature is
72							created to represent the object and all sequence features inside the
73							Bio::Seq object are treated as subfeatures of the top-level
74							feature. The Bio::SeqIO::chadoxml object calls
75							Bio::SeqFeature::Tools::Unflattener to unflatten the flat feature list
76							contained in the subject Bio::Seq object, to build gene model
77							containment hierarchy conforming to chado central dogma model: gene
78							--E mRNA --E exons and protein.
79
80							Destination of data in the subject Bio::Seq object $seq is as following:
81
82							*$seq->display_id: name of the top-level feature;
83
84							*$seq->accession_number: if defined, uniquename and
85							feature_dbxref of the top-level
86							feature if not defined,
87							$seq->display_id is used as the
88							uniquename of the top-level feature;
89
90							*$seq->molecule: transformed to SO type, used as the feature
91							type of the top-level feature if -seq_so_type
92							argument is supplied, use the supplied SO type
93							as the feature type of the top-level feature;
94
95							*$seq->species: organism of the top-level feature;
96
97							*$seq->seq: residues of the top-level feature;
98
99							*$seq->is_circular, $seq->division: feature_cvterm;
100
101							*$seq->keywords, $seq->desc, comments: featureprop;
102
103							*references: pub and feature_pub;
104							medline/pubmed ids: pub_dbxref;
105							comments: pubprop;
106
107							*feature "source" span: featureloc for top-level feature;
108
109							*feature "source" db_xref: feature_dbxref for top-level feature;
110
111							*feature "source" other tags: featureprop for top-level feature;
112
113							*subfeature 'symbol' or 'label' tag: feature uniquename, if
114							none of these is present, the chadoxml object
115							generates feature uniquenames as:
116							--
117							(e.g. foo-mRNA--1000..3000);
118
119							*gene model: feature_relationship built based on the
120							containment hierarchy;
121
122							*feature span: featureloc;
123
124							*feature accession numbers: feature_dbxref;
125
126							*feature tags (except db_xref, symbol and gene): featureprop;
127
128							Things to watch out for:
129
130							*chado schema change: this version works with the chado
131							version tagged chado_1_01 in GMOD CVS.
132
133							*feature uniquenames: especially important if using XORT
134							loader to do incremental load into
135							chado. may need pre-processing of the
136							source data to put the correct
137							uniquenames in place.
138
139							*pub uniquenames: chadoxml->write_seq() has the FlyBase policy
140							on pub uniquenames hard-coded, it assigns
141							pub uniquenames in the following way: for
142							journals and books, use ISBN number; for
143							published papers, use MEDLINE ID; for
144							everything else, use FlyBase unique
145							identifier FBrf#. need to modify the code to
146							implement your policy. look for the comments
147							in the code.
148
149							*for pubs possibly existing in chado but with no knowledge of
150							its uniquename:put "op" as "match", then need to run the
151							output chadoxml through a special filter that
152							talks to chado database and tries to find the
153							pub by matching with the provided information
154							instead of looking up by the unique key. after
155							matching, the filter also resets the "match"
156							operation to either "force" (default), or
157							"lookup", or "insert", or "update". the
158							"match" operation is for a special FlyBase use
159							case. please modify to work according to your
160							rules.
161
162							*chado initialization for loading:
163
164							cv & cvterm: in the output chadoxml, all cv's and
165							cvterm's are lookup only. Therefore,
166							before using XORT loader to load the
167							output into chado, chado must be
168							pre-loaded with all necessary CVs and
169							CVterms, including "SO" , "property
170							type", "relationship type", "pub type",
171							"pubprop type", "pub relationship type",
172							"sequence topology", "GenBank feature
173							qualifier", "GenBank division". A pub by
174							the uniquename 'nullpub' of type 'null
175							pub' needs to be inserted.
176
177							=head1 FEEDBACK
178
179							=head2 Mailing Lists
180
181							User feedback is an integral part of the evolution of this and other
182							Bioperl modules. Send your comments and suggestions preferably to one
183							of the Bioperl mailing lists. Your participation is much appreciated.
184
185							bioperl-l@bioperl.org - General discussion
186							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
187
188							=head2 Support
189
190							Please direct usage questions or support issues to the mailing list:
191
192							I
193
194							rather than to the module maintainer directly. Many experienced and
195							reponsive experts will be able look at the problem and quickly
196							address it. Please include a thorough description of the problem
197							with code and data examples if at all possible.
198
199							=head2 Reporting Bugs
200
201							Report bugs to the Bioperl bug tracking system to help us keep track
202							the bugs and their resolution.
203							Bug reports can be submitted via the web:
204
205							https://github.com/bioperl/bioperl-live/issues
206
207							=head1 AUTHOR - Peili Zhang
208
209							Email peili@morgan.harvard.edu
210
211							=head1 APPENDIX
212
213							The rest of the documentation details each of the object
214							methods. Internal methods are usually preceded with a _
215
216							=cut
217
218							# Let the code begin...
219
220							package Bio::SeqIO::chadoxml;
221	2			2		594	use strict;
	2					12
	2					47
222	2			2		529	use English;
	2					2208
	2					8
223
224	2			2		705	use Carp;
	2					10
	2					79
225	2			2		9	use Data::Dumper;
	2					3
	2					61
226	2			2		8	use XML::Writer;
	2					2
	2					39
227	2			2		531	use IO::File;
	2					1469
	2					176
228	2			2		12	use IO::Handle;
	2					3
	2					51
229	2			2		718	use Bio::Seq;
	2					6
	2					88
230	2			2		807	use Bio::Seq::RichSeq;
	2					4
	2					95
231	2			2		476	use Bio::SeqIO::FTHelper;
	2					4
	2					52
232	2			2		620	use Bio::Species;
	2					5
	2					50
233	2			2		12	use Bio::Seq::SeqFactory;
	2					3
	2					37
234	2			2		500	use Bio::Factory::SequenceStreamI;
	2					4
	2					39
235	2			2		10	use Bio::SeqFeature::Generic;
	2					3
	2					35
236	2			2		9	use Bio::Annotation::Collection;
	2					3
	2					31
237	2			2		463	use Bio::Annotation::Comment;
	2					4
	2					53
238	2			2		485	use Bio::Annotation::Reference;
	2					4
	2					50
239	2			2		11	use Bio::Annotation::DBLink;
	2					4
	2					32
240	2			2		987	use Bio::SeqFeature::Tools::Unflattener;
	2					5
	2					319
241
242							#global variables
243							undef(my %finaldatahash); #data from Bio::Seq object stored in a hash
244							undef(my %datahash); #data from Bio::Seq object stored in a hash
245
246							my $chadotables = 'feature featureprop feature_relationship featureloc feature_cvterm cvterm cv feature_pub pub pub_dbxref pub_author author pub_relationship pubprop feature_dbxref dbxref db synonym feature_synonym';
247
248							my %fkey = (
249							"cvterm.cv_id" => "cv",
250							"cvterm.dbxref_id" => "dbxref",
251							"dbxref.db_id" => "db",
252							"feature.type_id" => "cvterm",
253							"feature.organism_id" => "organism",
254							"feature.dbxref_id" => "dbxref",
255							"featureprop.type_id" => "cvterm",
256							"feature_pub.pub_id" => "pub",
257							"feature_cvterm.cvterm_id" => "cvterm",
258							"feature_cvterm.pub_id" => "pub",
259							"feature_cvterm.feature_id" => "feature",
260							"feature_dbxref.dbxref_id" => "dbxref",
261							"feature_relationship.object_id" => "feature",
262							"feature_relationship.subject_id" => "feature",
263							"feature_relationship.type_id" => "cvterm",
264							"featureloc.srcfeature_id" => "feature",
265							"pub.type_id" => "cvterm",
266							"pub_dbxref.dbxref_id" => "dbxref",
267							"pub_author.author_id" => "author",
268							"pub_relationship.obj_pub_id" => "pub",
269							"pub_relationship.subj_pub_id" => "pub",
270							"pub_relationship.type_id" => "cvterm",
271							"pubprop.type_id" => "cvterm",
272							"feature_synonym.feature_id" => "feature",
273							"feature_synonym.synonym_id" => "synonym",
274							"feature_synonym.pub_id" => "pub",
275							"synonym.type_id" => "cvterm",
276							);
277
278							my %cv_name = (
279							'relationship' => 'relationship',
280							'sequence' => 'sequence',
281							'feature_property' => 'feature_property',
282							);
283
284							my %feattype_args2so = (
285							"aberr" => "aberration_junction",
286							# "conflict" => "sequence_difference",
287							# "polyA_signal" => "polyA_signal_sequence",
288							"variation" => "sequence_variant",
289							"mutation1" => "point_mutation", #for single-base mutation
290							"mutation2" => "sequence_variant", #for multi-base mutation
291							"rescue" => "rescue_fragment",
292							# "rfrag" => "restriction_fragment",
293							"protein_bind" => "protein_binding_site",
294							"misc_feature" => "region",
295							# "prim_transcript" => "primary_transcript",
296							"CDS" => "polypeptide",
297							"reg_element" => "regulatory_region",
298							"seq_variant" => "sequence_variant",
299							"mat_peptide" => "mature_peptide",
300							"sig_peptide" => "signal_peptide",
301							);
302
303							undef(my %organism);
304
305	2			2		16	use base qw(Bio::SeqIO);
	2					4
	2					741
306
307							sub _initialize {
308
309	0			0			my($self,%args) = @_;
310
311	0						$self->SUPER::_initialize(%args);
312	0	0					unless( defined $self->sequence_factory ) {
313	0						$self->sequence_factory(Bio::Seq::SeqFactory->new
314							(-verbose => $self->verbose(),
315							-type => 'Bio::Seq::RichSeq'));
316							}
317							#optional arguments that can be passed in
318							$self->suppress_residues($args{'-suppress_residues'})
319	0	0					if defined $args{'-suppress_residues'};
320
321							$self->allow_residues($args{'-allow_residues'})
322	0	0					if defined $args{'-allow_residues'};
323	0						return;
324							}
325
326							=head2 write_seq
327
328							Title : write_seq
329							Usage : $stream->write_seq(-seq=>$seq, -seq_so_type=>$seqSOtype,
330							-src_feature=>$srcfeature,
331							-src_feat_type=>$srcfeattype,
332							-nounflatten=>0 or 1,
333							-is_analysis=>'true' or 'false',
334							-data_source=>$datasource)
335							Function: writes the $seq object (must be seq) into chadoxml.
336							Returns : 1 for success and 0 for error
337							Args : A Bio::Seq object $seq, optional $seqSOtype, $srcfeature,
338							$srcfeattype, $nounflatten, $is_analysis and $data_source.
339
340							When $srcfeature (a string, the uniquename of the source feature) is given, the
341							location and strand information of the top-level feature against the source
342							feature will be derived from the sequence feature called 'source' of the $seq
343							object, a featureloc record is generated for the top -level feature on
344							$srcfeature. when $srcfeature is given, $srcfeattype must also be present. All
345							feature coordinates in $seq should be against $srcfeature. $seqSOtype is the
346							optional SO term to use as the type of the top-level feature. For example, a
347							GenBank data file for a Drosophila melanogaster genome scaffold has the molecule
348							type of "DNA", when converting to chadoxml, a $seqSOtype argument of
349							"golden_path_region" can be supplied to save the scaffold as a feature of type
350							"golden_path_region" in chadoxml, instead of "DNA". a feature with primary tag
351							of 'source' must be present in the sequence feature list of $seq, to describe the
352							whole sequence record.
353
354							In the current implementation:
355
356							=over 3
357
358							=item *
359
360							non-mRNA records
361
362							A top-level feature of type $seq-Ealphabet is generated for the whole GenBank
363							record, features listed are unflattened for DNA records to build gene model
364							feature graph, and for the other types of records all features in $seq are
365							treated as subfeatures of the top-level feature.
366
367							=item *
368
369							mRNA records
370
371							If a 'gene' feature is present, it B have a /symbol or /label tag to
372							contain the uniquename of the gene. a top-level feature of type 'gene' is
373							generated. the mRNA is written as a subfeature of the top-level gene feature,
374							and the other sequence features listed in $seq are treated as subfeatures of the
375							mRNA feature.
376
377							=back
378
379							=cut
380
381							sub write_seq {
382	0			0	1		my $usage = <
383							Bio::SeqIO::chadoxml->write_seq()
384							Usage : \$stream->write_seq(-seq=>\$seq,
385							-seq_so_type=>\$SOtype,
386							-src_feature=>\$srcfeature,
387							-src_feat_type=>\$srcfeattype,
388							-nounflatten=>0 or 1,
389							-is_analysis=>'true' or 'false',
390							-data_source=>\$datasource)
391							Args : \$seq : a Bio::Seq object
392							\$SOtype : the SO term to use as the feature type of
393							the \$seq record, optional
394							\$srcfeature : unique name of the source feature, a string
395							containing at least one alphabetical letter
396							(a-z, A-Z), optional
397							\$srcfeattype : feature type of \$srcfeature. one of SO terms.
398							optional
399							when \$srcfeature is given, \$srcfeattype becomes mandatory,
400							\$datasource : source of the sequence annotation data,
401							e.g. 'GenBank' or 'GFF'.
402							EOUSAGE
403
404	0						my ($self,@args) = @_;
405
406	0						my ($seq, $seq_so_type, $srcfeature, $srcfeattype, $nounflatten, $isanalysis, $datasource, $genus, $species) =
407							$self->_rearrange([qw(SEQ
408							SEQ_SO_TYPE
409							SRC_FEATURE
410							SRC_FEAT_TYPE
411							NOUNFLATTEN
412							IS_ANALYSIS
413							DATA_SOURCE
414							GENUS
415							SPECIES
416							)],
417							@args);
418							#print "$seq_so_type, $srcfeature, $srcfeattype\n";
419
420	0	0					if( !defined $seq ) {
421	0						$self->throw("Attempting to write with no seq!");
422							}
423
424	0	0	0				if( ! ref $seq \|\| ! $seq->isa('Bio::Seq::RichSeqI') ) {
425							## FIXME $self->warn(" $seq is not a RichSeqI compliant module. Attempting to dump, but may fail!");
426							}
427
428							# try to get the srcfeature from the seqFeature object
429							# for this to work, the user has to pass in the srcfeature type
430	0	0					if (!$srcfeature) {
431	0	0					if ($seq->can('seq_id')) {
432	0	0					$srcfeature=$seq->seq_id if ($seq->seq_id ne $seq->display_name);
433							}
434							}
435
436							#$srcfeature, when provided, should contain at least one alphabetical letter
437	0	0					if (defined $srcfeature)
438							{
439	0	0					if ($srcfeature =~ /[a-zA-Z]/)
440							{
441	0						chomp($srcfeature);
442							} else {
443	0						$self->throw( $usage );
444							}
445
446							#check for mandatory $srcfeattype
447	0	0					if (! defined $srcfeattype)
448							{
449	0						$self->throw( $usage );
450							#$srcfeattype must be a string of non-whitespace characters
451							} else {
452	0	0					if ($srcfeattype =~ /\S+/) {
453	0						chomp($srcfeattype);
454							} else {
455	0						$self->throw( $usage );
456							}
457							}
458							}
459
460							# variables local to write_seq()
461	0						my $div = undef;
462	0						my $hkey = undef;
463	0						undef(my @top_featureprops);
464	0						undef(my @featuresyns);
465	0						undef(my @top_featurecvterms);
466	0	0					my $name = $seq->display_id if $seq->can('display_id');
467	0	0					$name = $seq->display_name if $seq->can('display_name');
468	0						undef(my @feature_cvterms);
469	0						undef(my %sthash);
470	0						undef(my %dvhash);
471	0						undef(my %h1);
472	0						undef(my %h2);
473	0						my $temp = undef;
474	0						my $ann = undef;
475	0						undef(my @references);
476	0						undef(my @feature_pubs);
477	0						my $ref = undef;
478	0						my $location = undef;
479	0						my $fbrf = undef;
480	0						my $journal = undef;
481	0						my $issue = undef;
482	0						my $volume = undef;
483	0						my $volumeissue = undef;
484	0						my $pages = undef;
485	0						my $year = undef;
486	0						my $pubtype = undef;
487							# my $miniref= undef;
488	0						my $uniquename = undef;
489	0						my $refhash = undef;
490	0						my $feat = undef;
491	0						my $tag = undef;
492	0						my $tag_cv = undef;
493	0						my $ftype = undef;
494	0						my $subfeatcnt = undef;
495	0						undef(my @top_featrels);
496	0						undef (my %srcfhash);
497
498	0						local($^W) = 0; # suppressing warnings about uninitialized fields.
499
500	0	0	0				if (!$name && $seq->can('attributes') ) {
501	0						($name) = $seq->attributes('Alias');
502							}
503
504	0	0	0				if ($seq->can('accession_number') && defined $seq->accession_number && $seq->accession_number ne 'unknown') {
		0	0
		0	0
			0
505	0						$uniquename = $seq->accession_number;
506							} elsif ($seq->can('accession') && defined $seq->accession && $seq->accession ne 'unknown') {
507	0						$uniquename = $seq->accession;
508							} elsif ($seq->can('attributes')) {
509	0						($uniquename) = $seq->attributes('load_id');
510							} else {
511	0						$uniquename = $name;
512							}
513	0						my $len = $seq->length();
514	0	0					if ($len == 0) {
515	0						$len = undef;
516							}
517
518	0						undef(my $gb_type);
519	0	0	0				if (!$seq->can('molecule') \|\| ! defined ($gb_type = $seq->molecule()) ) {
520	0	0					$gb_type = $seq->can('alphabet') ? $seq->alphabet : 'DNA';
521							}
522	0	0					$gb_type = 'DNA' if $ftype eq 'dna';
523	0	0					$gb_type = 'RNA' if $ftype eq 'rna';
524
525	0	0					if(length $seq_so_type > 0) {
526	0	0					if (defined $seq_so_type) {
		0
527	0						$ftype = $seq_so_type;
528							}
529							elsif ($seq->type) {
530	0	0					$ftype = ($seq->type =~ /(.*):/)
531							? $1
532							: $seq->type;
533							}
534							else {
535	0						$ftype = $gb_type;
536							}
537							}
538							else {
539	0						$ftype = $gb_type;
540							}
541
542	0						my %ftype_hash = $self->return_ftype_hash($ftype);
543
544	0	0					if ($species) {
545	0						%organism = ("genus"=>$genus, "species" => $species);
546							}
547							else {
548	0						my $spec = $seq->species();
549	0	0					if (!defined $spec) {
550	0						$self->throw("$seq does not know what organism it is from, which is required by chado. cannot proceed!\n");
551							} else {
552	0						%organism = ("genus"=>$spec->genus(), "species" => $spec->species());
553							}
554							}
555
556	0						my $residues;
557	0	0	0				if (!$self->suppress_residues \|\|
			0
558							($self->suppress_residues && $self->allow_residues eq $ftype)) {
559	0	0					$residues = $seq->seq->isa('Bio::PrimarySeq')
560							? $seq->seq->seq
561							: $seq->seq;
562							}
563							else {
564	0						$residues = '';
565							}
566
567							#set is_analysis flag for gene model features
568	0						undef(my $isanal);
569	0	0	0				if ($ftype eq 'gene' \|\| $ftype eq 'mRNA' \|\| $ftype eq 'exon' \|\| $ftype eq 'protein' \|\| $ftype eq 'polypeptide') {
			0
			0
			0
570	0						$isanal = $isanalysis;
571	0	0					$isanal = 'false' if !defined $isanal;
572							}
573
574							%datahash = (
575	0		0				"name" => $name,
576							"uniquename" => $uniquename,
577							"seqlen" => $len,
578							"residues" => $residues,
579							"type_id" => \%ftype_hash,
580							"organism_id" => \%organism,
581							"is_analysis" => $isanal \|\| 'false',
582							);
583
584	0	0					if (defined $srcfeature) {
585	0						%srcfhash = $self->_srcf_hash($srcfeature,
586							$srcfeattype,
587							\%organism);
588
589	0						my ($phase,$strand);
590	0	0					if ($seq->can('phase')) {
591	0						$phase = $seq->phase;
592							}
593
594	0	0					if ($seq->can('strand')) {
595	0						$strand = $seq->strand;
596							}
597	0						my %fl = (
598							"srcfeature_id" => \%srcfhash,
599							"fmin" => $seq->start - 1,
600							"fmax" => $seq->end,
601							"strand" => $strand,
602							"phase" => $phase,
603							);
604
605	0						$datahash{'featureloc'} = \%fl;
606
607							}
608
609
610							#if $srcfeature is not given, use the Bio::Seq object itself as the srcfeature for featureloc's
611	0	0					if (!defined $srcfeature) {
612	0						$srcfeature = $uniquename;
613	0						$srcfeattype = $ftype;
614							}
615
616							#default data source is 'GenBank'
617	0	0					if (!defined $datasource) {
618	0						$datasource = 'GenBank';
619							}
620
621	0	0					if ($datasource =~ /GenBank/i) {
622							#sequence topology as feature_cvterm
623	0	0	0				if ($seq->can('is_circular') && $seq->is_circular) {
624	0						%sthash = (
625							"cvterm_id" => {'name' => 'circular',
626							'cv_id' => {
627							'name' => 'sequence topology',
628							},
629							},
630							"pub_id" => {'uniquename' => 'nullpub',
631							'type_id' => {
632							'name' => 'null pub',
633							'cv_id' => {
634							'name'=> 'pub type',
635							},
636							},
637							},
638							);
639							} else {
640	0						%sthash = (
641							"cvterm_id" => { 'name' => 'linear',
642							'cv_id' => {
643							'name' => 'sequence topology',
644							}
645							},
646							"pub_id" => {'uniquename' => 'nullpub',
647							'type_id' => {
648							'name' => 'null pub',
649							'cv_id' => {
650							'name'=> 'pub type',
651							},
652							},
653							},
654							);
655							}
656	0						push(@feature_cvterms, \%sthash);
657
658							#division as feature_cvterm
659	0	0	0				if ($seq->can('division') && defined $seq->division()) {
660	0						$div = $seq->division();
661	0						%dvhash = (
662							"cvterm_id" => {'name' => $div,
663							'cv_id' => {
664							'name' => 'GenBank division'}},
665							"pub_id" => {'uniquename' => 'nullpub',
666							'type_id' => {
667							'name' => 'null pub',
668							'cv_id' => {
669							'name'=> 'pub type'},
670							}},
671							);
672	0						push(@feature_cvterms, \%dvhash);
673							}
674
675	0						$datahash{'feature_cvterm'} = \@feature_cvterms;
676							} # closes if GenBank
677
678							#featureprop's
679							#DEFINITION
680	0	0	0				if ($seq->can('desc') && defined $seq->desc()) {
681	0						$temp = $seq->desc();
682
683							my %prophash = (
684							"type_id" => {'name' => 'description',
685							'cv_id' => {
686							'name' =>
687	0						$cv_name{'feature_property'}
688							},
689							},
690							"value" => $temp,
691							);
692
693	0						push(@top_featureprops, \%prophash);
694							}
695
696							#KEYWORDS
697	0	0					if ($seq->can('keywords')) {
698	0						$temp = $seq->keywords();
699
700	0	0	0				if (defined $temp && $temp ne '.' && $temp ne '') {
			0
701							my %prophash = (
702							"type_id" => {'name' => 'keywords',
703							'cv_id' => {
704							'name' =>
705	0						$cv_name{'feature_property'}
706							}
707							},
708							"value" => $temp,
709							);
710
711	0						push(@top_featureprops, \%prophash);
712							}
713							}
714
715							#COMMENT
716	0	0					if ($seq->can('annotation')) {
717	0						$ann = $seq->annotation();
718	0						foreach my $comment ($ann->get_Annotations('comment')) {
719	0						$temp = $comment->as_text();
720							#print "fcomment: $temp\n";
721							my %prophash = (
722							"type_id" => {'name' => 'comment',
723							'cv_id' => {
724							'name' =>
725	0						$cv_name{'feature_property'}
726							}
727							},
728							"value" => $temp,
729							);
730
731	0						push(@top_featureprops, \%prophash);
732							}
733							}
734
735	0						my @top_dbxrefs = ();
736							#feature object from Bio::DB::SeqFeature::Store
737	0	0					if ($seq->can('attributes')) {
738	0						my %attributes = $seq->attributes;
739	0						for my $key (keys %attributes) {
740	0	0					next if ($key eq 'parent_id');
741	0	0					next if ($key eq 'load_id');
742
743	0	0	0				if ($key eq 'Alias') {
		0
		0
		0
744	0						@featuresyns = $self->handle_Alias_tag($seq,@featuresyns);
745							}
746
747							###FIXME deal with Dbxref, Ontology_term,source,
748							elsif ($key eq 'Ontology_term') {
749	0						@top_featurecvterms = $self->handle_Ontology_tag($seq,@top_featurecvterms);
750							}
751
752							elsif ($key eq 'dbxref' or $key eq 'Dbxref') {
753	0						@top_dbxrefs = $self->handle_dbxref($seq, $key, @top_dbxrefs);
754							}
755
756							elsif ($key =~ /^[a-z]/) {
757							@top_featureprops
758	0						= $self->handle_unreserved_tags($seq,$key,@top_featureprops);
759							}
760							}
761							}
762	0						$datahash{'feature_synonym'} = \@featuresyns;
763
764	0	0					if ($seq->can('source')) {
765	0						@top_dbxrefs = $self->handle_source($seq,@top_dbxrefs);
766							}
767
768							#accession and version as feature_dbxref
769	0	0	0				if ($seq->can('accession_number') && defined $seq->accession_number && $seq->accession_number ne 'unknown') {
			0
770	0						my $db = $self->_guess_acc_db($seq, $seq->accession_number);
771	0						my %acchash = (
772							"db_id" => {'name' => $db},
773							"accession" => $seq->accession_number,
774							"version" => $seq->seq_version,
775							);
776	0						my %fdbx = ('dbxref_id' => \%acchash);
777	0						push(@top_dbxrefs, \%fdbx);
778							}
779
780	0	0	0				if( $seq->isa('Bio::Seq::RichSeqI') && defined $seq->get_secondary_accessions() ) {
781	0						my @secacc = $seq->get_secondary_accessions();
782	0						my $acc;
783	0						foreach $acc (@secacc) {
784	0						my %acchash = (
785							"db_id" => {'name' => 'GB'},
786							"accession" => $acc,
787							);
788	0						my %fdbx = ('dbxref_id' => \%acchash);
789	0						push(@top_dbxrefs, \%fdbx);
790							}
791							}
792
793							#GI number
794	0	0	0				if( $seq->isa('Bio::Seq::RichSeqI') && defined ($seq->pid)) {
795	0						my $id = $seq->pid;
796							#print "reftype: ", ref($id), "\n";
797
798							#if (ref($id) eq 'HASH') {
799	0						my %acchash = (
800							"db_id" => {'name' => 'GI'},
801							"accession" => $id,
802							);
803	0						my %fdbx = ('dbxref_id' => \%acchash);
804	0						push (@top_dbxrefs, \%fdbx);
805							}
806
807							#REFERENCES as feature_pub
808	0	0					if (defined $ann) {
809							#get the references
810	0						@references = $ann->get_Annotations('reference');
811	0						foreach $ref (@references) {
812	0						undef(my %pubhash);
813	0						$refhash = $ref->hash_tree();
814	0		0				$location = $ref->location \|\| $refhash->{'location'};
815							#print "location: $location\n";
816
817							#get FBrf#, special for FlyBase SEAN loading
818	0	0					if (index($location, ' ==') >= 0) {
819	0						$location =~ /\s==/;
820							#print "match: $MATCH\n";
821							#print "prematch: $PREMATCH\n";
822							#print "postmatch: $POSTMATCH\n";
823	0						$fbrf = $PREMATCH;
824	0						$location = $POSTMATCH;
825	0						$location =~ s/^\s//;
826							}
827
828							#print "location: $location\n";
829							#unpublished reference
830	0	0					if ($location =~ /Unpublished/) {
		0
		0
831	0						$pubtype = 'unpublished';
832							%pubhash = (
833	0		0				"title" => $ref->title \|\| $refhash->{'title'},
834							#"miniref" => substr($location, 0, 255),
835							#"uniquename" => $fbrf,
836							"type_id" => {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}}
837							);
838							}
839							#submitted
840							elsif ($location =~ /Submitted/) {
841	0						$pubtype = 'submitted';
842
843							%pubhash = (
844	0		0				"title" => $ref->title \|\| $refhash->{'title'},
845							#"miniref" => substr($location, 0, 255),
846							#"uniquename" => $fbrf,
847							"type_id" => {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}}
848							);
849
850	0						undef(my $pyear);
851	0						$pyear = $self->_getSubmitYear($location);
852	0	0					if (defined $pyear) {
853	0						$pubhash{'pyear'} = $pyear;
854							}
855							}
856
857							#published journal paper
858							elsif ($location =~ /\D+\s\d+\s$(\d+\|\d+-\d+)$,\s(\d+-\d+\|\d+--\d+)\s$\d\d\d\d$$/) {
859	0						$pubtype = 'paper';
860
861							#parse location to get journal, volume, issue, pages & year
862	0						$location =~ /$\d\d\d\d$$/;
863
864	0						$year = $MATCH;
865	0						my $stuff = $PREMATCH;
866	0						$year =~ s/\(//; #remove the leading parenthesis
867	0						$year =~ s/\)//; #remove the trailing parenthesis
868
869	0						$stuff =~ /,\s(\d+-\d+\|\d+--\d+)\s$/;
870
871	0						$pages = $MATCH;
872	0						$stuff = $PREMATCH;
873	0						$pages =~ s/^, //; #remove the leading comma and space
874	0						$pages =~ s/ $//; #remove the last space
875
876	0						$stuff =~ /\s\d+\s$(\d+\|\d+-\d+)$$/;
877
878	0						$volumeissue = $MATCH;
879	0						$journal = $PREMATCH;
880	0						$volumeissue =~ s/^ //; #remove the leading space
881	0						$volumeissue =~ /$(\d+\|\d+-\d+)$$/;
882	0						$issue = $MATCH;
883	0						$volume = $PREMATCH;
884	0						$issue =~ s/^\(//; #remove the leading parentheses
885	0						$issue =~ s/\)$//; #remove the last parentheses
886	0						$volume =~ s/^\s//; #remove the leading space
887	0						$volume =~ s/\s$//; #remove the last space
888
889							%pubhash = (
890	0		0				"title" => $ref->title \|\| $refhash->{'title'},
891							"volume" => $volume,
892							"issue" => $issue,
893							"pyear" => $year,
894							"pages" => $pages,
895							#"miniref" => substr($location, 0, 255),
896							#"miniref" => ' ',
897							#"uniquename" => $fbrf,
898							"type_id" => {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}},
899							"pub_relationship" => {
900							'obj_pub_id' => {
901							'uniquename' => $journal,
902							'title' => $journal,
903							#'miniref' => substr($journal, 0, 255),
904							'type_id' =>{'name' => 'journal',
905							'cv_id' =>
906							{'name' => 'pub type'
907							},
908							},
909							#'pubprop' =>{'value'=> $journal,
910							# 'type_id'=>{'name' => 'abbreviation', 'cv_id' => {'name' => 'pubprop type'}},
911							# },
912							},
913							'type_id' => {
914							'name' => 'published_in',
915							'cv_id' => {
916							'name' => 'pub relationship type'},
917							},
918							},
919							);
920							}
921
922							#other references
923							else {
924	0						$pubtype = 'other';
925							%pubhash = (
926	0		0				"title" => $ref->title \|\| $refhash->{'title'},
927							#"miniref" => $fbrf,
928							"type_id" => {
929							'name' => $pubtype,
930							'cv_id' => {'name' =>'pub type'}
931							}
932							);
933							}
934
935							#pub_author
936	0						my $autref = $self->_getRefAuthors($ref);
937	0	0					if (defined $autref) {
938	0						$pubhash{'pub_author'} = $autref;
939							}
940							# if no author and is type 'submitted' and has submitter address, use the first 100 characters of submitter address as the author lastname.
941							else {
942	0	0					if ($pubtype eq 'submitted') {
943	0						my $autref = $self->_getSubmitAddr($ref);
944	0	0					if (defined $autref) {
945	0						$pubhash{'pub_author'} = $autref;
946							}
947							}
948							}
949
950							#$ref->comment as pubprop
951							#print "ref comment: ", $ref->comment, "\n";
952							#print "ref comment: ", $refhash->{'comment'}, "\n";
953	0	0	0				if (defined $ref->comment \|\| defined $refhash->{'comment'}) {
954	0		0				my $comnt = $ref->comment \|\| $refhash->{'comment'};
955							#print "remark: ", $comnt, "\n";
956	0						$pubhash{'pubprop'} = {
957							"type_id" => {'name' => 'comment', 'cv_id' => {'name' => 'pubprop type'}},
958							"value" => $comnt,
959							};
960							}
961
962							#pub_dbxref
963	0						undef(my @pub_dbxrefs);
964	0	0					if (defined $fbrf) {
965	0						push(@pub_dbxrefs, {dbxref_id => {accession => $fbrf, db_id => {'name' => 'FlyBase'}}});
966							}
967	0	0					if (defined ($temp = $ref->medline)) {
968	0						push(@pub_dbxrefs, {dbxref_id => {accession => $temp, db_id => {'name' => 'MEDLINE'}}});
969							#use medline # as the pub's uniquename
970	0						$pubhash{'uniquename'} = $temp;
971							}
972	0	0					if (defined ($temp = $ref->pubmed)) {
973	0						push(@pub_dbxrefs, {dbxref_id => {accession => $temp, db_id => {'name' => 'PUBMED'}}});
974							}
975	0						$pubhash{'pub_dbxref'} = \@pub_dbxrefs;
976
977							#if the pub uniquename is not defined or blank, put its FBrf# as its uniquename
978							#this is unique to FlyBase
979							#USERS OF THIS MODULE: PLEASE MODIFY HERE TO IMPLEMENT YOUR POLICY
980							# ON PUB UNIQUENAME!!!
981	0	0	0				if (!defined $pubhash{'uniquename'} \|\| $pubhash{'uniquename'} eq '') {
982	0	0					if (defined $fbrf) {
983	0						$pubhash{'uniquename'} = $fbrf;
984							}
985							#else {
986							# $pubhash{'uniquename'} = $self->_CreatePubUname($ref);
987							#}
988							}
989
990							#add to collection of references
991							#if the pub covers the entire sequence of the top-level feature, add it to feature_pubs
992	0	0	0				if (($ref->start == 1 && $ref->end == $len) \|\| (!defined $ref->start && !defined $ref->end)) {
			0
			0
993	0						push(@feature_pubs, {"pub_id" => \%pubhash});
994							}
995							#the pub is about a sub-sequence of the top-level feature
996							#create a feature for the sub-sequence and add pub as its feature_pub
997							#featureloc of this sub-sequence is against the top-level feature, in interbase coordinates.
998							else {
999							my %parf = (
1000							'uniquename' => $uniquename . ':' . $ref->start . "\.\." . $ref->end,
1001							'organism_id' =>\%organism,
1002	0						'type_id' =>{'name' =>'region', 'cv_id' => {'name' => $cv_name{'sequence'} }},
1003							);
1004	0						my %parfsrcf = (
1005							'uniquename' => $uniquename,
1006							'organism_id' =>\%organism,
1007							);
1008	0						my %parfloc = (
1009							'srcfeature_id' => \%parfsrcf,
1010							'fmin' => $ref->start - 1,
1011							'fmax' => $ref->end,
1012							);
1013	0						$parf{'featureloc'} = \%parfloc;
1014	0						$parf{'feature_pub'} = {'pub_id' => \%pubhash};
1015							my %ffr = (
1016							'subject_id' => \%parf,
1017	0						'type_id' => { 'name' => 'partof', 'cv_id' => { 'name' => $cv_name{'relationship'}}},
1018							);
1019	0						push(@top_featrels, \%ffr);
1020							}
1021							}
1022	0						$datahash{'feature_pub'} = \@feature_pubs;
1023							}
1024
1025							##construct srcfeature hash for use in featureloc
1026	0	0					if (defined $srcfeature) {
1027	0						%srcfhash = $self->_srcf_hash($srcfeature,
1028							$srcfeattype,
1029							\%organism);
1030							# my %fr = (
1031							# "object_id" => \%srcfhash,
1032							# "type_id" => { 'name' => 'partof', 'cv_id' => { 'name' => 'relationship type'}},
1033							# );
1034
1035							# push (@top_featrels, \%fr);
1036							}
1037
1038							#unflatten the seq features in $seq if $seq is a gene or a DNA sequence
1039	0	0	0				if (($gb_type eq 'gene' \|\| $gb_type eq 'DNA') &&
			0
1040							!$nounflatten) {
1041	0						my $u = Bio::SeqFeature::Tools::Unflattener->new;
1042	0						$u->unflatten_seq(-seq=>$seq, -use_magic=>1);
1043							}
1044
1045	0						my @top_sfs = $seq->get_SeqFeatures;
1046							#print $#top_sfs, "\n";
1047
1048							#SUBFEATURES
1049
1050	0	0					if ($datasource =~ /GenBank/i) {
		0
1051	0						$tag_cv = 'GenBank feature qualifier';
1052							} elsif ($datasource =~ /GFF/i) {
1053	0						$tag_cv = 'feature_property';
1054							} else {
1055	0						$tag_cv = $cv_name{'feature_property'};
1056							}
1057
1058	0						my $si = 0;
1059	0						foreach $feat (@top_sfs) {
1060							#$feat = $top_sfs[$si];
1061							#print "si: $si\n";
1062	0						my $prim_tag = $feat->primary_tag;
1063							#print $prim_tag, "\n";
1064
1065							# get all qualifiers of the 'source' feature, load these as top_featureprops of the top level feature
1066	0	0					if ($prim_tag eq 'source') {
1067	0						foreach $tag ($feat->all_tags()) {
1068							#db_xref
1069	0	0	0				if ($tag eq 'db_xref'
		0	0
		0
1070							or $tag eq 'Dbxref'
1071							or $tag eq 'dbxref') {
1072	0						my @t1 = $feat->each_tag_value($tag);
1073	0						foreach $temp (@t1) {
1074	0						$temp =~ /([^:]?):(.)/;
1075	0						my $db = $1;
1076	0						my $xref = $2;
1077							#PRE/POST very inefficent
1078							#my $db = $PREMATCH;
1079							#my $xref = $POSTMATCH;
1080	0						my %acchash = (
1081							"db_id" => {'name' => $db},
1082							"accession" => $xref,
1083							);
1084	0						my %fdbx = ('dbxref_id' => \%acchash);
1085	0						push (@top_dbxrefs, \%fdbx);
1086							}
1087							#Ontology_term
1088							} elsif ($tag eq 'Ontology_term') {
1089	0						my @t1 = $feat->each_tag_value($tag);
1090	0						foreach $temp (@t1) {
1091							###FIXME
1092							}
1093							#other tags as featureprop
1094							} elsif ($tag ne 'gene') {
1095	0						my %prophash = undef;
1096	0						%prophash = (
1097							"type_id" => {'name' => $tag, 'cv_id' => {'name' => $tag_cv}},
1098							"value" => join(' ',$feat->each_tag_value($tag)),
1099							);
1100	0						push(@top_featureprops, \%prophash);
1101							}
1102							}
1103
1104	0	0					if ($feat->can('source')) {
1105	0						my $source = $feat->source();
1106	0						@top_dbxrefs = $self->handle_source($feat, @top_dbxrefs);
1107							}
1108
1109							#featureloc for the top-level feature
1110	0						my $fmin = undef;
1111	0						my $fmax = undef;
1112	0						my $strand = undef;
1113	0						my $phase = undef;
1114	0						my %fl = undef;
1115
1116	0						$fmin = $feat->start - 1;
1117	0						$fmax = $feat->end;
1118	0						$strand = $feat->strand;
1119
1120	0	0					if ($feat->can('phase')) {
1121	0						$phase = $feat->phase;
1122							}
1123
1124							%fl = (
1125	0						"srcfeature_id" => \%srcfhash,
1126							"fmin" => $fmin,
1127							"fmax" => $fmax,
1128							"strand" => $strand,
1129							"phase" => $phase,
1130							);
1131
1132	0						$datahash{'featureloc'} = \%fl;
1133
1134							#delete 'source' feature from @top_sfs
1135	0						splice(@top_sfs, $si, 1);
1136							}
1137	0						$si ++;
1138							#close loop over top_sfs
1139							}
1140
1141							#the top-level features other than 'source'
1142	0						foreach $feat (@top_sfs) {
1143							#print $feat->primary_tag, "\n";
1144
1145	0						my $r = $self->_subfeat2featrelhash($name, $ftype, $feat, \%srcfhash, $tag_cv, $isanalysis);
1146
1147	0	0	0				if (!($ftype eq 'mRNA' && $feat->primary_tag eq 'gene')) {
1148	0						my %fr = %$r;
1149	0						push(@top_featrels, \%fr);
1150							} else {
1151	0						%finaldatahash = %$r;
1152							}
1153							}
1154
1155	0	0					if (@top_dbxrefs) {
1156	0						$datahash{'feature_dbxref'} = \@top_dbxrefs;
1157							}
1158
1159	0	0					if (@top_featureprops) {
1160	0						$datahash{'featureprop'} = \@top_featureprops;
1161							}
1162
1163	0	0					if (@top_featrels) {
1164	0						$datahash{'feature_relationship'} = \@top_featrels;
1165							}
1166
1167	0	0					if (@top_featurecvterms) {
1168	0						$datahash{'feature_cvterm'} = \@top_featurecvterms;
1169							}
1170
1171	0	0	0				if ($ftype eq 'mRNA' && %finaldatahash) {
1172							$finaldatahash{'feature_relationship'} = {
1173							'subject_id' => \%datahash,
1174	0						'type_id' => { 'name' => 'partof', 'cv_id' => { 'name' => $cv_name{'relationship'} }},
1175							};
1176							} else {
1177	0						%finaldatahash = %datahash;
1178							}
1179
1180	0						my $mainTag = 'feature';
1181	0						$self->_hash2xml(undef, $mainTag, \%finaldatahash);
1182
1183	0						return 1;
1184							}
1185
1186							sub _hash2xml {
1187	0			0			my $self = shift;
1188	0						my $isMatch = undef;
1189	0						$isMatch = shift;
1190	0						my $ult = shift;
1191	0						my $ref = shift;
1192	0						my %mh = %$ref;
1193	0						my $key;
1194							my $v;
1195	0						my $sh;
1196	0						my $xx;
1197	0						my $yy;
1198	0						my $nt;
1199	0						my $ntref;
1200	0						my $output;
1201	0	0					my $root = shift if (@_);
1202							#print "ult: $ult\n";
1203	0	0					if (!defined $self->{'writer'}) {
1204	0						$root = 1;
1205	0						$self->_create_writer();
1206							}
1207	0						my $temp;
1208	0						my %subh = undef;
1209
1210							#start opeing tag
1211							#if pub record of type 'journal', form the 'ref' attribute for special pub lookup
1212							#requires that the journal name itself is also stored as a pubprop record for the journal with value equal
1213							#to the journal name and type of 'abbreviation'.
1214	0	0	0				if ($ult eq 'pub' && $mh{'type_id'}->{'name'} eq 'journal') {
		0	0
		0	0
		0
1215	0						$self->{'writer'}->startTag($ult, 'ref' => $mh{'title'} . ':journal:abbreviation');
1216							}
1217
1218							#special pub match if pub uniquename not known
1219							elsif ($ult eq 'pub' && !defined $mh{'uniquename'}) {
1220	0						$self->{'writer'}->startTag($ult, 'op' => 'match');
1221							#set the match flag, all the sub tags should also have "op"="match"
1222	0						$isMatch = 1;
1223							}
1224
1225							#if cvterm or cv, lookup only
1226							elsif (($ult eq 'cvterm') \|\| ($ult eq 'cv')) {
1227	0						$self->{'writer'}->startTag($ult, 'op' => 'lookup');
1228							}
1229
1230							#if nested tables of match table, match too
1231							elsif ($isMatch) {
1232	0						$self->{'writer'}->startTag($ult, 'op' => 'match');
1233							}
1234
1235							else {
1236	0						$self->{'writer'}->startTag($ult);
1237							}
1238
1239							#first loop to produce xml for all the table columns
1240	0						foreach $key (keys %mh)
1241							{
1242							#print "key: $key\n";
1243	0						$xx = ' ' . $key;
1244	0						$yy = $key . ' ';
1245	0	0	0				if (index($chadotables, $xx) < 0 && index($chadotables, $yy) < 0)
1246							{
1247	0	0					if ($isMatch) {
1248	0						$self->{'writer'}->startTag($key, 'op' => 'match');
1249							} else {
1250	0						$self->{'writer'}->startTag($key);
1251							}
1252
1253	0						my $x = $ult . '.' . $key;
1254							#the column is a foreign key
1255	0	0					if (defined $fkey{$x})
1256							{
1257	0						$nt = $fkey{$x};
1258	0						$sh = $mh{$key};
1259	0						$self->_hash2xml($isMatch, $nt, $sh, 0);
1260							} else
1261							{
1262							#print "$key: $mh{$key}\n";
1263	0						$self->{'writer'}->characters($mh{$key});
1264							}
1265	0						$self->{'writer'}->endTag($key);
1266							}
1267							}
1268
1269							#second loop to produce xml for all the nested tables
1270	0						foreach $key (keys %mh)
1271							{
1272							#print "key: $key\n";
1273	0						$xx = ' ' . $key;
1274	0						$yy = $key . ' ';
1275							#a nested table
1276	0	0	0				if (index($chadotables, $xx) > 0 \|\| index($chadotables, $yy) > 0)
1277							{
1278							#$writer->startTag($key);
1279	0						$ntref = $mh{$key};
1280							#print "$key: ", ref($ntref), "\n";
1281	0	0					if (ref($ntref) =~ 'HASH') {
		0
1282	0						$self->_hash2xml($isMatch, $key, $ntref, 0);
1283							} elsif (ref($ntref) =~ 'ARRAY') {
1284							#print "array dim: ", $#$ntref, "\n";
1285	0						foreach $ref (@$ntref) {
1286							#print "\n";
1287	0						$self->_hash2xml($isMatch, $key, $ref, 0);
1288							}
1289							}
1290							#$writer->endTag($key);
1291							}
1292							}
1293
1294							#end tag
1295	0						$self->{'writer'}->endTag($ult);
1296
1297							#if ($root == 1) {
1298							# $self->{'writer'}->endTag('chado');
1299							# }
1300							}
1301
1302							sub _guess_acc_db {
1303	0			0			my $self = shift;
1304	0						my $seq = shift;
1305	0						my $acc = shift;
1306							#print "acc: $acc\n";
1307
1308	0	0	0				if ($acc =~ /^NM_\d{6}/ \|\| $acc =~ /^NP_\d{6}/ \|\| $acc =~ /^NT_\d{6}/ \|\| $acc =~ /^NC_\d{6}/) {
		0	0
		0	0
		0	0
		0	0
		0	0
		0	0
			0
			0
1309	0						return "RefSeq";
1310							} elsif ($acc =~ /^XM_\d{6}/ \|\| $acc =~ /^XP_\d{6}/ \|\| $acc =~ /^XR_\d{6}/) {
1311	0						return "RefSeq";
1312							} elsif ($acc =~ /^[a-zA-Z]{1,2}\d{5,6}/) {
1313	0						return "GB";
1314							} elsif ($seq->molecule() eq 'protein' && $acc =~ /^[a-zA-z]\d{5}/) {
1315	0						return "PIR";
1316							} elsif ($seq->molecule() eq 'protein' && $acc =~ /^\d{6,7}[a-zA-Z]/) {
1317	0						return "PRF";
1318							} elsif ($acc =~ /\d+/ && $acc !~ /[a-zA-Z]/) {
1319	0						return "LocusID";
1320							} elsif ($acc =~ /^CG\d+/ \|\| $acc =~ /^FB[a-z][a-z]\d+/) {
1321	0						return "FlyBase";
1322							} else {
1323	0						return "unknown";
1324							}
1325							}
1326
1327							sub _subfeat2featrelhash {
1328	0			0			my $self = shift;
1329	0						my $genename = shift;
1330	0						my $seqtype = shift;
1331	0						my $feat = shift;
1332	0						my $r = shift;
1333	0						my %srcf = %$r; #srcfeature hash for featureloc.srcfeature_id
1334	0						my $tag_cv = shift;
1335	0						my $isanalysis = shift;
1336
1337	0						my $prim_tag = $feat->primary_tag;
1338
1339	0						my $sfunique = undef; #subfeature uniquename
1340	0						my $sfname = undef; #subfeature name
1341	0						my $sftype = undef; #subfeature type
1342
1343	0	0					if ($feat->has_tag('symbol')) {
		0
1344	0						($sfunique) = $feat->each_tag_value("symbol");
1345							} elsif ($feat->has_tag('label')) {
1346	0						($sfunique) = $feat->each_tag_value("label");
1347							} else {
1348							#$self->throw("$prim_tag at " . $feat->start . "\.\." . $feat->end . " does not have symbol or label! To convert into chadoxml, a seq feature must have a /symbol or /label tag holding its unique name.");
1349							#generate feature unique name as --
1350	0						$sfunique = $self->_genFeatUniqueName($genename, $feat);
1351							}
1352
1353	0	0					if ($feat->has_tag('Name')) {
1354	0						($sfname) = $feat->each_tag_value("Name");
1355							}
1356
1357							#feature type translation
1358	0	0					if (defined $feattype_args2so{$prim_tag}) {
1359	0						$sftype = $feattype_args2so{$prim_tag};
1360							} else {
1361	0						$sftype = $prim_tag;
1362							}
1363
1364	0	0					if ($prim_tag eq 'mutation') {
1365	0	0					if ($feat->start == $feat->end) {
1366	0						$sftype = $feattype_args2so{'mutation1'};
1367							} else {
1368	0						$sftype = $feattype_args2so{'mutation2'};
1369							}
1370							}
1371
1372							#set is_analysis flag for gene model features
1373	0						undef(my $isanal);
1374	0	0	0				if ($sftype eq 'gene' \|\| $sftype eq 'mRNA' \|\| $sftype eq 'exon' \|\| $sftype eq 'protein' \|\| $sftype eq 'polypeptide') {
			0
			0
			0
1375	0						$isanal = $isanalysis;
1376							}
1377
1378							my %sfhash = (
1379							"name" => $sfname,
1380							"uniquename" => $sfunique,
1381							"organism_id" => \%organism,
1382	0		0				"type_id" => { 'name' => $sftype, 'cv_id' => { 'name' => $cv_name{'sequence'} }},
1383							"is_analysis" => $isanal \|\| 'false',
1384							);
1385
1386							#make a copy of %sfhash for passing to this method when recursively called
1387							#my %srcfeat = (
1388							# "name" => $sfname,
1389							# "uniquename" => $sfunique,
1390							# "organism_id" => \%organism,
1391							# "type_id" => { 'name' => $sftype, 'cv_id' => { 'name' => 'SO'}},
1392							# );
1393
1394							#featureloc for subfeatures
1395	0						undef(my $sfmin);
1396	0						undef(my $sfmax);
1397	0						undef(my $is_sfmin_partial);
1398	0						undef(my $is_sfmax_partial);
1399	0						undef(my $sfstrand);
1400	0						undef(my $sfphase);
1401	0						$sfmin = $feat->start - 1;
1402	0						$sfmax = $feat->end;
1403	0						$sfstrand = $feat->strand();
1404
1405	0	0					if ($feat->can('phase')) {
1406	0						$sfphase = $feat->phase;
1407							}
1408
1409							#if the gene feature in an mRNA record, cannot use its coordinates, omit featureloc
1410	0	0	0				if ($seqtype eq 'mRNA' && $sftype eq 'gene') {
1411							} else {
1412	0	0					if ($feat->location->isa('Bio::Location::FuzzyLocationI')) {
1413	0	0					if ($feat->location->start_pos_type() ne 'EXACT') {
1414	0						$is_sfmin_partial = 'true';
1415							}
1416	0	0					if ($feat->location->end_pos_type() ne 'EXACT') {
1417	0						$is_sfmax_partial = 'true';
1418							}
1419							}
1420
1421	0		0				my %sfl = (
			0
1422							"srcfeature_id" => \%srcf,
1423							"fmin" => $sfmin,
1424							"is_fmin_partial" => $is_sfmin_partial \|\| 'false',
1425							"fmax" => $sfmax,
1426							"is_fmax_partial" => $is_sfmax_partial \|\| 'false',
1427							"strand" => $sfstrand,
1428							"phase" => $sfphase,
1429							);
1430
1431	0						$sfhash{'featureloc'} = \%sfl;
1432							}
1433
1434
1435							#subfeature tags
1436	0						undef(my @sfdbxrefs); #subfeature dbxrefs
1437	0						undef(my @sub_featureprops); #subfeature props
1438	0						undef(my @sub_featuresyns); #subfeature synonyms
1439	0						undef(my @sub_featurecvterms); #subfeature cvterms
1440	0						foreach my $tag ($feat->all_tags()) {
1441							#feature_dbxref for features
1442	0	0	0				if ($tag eq 'db_xref' or $tag eq 'dbxref' or $tag eq 'Dbxref') {
		0	0
		0	0
		0	0
			0
1443	0						my @t1 = $feat->each_tag_value($tag);
1444							#print "# of dbxref: @t1\n";
1445	0						for my $temp (@t1) {
1446	0						$temp =~ /:/;
1447	0						my $db = $PREMATCH;
1448	0						my $xref = $POSTMATCH;
1449							#print "db: $db; xref: $xref\n";
1450	0						my %acchash = (
1451							"db_id" => {'name' => $db},
1452							"accession" => $xref,
1453							);
1454	0						my %sfdbx = ('dbxref_id' => \%acchash);
1455	0						push (@sfdbxrefs, \%sfdbx);
1456							}
1457							#Alias tags
1458							} elsif ($tag eq 'Alias') {
1459	0						@sub_featuresyns = $self->handle_Alias_tag($feat, @sub_featuresyns);
1460							} elsif ($tag eq 'Ontology_term') {
1461	0						@sub_featurecvterms = $self->handle_Ontology_tag($feat, @sub_featurecvterms);
1462							#featureprop for features, excluding GFF Name & Parent tags
1463							} elsif ($tag ne 'gene' && $tag ne 'symbol' && $tag ne 'Name' && $tag ne 'Parent') {
1464	0	0					next if ($tag eq 'parent_id');
1465	0	0					next if ($tag eq 'load_id');
1466	0						foreach my $val ($feat->each_tag_value($tag)) {
1467	0						my %prophash = undef;
1468	0						%prophash = (
1469							"type_id" => {'name' => $tag, 'cv_id' => {'name' => $tag_cv}},
1470							"value" => $val,
1471							);
1472	0						push(@sub_featureprops, \%prophash);
1473							}
1474							}
1475							}
1476
1477	0	0					if ($feat->can('source')) {
1478	0						@sfdbxrefs = $self->handle_source($feat,@sfdbxrefs);
1479							}
1480
1481	0	0					if (@sub_featureprops) {
1482	0						$sfhash{'featureprop'} = \@sub_featureprops;
1483							}
1484	0	0					if (@sfdbxrefs) {
1485	0						$sfhash{'feature_dbxref'} = \@sfdbxrefs;
1486							}
1487	0	0					if (@sub_featuresyns) {
1488	0						$sfhash{'feature_synonym'} = \@sub_featuresyns;
1489							}
1490	0	0					if (@sub_featurecvterms) {
1491	0						$sfhash{'feature_cvterm'} = \@sub_featurecvterms;
1492							}
1493
1494	0						undef(my @ssfeatrel);
1495	0	0					if ($feat->has_tag('locus_tag')) {
		0
1496	0						($genename)= $feat->each_tag_value('locus_tag');
1497							} elsif ($feat->has_tag('gene')) {
1498	0						($genename)= $feat->each_tag_value('gene');
1499							}
1500
1501	0						foreach my $sf ($feat->get_SeqFeatures()) {
1502							#print $sf->primary_tag, "\n";
1503	0						my $rref = $self->_subfeat2featrelhash($genename, $sftype, $sf, \%srcf, $tag_cv, $isanalysis);
1504	0	0					if (defined $rref) {
1505	0						push(@ssfeatrel, $rref);
1506							}
1507							}
1508
1509	0	0					if (@ssfeatrel) {
1510	0						$sfhash{'feature_relationship'} = \@ssfeatrel;
1511							}
1512
1513							#subj-obj relationship type
1514	0						undef(my $reltypename);
1515	0						$reltypename = return_reltypename($sftype);
1516
1517							my %fr = (
1518							"subject_id" => \%sfhash,
1519							"type_id" => { 'name' => $reltypename,
1520	0						'cv_id' => { 'name' => $cv_name{'relationship'} }},
1521							);
1522
1523	0	0	0				if ($seqtype eq 'mRNA' && $sftype eq 'gene') {
1524	0						return \%sfhash;
1525							} else {
1526	0						return \%fr;
1527							}
1528
1529							}
1530
1531							#generate uniquename for feature as: -- (foo-mRNA-10..1000)
1532							sub _genFeatUniqueName {
1533	0			0			my $self = shift;
1534	0						my $genename = shift;
1535	0						my $feat = shift;
1536	0						undef(my $uniquename);
1537	0						my $ftype = $feat->primary_tag;
1538	0						my $start = $feat->start;
1539	0						my $end = $feat->end;
1540
1541	0	0					if ($feat->has_tag('locus_tag')) {
		0
1542	0						($genename) = $feat->each_tag_value("locus_tag");
1543							} elsif ($feat->has_tag('gene')) {
1544	0						($genename) = $feat->each_tag_value("gene");
1545							}
1546
1547	0						$uniquename = $genename . '-' . $ftype . '-' . $start . "\.\." . $end;
1548
1549	0						return $uniquename;
1550							}
1551
1552							#create uniquename for pubs with no medline id and no FBrf#
1553							#use ", , " as the uniquename (same as miniref)
1554							# is if one author,
1555							# or and if two,
1556							# or et al. if more
1557							#sub _CreatePubUname {
1558							# my $self = shift;
1559							# my $pub = shift;
1560							# undef(my $pubuname);
1561							#
1562							# return $pubuname;
1563							#}
1564
1565							#get authors of a reference
1566							#returns ref to the array of author hashes
1567							sub _getRefAuthors {
1568	0			0			my $self = shift;
1569	0						my $ref = shift;
1570
1571	0						my $temp = $ref->authors;
1572	0						undef(my @authors);
1573	0						undef(my @aut);
1574
1575							#there are authors
1576	0	0					if ($temp ne '.') {
1577	0	0					if (index($temp, ' and ') > 0) {
1578	0						$temp =~ / and /;
1579	0						my $lastauthor = $POSTMATCH;
1580	0						@authors = split(/\, /, $PREMATCH);
1581	0						push (@authors, $lastauthor);
1582							} else {
1583	0						@authors = split(/\, /, $temp);
1584							}
1585
1586	0						my $a;
1587	0						my $i = 0;
1588	0						foreach $a (@authors) {
1589	0						$i ++;
1590							#parse the author lastname and givennames
1591	0						undef(my $last);
1592	0						undef(my $given);
1593	0	0					if (index($a, ',') > 0) { #genbank format, last,f.m.
		0
1594	0						($last, $given) = split(/\,/, $a);
1595							} elsif (index($a, ' ') > 0) { #embl format, last f.m.
1596	0						($last, $given) = split(/ /, $a);
1597							}
1598	0						my %au = (
1599							'surname' => $last,
1600							'givennames' => $given,
1601							);
1602	0						push(@aut, {author_id => \%au, arank => $i});
1603							}
1604
1605	0						return \@aut;
1606							}
1607
1608							#no authors, Bio::SeqIO::genbank doesn't pick up 'CONSRTM' line.
1609							else {
1610	0						return;
1611							}
1612
1613							}
1614
1615							#extract submission year from the citation of the submitted reference
1616							#genbank format for the submitted citation: JOURNAL Submitted (DD-MON-YYYY) submitter address
1617							sub _getSubmitYear {
1618	0			0			my $self = shift;
1619	0						my $citation = shift;
1620
1621	0	0					if ($citation !~ /Submitted/) {
1622	0						$self->warn("not citation for a submitted reference. cannot extract submission year.");
1623	0						return;
1624							} else {
1625	0						$citation =~ /Submitted $\d\d-[a-zA-Z]{3}-\d{4}$/;
1626	0						my $a = $MATCH;
1627	0						$a =~ /\d{4}/;
1628	0						my $year = $MATCH;
1629
1630	0						return $year;
1631							}
1632							}
1633
1634							sub _getSubmitAddr {
1635	0			0			my $self = shift;
1636	0						my $ref = shift;
1637	0						undef(my %author);
1638
1639	0						my $citation = $ref->location;
1640	0	0					if ($citation !~ /Submitted/) {
1641	0						$self->warn("not citation for a submitted reference. cannot extract submission year.");
1642	0						return;
1643							} else {
1644	0						$citation =~ /Submitted $\d\d-[a-zA-Z]{3}-\d{4}$/;
1645	0						my $a = $POSTMATCH;
1646	0	0					if (defined $a) {
1647	0						$a =~ s/^\s//;
1648	0						%author = (
1649							'author_id' => {'surname' => substr($a, 0, 100)},
1650							);
1651	0						return \%author;
1652							} else {
1653	0						return;
1654							}
1655							}
1656							}
1657
1658							=head2 suppress_residues
1659
1660							Title : suppress_residues
1661							Usage : $obj->suppress_residues() #get existing value
1662							$obj->suppress_residues($newval) #set new value
1663							Function : Keep track of the flag to suppress printing of residues in the
1664							chadoxml file. The default it to allow all residues to go into the
1665							file.
1666							Returns : value of suppress_residues (a scalar)
1667							Args : new value of suppress_residues (to set)
1668
1669							=cut
1670
1671							sub suppress_residues {
1672	0			0	1		my $self = shift;
1673	0	0					my $suppress_residues = shift if @_;
1674	0	0					return $self->{'suppress_residues'} = $suppress_residues if defined($suppress_residues);
1675	0						return $self->{'suppress_residues'};
1676							}
1677
1678							=head2 allow_residues
1679
1680							Title : allow_residues
1681							Usage : $obj->allow_residues() #get existing value
1682							$obj->allow_residues($feature_type) #set new value
1683							Function : Track the allow_residues type. This can be used in conjunction
1684							with the suppress_residues flag to only allow residues from a
1685							specific feature type to be printed in the xml file, for example,
1686							only printing chromosome residues. When suppress_residues is set to
1687							true, then only chromosome features would would go into the xml
1688							file. If suppress_residues is not set, this function has no effect
1689							(since the default is to put all residues in the xml file).
1690							Returns : value of allow_residues (string that corresponds to a feature type)
1691							Args : new value of allow_residues (to set)
1692							Status :
1693
1694							=cut
1695
1696							sub allow_residues {
1697	0			0	1		my $self = shift;
1698	0	0					my $allow_residues = shift if @_;
1699	0	0					return $self->{'allow_residues'} = $allow_residues if defined($allow_residues);
1700	0						return $self->{'allow_residues'};
1701							}
1702
1703							=head2 return_ftype_hash
1704
1705							Title : return_ftype_hash
1706							Usage : $obj->return_ftype_hash()
1707							Function : A simple hash where returning it has be factored out of the main
1708							code to allow subclasses to override it.
1709							Returns : A hash that indicates what the name of the SO term is and what
1710							the name of the Sequence Ontology is in the cv table.
1711							Args : The string that represents the SO term.
1712							Status :
1713
1714							=cut
1715
1716							sub return_ftype_hash {
1717	0			0	1		my $self = shift;
1718	0						my $ftype = shift;
1719							my %ftype_hash = ( "name" => $ftype,
1720	0						"cv_id" => {"name" => $cv_name{'sequence'} });
1721	0						return %ftype_hash;
1722							}
1723
1724							=head2 return_reltypename
1725
1726							Title : return_reltypename
1727							Usage : $obj->return_reltypename
1728							Function : Return the appropriate relationship type name depending on the
1729							feature type (typically part_of, but derives_from for polypeptide).
1730							Returns : A relationship type name.
1731							Args : A SO type name.
1732							Status :
1733
1734							=cut
1735
1736							sub return_reltypename {
1737	0			0	1		my $self = shift;
1738	0						my $sftype = shift;
1739
1740	0						my $reltypename;
1741	0	0	0				if ($sftype eq 'protein' \|\| $sftype eq 'polypeptide') {
1742	0						$reltypename = 'derives_from';
1743							} else {
1744	0						$reltypename = 'part_of';
1745							}
1746
1747	0						return $reltypename;
1748							}
1749
1750							=head2 next_seq
1751
1752							Title : next_seq
1753							Usage : $obj->next_seq
1754							Function :
1755							Returns :
1756							Args :
1757							Status : Not implemented (write only adaptor)
1758
1759							=cut
1760
1761							sub next_seq {
1762	0			0	1		my ($self, %argv) = @_;
1763
1764	0						$self->throw('next_seq is not implemented; this is a write-only adapter.');
1765
1766							}
1767
1768							=head2 _create_writer
1769
1770							Title : _create_writer
1771							Usage : $obj->_create_writer
1772							Function : Creates XML::Writer object and writes start tag
1773							Returns : Nothing, though the writer persists as part of the chadoxml object
1774							Args : None
1775							Status :
1776
1777							=cut
1778
1779							sub _create_writer {
1780	0			0			my $self = shift;
1781
1782	0						$self->{'writer'} = XML::Writer->new(OUTPUT => $self->_fh,
1783							DATA_MODE => 1,
1784							DATA_INDENT => 3);
1785
1786							#print header
1787	0						$self->{'writer'}->xmlDecl("UTF-8");
1788	0						$self->{'writer'}->comment("created by Peili Zhang, Flybase, Harvard University\n".
1789							"and Scott Cain, GMOD, Cold Spring Harbor Laboratory");
1790
1791							#start chadoxml
1792	0						$self->{'writer'}->startTag('chado');
1793
1794	0						return;
1795							}
1796
1797							=head2 close_chadoxml
1798
1799							Title : close_chadoxml
1800							Usage : $obj->close_chadoxml
1801							Function : Writes the closing xml tag
1802							Returns : None
1803							Args : None
1804							Status :
1805
1806							=cut
1807
1808							sub close_chadoxml {
1809	0			0	1		my $self = shift;
1810
1811	0						$self->{'writer'}->endTag('chado');
1812	0						return;
1813							}
1814
1815							=head2 handle_unreserved_tags
1816
1817							Title : handle_unreserved_tags
1818							Usage : $obj->handle_unreserved_tags
1819							Function : Converts tag value pairs to xml-ready hashrefs
1820							Returns : The array containing the hashrefs
1821							Args : In order: the Seq or SeqFeature object, the key, and the hasharray
1822							Status :
1823
1824							=cut
1825
1826							sub handle_unreserved_tags {
1827	0			0	1		my $self = shift;
1828	0						my $seq = shift;
1829	0						my $key = shift;
1830	0						my @arr = @_;
1831
1832	0						my @values = $seq->attributes($key);
1833	0						for my $value (@values) {
1834							my %prophash = (
1835							"type_id" => {'name' => $key,
1836	0						'cv_id' => { 'name' => $cv_name{'feature_property'} }
1837							},
1838							"value" => $value,
1839							);
1840	0						push(@arr, \%prophash);
1841							}
1842
1843	0						return @arr;
1844							}
1845
1846							=head2 handle_Alias_tag
1847
1848							Title : handle_Alias_tag
1849							Usage : $obj->handle_Alias_tag
1850							Function : Convert Alias values to synonym hash refs
1851							Returns : An array of synonym hash tags
1852							Args : The seq or seqFeature object and the synonym hash array
1853							Status :
1854
1855							=cut
1856
1857							sub handle_Alias_tag {
1858	0			0	1		my $self = shift;
1859	0						my $seq = shift;
1860	0						my @arr = @_;
1861
1862	0						my @Aliases = $seq->attributes('Alias');
1863	0						for my $Alias (@Aliases) {
1864	0						my %synhash = (
1865							"type_id" => { 'name' => 'exact',
1866							'cv_id' => { 'name' => 'synonym_type' } },
1867							"name" => $Alias,
1868							"synonym_sgml" => $Alias,
1869							);
1870	0						push(@arr, {'synonym_id' => \%synhash,
1871							'pub_id' => {'uniquename' => 'null',
1872							'type_id' => { 'name' => 'null',
1873							'cv_id' => {
1874							'name' => 'null',
1875							},
1876							},
1877							},
1878							});
1879							}
1880
1881	0						return @arr;
1882							}
1883
1884							=head2 handle_Ontology_tag
1885
1886							Title : handle_Ontology_tag
1887							Usage : $obj->handle_Ontology_tag
1888							Function : Convert Ontology_term values to ontology term hash refs
1889							Returns : An array of ontology term hash refs
1890							Args : The seq or seqFeature object and the ontology term array
1891							Status :
1892
1893							=cut
1894
1895							sub handle_Ontology_tag {
1896	0			0	1		my $self = shift;
1897	0						my $seq = shift;
1898	0						my @arr = @_;
1899
1900	0						my @terms = $seq->attributes('Ontology_term');
1901	0						for my $term (@terms) {
1902	0						my $hashref;
1903	0	0					if ($term =~ /(\S+):(\S+)/) {
1904	0						my $db = $1;
1905	0						my $acc = $2;
1906	0						$hashref = {
1907							'cvterm_id' => {
1908							'dbxref_id' => {
1909							'db_id' => { 'name' => $db },
1910							'accession' => $acc
1911							},
1912							},
1913							};
1914							}
1915	0						push(@arr, {cvterm_id => $hashref});
1916							}
1917
1918	0						return @arr;
1919							}
1920
1921							=head2 handle_dbxref
1922
1923							Title : handle_dbxref
1924							Usage : $obj->handle_dbxref
1925							Function : Convert Dbxref values to dbxref hashref
1926							Returns : An array of dbxref hashrefs
1927							Args : A seq or seqFeature object and the dbxref array
1928							Status :
1929
1930							=cut
1931
1932							sub handle_dbxref {
1933	0			0	1		my $self = shift;
1934	0						my $seq = shift;
1935	0						my $tag = shift;
1936	0						my @arr = @_;
1937
1938	0						my @terms = $seq->attributes($tag);
1939	0						for my $term (@terms) {
1940	0						my $hashref;
1941	0	0					if ($term =~ /(\S+):(\S+)/) {
1942	0						my $db = $1;
1943	0						my $acc= $2;
1944	0						my $version = 1;
1945	0	0					if ($acc =~ /(\S+)\.(\S+)/) {
1946	0						$acc = $1;
1947	0						$version = $2;
1948							}
1949							$hashref = {
1950	0						'dbxref_id' => {
1951							'db_id' => { 'name' => $db },
1952							'accession' => $acc,
1953							'version' => $version,
1954							},
1955							};
1956							}
1957							else {
1958	0						$self->throw("I don't know how to handle a dbxref like $term");
1959							}
1960	0						push(@arr, {'dbxref_id' => $hashref});
1961							}
1962	0						return @arr;
1963							}
1964
1965							=head2 handle_source
1966
1967							Title : handle_source
1968							Usage : $obj->handle_source
1969							Function :
1970							Returns :
1971							Args :
1972							Status :
1973
1974							=cut
1975
1976							sub handle_source {
1977	0			0	1		my $self = shift;
1978	0						my $seq = shift;
1979	0						my @arr = @_;
1980
1981	0						my $source = $seq->source();
1982	0	0					return @arr unless $source;
1983
1984	0						my $hashref = {
1985							'dbxref_id' => {
1986							'db_id' => {'name' => 'GFF_source'},
1987							'accession' => $source,
1988							}
1989							};
1990
1991	0						push(@arr, {'dbxref_id' => $hashref});
1992	0						return @arr;
1993							}
1994
1995							=head2 _srcf_hash
1996
1997							Title : _srcf_hash
1998							Usage : $obj->_srcf_hash
1999							Function : Creates the srcfeature hash for use in featureloc hashes
2000							Returns : The srcfeature hash
2001							Args : The srcfeature name, the srcfeature type and a reference to the
2002							organism hash.
2003							Status :
2004
2005							=cut
2006
2007							sub _srcf_hash {
2008	0			0			my $self = shift;
2009	0						my $srcf = shift;
2010	0						my $stype= shift;
2011	0						my $orgref = shift;
2012
2013							my %hash = ('uniquename' => $srcf,
2014							'organism_id' => $orgref,
2015							'type_id' => {'name' => $stype,
2016							'cv_id' =>
2017	0						{'name' => $cv_name{'sequence'} }},
2018							);
2019
2020	0						return %hash;
2021							}
2022
2023
2024							1;