File Coverage

Bio/SeqIO/chadoxml.pm
Criterion Covered Total %
statement 60 681 8.8
branch 0 306 0.0
condition 0 206 0.0
subroutine 20 42 47.6
pod 12 12 100.0
total 92 1247 7.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqIO::chadoxml
3             #
4             # Peili Zhang
5             #
6             # You may distribute this module under the same terms as perl itself
7              
8             # POD documentation - main docs before the code
9              
10             =head1 NAME
11              
12             Bio::SeqIO::chadoxml - chadoxml sequence output stream
13              
14             =head1 SYNOPSIS
15              
16             It is probably best not to use this object directly, but
17             rather go through the SeqIO handler system:
18              
19             $writer = Bio::SeqIO->new(-file => ">chado.xml",
20             -format => 'chadoxml');
21              
22             # assume you already have Sequence or SeqFeature objects
23             $writer->write_seq($seq_obj);
24              
25             #after writing all seqs
26             $writer->close_chadoxml();
27              
28              
29              
30             =head1 DESCRIPTION
31              
32             This object can transform Bio::Seq objects to chadoxml flat
33             file databases (for chadoxml DTD, see
34             http://gmod.cvs.sourceforge.net/gmod/schema/chado/dat/chado.dtd).
35              
36             This is currently a write-only module.
37              
38             $seqio = Bio::SeqIO->new(-file => '>outfile.xml',
39             -format => 'chadoxml'
40             -suppress_residues => 1,
41             -allow_residues => 'chromosome',
42             );
43              
44             # we have a Bio::Seq object $seq which is a gene located on
45             # chromosome arm 'X', to be written out to chadoxml
46             # before converting to chadoxml, $seq object B be transformed
47             # so that all the coordinates in $seq are against the source
48             # feature to be passed into Bio::SeqIO::chadoxml->write_seq()
49             # -- chromosome arm X in the example below.
50              
51             $seqio->write_seq(-seq=>$seq,
52             -genus => 'Homo',
53             -species => 'sapiens',
54             -seq_so_type=>'gene',
55             -src_feature=>'X',
56             -src_feat_type=>'chromosome_arm',
57             -nounflatten=>1,
58             -is_analysis=>'true',
59             -data_source=>'GenBank');
60              
61             The chadoxml output of Bio::SeqIO::chadoxml-Ewrite_seq() method can be
62             passed to the loader utility in XORT package
63             (http://gmod.cvs.sourceforge.net/gmod/schema/XMLTools/XORT/)
64             to be loaded into chado.
65              
66             This object is currently implemented to work with sequence and
67             annotation data from whole genome projects deposited in GenBank. It
68             may not be able to handle all different types of data from all
69             different sources.
70              
71             In converting a Bio::Seq object into chadoxml, a top-level feature is
72             created to represent the object and all sequence features inside the
73             Bio::Seq object are treated as subfeatures of the top-level
74             feature. The Bio::SeqIO::chadoxml object calls
75             Bio::SeqFeature::Tools::Unflattener to unflatten the flat feature list
76             contained in the subject Bio::Seq object, to build gene model
77             containment hierarchy conforming to chado central dogma model: gene
78             --E mRNA --E exons and protein.
79              
80             Destination of data in the subject Bio::Seq object $seq is as following:
81              
82             *$seq->display_id: name of the top-level feature;
83              
84             *$seq->accession_number: if defined, uniquename and
85             feature_dbxref of the top-level
86             feature if not defined,
87             $seq->display_id is used as the
88             uniquename of the top-level feature;
89              
90             *$seq->molecule: transformed to SO type, used as the feature
91             type of the top-level feature if -seq_so_type
92             argument is supplied, use the supplied SO type
93             as the feature type of the top-level feature;
94              
95             *$seq->species: organism of the top-level feature;
96              
97             *$seq->seq: residues of the top-level feature;
98              
99             *$seq->is_circular, $seq->division: feature_cvterm;
100              
101             *$seq->keywords, $seq->desc, comments: featureprop;
102              
103             *references: pub and feature_pub;
104             medline/pubmed ids: pub_dbxref;
105             comments: pubprop;
106              
107             *feature "source" span: featureloc for top-level feature;
108              
109             *feature "source" db_xref: feature_dbxref for top-level feature;
110              
111             *feature "source" other tags: featureprop for top-level feature;
112              
113             *subfeature 'symbol' or 'label' tag: feature uniquename, if
114             none of these is present, the chadoxml object
115             generates feature uniquenames as:
116             --
117             (e.g. foo-mRNA--1000..3000);
118              
119             *gene model: feature_relationship built based on the
120             containment hierarchy;
121              
122             *feature span: featureloc;
123              
124             *feature accession numbers: feature_dbxref;
125              
126             *feature tags (except db_xref, symbol and gene): featureprop;
127              
128             Things to watch out for:
129              
130             *chado schema change: this version works with the chado
131             version tagged chado_1_01 in GMOD CVS.
132              
133             *feature uniquenames: especially important if using XORT
134             loader to do incremental load into
135             chado. may need pre-processing of the
136             source data to put the correct
137             uniquenames in place.
138              
139             *pub uniquenames: chadoxml->write_seq() has the FlyBase policy
140             on pub uniquenames hard-coded, it assigns
141             pub uniquenames in the following way: for
142             journals and books, use ISBN number; for
143             published papers, use MEDLINE ID; for
144             everything else, use FlyBase unique
145             identifier FBrf#. need to modify the code to
146             implement your policy. look for the comments
147             in the code.
148              
149             *for pubs possibly existing in chado but with no knowledge of
150             its uniquename:put "op" as "match", then need to run the
151             output chadoxml through a special filter that
152             talks to chado database and tries to find the
153             pub by matching with the provided information
154             instead of looking up by the unique key. after
155             matching, the filter also resets the "match"
156             operation to either "force" (default), or
157             "lookup", or "insert", or "update". the
158             "match" operation is for a special FlyBase use
159             case. please modify to work according to your
160             rules.
161              
162             *chado initialization for loading:
163              
164             cv & cvterm: in the output chadoxml, all cv's and
165             cvterm's are lookup only. Therefore,
166             before using XORT loader to load the
167             output into chado, chado must be
168             pre-loaded with all necessary CVs and
169             CVterms, including "SO" , "property
170             type", "relationship type", "pub type",
171             "pubprop type", "pub relationship type",
172             "sequence topology", "GenBank feature
173             qualifier", "GenBank division". A pub by
174             the uniquename 'nullpub' of type 'null
175             pub' needs to be inserted.
176              
177             =head1 FEEDBACK
178              
179             =head2 Mailing Lists
180              
181             User feedback is an integral part of the evolution of this and other
182             Bioperl modules. Send your comments and suggestions preferably to one
183             of the Bioperl mailing lists. Your participation is much appreciated.
184              
185             bioperl-l@bioperl.org - General discussion
186             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
187              
188             =head2 Support
189              
190             Please direct usage questions or support issues to the mailing list:
191              
192             I
193              
194             rather than to the module maintainer directly. Many experienced and
195             reponsive experts will be able look at the problem and quickly
196             address it. Please include a thorough description of the problem
197             with code and data examples if at all possible.
198              
199             =head2 Reporting Bugs
200              
201             Report bugs to the Bioperl bug tracking system to help us keep track
202             the bugs and their resolution.
203             Bug reports can be submitted via the web:
204              
205             https://github.com/bioperl/bioperl-live/issues
206              
207             =head1 AUTHOR - Peili Zhang
208              
209             Email peili@morgan.harvard.edu
210              
211             =head1 APPENDIX
212              
213             The rest of the documentation details each of the object
214             methods. Internal methods are usually preceded with a _
215              
216             =cut
217              
218             # Let the code begin...
219              
220             package Bio::SeqIO::chadoxml;
221 2     2   594 use strict;
  2         12  
  2         47  
222 2     2   529 use English;
  2         2208  
  2         8  
223              
224 2     2   705 use Carp;
  2         10  
  2         79  
225 2     2   9 use Data::Dumper;
  2         3  
  2         61  
226 2     2   8 use XML::Writer;
  2         2  
  2         39  
227 2     2   531 use IO::File;
  2         1469  
  2         176  
228 2     2   12 use IO::Handle;
  2         3  
  2         51  
229 2     2   718 use Bio::Seq;
  2         6  
  2         88  
230 2     2   807 use Bio::Seq::RichSeq;
  2         4  
  2         95  
231 2     2   476 use Bio::SeqIO::FTHelper;
  2         4  
  2         52  
232 2     2   620 use Bio::Species;
  2         5  
  2         50  
233 2     2   12 use Bio::Seq::SeqFactory;
  2         3  
  2         37  
234 2     2   500 use Bio::Factory::SequenceStreamI;
  2         4  
  2         39  
235 2     2   10 use Bio::SeqFeature::Generic;
  2         3  
  2         35  
236 2     2   9 use Bio::Annotation::Collection;
  2         3  
  2         31  
237 2     2   463 use Bio::Annotation::Comment;
  2         4  
  2         53  
238 2     2   485 use Bio::Annotation::Reference;
  2         4  
  2         50  
239 2     2   11 use Bio::Annotation::DBLink;
  2         4  
  2         32  
240 2     2   987 use Bio::SeqFeature::Tools::Unflattener;
  2         5  
  2         319  
241              
242             #global variables
243             undef(my %finaldatahash); #data from Bio::Seq object stored in a hash
244             undef(my %datahash); #data from Bio::Seq object stored in a hash
245              
246             my $chadotables = 'feature featureprop feature_relationship featureloc feature_cvterm cvterm cv feature_pub pub pub_dbxref pub_author author pub_relationship pubprop feature_dbxref dbxref db synonym feature_synonym';
247              
248             my %fkey = (
249             "cvterm.cv_id" => "cv",
250             "cvterm.dbxref_id" => "dbxref",
251             "dbxref.db_id" => "db",
252             "feature.type_id" => "cvterm",
253             "feature.organism_id" => "organism",
254             "feature.dbxref_id" => "dbxref",
255             "featureprop.type_id" => "cvterm",
256             "feature_pub.pub_id" => "pub",
257             "feature_cvterm.cvterm_id" => "cvterm",
258             "feature_cvterm.pub_id" => "pub",
259             "feature_cvterm.feature_id" => "feature",
260             "feature_dbxref.dbxref_id" => "dbxref",
261             "feature_relationship.object_id" => "feature",
262             "feature_relationship.subject_id" => "feature",
263             "feature_relationship.type_id" => "cvterm",
264             "featureloc.srcfeature_id" => "feature",
265             "pub.type_id" => "cvterm",
266             "pub_dbxref.dbxref_id" => "dbxref",
267             "pub_author.author_id" => "author",
268             "pub_relationship.obj_pub_id" => "pub",
269             "pub_relationship.subj_pub_id" => "pub",
270             "pub_relationship.type_id" => "cvterm",
271             "pubprop.type_id" => "cvterm",
272             "feature_synonym.feature_id" => "feature",
273             "feature_synonym.synonym_id" => "synonym",
274             "feature_synonym.pub_id" => "pub",
275             "synonym.type_id" => "cvterm",
276             );
277              
278             my %cv_name = (
279             'relationship' => 'relationship',
280             'sequence' => 'sequence',
281             'feature_property' => 'feature_property',
282             );
283              
284             my %feattype_args2so = (
285             "aberr" => "aberration_junction",
286             # "conflict" => "sequence_difference",
287             # "polyA_signal" => "polyA_signal_sequence",
288             "variation" => "sequence_variant",
289             "mutation1" => "point_mutation", #for single-base mutation
290             "mutation2" => "sequence_variant", #for multi-base mutation
291             "rescue" => "rescue_fragment",
292             # "rfrag" => "restriction_fragment",
293             "protein_bind" => "protein_binding_site",
294             "misc_feature" => "region",
295             # "prim_transcript" => "primary_transcript",
296             "CDS" => "polypeptide",
297             "reg_element" => "regulatory_region",
298             "seq_variant" => "sequence_variant",
299             "mat_peptide" => "mature_peptide",
300             "sig_peptide" => "signal_peptide",
301             );
302              
303             undef(my %organism);
304              
305 2     2   16 use base qw(Bio::SeqIO);
  2         4  
  2         741  
306              
307             sub _initialize {
308              
309 0     0     my($self,%args) = @_;
310              
311 0           $self->SUPER::_initialize(%args);
312 0 0         unless( defined $self->sequence_factory ) {
313 0           $self->sequence_factory(Bio::Seq::SeqFactory->new
314             (-verbose => $self->verbose(),
315             -type => 'Bio::Seq::RichSeq'));
316             }
317             #optional arguments that can be passed in
318             $self->suppress_residues($args{'-suppress_residues'})
319 0 0         if defined $args{'-suppress_residues'};
320              
321             $self->allow_residues($args{'-allow_residues'})
322 0 0         if defined $args{'-allow_residues'};
323 0           return;
324             }
325              
326             =head2 write_seq
327              
328             Title : write_seq
329             Usage : $stream->write_seq(-seq=>$seq, -seq_so_type=>$seqSOtype,
330             -src_feature=>$srcfeature,
331             -src_feat_type=>$srcfeattype,
332             -nounflatten=>0 or 1,
333             -is_analysis=>'true' or 'false',
334             -data_source=>$datasource)
335             Function: writes the $seq object (must be seq) into chadoxml.
336             Returns : 1 for success and 0 for error
337             Args : A Bio::Seq object $seq, optional $seqSOtype, $srcfeature,
338             $srcfeattype, $nounflatten, $is_analysis and $data_source.
339              
340             When $srcfeature (a string, the uniquename of the source feature) is given, the
341             location and strand information of the top-level feature against the source
342             feature will be derived from the sequence feature called 'source' of the $seq
343             object, a featureloc record is generated for the top -level feature on
344             $srcfeature. when $srcfeature is given, $srcfeattype must also be present. All
345             feature coordinates in $seq should be against $srcfeature. $seqSOtype is the
346             optional SO term to use as the type of the top-level feature. For example, a
347             GenBank data file for a Drosophila melanogaster genome scaffold has the molecule
348             type of "DNA", when converting to chadoxml, a $seqSOtype argument of
349             "golden_path_region" can be supplied to save the scaffold as a feature of type
350             "golden_path_region" in chadoxml, instead of "DNA". a feature with primary tag
351             of 'source' must be present in the sequence feature list of $seq, to describe the
352             whole sequence record.
353              
354             In the current implementation:
355              
356             =over 3
357              
358             =item *
359              
360             non-mRNA records
361              
362             A top-level feature of type $seq-Ealphabet is generated for the whole GenBank
363             record, features listed are unflattened for DNA records to build gene model
364             feature graph, and for the other types of records all features in $seq are
365             treated as subfeatures of the top-level feature.
366              
367             =item *
368              
369             mRNA records
370              
371             If a 'gene' feature is present, it B have a /symbol or /label tag to
372             contain the uniquename of the gene. a top-level feature of type 'gene' is
373             generated. the mRNA is written as a subfeature of the top-level gene feature,
374             and the other sequence features listed in $seq are treated as subfeatures of the
375             mRNA feature.
376              
377             =back
378              
379             =cut
380              
381             sub write_seq {
382 0     0 1   my $usage = <
383             Bio::SeqIO::chadoxml->write_seq()
384             Usage : \$stream->write_seq(-seq=>\$seq,
385             -seq_so_type=>\$SOtype,
386             -src_feature=>\$srcfeature,
387             -src_feat_type=>\$srcfeattype,
388             -nounflatten=>0 or 1,
389             -is_analysis=>'true' or 'false',
390             -data_source=>\$datasource)
391             Args : \$seq : a Bio::Seq object
392             \$SOtype : the SO term to use as the feature type of
393             the \$seq record, optional
394             \$srcfeature : unique name of the source feature, a string
395             containing at least one alphabetical letter
396             (a-z, A-Z), optional
397             \$srcfeattype : feature type of \$srcfeature. one of SO terms.
398             optional
399             when \$srcfeature is given, \$srcfeattype becomes mandatory,
400             \$datasource : source of the sequence annotation data,
401             e.g. 'GenBank' or 'GFF'.
402             EOUSAGE
403              
404 0           my ($self,@args) = @_;
405              
406 0           my ($seq, $seq_so_type, $srcfeature, $srcfeattype, $nounflatten, $isanalysis, $datasource, $genus, $species) =
407             $self->_rearrange([qw(SEQ
408             SEQ_SO_TYPE
409             SRC_FEATURE
410             SRC_FEAT_TYPE
411             NOUNFLATTEN
412             IS_ANALYSIS
413             DATA_SOURCE
414             GENUS
415             SPECIES
416             )],
417             @args);
418             #print "$seq_so_type, $srcfeature, $srcfeattype\n";
419              
420 0 0         if( !defined $seq ) {
421 0           $self->throw("Attempting to write with no seq!");
422             }
423              
424 0 0 0       if( ! ref $seq || ! $seq->isa('Bio::Seq::RichSeqI') ) {
425             ## FIXME $self->warn(" $seq is not a RichSeqI compliant module. Attempting to dump, but may fail!");
426             }
427              
428             # try to get the srcfeature from the seqFeature object
429             # for this to work, the user has to pass in the srcfeature type
430 0 0         if (!$srcfeature) {
431 0 0         if ($seq->can('seq_id')) {
432 0 0         $srcfeature=$seq->seq_id if ($seq->seq_id ne $seq->display_name);
433             }
434             }
435              
436             #$srcfeature, when provided, should contain at least one alphabetical letter
437 0 0         if (defined $srcfeature)
438             {
439 0 0         if ($srcfeature =~ /[a-zA-Z]/)
440             {
441 0           chomp($srcfeature);
442             } else {
443 0           $self->throw( $usage );
444             }
445              
446             #check for mandatory $srcfeattype
447 0 0         if (! defined $srcfeattype)
448             {
449 0           $self->throw( $usage );
450             #$srcfeattype must be a string of non-whitespace characters
451             } else {
452 0 0         if ($srcfeattype =~ /\S+/) {
453 0           chomp($srcfeattype);
454             } else {
455 0           $self->throw( $usage );
456             }
457             }
458             }
459              
460             # variables local to write_seq()
461 0           my $div = undef;
462 0           my $hkey = undef;
463 0           undef(my @top_featureprops);
464 0           undef(my @featuresyns);
465 0           undef(my @top_featurecvterms);
466 0 0         my $name = $seq->display_id if $seq->can('display_id');
467 0 0         $name = $seq->display_name if $seq->can('display_name');
468 0           undef(my @feature_cvterms);
469 0           undef(my %sthash);
470 0           undef(my %dvhash);
471 0           undef(my %h1);
472 0           undef(my %h2);
473 0           my $temp = undef;
474 0           my $ann = undef;
475 0           undef(my @references);
476 0           undef(my @feature_pubs);
477 0           my $ref = undef;
478 0           my $location = undef;
479 0           my $fbrf = undef;
480 0           my $journal = undef;
481 0           my $issue = undef;
482 0           my $volume = undef;
483 0           my $volumeissue = undef;
484 0           my $pages = undef;
485 0           my $year = undef;
486 0           my $pubtype = undef;
487             # my $miniref= undef;
488 0           my $uniquename = undef;
489 0           my $refhash = undef;
490 0           my $feat = undef;
491 0           my $tag = undef;
492 0           my $tag_cv = undef;
493 0           my $ftype = undef;
494 0           my $subfeatcnt = undef;
495 0           undef(my @top_featrels);
496 0           undef (my %srcfhash);
497              
498 0           local($^W) = 0; # suppressing warnings about uninitialized fields.
499              
500 0 0 0       if (!$name && $seq->can('attributes') ) {
501 0           ($name) = $seq->attributes('Alias');
502             }
503              
504 0 0 0       if ($seq->can('accession_number') && defined $seq->accession_number && $seq->accession_number ne 'unknown') {
    0 0        
    0 0        
      0        
505 0           $uniquename = $seq->accession_number;
506             } elsif ($seq->can('accession') && defined $seq->accession && $seq->accession ne 'unknown') {
507 0           $uniquename = $seq->accession;
508             } elsif ($seq->can('attributes')) {
509 0           ($uniquename) = $seq->attributes('load_id');
510             } else {
511 0           $uniquename = $name;
512             }
513 0           my $len = $seq->length();
514 0 0         if ($len == 0) {
515 0           $len = undef;
516             }
517              
518 0           undef(my $gb_type);
519 0 0 0       if (!$seq->can('molecule') || ! defined ($gb_type = $seq->molecule()) ) {
520 0 0         $gb_type = $seq->can('alphabet') ? $seq->alphabet : 'DNA';
521             }
522 0 0         $gb_type = 'DNA' if $ftype eq 'dna';
523 0 0         $gb_type = 'RNA' if $ftype eq 'rna';
524              
525 0 0         if(length $seq_so_type > 0) {
526 0 0         if (defined $seq_so_type) {
    0          
527 0           $ftype = $seq_so_type;
528             }
529             elsif ($seq->type) {
530 0 0         $ftype = ($seq->type =~ /(.*):/)
531             ? $1
532             : $seq->type;
533             }
534             else {
535 0           $ftype = $gb_type;
536             }
537             }
538             else {
539 0           $ftype = $gb_type;
540             }
541              
542 0           my %ftype_hash = $self->return_ftype_hash($ftype);
543              
544 0 0         if ($species) {
545 0           %organism = ("genus"=>$genus, "species" => $species);
546             }
547             else {
548 0           my $spec = $seq->species();
549 0 0         if (!defined $spec) {
550 0           $self->throw("$seq does not know what organism it is from, which is required by chado. cannot proceed!\n");
551             } else {
552 0           %organism = ("genus"=>$spec->genus(), "species" => $spec->species());
553             }
554             }
555              
556 0           my $residues;
557 0 0 0       if (!$self->suppress_residues ||
      0        
558             ($self->suppress_residues && $self->allow_residues eq $ftype)) {
559 0 0         $residues = $seq->seq->isa('Bio::PrimarySeq')
560             ? $seq->seq->seq
561             : $seq->seq;
562             }
563             else {
564 0           $residues = '';
565             }
566              
567             #set is_analysis flag for gene model features
568 0           undef(my $isanal);
569 0 0 0       if ($ftype eq 'gene' || $ftype eq 'mRNA' || $ftype eq 'exon' || $ftype eq 'protein' || $ftype eq 'polypeptide') {
      0        
      0        
      0        
570 0           $isanal = $isanalysis;
571 0 0         $isanal = 'false' if !defined $isanal;
572             }
573              
574             %datahash = (
575 0   0       "name" => $name,
576             "uniquename" => $uniquename,
577             "seqlen" => $len,
578             "residues" => $residues,
579             "type_id" => \%ftype_hash,
580             "organism_id" => \%organism,
581             "is_analysis" => $isanal || 'false',
582             );
583              
584 0 0         if (defined $srcfeature) {
585 0           %srcfhash = $self->_srcf_hash($srcfeature,
586             $srcfeattype,
587             \%organism);
588              
589 0           my ($phase,$strand);
590 0 0         if ($seq->can('phase')) {
591 0           $phase = $seq->phase;
592             }
593              
594 0 0         if ($seq->can('strand')) {
595 0           $strand = $seq->strand;
596             }
597 0           my %fl = (
598             "srcfeature_id" => \%srcfhash,
599             "fmin" => $seq->start - 1,
600             "fmax" => $seq->end,
601             "strand" => $strand,
602             "phase" => $phase,
603             );
604              
605 0           $datahash{'featureloc'} = \%fl;
606              
607             }
608              
609              
610             #if $srcfeature is not given, use the Bio::Seq object itself as the srcfeature for featureloc's
611 0 0         if (!defined $srcfeature) {
612 0           $srcfeature = $uniquename;
613 0           $srcfeattype = $ftype;
614             }
615              
616             #default data source is 'GenBank'
617 0 0         if (!defined $datasource) {
618 0           $datasource = 'GenBank';
619             }
620              
621 0 0         if ($datasource =~ /GenBank/i) {
622             #sequence topology as feature_cvterm
623 0 0 0       if ($seq->can('is_circular') && $seq->is_circular) {
624 0           %sthash = (
625             "cvterm_id" => {'name' => 'circular',
626             'cv_id' => {
627             'name' => 'sequence topology',
628             },
629             },
630             "pub_id" => {'uniquename' => 'nullpub',
631             'type_id' => {
632             'name' => 'null pub',
633             'cv_id' => {
634             'name'=> 'pub type',
635             },
636             },
637             },
638             );
639             } else {
640 0           %sthash = (
641             "cvterm_id" => { 'name' => 'linear',
642             'cv_id' => {
643             'name' => 'sequence topology',
644             }
645             },
646             "pub_id" => {'uniquename' => 'nullpub',
647             'type_id' => {
648             'name' => 'null pub',
649             'cv_id' => {
650             'name'=> 'pub type',
651             },
652             },
653             },
654             );
655             }
656 0           push(@feature_cvterms, \%sthash);
657              
658             #division as feature_cvterm
659 0 0 0       if ($seq->can('division') && defined $seq->division()) {
660 0           $div = $seq->division();
661 0           %dvhash = (
662             "cvterm_id" => {'name' => $div,
663             'cv_id' => {
664             'name' => 'GenBank division'}},
665             "pub_id" => {'uniquename' => 'nullpub',
666             'type_id' => {
667             'name' => 'null pub',
668             'cv_id' => {
669             'name'=> 'pub type'},
670             }},
671             );
672 0           push(@feature_cvterms, \%dvhash);
673             }
674              
675 0           $datahash{'feature_cvterm'} = \@feature_cvterms;
676             } # closes if GenBank
677              
678             #featureprop's
679             #DEFINITION
680 0 0 0       if ($seq->can('desc') && defined $seq->desc()) {
681 0           $temp = $seq->desc();
682              
683             my %prophash = (
684             "type_id" => {'name' => 'description',
685             'cv_id' => {
686             'name' =>
687 0           $cv_name{'feature_property'}
688             },
689             },
690             "value" => $temp,
691             );
692              
693 0           push(@top_featureprops, \%prophash);
694             }
695              
696             #KEYWORDS
697 0 0         if ($seq->can('keywords')) {
698 0           $temp = $seq->keywords();
699              
700 0 0 0       if (defined $temp && $temp ne '.' && $temp ne '') {
      0        
701             my %prophash = (
702             "type_id" => {'name' => 'keywords',
703             'cv_id' => {
704             'name' =>
705 0           $cv_name{'feature_property'}
706             }
707             },
708             "value" => $temp,
709             );
710              
711 0           push(@top_featureprops, \%prophash);
712             }
713             }
714              
715             #COMMENT
716 0 0         if ($seq->can('annotation')) {
717 0           $ann = $seq->annotation();
718 0           foreach my $comment ($ann->get_Annotations('comment')) {
719 0           $temp = $comment->as_text();
720             #print "fcomment: $temp\n";
721             my %prophash = (
722             "type_id" => {'name' => 'comment',
723             'cv_id' => {
724             'name' =>
725 0           $cv_name{'feature_property'}
726             }
727             },
728             "value" => $temp,
729             );
730              
731 0           push(@top_featureprops, \%prophash);
732             }
733             }
734              
735 0           my @top_dbxrefs = ();
736             #feature object from Bio::DB::SeqFeature::Store
737 0 0         if ($seq->can('attributes')) {
738 0           my %attributes = $seq->attributes;
739 0           for my $key (keys %attributes) {
740 0 0         next if ($key eq 'parent_id');
741 0 0         next if ($key eq 'load_id');
742              
743 0 0 0       if ($key eq 'Alias') {
    0          
    0          
    0          
744 0           @featuresyns = $self->handle_Alias_tag($seq,@featuresyns);
745             }
746              
747             ###FIXME deal with Dbxref, Ontology_term,source,
748             elsif ($key eq 'Ontology_term') {
749 0           @top_featurecvterms = $self->handle_Ontology_tag($seq,@top_featurecvterms);
750             }
751              
752             elsif ($key eq 'dbxref' or $key eq 'Dbxref') {
753 0           @top_dbxrefs = $self->handle_dbxref($seq, $key, @top_dbxrefs);
754             }
755              
756             elsif ($key =~ /^[a-z]/) {
757             @top_featureprops
758 0           = $self->handle_unreserved_tags($seq,$key,@top_featureprops);
759             }
760             }
761             }
762 0           $datahash{'feature_synonym'} = \@featuresyns;
763              
764 0 0         if ($seq->can('source')) {
765 0           @top_dbxrefs = $self->handle_source($seq,@top_dbxrefs);
766             }
767              
768             #accession and version as feature_dbxref
769 0 0 0       if ($seq->can('accession_number') && defined $seq->accession_number && $seq->accession_number ne 'unknown') {
      0        
770 0           my $db = $self->_guess_acc_db($seq, $seq->accession_number);
771 0           my %acchash = (
772             "db_id" => {'name' => $db},
773             "accession" => $seq->accession_number,
774             "version" => $seq->seq_version,
775             );
776 0           my %fdbx = ('dbxref_id' => \%acchash);
777 0           push(@top_dbxrefs, \%fdbx);
778             }
779              
780 0 0 0       if( $seq->isa('Bio::Seq::RichSeqI') && defined $seq->get_secondary_accessions() ) {
781 0           my @secacc = $seq->get_secondary_accessions();
782 0           my $acc;
783 0           foreach $acc (@secacc) {
784 0           my %acchash = (
785             "db_id" => {'name' => 'GB'},
786             "accession" => $acc,
787             );
788 0           my %fdbx = ('dbxref_id' => \%acchash);
789 0           push(@top_dbxrefs, \%fdbx);
790             }
791             }
792              
793             #GI number
794 0 0 0       if( $seq->isa('Bio::Seq::RichSeqI') && defined ($seq->pid)) {
795 0           my $id = $seq->pid;
796             #print "reftype: ", ref($id), "\n";
797              
798             #if (ref($id) eq 'HASH') {
799 0           my %acchash = (
800             "db_id" => {'name' => 'GI'},
801             "accession" => $id,
802             );
803 0           my %fdbx = ('dbxref_id' => \%acchash);
804 0           push (@top_dbxrefs, \%fdbx);
805             }
806              
807             #REFERENCES as feature_pub
808 0 0         if (defined $ann) {
809             #get the references
810 0           @references = $ann->get_Annotations('reference');
811 0           foreach $ref (@references) {
812 0           undef(my %pubhash);
813 0           $refhash = $ref->hash_tree();
814 0   0       $location = $ref->location || $refhash->{'location'};
815             #print "location: $location\n";
816              
817             #get FBrf#, special for FlyBase SEAN loading
818 0 0         if (index($location, ' ==') >= 0) {
819 0           $location =~ /\s==/;
820             #print "match: $MATCH\n";
821             #print "prematch: $PREMATCH\n";
822             #print "postmatch: $POSTMATCH\n";
823 0           $fbrf = $PREMATCH;
824 0           $location = $POSTMATCH;
825 0           $location =~ s/^\s//;
826             }
827              
828             #print "location: $location\n";
829             #unpublished reference
830 0 0         if ($location =~ /Unpublished/) {
    0          
    0          
831 0           $pubtype = 'unpublished';
832             %pubhash = (
833 0   0       "title" => $ref->title || $refhash->{'title'},
834             #"miniref" => substr($location, 0, 255),
835             #"uniquename" => $fbrf,
836             "type_id" => {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}}
837             );
838             }
839             #submitted
840             elsif ($location =~ /Submitted/) {
841 0           $pubtype = 'submitted';
842              
843             %pubhash = (
844 0   0       "title" => $ref->title || $refhash->{'title'},
845             #"miniref" => substr($location, 0, 255),
846             #"uniquename" => $fbrf,
847             "type_id" => {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}}
848             );
849              
850 0           undef(my $pyear);
851 0           $pyear = $self->_getSubmitYear($location);
852 0 0         if (defined $pyear) {
853 0           $pubhash{'pyear'} = $pyear;
854             }
855             }
856              
857             #published journal paper
858             elsif ($location =~ /\D+\s\d+\s\((\d+|\d+-\d+)\),\s(\d+-\d+|\d+--\d+)\s\(\d\d\d\d\)$/) {
859 0           $pubtype = 'paper';
860              
861             #parse location to get journal, volume, issue, pages & year
862 0           $location =~ /\(\d\d\d\d\)$/;
863              
864 0           $year = $MATCH;
865 0           my $stuff = $PREMATCH;
866 0           $year =~ s/\(//; #remove the leading parenthesis
867 0           $year =~ s/\)//; #remove the trailing parenthesis
868              
869 0           $stuff =~ /,\s(\d+-\d+|\d+--\d+)\s$/;
870              
871 0           $pages = $MATCH;
872 0           $stuff = $PREMATCH;
873 0           $pages =~ s/^, //; #remove the leading comma and space
874 0           $pages =~ s/ $//; #remove the last space
875              
876 0           $stuff =~ /\s\d+\s\((\d+|\d+-\d+)\)$/;
877              
878 0           $volumeissue = $MATCH;
879 0           $journal = $PREMATCH;
880 0           $volumeissue =~ s/^ //; #remove the leading space
881 0           $volumeissue =~ /\((\d+|\d+-\d+)\)$/;
882 0           $issue = $MATCH;
883 0           $volume = $PREMATCH;
884 0           $issue =~ s/^\(//; #remove the leading parentheses
885 0           $issue =~ s/\)$//; #remove the last parentheses
886 0           $volume =~ s/^\s//; #remove the leading space
887 0           $volume =~ s/\s$//; #remove the last space
888              
889             %pubhash = (
890 0   0       "title" => $ref->title || $refhash->{'title'},
891             "volume" => $volume,
892             "issue" => $issue,
893             "pyear" => $year,
894             "pages" => $pages,
895             #"miniref" => substr($location, 0, 255),
896             #"miniref" => ' ',
897             #"uniquename" => $fbrf,
898             "type_id" => {'name' => $pubtype, 'cv_id' => {'name' =>'pub type'}},
899             "pub_relationship" => {
900             'obj_pub_id' => {
901             'uniquename' => $journal,
902             'title' => $journal,
903             #'miniref' => substr($journal, 0, 255),
904             'type_id' =>{'name' => 'journal',
905             'cv_id' =>
906             {'name' => 'pub type'
907             },
908             },
909             #'pubprop' =>{'value'=> $journal,
910             # 'type_id'=>{'name' => 'abbreviation', 'cv_id' => {'name' => 'pubprop type'}},
911             # },
912             },
913             'type_id' => {
914             'name' => 'published_in',
915             'cv_id' => {
916             'name' => 'pub relationship type'},
917             },
918             },
919             );
920             }
921              
922             #other references
923             else {
924 0           $pubtype = 'other';
925             %pubhash = (
926 0   0       "title" => $ref->title || $refhash->{'title'},
927             #"miniref" => $fbrf,
928             "type_id" => {
929             'name' => $pubtype,
930             'cv_id' => {'name' =>'pub type'}
931             }
932             );
933             }
934              
935             #pub_author
936 0           my $autref = $self->_getRefAuthors($ref);
937 0 0         if (defined $autref) {
938 0           $pubhash{'pub_author'} = $autref;
939             }
940             # if no author and is type 'submitted' and has submitter address, use the first 100 characters of submitter address as the author lastname.
941             else {
942 0 0         if ($pubtype eq 'submitted') {
943 0           my $autref = $self->_getSubmitAddr($ref);
944 0 0         if (defined $autref) {
945 0           $pubhash{'pub_author'} = $autref;
946             }
947             }
948             }
949              
950             #$ref->comment as pubprop
951             #print "ref comment: ", $ref->comment, "\n";
952             #print "ref comment: ", $refhash->{'comment'}, "\n";
953 0 0 0       if (defined $ref->comment || defined $refhash->{'comment'}) {
954 0   0       my $comnt = $ref->comment || $refhash->{'comment'};
955             #print "remark: ", $comnt, "\n";
956 0           $pubhash{'pubprop'} = {
957             "type_id" => {'name' => 'comment', 'cv_id' => {'name' => 'pubprop type'}},
958             "value" => $comnt,
959             };
960             }
961              
962             #pub_dbxref
963 0           undef(my @pub_dbxrefs);
964 0 0         if (defined $fbrf) {
965 0           push(@pub_dbxrefs, {dbxref_id => {accession => $fbrf, db_id => {'name' => 'FlyBase'}}});
966             }
967 0 0         if (defined ($temp = $ref->medline)) {
968 0           push(@pub_dbxrefs, {dbxref_id => {accession => $temp, db_id => {'name' => 'MEDLINE'}}});
969             #use medline # as the pub's uniquename
970 0           $pubhash{'uniquename'} = $temp;
971             }
972 0 0         if (defined ($temp = $ref->pubmed)) {
973 0           push(@pub_dbxrefs, {dbxref_id => {accession => $temp, db_id => {'name' => 'PUBMED'}}});
974             }
975 0           $pubhash{'pub_dbxref'} = \@pub_dbxrefs;
976              
977             #if the pub uniquename is not defined or blank, put its FBrf# as its uniquename
978             #this is unique to FlyBase
979             #USERS OF THIS MODULE: PLEASE MODIFY HERE TO IMPLEMENT YOUR POLICY
980             # ON PUB UNIQUENAME!!!
981 0 0 0       if (!defined $pubhash{'uniquename'} || $pubhash{'uniquename'} eq '') {
982 0 0         if (defined $fbrf) {
983 0           $pubhash{'uniquename'} = $fbrf;
984             }
985             #else {
986             # $pubhash{'uniquename'} = $self->_CreatePubUname($ref);
987             #}
988             }
989              
990             #add to collection of references
991             #if the pub covers the entire sequence of the top-level feature, add it to feature_pubs
992 0 0 0       if (($ref->start == 1 && $ref->end == $len) || (!defined $ref->start && !defined $ref->end)) {
      0        
      0        
993 0           push(@feature_pubs, {"pub_id" => \%pubhash});
994             }
995             #the pub is about a sub-sequence of the top-level feature
996             #create a feature for the sub-sequence and add pub as its feature_pub
997             #featureloc of this sub-sequence is against the top-level feature, in interbase coordinates.
998             else {
999             my %parf = (
1000             'uniquename' => $uniquename . ':' . $ref->start . "\.\." . $ref->end,
1001             'organism_id' =>\%organism,
1002 0           'type_id' =>{'name' =>'region', 'cv_id' => {'name' => $cv_name{'sequence'} }},
1003             );
1004 0           my %parfsrcf = (
1005             'uniquename' => $uniquename,
1006             'organism_id' =>\%organism,
1007             );
1008 0           my %parfloc = (
1009             'srcfeature_id' => \%parfsrcf,
1010             'fmin' => $ref->start - 1,
1011             'fmax' => $ref->end,
1012             );
1013 0           $parf{'featureloc'} = \%parfloc;
1014 0           $parf{'feature_pub'} = {'pub_id' => \%pubhash};
1015             my %ffr = (
1016             'subject_id' => \%parf,
1017 0           'type_id' => { 'name' => 'partof', 'cv_id' => { 'name' => $cv_name{'relationship'}}},
1018             );
1019 0           push(@top_featrels, \%ffr);
1020             }
1021             }
1022 0           $datahash{'feature_pub'} = \@feature_pubs;
1023             }
1024              
1025             ##construct srcfeature hash for use in featureloc
1026 0 0         if (defined $srcfeature) {
1027 0           %srcfhash = $self->_srcf_hash($srcfeature,
1028             $srcfeattype,
1029             \%organism);
1030             # my %fr = (
1031             # "object_id" => \%srcfhash,
1032             # "type_id" => { 'name' => 'partof', 'cv_id' => { 'name' => 'relationship type'}},
1033             # );
1034              
1035             # push (@top_featrels, \%fr);
1036             }
1037              
1038             #unflatten the seq features in $seq if $seq is a gene or a DNA sequence
1039 0 0 0       if (($gb_type eq 'gene' || $gb_type eq 'DNA') &&
      0        
1040             !$nounflatten) {
1041 0           my $u = Bio::SeqFeature::Tools::Unflattener->new;
1042 0           $u->unflatten_seq(-seq=>$seq, -use_magic=>1);
1043             }
1044              
1045 0           my @top_sfs = $seq->get_SeqFeatures;
1046             #print $#top_sfs, "\n";
1047              
1048             #SUBFEATURES
1049              
1050 0 0         if ($datasource =~ /GenBank/i) {
    0          
1051 0           $tag_cv = 'GenBank feature qualifier';
1052             } elsif ($datasource =~ /GFF/i) {
1053 0           $tag_cv = 'feature_property';
1054             } else {
1055 0           $tag_cv = $cv_name{'feature_property'};
1056             }
1057              
1058 0           my $si = 0;
1059 0           foreach $feat (@top_sfs) {
1060             #$feat = $top_sfs[$si];
1061             #print "si: $si\n";
1062 0           my $prim_tag = $feat->primary_tag;
1063             #print $prim_tag, "\n";
1064              
1065             # get all qualifiers of the 'source' feature, load these as top_featureprops of the top level feature
1066 0 0         if ($prim_tag eq 'source') {
1067 0           foreach $tag ($feat->all_tags()) {
1068             #db_xref
1069 0 0 0       if ($tag eq 'db_xref'
    0 0        
    0          
1070             or $tag eq 'Dbxref'
1071             or $tag eq 'dbxref') {
1072 0           my @t1 = $feat->each_tag_value($tag);
1073 0           foreach $temp (@t1) {
1074 0           $temp =~ /([^:]*?):(.*)/;
1075 0           my $db = $1;
1076 0           my $xref = $2;
1077             #PRE/POST very inefficent
1078             #my $db = $PREMATCH;
1079             #my $xref = $POSTMATCH;
1080 0           my %acchash = (
1081             "db_id" => {'name' => $db},
1082             "accession" => $xref,
1083             );
1084 0           my %fdbx = ('dbxref_id' => \%acchash);
1085 0           push (@top_dbxrefs, \%fdbx);
1086             }
1087             #Ontology_term
1088             } elsif ($tag eq 'Ontology_term') {
1089 0           my @t1 = $feat->each_tag_value($tag);
1090 0           foreach $temp (@t1) {
1091             ###FIXME
1092             }
1093             #other tags as featureprop
1094             } elsif ($tag ne 'gene') {
1095 0           my %prophash = undef;
1096 0           %prophash = (
1097             "type_id" => {'name' => $tag, 'cv_id' => {'name' => $tag_cv}},
1098             "value" => join(' ',$feat->each_tag_value($tag)),
1099             );
1100 0           push(@top_featureprops, \%prophash);
1101             }
1102             }
1103              
1104 0 0         if ($feat->can('source')) {
1105 0           my $source = $feat->source();
1106 0           @top_dbxrefs = $self->handle_source($feat, @top_dbxrefs);
1107             }
1108              
1109             #featureloc for the top-level feature
1110 0           my $fmin = undef;
1111 0           my $fmax = undef;
1112 0           my $strand = undef;
1113 0           my $phase = undef;
1114 0           my %fl = undef;
1115              
1116 0           $fmin = $feat->start - 1;
1117 0           $fmax = $feat->end;
1118 0           $strand = $feat->strand;
1119              
1120 0 0         if ($feat->can('phase')) {
1121 0           $phase = $feat->phase;
1122             }
1123              
1124             %fl = (
1125 0           "srcfeature_id" => \%srcfhash,
1126             "fmin" => $fmin,
1127             "fmax" => $fmax,
1128             "strand" => $strand,
1129             "phase" => $phase,
1130             );
1131              
1132 0           $datahash{'featureloc'} = \%fl;
1133              
1134             #delete 'source' feature from @top_sfs
1135 0           splice(@top_sfs, $si, 1);
1136             }
1137 0           $si ++;
1138             #close loop over top_sfs
1139             }
1140              
1141             #the top-level features other than 'source'
1142 0           foreach $feat (@top_sfs) {
1143             #print $feat->primary_tag, "\n";
1144              
1145 0           my $r = $self->_subfeat2featrelhash($name, $ftype, $feat, \%srcfhash, $tag_cv, $isanalysis);
1146              
1147 0 0 0       if (!($ftype eq 'mRNA' && $feat->primary_tag eq 'gene')) {
1148 0           my %fr = %$r;
1149 0           push(@top_featrels, \%fr);
1150             } else {
1151 0           %finaldatahash = %$r;
1152             }
1153             }
1154              
1155 0 0         if (@top_dbxrefs) {
1156 0           $datahash{'feature_dbxref'} = \@top_dbxrefs;
1157             }
1158              
1159 0 0         if (@top_featureprops) {
1160 0           $datahash{'featureprop'} = \@top_featureprops;
1161             }
1162              
1163 0 0         if (@top_featrels) {
1164 0           $datahash{'feature_relationship'} = \@top_featrels;
1165             }
1166              
1167 0 0         if (@top_featurecvterms) {
1168 0           $datahash{'feature_cvterm'} = \@top_featurecvterms;
1169             }
1170              
1171 0 0 0       if ($ftype eq 'mRNA' && %finaldatahash) {
1172             $finaldatahash{'feature_relationship'} = {
1173             'subject_id' => \%datahash,
1174 0           'type_id' => { 'name' => 'partof', 'cv_id' => { 'name' => $cv_name{'relationship'} }},
1175             };
1176             } else {
1177 0           %finaldatahash = %datahash;
1178             }
1179              
1180 0           my $mainTag = 'feature';
1181 0           $self->_hash2xml(undef, $mainTag, \%finaldatahash);
1182              
1183 0           return 1;
1184             }
1185              
1186             sub _hash2xml {
1187 0     0     my $self = shift;
1188 0           my $isMatch = undef;
1189 0           $isMatch = shift;
1190 0           my $ult = shift;
1191 0           my $ref = shift;
1192 0           my %mh = %$ref;
1193 0           my $key;
1194             my $v;
1195 0           my $sh;
1196 0           my $xx;
1197 0           my $yy;
1198 0           my $nt;
1199 0           my $ntref;
1200 0           my $output;
1201 0 0         my $root = shift if (@_);
1202             #print "ult: $ult\n";
1203 0 0         if (!defined $self->{'writer'}) {
1204 0           $root = 1;
1205 0           $self->_create_writer();
1206             }
1207 0           my $temp;
1208 0           my %subh = undef;
1209              
1210             #start opeing tag
1211             #if pub record of type 'journal', form the 'ref' attribute for special pub lookup
1212             #requires that the journal name itself is also stored as a pubprop record for the journal with value equal
1213             #to the journal name and type of 'abbreviation'.
1214 0 0 0       if ($ult eq 'pub' && $mh{'type_id'}->{'name'} eq 'journal') {
    0 0        
    0 0        
    0          
1215 0           $self->{'writer'}->startTag($ult, 'ref' => $mh{'title'} . ':journal:abbreviation');
1216             }
1217              
1218             #special pub match if pub uniquename not known
1219             elsif ($ult eq 'pub' && !defined $mh{'uniquename'}) {
1220 0           $self->{'writer'}->startTag($ult, 'op' => 'match');
1221             #set the match flag, all the sub tags should also have "op"="match"
1222 0           $isMatch = 1;
1223             }
1224              
1225             #if cvterm or cv, lookup only
1226             elsif (($ult eq 'cvterm') || ($ult eq 'cv')) {
1227 0           $self->{'writer'}->startTag($ult, 'op' => 'lookup');
1228             }
1229              
1230             #if nested tables of match table, match too
1231             elsif ($isMatch) {
1232 0           $self->{'writer'}->startTag($ult, 'op' => 'match');
1233             }
1234              
1235             else {
1236 0           $self->{'writer'}->startTag($ult);
1237             }
1238              
1239             #first loop to produce xml for all the table columns
1240 0           foreach $key (keys %mh)
1241             {
1242             #print "key: $key\n";
1243 0           $xx = ' ' . $key;
1244 0           $yy = $key . ' ';
1245 0 0 0       if (index($chadotables, $xx) < 0 && index($chadotables, $yy) < 0)
1246             {
1247 0 0         if ($isMatch) {
1248 0           $self->{'writer'}->startTag($key, 'op' => 'match');
1249             } else {
1250 0           $self->{'writer'}->startTag($key);
1251             }
1252              
1253 0           my $x = $ult . '.' . $key;
1254             #the column is a foreign key
1255 0 0         if (defined $fkey{$x})
1256             {
1257 0           $nt = $fkey{$x};
1258 0           $sh = $mh{$key};
1259 0           $self->_hash2xml($isMatch, $nt, $sh, 0);
1260             } else
1261             {
1262             #print "$key: $mh{$key}\n";
1263 0           $self->{'writer'}->characters($mh{$key});
1264             }
1265 0           $self->{'writer'}->endTag($key);
1266             }
1267             }
1268              
1269             #second loop to produce xml for all the nested tables
1270 0           foreach $key (keys %mh)
1271             {
1272             #print "key: $key\n";
1273 0           $xx = ' ' . $key;
1274 0           $yy = $key . ' ';
1275             #a nested table
1276 0 0 0       if (index($chadotables, $xx) > 0 || index($chadotables, $yy) > 0)
1277             {
1278             #$writer->startTag($key);
1279 0           $ntref = $mh{$key};
1280             #print "$key: ", ref($ntref), "\n";
1281 0 0         if (ref($ntref) =~ 'HASH') {
    0          
1282 0           $self->_hash2xml($isMatch, $key, $ntref, 0);
1283             } elsif (ref($ntref) =~ 'ARRAY') {
1284             #print "array dim: ", $#$ntref, "\n";
1285 0           foreach $ref (@$ntref) {
1286             #print "\n";
1287 0           $self->_hash2xml($isMatch, $key, $ref, 0);
1288             }
1289             }
1290             #$writer->endTag($key);
1291             }
1292             }
1293              
1294             #end tag
1295 0           $self->{'writer'}->endTag($ult);
1296              
1297             #if ($root == 1) {
1298             # $self->{'writer'}->endTag('chado');
1299             # }
1300             }
1301              
1302             sub _guess_acc_db {
1303 0     0     my $self = shift;
1304 0           my $seq = shift;
1305 0           my $acc = shift;
1306             #print "acc: $acc\n";
1307              
1308 0 0 0       if ($acc =~ /^NM_\d{6}/ || $acc =~ /^NP_\d{6}/ || $acc =~ /^NT_\d{6}/ || $acc =~ /^NC_\d{6}/) {
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
    0 0        
      0        
      0        
1309 0           return "RefSeq";
1310             } elsif ($acc =~ /^XM_\d{6}/ || $acc =~ /^XP_\d{6}/ || $acc =~ /^XR_\d{6}/) {
1311 0           return "RefSeq";
1312             } elsif ($acc =~ /^[a-zA-Z]{1,2}\d{5,6}/) {
1313 0           return "GB";
1314             } elsif ($seq->molecule() eq 'protein' && $acc =~ /^[a-zA-z]\d{5}/) {
1315 0           return "PIR";
1316             } elsif ($seq->molecule() eq 'protein' && $acc =~ /^\d{6,7}[a-zA-Z]/) {
1317 0           return "PRF";
1318             } elsif ($acc =~ /\d+/ && $acc !~ /[a-zA-Z]/) {
1319 0           return "LocusID";
1320             } elsif ($acc =~ /^CG\d+/ || $acc =~ /^FB[a-z][a-z]\d+/) {
1321 0           return "FlyBase";
1322             } else {
1323 0           return "unknown";
1324             }
1325             }
1326              
1327             sub _subfeat2featrelhash {
1328 0     0     my $self = shift;
1329 0           my $genename = shift;
1330 0           my $seqtype = shift;
1331 0           my $feat = shift;
1332 0           my $r = shift;
1333 0           my %srcf = %$r; #srcfeature hash for featureloc.srcfeature_id
1334 0           my $tag_cv = shift;
1335 0           my $isanalysis = shift;
1336              
1337 0           my $prim_tag = $feat->primary_tag;
1338              
1339 0           my $sfunique = undef; #subfeature uniquename
1340 0           my $sfname = undef; #subfeature name
1341 0           my $sftype = undef; #subfeature type
1342              
1343 0 0         if ($feat->has_tag('symbol')) {
    0          
1344 0           ($sfunique) = $feat->each_tag_value("symbol");
1345             } elsif ($feat->has_tag('label')) {
1346 0           ($sfunique) = $feat->each_tag_value("label");
1347             } else {
1348             #$self->throw("$prim_tag at " . $feat->start . "\.\." . $feat->end . " does not have symbol or label! To convert into chadoxml, a seq feature must have a /symbol or /label tag holding its unique name.");
1349             #generate feature unique name as --
1350 0           $sfunique = $self->_genFeatUniqueName($genename, $feat);
1351             }
1352              
1353 0 0         if ($feat->has_tag('Name')) {
1354 0           ($sfname) = $feat->each_tag_value("Name");
1355             }
1356              
1357             #feature type translation
1358 0 0         if (defined $feattype_args2so{$prim_tag}) {
1359 0           $sftype = $feattype_args2so{$prim_tag};
1360             } else {
1361 0           $sftype = $prim_tag;
1362             }
1363              
1364 0 0         if ($prim_tag eq 'mutation') {
1365 0 0         if ($feat->start == $feat->end) {
1366 0           $sftype = $feattype_args2so{'mutation1'};
1367             } else {
1368 0           $sftype = $feattype_args2so{'mutation2'};
1369             }
1370             }
1371              
1372             #set is_analysis flag for gene model features
1373 0           undef(my $isanal);
1374 0 0 0       if ($sftype eq 'gene' || $sftype eq 'mRNA' || $sftype eq 'exon' || $sftype eq 'protein' || $sftype eq 'polypeptide') {
      0        
      0        
      0        
1375 0           $isanal = $isanalysis;
1376             }
1377              
1378             my %sfhash = (
1379             "name" => $sfname,
1380             "uniquename" => $sfunique,
1381             "organism_id" => \%organism,
1382 0   0       "type_id" => { 'name' => $sftype, 'cv_id' => { 'name' => $cv_name{'sequence'} }},
1383             "is_analysis" => $isanal || 'false',
1384             );
1385              
1386             #make a copy of %sfhash for passing to this method when recursively called
1387             #my %srcfeat = (
1388             # "name" => $sfname,
1389             # "uniquename" => $sfunique,
1390             # "organism_id" => \%organism,
1391             # "type_id" => { 'name' => $sftype, 'cv_id' => { 'name' => 'SO'}},
1392             # );
1393              
1394             #featureloc for subfeatures
1395 0           undef(my $sfmin);
1396 0           undef(my $sfmax);
1397 0           undef(my $is_sfmin_partial);
1398 0           undef(my $is_sfmax_partial);
1399 0           undef(my $sfstrand);
1400 0           undef(my $sfphase);
1401 0           $sfmin = $feat->start - 1;
1402 0           $sfmax = $feat->end;
1403 0           $sfstrand = $feat->strand();
1404              
1405 0 0         if ($feat->can('phase')) {
1406 0           $sfphase = $feat->phase;
1407             }
1408              
1409             #if the gene feature in an mRNA record, cannot use its coordinates, omit featureloc
1410 0 0 0       if ($seqtype eq 'mRNA' && $sftype eq 'gene') {
1411             } else {
1412 0 0         if ($feat->location->isa('Bio::Location::FuzzyLocationI')) {
1413 0 0         if ($feat->location->start_pos_type() ne 'EXACT') {
1414 0           $is_sfmin_partial = 'true';
1415             }
1416 0 0         if ($feat->location->end_pos_type() ne 'EXACT') {
1417 0           $is_sfmax_partial = 'true';
1418             }
1419             }
1420              
1421 0   0       my %sfl = (
      0        
1422             "srcfeature_id" => \%srcf,
1423             "fmin" => $sfmin,
1424             "is_fmin_partial" => $is_sfmin_partial || 'false',
1425             "fmax" => $sfmax,
1426             "is_fmax_partial" => $is_sfmax_partial || 'false',
1427             "strand" => $sfstrand,
1428             "phase" => $sfphase,
1429             );
1430              
1431 0           $sfhash{'featureloc'} = \%sfl;
1432             }
1433              
1434              
1435             #subfeature tags
1436 0           undef(my @sfdbxrefs); #subfeature dbxrefs
1437 0           undef(my @sub_featureprops); #subfeature props
1438 0           undef(my @sub_featuresyns); #subfeature synonyms
1439 0           undef(my @sub_featurecvterms); #subfeature cvterms
1440 0           foreach my $tag ($feat->all_tags()) {
1441             #feature_dbxref for features
1442 0 0 0       if ($tag eq 'db_xref' or $tag eq 'dbxref' or $tag eq 'Dbxref') {
    0 0        
    0 0        
    0 0        
      0        
1443 0           my @t1 = $feat->each_tag_value($tag);
1444             #print "# of dbxref: @t1\n";
1445 0           for my $temp (@t1) {
1446 0           $temp =~ /:/;
1447 0           my $db = $PREMATCH;
1448 0           my $xref = $POSTMATCH;
1449             #print "db: $db; xref: $xref\n";
1450 0           my %acchash = (
1451             "db_id" => {'name' => $db},
1452             "accession" => $xref,
1453             );
1454 0           my %sfdbx = ('dbxref_id' => \%acchash);
1455 0           push (@sfdbxrefs, \%sfdbx);
1456             }
1457             #Alias tags
1458             } elsif ($tag eq 'Alias') {
1459 0           @sub_featuresyns = $self->handle_Alias_tag($feat, @sub_featuresyns);
1460             } elsif ($tag eq 'Ontology_term') {
1461 0           @sub_featurecvterms = $self->handle_Ontology_tag($feat, @sub_featurecvterms);
1462             #featureprop for features, excluding GFF Name & Parent tags
1463             } elsif ($tag ne 'gene' && $tag ne 'symbol' && $tag ne 'Name' && $tag ne 'Parent') {
1464 0 0         next if ($tag eq 'parent_id');
1465 0 0         next if ($tag eq 'load_id');
1466 0           foreach my $val ($feat->each_tag_value($tag)) {
1467 0           my %prophash = undef;
1468 0           %prophash = (
1469             "type_id" => {'name' => $tag, 'cv_id' => {'name' => $tag_cv}},
1470             "value" => $val,
1471             );
1472 0           push(@sub_featureprops, \%prophash);
1473             }
1474             }
1475             }
1476              
1477 0 0         if ($feat->can('source')) {
1478 0           @sfdbxrefs = $self->handle_source($feat,@sfdbxrefs);
1479             }
1480              
1481 0 0         if (@sub_featureprops) {
1482 0           $sfhash{'featureprop'} = \@sub_featureprops;
1483             }
1484 0 0         if (@sfdbxrefs) {
1485 0           $sfhash{'feature_dbxref'} = \@sfdbxrefs;
1486             }
1487 0 0         if (@sub_featuresyns) {
1488 0           $sfhash{'feature_synonym'} = \@sub_featuresyns;
1489             }
1490 0 0         if (@sub_featurecvterms) {
1491 0           $sfhash{'feature_cvterm'} = \@sub_featurecvterms;
1492             }
1493              
1494 0           undef(my @ssfeatrel);
1495 0 0         if ($feat->has_tag('locus_tag')) {
    0          
1496 0           ($genename)= $feat->each_tag_value('locus_tag');
1497             } elsif ($feat->has_tag('gene')) {
1498 0           ($genename)= $feat->each_tag_value('gene');
1499             }
1500              
1501 0           foreach my $sf ($feat->get_SeqFeatures()) {
1502             #print $sf->primary_tag, "\n";
1503 0           my $rref = $self->_subfeat2featrelhash($genename, $sftype, $sf, \%srcf, $tag_cv, $isanalysis);
1504 0 0         if (defined $rref) {
1505 0           push(@ssfeatrel, $rref);
1506             }
1507             }
1508              
1509 0 0         if (@ssfeatrel) {
1510 0           $sfhash{'feature_relationship'} = \@ssfeatrel;
1511             }
1512              
1513             #subj-obj relationship type
1514 0           undef(my $reltypename);
1515 0           $reltypename = return_reltypename($sftype);
1516              
1517             my %fr = (
1518             "subject_id" => \%sfhash,
1519             "type_id" => { 'name' => $reltypename,
1520 0           'cv_id' => { 'name' => $cv_name{'relationship'} }},
1521             );
1522              
1523 0 0 0       if ($seqtype eq 'mRNA' && $sftype eq 'gene') {
1524 0           return \%sfhash;
1525             } else {
1526 0           return \%fr;
1527             }
1528              
1529             }
1530              
1531             #generate uniquename for feature as: -- (foo-mRNA-10..1000)
1532             sub _genFeatUniqueName {
1533 0     0     my $self = shift;
1534 0           my $genename = shift;
1535 0           my $feat = shift;
1536 0           undef(my $uniquename);
1537 0           my $ftype = $feat->primary_tag;
1538 0           my $start = $feat->start;
1539 0           my $end = $feat->end;
1540              
1541 0 0         if ($feat->has_tag('locus_tag')) {
    0          
1542 0           ($genename) = $feat->each_tag_value("locus_tag");
1543             } elsif ($feat->has_tag('gene')) {
1544 0           ($genename) = $feat->each_tag_value("gene");
1545             }
1546              
1547 0           $uniquename = $genename . '-' . $ftype . '-' . $start . "\.\." . $end;
1548              
1549 0           return $uniquename;
1550             }
1551              
1552             #create uniquename for pubs with no medline id and no FBrf#
1553             #use ", , " as the uniquename (same as miniref)
1554             # is if one author,
1555             # or and if two,
1556             # or et al. if more
1557             #sub _CreatePubUname {
1558             # my $self = shift;
1559             # my $pub = shift;
1560             # undef(my $pubuname);
1561             #
1562             # return $pubuname;
1563             #}
1564              
1565             #get authors of a reference
1566             #returns ref to the array of author hashes
1567             sub _getRefAuthors {
1568 0     0     my $self = shift;
1569 0           my $ref = shift;
1570              
1571 0           my $temp = $ref->authors;
1572 0           undef(my @authors);
1573 0           undef(my @aut);
1574              
1575             #there are authors
1576 0 0         if ($temp ne '.') {
1577 0 0         if (index($temp, ' and ') > 0) {
1578 0           $temp =~ / and /;
1579 0           my $lastauthor = $POSTMATCH;
1580 0           @authors = split(/\, /, $PREMATCH);
1581 0           push (@authors, $lastauthor);
1582             } else {
1583 0           @authors = split(/\, /, $temp);
1584             }
1585              
1586 0           my $a;
1587 0           my $i = 0;
1588 0           foreach $a (@authors) {
1589 0           $i ++;
1590             #parse the author lastname and givennames
1591 0           undef(my $last);
1592 0           undef(my $given);
1593 0 0         if (index($a, ',') > 0) { #genbank format, last,f.m.
    0          
1594 0           ($last, $given) = split(/\,/, $a);
1595             } elsif (index($a, ' ') > 0) { #embl format, last f.m.
1596 0           ($last, $given) = split(/ /, $a);
1597             }
1598 0           my %au = (
1599             'surname' => $last,
1600             'givennames' => $given,
1601             );
1602 0           push(@aut, {author_id => \%au, arank => $i});
1603             }
1604              
1605 0           return \@aut;
1606             }
1607              
1608             #no authors, Bio::SeqIO::genbank doesn't pick up 'CONSRTM' line.
1609             else {
1610 0           return;
1611             }
1612              
1613             }
1614              
1615             #extract submission year from the citation of the submitted reference
1616             #genbank format for the submitted citation: JOURNAL Submitted (DD-MON-YYYY) submitter address
1617             sub _getSubmitYear {
1618 0     0     my $self = shift;
1619 0           my $citation = shift;
1620              
1621 0 0         if ($citation !~ /Submitted/) {
1622 0           $self->warn("not citation for a submitted reference. cannot extract submission year.");
1623 0           return;
1624             } else {
1625 0           $citation =~ /Submitted \(\d\d-[a-zA-Z]{3}-\d{4}\)/;
1626 0           my $a = $MATCH;
1627 0           $a =~ /\d{4}/;
1628 0           my $year = $MATCH;
1629              
1630 0           return $year;
1631             }
1632             }
1633              
1634             sub _getSubmitAddr {
1635 0     0     my $self = shift;
1636 0           my $ref = shift;
1637 0           undef(my %author);
1638              
1639 0           my $citation = $ref->location;
1640 0 0         if ($citation !~ /Submitted/) {
1641 0           $self->warn("not citation for a submitted reference. cannot extract submission year.");
1642 0           return;
1643             } else {
1644 0           $citation =~ /Submitted \(\d\d-[a-zA-Z]{3}-\d{4}\)/;
1645 0           my $a = $POSTMATCH;
1646 0 0         if (defined $a) {
1647 0           $a =~ s/^\s//;
1648 0           %author = (
1649             'author_id' => {'surname' => substr($a, 0, 100)},
1650             );
1651 0           return \%author;
1652             } else {
1653 0           return;
1654             }
1655             }
1656             }
1657              
1658             =head2 suppress_residues
1659              
1660             Title : suppress_residues
1661             Usage : $obj->suppress_residues() #get existing value
1662             $obj->suppress_residues($newval) #set new value
1663             Function : Keep track of the flag to suppress printing of residues in the
1664             chadoxml file. The default it to allow all residues to go into the
1665             file.
1666             Returns : value of suppress_residues (a scalar)
1667             Args : new value of suppress_residues (to set)
1668              
1669             =cut
1670              
1671             sub suppress_residues {
1672 0     0 1   my $self = shift;
1673 0 0         my $suppress_residues = shift if @_;
1674 0 0         return $self->{'suppress_residues'} = $suppress_residues if defined($suppress_residues);
1675 0           return $self->{'suppress_residues'};
1676             }
1677              
1678             =head2 allow_residues
1679              
1680             Title : allow_residues
1681             Usage : $obj->allow_residues() #get existing value
1682             $obj->allow_residues($feature_type) #set new value
1683             Function : Track the allow_residues type. This can be used in conjunction
1684             with the suppress_residues flag to only allow residues from a
1685             specific feature type to be printed in the xml file, for example,
1686             only printing chromosome residues. When suppress_residues is set to
1687             true, then only chromosome features would would go into the xml
1688             file. If suppress_residues is not set, this function has no effect
1689             (since the default is to put all residues in the xml file).
1690             Returns : value of allow_residues (string that corresponds to a feature type)
1691             Args : new value of allow_residues (to set)
1692             Status :
1693              
1694             =cut
1695              
1696             sub allow_residues {
1697 0     0 1   my $self = shift;
1698 0 0         my $allow_residues = shift if @_;
1699 0 0         return $self->{'allow_residues'} = $allow_residues if defined($allow_residues);
1700 0           return $self->{'allow_residues'};
1701             }
1702              
1703             =head2 return_ftype_hash
1704              
1705             Title : return_ftype_hash
1706             Usage : $obj->return_ftype_hash()
1707             Function : A simple hash where returning it has be factored out of the main
1708             code to allow subclasses to override it.
1709             Returns : A hash that indicates what the name of the SO term is and what
1710             the name of the Sequence Ontology is in the cv table.
1711             Args : The string that represents the SO term.
1712             Status :
1713              
1714             =cut
1715              
1716             sub return_ftype_hash {
1717 0     0 1   my $self = shift;
1718 0           my $ftype = shift;
1719             my %ftype_hash = ( "name" => $ftype,
1720 0           "cv_id" => {"name" => $cv_name{'sequence'} });
1721 0           return %ftype_hash;
1722             }
1723              
1724             =head2 return_reltypename
1725              
1726             Title : return_reltypename
1727             Usage : $obj->return_reltypename
1728             Function : Return the appropriate relationship type name depending on the
1729             feature type (typically part_of, but derives_from for polypeptide).
1730             Returns : A relationship type name.
1731             Args : A SO type name.
1732             Status :
1733              
1734             =cut
1735              
1736             sub return_reltypename {
1737 0     0 1   my $self = shift;
1738 0           my $sftype = shift;
1739              
1740 0           my $reltypename;
1741 0 0 0       if ($sftype eq 'protein' || $sftype eq 'polypeptide') {
1742 0           $reltypename = 'derives_from';
1743             } else {
1744 0           $reltypename = 'part_of';
1745             }
1746              
1747 0           return $reltypename;
1748             }
1749              
1750             =head2 next_seq
1751              
1752             Title : next_seq
1753             Usage : $obj->next_seq
1754             Function :
1755             Returns :
1756             Args :
1757             Status : Not implemented (write only adaptor)
1758              
1759             =cut
1760              
1761             sub next_seq {
1762 0     0 1   my ($self, %argv) = @_;
1763              
1764 0           $self->throw('next_seq is not implemented; this is a write-only adapter.');
1765              
1766             }
1767              
1768             =head2 _create_writer
1769              
1770             Title : _create_writer
1771             Usage : $obj->_create_writer
1772             Function : Creates XML::Writer object and writes start tag
1773             Returns : Nothing, though the writer persists as part of the chadoxml object
1774             Args : None
1775             Status :
1776              
1777             =cut
1778              
1779             sub _create_writer {
1780 0     0     my $self = shift;
1781              
1782 0           $self->{'writer'} = XML::Writer->new(OUTPUT => $self->_fh,
1783             DATA_MODE => 1,
1784             DATA_INDENT => 3);
1785              
1786             #print header
1787 0           $self->{'writer'}->xmlDecl("UTF-8");
1788 0           $self->{'writer'}->comment("created by Peili Zhang, Flybase, Harvard University\n".
1789             "and Scott Cain, GMOD, Cold Spring Harbor Laboratory");
1790              
1791             #start chadoxml
1792 0           $self->{'writer'}->startTag('chado');
1793              
1794 0           return;
1795             }
1796              
1797             =head2 close_chadoxml
1798              
1799             Title : close_chadoxml
1800             Usage : $obj->close_chadoxml
1801             Function : Writes the closing xml tag
1802             Returns : None
1803             Args : None
1804             Status :
1805              
1806             =cut
1807              
1808             sub close_chadoxml {
1809 0     0 1   my $self = shift;
1810              
1811 0           $self->{'writer'}->endTag('chado');
1812 0           return;
1813             }
1814              
1815             =head2 handle_unreserved_tags
1816              
1817             Title : handle_unreserved_tags
1818             Usage : $obj->handle_unreserved_tags
1819             Function : Converts tag value pairs to xml-ready hashrefs
1820             Returns : The array containing the hashrefs
1821             Args : In order: the Seq or SeqFeature object, the key, and the hasharray
1822             Status :
1823              
1824             =cut
1825              
1826             sub handle_unreserved_tags {
1827 0     0 1   my $self = shift;
1828 0           my $seq = shift;
1829 0           my $key = shift;
1830 0           my @arr = @_;
1831              
1832 0           my @values = $seq->attributes($key);
1833 0           for my $value (@values) {
1834             my %prophash = (
1835             "type_id" => {'name' => $key,
1836 0           'cv_id' => { 'name' => $cv_name{'feature_property'} }
1837             },
1838             "value" => $value,
1839             );
1840 0           push(@arr, \%prophash);
1841             }
1842              
1843 0           return @arr;
1844             }
1845              
1846             =head2 handle_Alias_tag
1847              
1848             Title : handle_Alias_tag
1849             Usage : $obj->handle_Alias_tag
1850             Function : Convert Alias values to synonym hash refs
1851             Returns : An array of synonym hash tags
1852             Args : The seq or seqFeature object and the synonym hash array
1853             Status :
1854              
1855             =cut
1856              
1857             sub handle_Alias_tag {
1858 0     0 1   my $self = shift;
1859 0           my $seq = shift;
1860 0           my @arr = @_;
1861              
1862 0           my @Aliases = $seq->attributes('Alias');
1863 0           for my $Alias (@Aliases) {
1864 0           my %synhash = (
1865             "type_id" => { 'name' => 'exact',
1866             'cv_id' => { 'name' => 'synonym_type' } },
1867             "name" => $Alias,
1868             "synonym_sgml" => $Alias,
1869             );
1870 0           push(@arr, {'synonym_id' => \%synhash,
1871             'pub_id' => {'uniquename' => 'null',
1872             'type_id' => { 'name' => 'null',
1873             'cv_id' => {
1874             'name' => 'null',
1875             },
1876             },
1877             },
1878             });
1879             }
1880              
1881 0           return @arr;
1882             }
1883              
1884             =head2 handle_Ontology_tag
1885              
1886             Title : handle_Ontology_tag
1887             Usage : $obj->handle_Ontology_tag
1888             Function : Convert Ontology_term values to ontology term hash refs
1889             Returns : An array of ontology term hash refs
1890             Args : The seq or seqFeature object and the ontology term array
1891             Status :
1892              
1893             =cut
1894              
1895             sub handle_Ontology_tag {
1896 0     0 1   my $self = shift;
1897 0           my $seq = shift;
1898 0           my @arr = @_;
1899              
1900 0           my @terms = $seq->attributes('Ontology_term');
1901 0           for my $term (@terms) {
1902 0           my $hashref;
1903 0 0         if ($term =~ /(\S+):(\S+)/) {
1904 0           my $db = $1;
1905 0           my $acc = $2;
1906 0           $hashref = {
1907             'cvterm_id' => {
1908             'dbxref_id' => {
1909             'db_id' => { 'name' => $db },
1910             'accession' => $acc
1911             },
1912             },
1913             };
1914             }
1915 0           push(@arr, {cvterm_id => $hashref});
1916             }
1917              
1918 0           return @arr;
1919             }
1920              
1921             =head2 handle_dbxref
1922              
1923             Title : handle_dbxref
1924             Usage : $obj->handle_dbxref
1925             Function : Convert Dbxref values to dbxref hashref
1926             Returns : An array of dbxref hashrefs
1927             Args : A seq or seqFeature object and the dbxref array
1928             Status :
1929              
1930             =cut
1931              
1932             sub handle_dbxref {
1933 0     0 1   my $self = shift;
1934 0           my $seq = shift;
1935 0           my $tag = shift;
1936 0           my @arr = @_;
1937              
1938 0           my @terms = $seq->attributes($tag);
1939 0           for my $term (@terms) {
1940 0           my $hashref;
1941 0 0         if ($term =~ /(\S+):(\S+)/) {
1942 0           my $db = $1;
1943 0           my $acc= $2;
1944 0           my $version = 1;
1945 0 0         if ($acc =~ /(\S+)\.(\S+)/) {
1946 0           $acc = $1;
1947 0           $version = $2;
1948             }
1949             $hashref = {
1950 0           'dbxref_id' => {
1951             'db_id' => { 'name' => $db },
1952             'accession' => $acc,
1953             'version' => $version,
1954             },
1955             };
1956             }
1957             else {
1958 0           $self->throw("I don't know how to handle a dbxref like $term");
1959             }
1960 0           push(@arr, {'dbxref_id' => $hashref});
1961             }
1962 0           return @arr;
1963             }
1964              
1965             =head2 handle_source
1966              
1967             Title : handle_source
1968             Usage : $obj->handle_source
1969             Function :
1970             Returns :
1971             Args :
1972             Status :
1973              
1974             =cut
1975              
1976             sub handle_source {
1977 0     0 1   my $self = shift;
1978 0           my $seq = shift;
1979 0           my @arr = @_;
1980              
1981 0           my $source = $seq->source();
1982 0 0         return @arr unless $source;
1983              
1984 0           my $hashref = {
1985             'dbxref_id' => {
1986             'db_id' => {'name' => 'GFF_source'},
1987             'accession' => $source,
1988             }
1989             };
1990              
1991 0           push(@arr, {'dbxref_id' => $hashref});
1992 0           return @arr;
1993             }
1994              
1995             =head2 _srcf_hash
1996              
1997             Title : _srcf_hash
1998             Usage : $obj->_srcf_hash
1999             Function : Creates the srcfeature hash for use in featureloc hashes
2000             Returns : The srcfeature hash
2001             Args : The srcfeature name, the srcfeature type and a reference to the
2002             organism hash.
2003             Status :
2004              
2005             =cut
2006              
2007             sub _srcf_hash {
2008 0     0     my $self = shift;
2009 0           my $srcf = shift;
2010 0           my $stype= shift;
2011 0           my $orgref = shift;
2012              
2013             my %hash = ('uniquename' => $srcf,
2014             'organism_id' => $orgref,
2015             'type_id' => {'name' => $stype,
2016             'cv_id' =>
2017 0           {'name' => $cv_name{'sequence'} }},
2018             );
2019              
2020 0           return %hash;
2021             }
2022              
2023              
2024             1;