| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # BioPerl module for Bio::SeqIO::Handler::GenericRichSeqHandler | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # Please direct questions and support issues to | 
| 5 |  |  |  |  |  |  | # | 
| 6 |  |  |  |  |  |  | # Cared for by Chris Fields | 
| 7 |  |  |  |  |  |  | # | 
| 8 |  |  |  |  |  |  | # Copyright Chris Fields | 
| 9 |  |  |  |  |  |  | # | 
| 10 |  |  |  |  |  |  | # You may distribute this module under the same terms as perl itself | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | # POD documentation - main docs before the code | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =head1 NAME | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | Bio::SeqIO::Handler::GenericRichSeqHandler - Bio::HandlerI-based | 
| 17 |  |  |  |  |  |  | data handler for GenBank/EMBL/UniProt (and other) sequence data | 
| 18 |  |  |  |  |  |  |  | 
| 19 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 20 |  |  |  |  |  |  |  | 
| 21 |  |  |  |  |  |  | # MyHandler is a GenericRichSeqHandler object. | 
| 22 |  |  |  |  |  |  | # inside a parser (driver) constructor.... | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | $self->seq_handler($handler || MyHandler->new(-format => 'genbank')); | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | # in next_seq() in driver... | 
| 27 |  |  |  |  |  |  |  | 
| 28 |  |  |  |  |  |  | $hobj = $self->seqhandler(); | 
| 29 |  |  |  |  |  |  |  | 
| 30 |  |  |  |  |  |  | # roll data up into hashref chunks, pass off into Handler for processing... | 
| 31 |  |  |  |  |  |  |  | 
| 32 |  |  |  |  |  |  | $hobj->data_handler($data); | 
| 33 |  |  |  |  |  |  |  | 
| 34 |  |  |  |  |  |  | # or retrieve Handler methods and pass data directly to Handler methods... | 
| 35 |  |  |  |  |  |  |  | 
| 36 |  |  |  |  |  |  | my $hmeth = $hobj->handler_methods; | 
| 37 |  |  |  |  |  |  |  | 
| 38 |  |  |  |  |  |  | if ($hmeth->{ $data->{NAME} }) { | 
| 39 |  |  |  |  |  |  | my $mth = $hmeth->{ $data->{NAME} }; | 
| 40 |  |  |  |  |  |  | $hobj->$mth($data); | 
| 41 |  |  |  |  |  |  | } | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 44 |  |  |  |  |  |  |  | 
| 45 |  |  |  |  |  |  | This is an experimental implementation of a sequence-based HandlerBaseI parser | 
| 46 |  |  |  |  |  |  | and may change over time. It is possible (nay, likely) that the way handler | 
| 47 |  |  |  |  |  |  | methods are set up will change over development to allow more flexibility. | 
| 48 |  |  |  |  |  |  | Release pumpkins, please do not add this to a release until the API has settled. | 
| 49 |  |  |  |  |  |  | It is also likely that write_seq() will not work properly for some data. | 
| 50 |  |  |  |  |  |  |  | 
| 51 |  |  |  |  |  |  | Standard Developer caveats: | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | Do not use for production purposes. | 
| 54 |  |  |  |  |  |  | Not responsible for destroying (your data|computer|world). | 
| 55 |  |  |  |  |  |  | Do not stare directly at GenericRichSeqHandler. | 
| 56 |  |  |  |  |  |  | If GenericRichSeqHandler glows, back slowly away and call for help. | 
| 57 |  |  |  |  |  |  |  | 
| 58 |  |  |  |  |  |  | Consider yourself warned! | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | This class acts as a demonstration on how to handle similar data chunks derived | 
| 61 |  |  |  |  |  |  | from Bio::SeqIO::gbdriver, Bio::SeqIO::embldriver, and Bio::SeqIO::swissdriver | 
| 62 |  |  |  |  |  |  | using similar (or the same) handler methods. | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | The modules currently pass all previous tests in t/genbank.t, t/embl.t, and | 
| 65 |  |  |  |  |  |  | t/swiss.t yet all use the same handler methods (the collected tests for handlers | 
| 66 |  |  |  |  |  |  | can be found in t/Handler.t). Some tweaking of the methods themselves is | 
| 67 |  |  |  |  |  |  | probably in order over the long run to ensure that data is consistently handled | 
| 68 |  |  |  |  |  |  | for each parser.  Round-trip tests are probably in order here... | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | Though a Bio::Seq::SeqBuilder is employed for building sequence objects no | 
| 71 |  |  |  |  |  |  | bypassing of data based on builder slots has been implemented (yet); this is | 
| 72 |  |  |  |  |  |  | planned in the near future. | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | As a reminder: this is the current Annotation data chunk (via Data::Dumper): | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | $VAR1 = { | 
| 77 |  |  |  |  |  |  | 'NAME' => 'REFERENCE', | 
| 78 |  |  |  |  |  |  | 'DATA' => '1  (bases 1 to 10001)' | 
| 79 |  |  |  |  |  |  | 'AUTHORS' => 'International Human Genome Sequencing Consortium.' | 
| 80 |  |  |  |  |  |  | 'TITLE' => 'The DNA sequence of Homo sapiens' | 
| 81 |  |  |  |  |  |  | 'JOURNAL' => 'Unpublished (2003)' | 
| 82 |  |  |  |  |  |  | }; | 
| 83 |  |  |  |  |  |  | ... | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | This is the current SeqFeature data chunk (again via Data::Dumper): | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | $VAR1 = { | 
| 88 |  |  |  |  |  |  | 'mol_type' => 'genomic DNA', | 
| 89 |  |  |  |  |  |  | 'LOCATION' => '<1..>10001', | 
| 90 |  |  |  |  |  |  | 'NAME' => 'FEATURES', | 
| 91 |  |  |  |  |  |  | 'FEATURE_KEY' => 'source', | 
| 92 |  |  |  |  |  |  | 'note' => 'Accession AL451081 sequenced by The Sanger Centre', | 
| 93 |  |  |  |  |  |  | 'db_xref' => 'taxon:9606', | 
| 94 |  |  |  |  |  |  | 'clone' => 'RP11-302I18', | 
| 95 |  |  |  |  |  |  | 'organism' => 'Homo sapiens' | 
| 96 |  |  |  |  |  |  | }; | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | =head1 FEEDBACK | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | =head2 Mailing Lists | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | User feedback is an integral part of the evolution of this and other | 
| 103 |  |  |  |  |  |  | Bioperl modules. Send your comments and suggestions preferably to one | 
| 104 |  |  |  |  |  |  | of the Bioperl mailing lists.  Your participation is much appreciated. | 
| 105 |  |  |  |  |  |  |  | 
| 106 |  |  |  |  |  |  | bioperl-l@bioperl.org                  - General discussion | 
| 107 |  |  |  |  |  |  | http://bioperl.org/wiki/Mailing_lists  - About the mailing lists | 
| 108 |  |  |  |  |  |  |  | 
| 109 |  |  |  |  |  |  | =head2 Support | 
| 110 |  |  |  |  |  |  |  | 
| 111 |  |  |  |  |  |  | Please direct usage questions or support issues to the mailing list: | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | I | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | rather than to the module maintainer directly. Many experienced and | 
| 116 |  |  |  |  |  |  | reponsive experts will be able look at the problem and quickly | 
| 117 |  |  |  |  |  |  | address it. Please include a thorough description of the problem | 
| 118 |  |  |  |  |  |  | with code and data examples if at all possible. | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | =head2 Reporting Bugs | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | Report bugs to the Bioperl bug tracking system to help us keep track | 
| 123 |  |  |  |  |  |  | the bugs and their resolution.  Bug reports can be submitted via the | 
| 124 |  |  |  |  |  |  | web: | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | https://github.com/bioperl/bioperl-live/issues | 
| 127 |  |  |  |  |  |  |  | 
| 128 |  |  |  |  |  |  | =head1 AUTHOR - Chris Fields | 
| 129 |  |  |  |  |  |  |  | 
| 130 |  |  |  |  |  |  | Email cjfields at bioperl dot org | 
| 131 |  |  |  |  |  |  |  | 
| 132 |  |  |  |  |  |  | =head1 APPENDIX | 
| 133 |  |  |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | The rest of the documentation details each of the object methods. Internal | 
| 135 |  |  |  |  |  |  | methods are usually preceded with a _ | 
| 136 |  |  |  |  |  |  |  | 
| 137 |  |  |  |  |  |  | =cut | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | # Let the code begin... | 
| 140 |  |  |  |  |  |  |  | 
| 141 |  |  |  |  |  |  | package Bio::SeqIO::Handler::GenericRichSeqHandler; | 
| 142 | 1 |  |  | 1 |  | 6 | use strict; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 23 |  | 
| 143 | 1 |  |  | 1 |  | 4 | use warnings; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 20 |  | 
| 144 |  |  |  |  |  |  |  | 
| 145 | 1 |  |  | 1 |  | 592 | use Bio::SeqIO::FTHelper; | 
|  | 1 |  |  |  |  | 3 |  | 
|  | 1 |  |  |  |  | 24 |  | 
| 146 | 1 |  |  | 1 |  | 6 | use Bio::Annotation::Collection; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 17 |  | 
| 147 | 1 |  |  | 1 |  | 706 | use Bio::Annotation::DBLink; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 28 |  | 
| 148 | 1 |  |  | 1 |  | 589 | use Bio::Annotation::Comment; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 23 |  | 
| 149 | 1 |  |  | 1 |  | 597 | use Bio::Annotation::Reference; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 26 |  | 
| 150 | 1 |  |  | 1 |  | 6 | use Bio::Annotation::Collection; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 15 |  | 
| 151 | 1 |  |  | 1 |  | 5 | use Bio::Annotation::SimpleValue; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 16 |  | 
| 152 | 1 |  |  | 1 |  | 662 | use Bio::Annotation::TagTree; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 28 |  | 
| 153 | 1 |  |  | 1 |  | 7 | use Bio::SeqFeature::Generic; | 
|  | 1 |  |  |  |  | 1 |  | 
|  | 1 |  |  |  |  | 17 |  | 
| 154 | 1 |  |  | 1 |  | 624 | use Bio::Species; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 25 |  | 
| 155 | 1 |  |  | 1 |  | 6 | use Bio::Taxon; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 14 |  | 
| 156 | 1 |  |  | 1 |  | 4 | use Bio::DB::Taxonomy; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 15 |  | 
| 157 | 1 |  |  | 1 |  | 4 | use Bio::Factory::FTLocationFactory; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 20 |  | 
| 158 | 1 |  |  | 1 |  | 4 | use Data::Dumper; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 47 |  | 
| 159 |  |  |  |  |  |  |  | 
| 160 | 1 |  |  | 1 |  | 4 | use base qw(Bio::Root::Root Bio::HandlerBaseI); | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 662 |  | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | my %HANDLERS = ( | 
| 163 |  |  |  |  |  |  | 'genbank'   => { | 
| 164 |  |  |  |  |  |  | 'LOCUS'         => \&_genbank_locus, | 
| 165 |  |  |  |  |  |  | 'DEFINITION'    => \&_generic_description, | 
| 166 |  |  |  |  |  |  | 'ACCESSION'     => \&_generic_accession, | 
| 167 |  |  |  |  |  |  | 'VERSION'       => \&_generic_version, | 
| 168 |  |  |  |  |  |  | 'KEYWORDS'      => \&_generic_keywords, | 
| 169 |  |  |  |  |  |  | 'DBSOURCE'      => \&_genbank_dbsource, | 
| 170 |  |  |  |  |  |  | 'DBLINK'        => \&_genbank_dbsource, | 
| 171 |  |  |  |  |  |  | 'SOURCE'        => \&_generic_species, | 
| 172 |  |  |  |  |  |  | 'REFERENCE'     => \&_generic_reference, | 
| 173 |  |  |  |  |  |  | 'COMMENT'       => \&_generic_comment, | 
| 174 |  |  |  |  |  |  | 'FEATURES'      => \&_generic_seqfeatures, | 
| 175 |  |  |  |  |  |  | 'BASE'          => \&noop,    # this is generated from scratch | 
| 176 |  |  |  |  |  |  | 'ORIGIN'        => \&_generic_seq, | 
| 177 |  |  |  |  |  |  | # handles anything else (WGS, WGS_SCAFLD, CONTIG, PROJECT) | 
| 178 |  |  |  |  |  |  | '_DEFAULT_'     => \&_generic_simplevalue, | 
| 179 |  |  |  |  |  |  | }, | 
| 180 |  |  |  |  |  |  | 'embl'      => { | 
| 181 |  |  |  |  |  |  | 'ID'            => \&_embl_id, | 
| 182 |  |  |  |  |  |  | 'DT'            => \&_embl_date, | 
| 183 |  |  |  |  |  |  | 'DR'            => \&_generic_dbsource, | 
| 184 |  |  |  |  |  |  | 'SV'            => \&_generic_version, | 
| 185 |  |  |  |  |  |  | 'RN'            => \&_generic_reference, | 
| 186 |  |  |  |  |  |  | 'KW'            => \&_generic_keywords, | 
| 187 |  |  |  |  |  |  | 'DE'            => \&_generic_description, | 
| 188 |  |  |  |  |  |  | 'AC'            => \&_generic_accession, | 
| 189 |  |  |  |  |  |  | #'AH'            => \&noop, # TPA data not dealt with yet... | 
| 190 |  |  |  |  |  |  | #'AS'            => \&noop, | 
| 191 |  |  |  |  |  |  | 'SQ'            => \&_generic_seq, | 
| 192 |  |  |  |  |  |  | 'OS'            => \&_generic_species, | 
| 193 |  |  |  |  |  |  | 'CC'            => \&_generic_comment, | 
| 194 |  |  |  |  |  |  | 'FT'            => \&_generic_seqfeatures, | 
| 195 |  |  |  |  |  |  | # handles anything else (WGS, TPA, ANN...) | 
| 196 |  |  |  |  |  |  | '_DEFAULT_'     => \&_generic_simplevalue, | 
| 197 |  |  |  |  |  |  | }, | 
| 198 |  |  |  |  |  |  | 'swiss'     => { | 
| 199 |  |  |  |  |  |  | 'ID'            => \&_swiss_id, | 
| 200 |  |  |  |  |  |  | 'DT'            => \&_swiss_date, | 
| 201 |  |  |  |  |  |  | 'GN'            => \&_swiss_genename, | 
| 202 |  |  |  |  |  |  | 'DR'            => \&_generic_dbsource, | 
| 203 |  |  |  |  |  |  | 'RN'            => \&_generic_reference, | 
| 204 |  |  |  |  |  |  | 'KW'            => \&_generic_keywords, | 
| 205 |  |  |  |  |  |  | 'DE'            => \&_generic_description, | 
| 206 |  |  |  |  |  |  | 'AC'            => \&_generic_accession, | 
| 207 |  |  |  |  |  |  | 'SQ'            => \&_generic_seq, | 
| 208 |  |  |  |  |  |  | 'OS'            => \&_generic_species, | 
| 209 |  |  |  |  |  |  | 'CC'            => \&_generic_comment, | 
| 210 |  |  |  |  |  |  | 'FT'            => \&_generic_seqfeatures, | 
| 211 |  |  |  |  |  |  | # handles anything else, though I don't know what... | 
| 212 |  |  |  |  |  |  | '_DEFAULT_'     => \&_generic_simplevalue, | 
| 213 |  |  |  |  |  |  | }, | 
| 214 |  |  |  |  |  |  | ); | 
| 215 |  |  |  |  |  |  |  | 
| 216 |  |  |  |  |  |  | # can we do this generically?  Seems like a lot of trouble... | 
| 217 |  |  |  |  |  |  | my %DBSOURCE = map {$_ => 1} qw( | 
| 218 |  |  |  |  |  |  | EchoBASE     IntAct    SWISS-2DPAGE    ECO2DBASE    ECOGENE    TIGRFAMs | 
| 219 |  |  |  |  |  |  | TIGR    GO    InterPro    Pfam    PROSITE    SGD    GermOnline | 
| 220 |  |  |  |  |  |  | HSSP    PhosSite    Ensembl    RGD    AGD    ArrayExpress    KEGG | 
| 221 |  |  |  |  |  |  | H-InvDB    HGNC    LinkHub    PANTHER    PRINTS    SMART    SMR | 
| 222 |  |  |  |  |  |  | MGI    MIM    RZPD-ProtExp    ProDom    MEROPS    TRANSFAC    Reactome | 
| 223 |  |  |  |  |  |  | UniGene    GlycoSuiteDB    PIRSF    HSC-2DPAGE    PHCI-2DPAGE | 
| 224 |  |  |  |  |  |  | PMMA-2DPAGE    Siena-2DPAGE    Rat-heart-2DPAGE    Aarhus/Ghent-2DPAGE | 
| 225 |  |  |  |  |  |  | Biocyc    MetaCyc    Biocyc:Metacyc    GenomeReviews    FlyBase | 
| 226 |  |  |  |  |  |  | TMHOBP    COMPLUYEAST-2DPAGE    OGP    DictyBase    HAMAP | 
| 227 |  |  |  |  |  |  | PhotoList    Gramene    WormBase    WormPep    Genew    ZFIN | 
| 228 |  |  |  |  |  |  | PeroxiBase    MaizeDB    TAIR    DrugBank    REBASE    HPA | 
| 229 |  |  |  |  |  |  | swissprot    GenBank    GenPept    REFSEQ    embl    PDB    UniProtKB); | 
| 230 |  |  |  |  |  |  |  | 
| 231 |  |  |  |  |  |  | my %NOPROCESS = map {$_ => 1} qw(DBSOURCE ORGANISM FEATURES); | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | our %VALID_ALPHABET = ( | 
| 234 |  |  |  |  |  |  | 'bp' => 'dna', | 
| 235 |  |  |  |  |  |  | 'aa' => 'protein', | 
| 236 |  |  |  |  |  |  | 'rc' => '' # rc = release candidate; file has no sequences | 
| 237 |  |  |  |  |  |  | ); | 
| 238 |  |  |  |  |  |  |  | 
| 239 |  |  |  |  |  |  | =head2 new | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | Title   :  new | 
| 242 |  |  |  |  |  |  | Usage   : | 
| 243 |  |  |  |  |  |  | Function: | 
| 244 |  |  |  |  |  |  | Returns : | 
| 245 |  |  |  |  |  |  | Args    :  -format    Sequence format to be mapped for handler methods | 
| 246 |  |  |  |  |  |  | -builder   Bio::Seq::SeqBuilder object (normally defined in | 
| 247 |  |  |  |  |  |  | SequenceStreamI object implementation constructor) | 
| 248 |  |  |  |  |  |  | Throws  :  On undefined '-format' sequence format parameter | 
| 249 |  |  |  |  |  |  | Note    :  Still under heavy development | 
| 250 |  |  |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | =cut | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | sub new { | 
| 254 | 42 |  |  | 42 | 1 | 185 | my ($class, @args) = @_; | 
| 255 | 42 |  |  |  |  | 165 | my $self = $class->SUPER::new(@args); | 
| 256 | 42 |  |  |  |  | 234 | $self = {@args}; | 
| 257 | 42 |  |  |  |  | 89 | bless $self,$class; | 
| 258 | 42 |  |  |  |  | 170 | my ($format, $builder) = $self->_rearrange([qw(FORMAT BUILDER)], @args); | 
| 259 | 42 | 50 |  |  |  | 153 | $self->throw("Must define sequence record format") if !$format; | 
| 260 | 42 |  |  |  |  | 218 | $self->format($format); | 
| 261 | 42 |  |  |  |  | 171 | $self->handler_methods(); | 
| 262 | 42 | 50 |  |  |  | 220 | $builder  &&  $self->seqbuilder($builder); | 
| 263 | 42 |  |  |  |  | 170 | $self->location_factory(); | 
| 264 | 42 |  |  |  |  | 227 | return $self; | 
| 265 |  |  |  |  |  |  | } | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | =head1 L implementing methods | 
| 268 |  |  |  |  |  |  |  | 
| 269 |  |  |  |  |  |  | =head2 handler_methods | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | Title   :  handler_methods | 
| 272 |  |  |  |  |  |  | Usage   :  $handler->handler_methods('GenBank') | 
| 273 |  |  |  |  |  |  | %handlers = $handler->handler_methods(); | 
| 274 |  |  |  |  |  |  | Function:  Retrieve the handler methods used for the current format() in | 
| 275 |  |  |  |  |  |  | the handler.  This assumes the handler methods are already | 
| 276 |  |  |  |  |  |  | described in the HandlerI-implementing class. | 
| 277 |  |  |  |  |  |  | Returns :  a hash reference with the data type handled and the code ref | 
| 278 |  |  |  |  |  |  | associated with it. | 
| 279 |  |  |  |  |  |  | Args    :  [optional] String representing the sequence format.  If set here | 
| 280 |  |  |  |  |  |  | this will also set sequence_format() | 
| 281 |  |  |  |  |  |  | Throws  :  On unimplemented sequence format in %HANDLERS | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | =cut | 
| 284 |  |  |  |  |  |  |  | 
| 285 |  |  |  |  |  |  | sub handler_methods { | 
| 286 | 75 |  |  | 75 | 1 | 132 | my $self = shift; | 
| 287 | 75 | 100 |  |  |  | 189 | if (!($self->{'handlers'})) { | 
| 288 |  |  |  |  |  |  | $self->throw("No handlers defined for seqformat ",$self->format) | 
| 289 | 42 | 50 |  |  |  | 123 | unless exists $HANDLERS{$self->format}; | 
| 290 | 42 |  |  |  |  | 121 | $self->{'handlers'} = $HANDLERS{$self->format}; | 
| 291 |  |  |  |  |  |  | } | 
| 292 | 75 |  |  |  |  | 156 | return ($self->{'handlers'}); | 
| 293 |  |  |  |  |  |  | } | 
| 294 |  |  |  |  |  |  |  | 
| 295 |  |  |  |  |  |  | =head2 data_handler | 
| 296 |  |  |  |  |  |  |  | 
| 297 |  |  |  |  |  |  | Title   :  data_handler | 
| 298 |  |  |  |  |  |  | Usage   :  $handler->data_handler($data) | 
| 299 |  |  |  |  |  |  | Function:  Centralized method which accepts all data chunks, then distributes | 
| 300 |  |  |  |  |  |  | to the appropriate methods for processing based on the chunk name | 
| 301 |  |  |  |  |  |  | from within the HandlerBaseI object. | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  | One can also use | 
| 304 |  |  |  |  |  |  | Returns :  None | 
| 305 |  |  |  |  |  |  | Args    :  an hash ref containing a data chunk. | 
| 306 |  |  |  |  |  |  |  | 
| 307 |  |  |  |  |  |  | =cut | 
| 308 |  |  |  |  |  |  |  | 
| 309 |  |  |  |  |  |  | sub data_handler { | 
| 310 | 1646 |  |  | 1646 | 1 | 3476 | my ($self, $data) = @_; | 
| 311 | 1646 |  | 33 |  |  | 4039 | my $nm = $data->{NAME} || $self->throw("No name tag defined!"); | 
| 312 |  |  |  |  |  |  |  | 
| 313 |  |  |  |  |  |  | # this should handle data on the fly w/o caching; any caching should be | 
| 314 |  |  |  |  |  |  | # done in the driver! | 
| 315 |  |  |  |  |  |  | my $method = (exists $self->{'handlers'}->{$nm}) ? ($self->{'handlers'}->{$nm}) : | 
| 316 | 1646 | 50 |  |  |  | 6349 | (exists $self->{'handlers'}->{'_DEFAULT_'}) ? ($self->{'handlers'}->{'_DEFAULT_'}) : | 
|  |  | 100 |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | undef; | 
| 318 | 1646 | 50 |  |  |  | 3093 | if (!$method) { | 
| 319 | 0 |  |  |  |  | 0 | $self->debug("No handler defined for $nm\n"); | 
| 320 | 0 |  |  |  |  | 0 | return; | 
| 321 |  |  |  |  |  |  | }; | 
| 322 | 1646 |  |  |  |  | 4154 | $self->$method($data); | 
| 323 |  |  |  |  |  |  | } | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  | =head2 reset_parameters | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | Title   :  reset_parameters | 
| 328 |  |  |  |  |  |  | Usage   :  $handler->reset_parameters() | 
| 329 |  |  |  |  |  |  | Function:  Resets the internal cache of data (normally object parameters for | 
| 330 |  |  |  |  |  |  | a builder or factory) | 
| 331 |  |  |  |  |  |  | Returns :  None | 
| 332 |  |  |  |  |  |  | Args    :  None | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | =cut | 
| 335 |  |  |  |  |  |  |  | 
| 336 |  |  |  |  |  |  | sub reset_parameters { | 
| 337 | 58 |  |  | 58 | 1 | 115 | my $self = shift; | 
| 338 | 58 |  |  |  |  | 592 | $self->{'_params'} = undef; | 
| 339 |  |  |  |  |  |  | } | 
| 340 |  |  |  |  |  |  |  | 
| 341 |  |  |  |  |  |  | =head2 format | 
| 342 |  |  |  |  |  |  |  | 
| 343 |  |  |  |  |  |  | Title   :  format | 
| 344 |  |  |  |  |  |  | Usage   :  $handler->format('GenBank') | 
| 345 |  |  |  |  |  |  | Function:  Get/Set the format for the report/record being parsed. This can be | 
| 346 |  |  |  |  |  |  | used to set handlers in classes which are capable of processing | 
| 347 |  |  |  |  |  |  | similar data chunks from multiple driver modules. | 
| 348 |  |  |  |  |  |  | Returns :  String with the sequence format | 
| 349 |  |  |  |  |  |  | Args    :  [optional] String with the sequence format | 
| 350 |  |  |  |  |  |  | Note    :  The format may be used to set the handlers (as in the | 
| 351 |  |  |  |  |  |  | current GenericRichSeqHandler implementation) | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | =cut | 
| 354 |  |  |  |  |  |  |  | 
| 355 |  |  |  |  |  |  | sub format { | 
| 356 | 1301 |  |  | 1301 | 1 | 2114 | my $self = shift; | 
| 357 | 1301 | 100 |  |  |  | 2871 | return $self->{'_seqformat'} = lc shift if @_; | 
| 358 | 1259 |  |  |  |  | 3472 | return $self->{'_seqformat'}; | 
| 359 |  |  |  |  |  |  | } | 
| 360 |  |  |  |  |  |  |  | 
| 361 |  |  |  |  |  |  | =head2 get_params | 
| 362 |  |  |  |  |  |  |  | 
| 363 |  |  |  |  |  |  | Title   :  get_params | 
| 364 |  |  |  |  |  |  | Usage   :  $handler->get_params('-species') | 
| 365 |  |  |  |  |  |  | Function:  Convenience method used to retrieve the specified | 
| 366 |  |  |  |  |  |  | parameters from the internal parameter cache | 
| 367 |  |  |  |  |  |  | Returns :  Hash ref containing parameters requested and data as | 
| 368 |  |  |  |  |  |  | key-value pairs.  Note that some parameter values may be | 
| 369 |  |  |  |  |  |  | objects, arrays, etc. | 
| 370 |  |  |  |  |  |  | Args    :  List (array) representing the parameters requested | 
| 371 |  |  |  |  |  |  |  | 
| 372 |  |  |  |  |  |  | =cut | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | sub get_params { | 
| 375 | 818 |  |  | 818 | 1 | 1647 | my ($self, @ids) = @_; | 
| 376 | 818 |  |  |  |  | 1306 | my %data; | 
| 377 | 818 |  |  |  |  | 1426 | for my $id (@ids) { | 
| 378 | 818 | 50 |  |  |  | 2422 | if (!index($id, '-')==0) { | 
| 379 | 818 |  |  |  |  | 2103 | $id = '-'.$id ; | 
| 380 |  |  |  |  |  |  | } | 
| 381 | 818 | 100 |  |  |  | 3653 | $data{$id} = $self->{'_params'}->{$id} if (exists $self->{'_params'}->{$id}); | 
| 382 |  |  |  |  |  |  | } | 
| 383 | 818 |  |  |  |  | 2380 | return \%data; | 
| 384 |  |  |  |  |  |  | } | 
| 385 |  |  |  |  |  |  |  | 
| 386 |  |  |  |  |  |  | =head2 set_params | 
| 387 |  |  |  |  |  |  |  | 
| 388 |  |  |  |  |  |  | Title   :  set_params | 
| 389 |  |  |  |  |  |  | Usage   :  $handler->set_param({'-species') | 
| 390 |  |  |  |  |  |  | Function:  Convenience method used to set specific parameters | 
| 391 |  |  |  |  |  |  | Returns :  None | 
| 392 |  |  |  |  |  |  | Args    :  Hash ref containing the data to be passed as key-value pairs | 
| 393 |  |  |  |  |  |  |  | 
| 394 |  |  |  |  |  |  | =cut | 
| 395 |  |  |  |  |  |  |  | 
| 396 |  |  |  |  |  |  | sub set_params { | 
| 397 | 0 |  |  | 0 | 1 | 0 | shift->throw('Not implemented yet!'); | 
| 398 |  |  |  |  |  |  | } | 
| 399 |  |  |  |  |  |  |  | 
| 400 |  |  |  |  |  |  | =head1 Methods unique to this implementation | 
| 401 |  |  |  |  |  |  |  | 
| 402 |  |  |  |  |  |  | =head2 seqbuilder | 
| 403 |  |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  | Title   :  seqbuilder | 
| 405 |  |  |  |  |  |  | Usage   : | 
| 406 |  |  |  |  |  |  | Function: | 
| 407 |  |  |  |  |  |  | Returns : | 
| 408 |  |  |  |  |  |  | Args    : | 
| 409 |  |  |  |  |  |  | Throws  : | 
| 410 |  |  |  |  |  |  | Note    : | 
| 411 |  |  |  |  |  |  |  | 
| 412 |  |  |  |  |  |  | =cut | 
| 413 |  |  |  |  |  |  |  | 
| 414 |  |  |  |  |  |  | sub seqbuilder { | 
| 415 | 104 |  |  | 104 | 1 | 208 | my $self = shift; | 
| 416 | 104 | 100 |  |  |  | 361 | return $self->{'_seqbuilder'} = shift if @_; | 
| 417 | 62 |  |  |  |  | 175 | return $self->{'_seqbuilder'}; | 
| 418 |  |  |  |  |  |  | } | 
| 419 |  |  |  |  |  |  |  | 
| 420 |  |  |  |  |  |  | =head2 build_sequence | 
| 421 |  |  |  |  |  |  |  | 
| 422 |  |  |  |  |  |  | Title   :  build_sequence | 
| 423 |  |  |  |  |  |  | Usage   : | 
| 424 |  |  |  |  |  |  | Function: | 
| 425 |  |  |  |  |  |  | Returns : | 
| 426 |  |  |  |  |  |  | Args    : | 
| 427 |  |  |  |  |  |  | Throws  : | 
| 428 |  |  |  |  |  |  | Note    : | 
| 429 |  |  |  |  |  |  |  | 
| 430 |  |  |  |  |  |  | =cut | 
| 431 |  |  |  |  |  |  |  | 
| 432 |  |  |  |  |  |  | sub build_sequence { | 
| 433 | 62 |  |  | 62 | 1 | 146 | my $self = shift; | 
| 434 | 62 |  |  |  |  | 251 | my $builder = $self->seqbuilder(); | 
| 435 | 62 |  |  |  |  | 109 | my $seq; | 
| 436 | 62 | 100 |  |  |  | 194 | if (defined($self->{'_params'})) { | 
| 437 | 58 |  |  |  |  | 111 | $builder->add_slot_value(%{ $self->{'_params'} }); | 
|  | 58 |  |  |  |  | 839 |  | 
| 438 | 58 |  |  |  |  | 340 | $seq = $builder->make_object(); | 
| 439 | 58 |  |  |  |  | 321 | $self->reset_parameters; | 
| 440 |  |  |  |  |  |  | } | 
| 441 | 62 | 100 |  |  |  | 1149 | return $seq if $seq; | 
| 442 | 4 |  |  |  |  | 25 | return 0; | 
| 443 |  |  |  |  |  |  | } | 
| 444 |  |  |  |  |  |  |  | 
| 445 |  |  |  |  |  |  | =head2 location_factory | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | Title   :  location_factory | 
| 448 |  |  |  |  |  |  | Usage   : | 
| 449 |  |  |  |  |  |  | Function: | 
| 450 |  |  |  |  |  |  | Returns : | 
| 451 |  |  |  |  |  |  | Args    : | 
| 452 |  |  |  |  |  |  | Throws  : | 
| 453 |  |  |  |  |  |  | Note    : | 
| 454 |  |  |  |  |  |  |  | 
| 455 |  |  |  |  |  |  | =cut | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  | sub location_factory { | 
| 458 | 42 |  |  | 42 | 1 | 108 | my ($self, $factory) = @_; | 
| 459 | 42 | 50 |  |  |  | 213 | if ($factory) { | 
|  |  | 50 |  |  |  |  |  | 
| 460 | 0 | 0 | 0 |  |  | 0 | $self->throw("Must have a Bio::Factory::LocationFactoryI when ". | 
| 461 |  |  |  |  |  |  | "explicitly setting factory()") unless | 
| 462 |  |  |  |  |  |  | (ref($factory) && $factory->isa('Bio::Factory::LocationFactoryI')); | 
| 463 | 0 |  |  |  |  | 0 | $self->{'_locfactory'} = $factory; | 
| 464 |  |  |  |  |  |  | } elsif (!defined($self->{'_locfactory'})) { | 
| 465 | 42 |  |  |  |  | 279 | $self->{'_locfactory'} = Bio::Factory::FTLocationFactory->new() | 
| 466 |  |  |  |  |  |  | } | 
| 467 | 42 |  |  |  |  | 93 | return $self->{'_locfactory'}; | 
| 468 |  |  |  |  |  |  | } | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | =head2 annotation_collection | 
| 471 |  |  |  |  |  |  |  | 
| 472 |  |  |  |  |  |  | Title   :  annotation_collection | 
| 473 |  |  |  |  |  |  | Usage   : | 
| 474 |  |  |  |  |  |  | Function: | 
| 475 |  |  |  |  |  |  | Returns : | 
| 476 |  |  |  |  |  |  | Args    : | 
| 477 |  |  |  |  |  |  | Throws  : | 
| 478 |  |  |  |  |  |  | Note    : | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | =cut | 
| 481 |  |  |  |  |  |  |  | 
| 482 |  |  |  |  |  |  | sub annotation_collection { | 
| 483 | 746 |  |  | 746 | 1 | 1516 | my ($self, $coll) = @_; | 
| 484 | 746 | 50 |  |  |  | 2781 | if ($coll) { | 
|  |  | 100 |  |  |  |  |  | 
| 485 | 0 | 0 | 0 |  |  | 0 | $self->throw("Must have Bio::AnnotationCollectionI ". | 
| 486 |  |  |  |  |  |  | "when explicitly setting collection()") | 
| 487 |  |  |  |  |  |  | unless (ref($coll) && $coll->isa('Bio::AnnotationCollectionI')); | 
| 488 | 0 |  |  |  |  | 0 | $self->{'_params'}->{'-annotation'} = $coll; | 
| 489 |  |  |  |  |  |  | } elsif (!exists($self->{'_params'}->{'-annotation'})) { | 
| 490 | 57 |  |  |  |  | 536 | $self->{'_params'}->{'-annotation'} = Bio::Annotation::Collection->new() | 
| 491 |  |  |  |  |  |  | } | 
| 492 | 746 |  |  |  |  | 3127 | return $self->{'_params'}->{'-annotation'}; | 
| 493 |  |  |  |  |  |  | } | 
| 494 |  |  |  |  |  |  |  | 
| 495 |  |  |  |  |  |  | ####################### SEQUENCE HANDLERS ####################### | 
| 496 |  |  |  |  |  |  |  | 
| 497 |  |  |  |  |  |  | # any sequence data | 
| 498 |  |  |  |  |  |  | sub _generic_seq { | 
| 499 | 53 |  |  | 53 |  | 155 | my ($self, $data) = @_; | 
| 500 | 53 |  |  |  |  | 435 | $self->{'_params'}->{'-seq'} = $data->{DATA}; | 
| 501 |  |  |  |  |  |  | } | 
| 502 |  |  |  |  |  |  |  | 
| 503 |  |  |  |  |  |  | ####################### RAW DATA HANDLERS ####################### | 
| 504 |  |  |  |  |  |  |  | 
| 505 |  |  |  |  |  |  | # GenBank LOCUS line | 
| 506 |  |  |  |  |  |  | sub _genbank_locus { | 
| 507 | 31 |  |  | 31 |  | 71 | my ($self, $data) = @_; | 
| 508 | 31 |  |  |  |  | 237 | my (@tokens) = split m{\s+}, $data->{DATA}; | 
| 509 | 31 |  |  |  |  | 71 | my $display_id = shift @tokens; | 
| 510 | 31 |  |  |  |  | 118 | $self->{'_params'}->{'-display_id'} = $display_id; | 
| 511 | 31 |  |  |  |  | 64 | my $seqlength = shift @tokens; | 
| 512 | 31 | 50 |  |  |  | 94 | if (exists $VALID_ALPHABET{$seqlength}) { | 
| 513 |  |  |  |  |  |  | # moved one token too far.  No locus name? | 
| 514 |  |  |  |  |  |  | $self->warn("Bad LOCUS name?  Changing [".$self->{'_params'}->{'-display_id'}. | 
| 515 | 0 |  |  |  |  | 0 | "] to 'unknown' and length to ".$self->{'_params'}->{'-display_id'}); | 
| 516 | 0 |  |  |  |  | 0 | $self->{'_params'}->{'-length'} = $self->{'_params'}->{'-display_id'}; | 
| 517 | 0 |  |  |  |  | 0 | $self->{'_params'}->{'-display_id'} = 'unknown'; | 
| 518 |  |  |  |  |  |  | # add token back... | 
| 519 | 0 |  |  |  |  | 0 | unshift @tokens, $seqlength; | 
| 520 |  |  |  |  |  |  | } else { | 
| 521 | 31 |  |  |  |  | 79 | $self->{'_params'}->{'-length'} = $seqlength; | 
| 522 |  |  |  |  |  |  | } | 
| 523 | 31 |  |  |  |  | 67 | my $alphabet = lc(shift @tokens); | 
| 524 |  |  |  |  |  |  | $self->{'_params'}->{'-alphabet'} = | 
| 525 | 31 | 50 |  |  |  | 133 | (exists $VALID_ALPHABET{$alphabet}) ? $VALID_ALPHABET{$alphabet} : | 
| 526 |  |  |  |  |  |  | $self->warn("Unknown alphabet: $alphabet"); | 
| 527 | 31 | 50 | 66 |  |  | 142 | if (($self->{'_params'}->{'-alphabet'} eq 'dna') || (@tokens > 2)) { | 
| 528 | 31 |  |  |  |  | 98 | $self->{'_params'}->{'-molecule'} = shift(@tokens); | 
| 529 | 31 |  |  |  |  | 71 | my $circ = shift(@tokens); | 
| 530 | 31 | 100 |  |  |  | 83 | if ($circ eq 'circular') { | 
| 531 | 2 |  |  |  |  | 6 | $self->{'_params'}->{'-is_circular'} = 1; | 
| 532 | 2 |  |  |  |  | 7 | $self->{'_params'}->{'-division'} = shift(@tokens); | 
| 533 |  |  |  |  |  |  | } else { | 
| 534 |  |  |  |  |  |  | # 'linear' or 'circular' may actually be omitted altogether | 
| 535 | 29 | 100 |  |  |  | 96 | $self->{'_params'}->{'-division'} = | 
| 536 |  |  |  |  |  |  | (CORE::length($circ) == 3 ) ? $circ : shift(@tokens); | 
| 537 |  |  |  |  |  |  | } | 
| 538 |  |  |  |  |  |  | } else { | 
| 539 | 0 | 0 |  |  |  | 0 | $self->{'_params'}->{'-molecule'} = 'PRT' if($self->{'_params'}->{'-alphabet'} eq 'aa'); | 
| 540 | 0 |  |  |  |  | 0 | $self->{'_params'}->{'-division'} = shift(@tokens); | 
| 541 |  |  |  |  |  |  | } | 
| 542 | 31 |  |  |  |  | 103 | my $date = join(' ', @tokens); | 
| 543 |  |  |  |  |  |  | # maybe use Date::Time for dates? | 
| 544 | 31 | 100 | 66 |  |  | 287 | if($date && $date =~ s{\s*((\d{1,2})-(\w{3})-(\d{2,4})).*}{$1}) { | 
| 545 |  |  |  |  |  |  |  | 
| 546 | 30 | 50 |  |  |  | 75 | if( length($date) < 11 ) { | 
| 547 |  |  |  |  |  |  | # improperly formatted date | 
| 548 |  |  |  |  |  |  | # But we'll be nice and fix it for them | 
| 549 | 0 |  |  |  |  | 0 | my ($d,$m,$y) = ($2,$3,$4); | 
| 550 | 0 | 0 |  |  |  | 0 | if( length($d) == 1 ) { | 
| 551 | 0 |  |  |  |  | 0 | $d = "0$d"; | 
| 552 |  |  |  |  |  |  | } | 
| 553 |  |  |  |  |  |  | # guess the century here | 
| 554 | 0 | 0 |  |  |  | 0 | if( length($y) == 2 ) { | 
| 555 | 0 | 0 |  |  |  | 0 | if( $y > 60 ) { # arbitrarily guess that '60' means 1960 | 
| 556 | 0 |  |  |  |  | 0 | $y = "19$y"; | 
| 557 |  |  |  |  |  |  | } else { | 
| 558 | 0 |  |  |  |  | 0 | $y = "20$y"; | 
| 559 |  |  |  |  |  |  | } | 
| 560 | 0 |  |  |  |  | 0 | $self->warn("Date was malformed, guessing the century for $date to be $y\n"); | 
| 561 |  |  |  |  |  |  | } | 
| 562 | 0 |  |  |  |  | 0 | $self->{'_params'}->{'-dates'} = [join('-',$d,$m,$y)]; | 
| 563 |  |  |  |  |  |  | } else { | 
| 564 | 30 |  |  |  |  | 107 | $self->{'_params'}->{'-dates'} = [$date]; | 
| 565 |  |  |  |  |  |  | } | 
| 566 |  |  |  |  |  |  | } | 
| 567 |  |  |  |  |  |  | } | 
| 568 |  |  |  |  |  |  |  | 
| 569 |  |  |  |  |  |  | # EMBL ID line | 
| 570 |  |  |  |  |  |  | sub _embl_id { | 
| 571 | 10 |  |  | 10 |  | 33 | my ($self, $data) = @_; | 
| 572 | 10 |  |  |  |  | 21 | my $alphabet; | 
| 573 | 10 |  |  |  |  | 26 | my ($name, $sv, $topology, $mol, $div); | 
| 574 | 10 |  |  |  |  | 28 | my $line = $data->{DATA}; | 
| 575 |  |  |  |  |  |  | #$self->debug("$line\n"); | 
| 576 | 10 |  |  |  |  | 34 | my ($idtype) = $line =~ tr/;/;/; | 
| 577 | 10 | 100 |  |  |  | 61 | if ( $idtype == 6) {   # New style headers contain exactly six semicolons. | 
|  |  | 100 |  |  |  |  |  | 
| 578 |  |  |  |  |  |  | # New style header (EMBL Release >= 87, after June 2006) | 
| 579 | 1 |  |  |  |  | 5 | my $topology; | 
| 580 |  |  |  |  |  |  | my $sv; | 
| 581 |  |  |  |  |  |  |  | 
| 582 |  |  |  |  |  |  | # ID   DQ299383; SV 1; linear; mRNA; STD; MAM; 431 BP. | 
| 583 |  |  |  |  |  |  | # This regexp comes from the new2old.pl conversion script, from EBI | 
| 584 | 1 | 50 |  |  |  | 13 | if ($line =~ m/^(\w+);\s+SV (\d+); (\w+); ([^;]+); (\w{3}); (\w{3}); (\d+) \w{2}\./) { | 
| 585 | 1 |  |  |  |  | 9 | ($name, $sv, $topology, $mol, $div) = ($1, $2, $3, $4, $6); | 
| 586 |  |  |  |  |  |  | } else { | 
| 587 | 0 |  |  |  |  | 0 | $self->throw("Unrecognized EMBL ID line:[$line]"); | 
| 588 |  |  |  |  |  |  | } | 
| 589 | 1 | 50 |  |  |  | 5 | if (defined($sv)) { | 
| 590 | 1 |  |  |  |  | 6 | $self->{'_params'}->{'-seq_version'} = $sv; | 
| 591 | 1 |  |  |  |  | 4 | $self->{'_params'}->{'-version'} = $sv; | 
| 592 |  |  |  |  |  |  | } | 
| 593 |  |  |  |  |  |  |  | 
| 594 | 1 | 50 |  |  |  | 4 | if ($topology eq "circular") { | 
| 595 | 0 |  |  |  |  | 0 | $self->{'_params'}->{'-is_circular'} = 1; | 
| 596 |  |  |  |  |  |  | } | 
| 597 |  |  |  |  |  |  |  | 
| 598 | 1 | 50 |  |  |  | 4 | if (defined $mol ) { | 
| 599 | 1 | 50 |  |  |  | 8 | if ($mol =~ /DNA/) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 600 | 0 |  |  |  |  | 0 | $alphabet='dna'; | 
| 601 |  |  |  |  |  |  | } | 
| 602 |  |  |  |  |  |  | elsif ($mol =~ /RNA/) { | 
| 603 | 1 |  |  |  |  | 4 | $alphabet='rna'; | 
| 604 |  |  |  |  |  |  | } | 
| 605 |  |  |  |  |  |  | elsif ($mol =~ /AA/) { | 
| 606 | 0 |  |  |  |  | 0 | $alphabet='protein'; | 
| 607 |  |  |  |  |  |  | } | 
| 608 |  |  |  |  |  |  | } | 
| 609 |  |  |  |  |  |  | } elsif ($idtype) { # has internal ';' | 
| 610 |  |  |  |  |  |  | # Old style header (EMBL Release < 87, before June 2006) | 
| 611 | 8 | 50 |  |  |  | 99 | if ($line =~ m{^(\S+)[^;]*;\s+(\S+)[^;]*;\s+(\S+)[^;]*;}) { | 
| 612 | 8 |  |  |  |  | 52 | ($name, $mol, $div) = ($1, $2, $3); | 
| 613 |  |  |  |  |  |  | #$self->debug("[$name][$mol][$div]"); | 
| 614 |  |  |  |  |  |  | } | 
| 615 |  |  |  |  |  |  |  | 
| 616 | 8 | 50 |  |  |  | 41 | if($mol) { | 
| 617 | 8 | 50 |  |  |  | 33 | if ( $mol =~ m{circular} ) { | 
| 618 | 0 |  |  |  |  | 0 | $self->{'_params'}->{'-is_circular'} = 1; | 
| 619 | 0 |  |  |  |  | 0 | $mol =~  s{circular }{}; | 
| 620 |  |  |  |  |  |  | } | 
| 621 | 8 | 50 |  |  |  | 37 | if (defined $mol ) { | 
| 622 | 8 | 100 |  |  |  | 53 | if ($mol =~ /DNA/) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 623 | 7 |  |  |  |  | 30 | $alphabet='dna'; | 
| 624 |  |  |  |  |  |  | } | 
| 625 |  |  |  |  |  |  | elsif ($mol =~ /RNA/) { | 
| 626 | 1 |  |  |  |  | 5 | $alphabet='rna'; | 
| 627 |  |  |  |  |  |  | } | 
| 628 |  |  |  |  |  |  | elsif ($mol =~ /AA/) { | 
| 629 | 0 |  |  |  |  | 0 | $alphabet='protein'; | 
| 630 |  |  |  |  |  |  | } | 
| 631 |  |  |  |  |  |  | } | 
| 632 |  |  |  |  |  |  | } | 
| 633 |  |  |  |  |  |  | } else { | 
| 634 | 1 |  |  |  |  | 2 | $name = $data->{DATA}; | 
| 635 |  |  |  |  |  |  | } | 
| 636 | 10 | 50 | 33 |  |  | 85 | unless( defined $name && length($name) ) { | 
| 637 | 0 |  |  |  |  | 0 | $name = "unknown_id"; | 
| 638 |  |  |  |  |  |  | } | 
| 639 | 10 |  |  |  |  | 69 | $self->{'_params'}->{'-display_id'} = $name; | 
| 640 | 10 |  |  |  |  | 35 | $self->{'_params'}->{'-alphabet'} = $alphabet; | 
| 641 | 10 | 100 |  |  |  | 62 | $self->{'_params'}->{'-division'} = $div if $div; | 
| 642 | 10 | 100 |  |  |  | 71 | $self->{'_params'}->{'-molecule'} = $mol if $mol; | 
| 643 |  |  |  |  |  |  | } | 
| 644 |  |  |  |  |  |  |  | 
| 645 |  |  |  |  |  |  | # UniProt/SwissProt ID line | 
| 646 |  |  |  |  |  |  | sub _swiss_id { | 
| 647 | 17 |  |  | 17 |  | 55 | my ($self, $data) = @_; | 
| 648 | 17 |  |  |  |  | 46 | my ($name, $seq_div); | 
| 649 | 17 | 50 |  |  |  | 198 | if($data->{DATA} =~ m{^ | 
| 650 |  |  |  |  |  |  | (\S+)           \s+     #  $1  entryname | 
| 651 |  |  |  |  |  |  | ([^\s;]+);      \s+     #  $2  DataClass | 
| 652 |  |  |  |  |  |  | (?:PRT;)?       \s+     #  Molecule Type (optional) | 
| 653 |  |  |  |  |  |  | [0-9]+[ ]AA     \.      #  Sequencelength (capture?) | 
| 654 |  |  |  |  |  |  | $ | 
| 655 |  |  |  |  |  |  | }ox ) { | 
| 656 | 17 |  |  |  |  | 93 | ($name, $seq_div) = ($1, $2); | 
| 657 | 17 | 50 | 100 |  |  | 197 | $self->{'_params'}->{'-namespace'} = | 
|  |  | 100 | 66 |  |  |  |  | 
| 658 |  |  |  |  |  |  | ($seq_div eq 'Reviewed'   || $seq_div eq 'STANDARD')     ? 'Swiss-Prot' : | 
| 659 |  |  |  |  |  |  | ($seq_div eq 'Unreviewed' || $seq_div eq 'PRELIMINARY')  ? 'TrEMBL'     : | 
| 660 |  |  |  |  |  |  | $seq_div; | 
| 661 |  |  |  |  |  |  | # we shouldn't be setting the division, but for now... | 
| 662 | 17 |  |  |  |  | 113 | my ($junk, $division) = split q(_), $name; | 
| 663 | 17 |  |  |  |  | 87 | $self->{'_params'}->{'-division'} = $division; | 
| 664 | 17 |  |  |  |  | 53 | $self->{'_params'}->{'-alphabet'} = 'protein'; | 
| 665 |  |  |  |  |  |  | # this is important to have the id for display in e.g. FTHelper, otherwise | 
| 666 |  |  |  |  |  |  | # you won't know which entry caused an error | 
| 667 | 17 |  |  |  |  | 97 | $self->{'_params'}->{'-display_id'} = $name; | 
| 668 |  |  |  |  |  |  | } else { | 
| 669 | 0 |  |  |  |  | 0 | $self->throw("Unrecognized UniProt/SwissProt ID line:[".$data->{DATA}."]"); | 
| 670 |  |  |  |  |  |  | } | 
| 671 |  |  |  |  |  |  | } | 
| 672 |  |  |  |  |  |  |  | 
| 673 |  |  |  |  |  |  | # UniProt/SwissProt GN line | 
| 674 |  |  |  |  |  |  | sub _swiss_genename { | 
| 675 | 17 |  |  | 17 |  | 51 | my ($self, $data) = @_; | 
| 676 |  |  |  |  |  |  | #$self->debug(Dumper($data)); | 
| 677 | 17 |  |  |  |  | 68 | my $genename = $data->{DATA}; | 
| 678 | 17 |  |  |  |  | 31 | my $gn; | 
| 679 | 17 | 50 |  |  |  | 73 | if ($genename) { | 
| 680 | 17 |  |  |  |  | 45 | my @stags; | 
| 681 | 17 | 100 |  |  |  | 98 | if ($genename =~ /\w=\w/) { | 
| 682 |  |  |  |  |  |  | # new format (e.g., Name=RCHY1; Synonyms=ZNF363, CHIMP) | 
| 683 | 10 |  |  |  |  | 57 | for my $n (split(m{\s+and\s+},$genename)) { | 
| 684 | 12 |  |  |  |  | 26 | my @genenames; | 
| 685 | 12 |  |  |  |  | 136 | for my $section (split(m{\s*;\s*},$n)) { | 
| 686 | 15 |  |  |  |  | 60 | my ($tag, $rest) = split("=",$section); | 
| 687 | 15 |  | 50 |  |  | 45 | $rest ||= ''; | 
| 688 | 15 |  |  |  |  | 48 | for my $val (split(m{\s*,\s*},$rest)) { | 
| 689 | 19 |  |  |  |  | 73 | push @genenames, [$tag => $val]; | 
| 690 |  |  |  |  |  |  | } | 
| 691 |  |  |  |  |  |  | } | 
| 692 | 12 |  |  |  |  | 58 | push @stags, ['gene_name' => \@genenames]; | 
| 693 |  |  |  |  |  |  | } | 
| 694 |  |  |  |  |  |  | } else { | 
| 695 |  |  |  |  |  |  | # old format | 
| 696 | 7 |  |  |  |  | 38 | for my $section (split(/ AND /, $genename)) { | 
| 697 | 9 |  |  |  |  | 19 | my @genenames; | 
| 698 | 9 |  |  |  |  | 51 | $section =~ s/[\(\)\.]//g; | 
| 699 | 9 |  |  |  |  | 44 | my @names = split(m{\s+OR\s+}, $section); | 
| 700 | 9 |  |  |  |  | 39 | push @genenames, ['Name' => shift @names]; | 
| 701 | 9 |  |  |  |  | 24 | push @genenames, map {['Synonyms' => $_]} @names; | 
|  | 10 |  |  |  |  | 23 |  | 
| 702 | 9 |  |  |  |  | 45 | push @stags, ['gene_name' => \@genenames] | 
| 703 |  |  |  |  |  |  | } | 
| 704 |  |  |  |  |  |  | } #use Data::Dumper; print Dumper $gn, $genename;# exit; | 
| 705 | 17 |  |  |  |  | 251 | my $gn = Bio::Annotation::TagTree->new(-tagname => 'gene_name', | 
| 706 |  |  |  |  |  |  | -value => ['gene_names' => \@stags]); | 
| 707 | 17 |  |  |  |  | 78 | $self->annotation_collection->add_Annotation('gene_name', $gn); | 
| 708 |  |  |  |  |  |  | } | 
| 709 |  |  |  |  |  |  | } | 
| 710 |  |  |  |  |  |  |  | 
| 711 |  |  |  |  |  |  | # GenBank VERSION line | 
| 712 |  |  |  |  |  |  | # old EMBL SV line (now obsolete) | 
| 713 |  |  |  |  |  |  | # UniProt/SwissProt? | 
| 714 |  |  |  |  |  |  | sub _generic_version { | 
| 715 | 28 |  |  | 28 |  | 74 | my ($self, $data) = @_; | 
| 716 | 28 |  |  |  |  | 131 | my ($acc,$gi) = split(' ',$data->{DATA}); | 
| 717 | 28 | 100 |  |  |  | 157 | if($acc =~ m{^\w+\.(\d+)}xmso) { | 
| 718 | 27 |  |  |  |  | 95 | $self->{'_params'}->{'-version'} = $1; | 
| 719 | 27 |  |  |  |  | 60 | $self->{'_params'}->{'-seq_version'} = $1; | 
| 720 |  |  |  |  |  |  | } | 
| 721 | 28 | 100 | 66 |  |  | 158 | if($gi && (index($gi,"GI:") == 0)) { | 
| 722 | 24 |  |  |  |  | 90 | $self->{'_params'}->{'-primary_id'} = substr($gi,3); | 
| 723 |  |  |  |  |  |  | } | 
| 724 |  |  |  |  |  |  | } | 
| 725 |  |  |  |  |  |  |  | 
| 726 |  |  |  |  |  |  | # EMBL DT lines | 
| 727 |  |  |  |  |  |  | sub _embl_date { | 
| 728 | 5 |  |  | 5 |  | 21 | my ($self, $data) = @_; | 
| 729 | 5 |  |  |  |  | 58 | while ($data->{DATA} =~ m{(\S+)\s\((.*?)\)}g) { | 
| 730 | 10 |  |  |  |  | 44 | my ($date, $version) = ($1, $2); | 
| 731 | 10 |  |  |  |  | 25 | $date =~ tr{,}{}d; # remove comma if new version | 
| 732 | 10 | 50 |  |  |  | 51 | if ($version =~ m{\(Rel\.\s(\d+),\sCreated\)}xmso ) { | 
|  |  | 50 |  |  |  |  |  | 
| 733 | 0 |  |  |  |  | 0 | my $release = Bio::Annotation::SimpleValue->new( | 
| 734 |  |  |  |  |  |  | -tagname    => 'creation_release', | 
| 735 |  |  |  |  |  |  | -value      => $1 | 
| 736 |  |  |  |  |  |  | ); | 
| 737 | 0 |  |  |  |  | 0 | $self->annotation_collection->add_Annotation($release); | 
| 738 |  |  |  |  |  |  | } elsif ($version =~ m{\(Rel\.\s(\d+),\sLast\supdated,\sVersion\s(\d+)\)}xmso ) { | 
| 739 | 0 |  |  |  |  | 0 | my $release = Bio::Annotation::SimpleValue->new( | 
| 740 |  |  |  |  |  |  | -tagname    => 'update_release', | 
| 741 |  |  |  |  |  |  | -value      => $1 | 
| 742 |  |  |  |  |  |  | ); | 
| 743 | 0 |  |  |  |  | 0 | $self->annotation_collection->add_Annotation($release); | 
| 744 | 0 |  |  |  |  | 0 | my $update = Bio::Annotation::SimpleValue->new( | 
| 745 |  |  |  |  |  |  | -tagname    => 'update_version', | 
| 746 |  |  |  |  |  |  | -value      => $2 | 
| 747 |  |  |  |  |  |  | ); | 
| 748 | 0 |  |  |  |  | 0 | $self->annotation_collection->add_Annotation($update); | 
| 749 |  |  |  |  |  |  | } | 
| 750 | 10 |  |  |  |  | 23 | push @{ $self->{'_params'}->{'-dates'} }, $date; | 
|  | 10 |  |  |  |  | 83 |  | 
| 751 |  |  |  |  |  |  | } | 
| 752 |  |  |  |  |  |  | } | 
| 753 |  |  |  |  |  |  |  | 
| 754 |  |  |  |  |  |  | # UniProt/SwissProt DT lines | 
| 755 |  |  |  |  |  |  | sub _swiss_date { | 
| 756 | 17 |  |  | 17 |  | 71 | my ($self, $data) = @_; | 
| 757 |  |  |  |  |  |  | # swissprot | 
| 758 | 17 |  |  |  |  | 101 | my @dls = split m{\n}, $data->{DATA}; | 
| 759 | 17 |  |  |  |  | 66 | for my $dl (@dls) { | 
| 760 | 51 |  |  |  |  | 191 | my ($date, $version) = split(' ', $dl, 2); | 
| 761 | 51 |  |  |  |  | 121 | $date =~ tr{,}{}d; # remove comma if new version | 
| 762 | 51 | 100 | 100 |  |  | 718 | if ($version =~ m{\(Rel\. (\d+), Last sequence update\)} || # old | 
|  |  | 100 | 100 |  |  |  |  | 
| 763 |  |  |  |  |  |  | $version =~ m{sequence version (\d+)\.}) { #new | 
| 764 | 17 |  |  |  |  | 214 | my $update = Bio::Annotation::SimpleValue->new( | 
| 765 |  |  |  |  |  |  | -tagname    => 'seq_update', | 
| 766 |  |  |  |  |  |  | -value      => $1 | 
| 767 |  |  |  |  |  |  | ); | 
| 768 | 17 |  |  |  |  | 91 | $self->annotation_collection->add_Annotation($update); | 
| 769 |  |  |  |  |  |  | } elsif ($version =~ m{\(Rel\. (\d+), Last annotation update\)} || #old | 
| 770 |  |  |  |  |  |  | $version =~ m{entry version (\d+)\.}) { #new | 
| 771 | 17 |  |  |  |  | 112 | $self->{'_params'}->{'-version'} = $1; | 
| 772 | 17 |  |  |  |  | 74 | $self->{'_params'}->{'-seq_version'} = $1; | 
| 773 |  |  |  |  |  |  | } | 
| 774 | 51 |  |  |  |  | 92 | push @{ $self->{'_params'}->{'-dates'} }, $date; | 
|  | 51 |  |  |  |  | 233 |  | 
| 775 |  |  |  |  |  |  | } | 
| 776 |  |  |  |  |  |  | } | 
| 777 |  |  |  |  |  |  |  | 
| 778 |  |  |  |  |  |  | # GenBank KEYWORDS line | 
| 779 |  |  |  |  |  |  | # EMBL KW line | 
| 780 |  |  |  |  |  |  | # UniProt/SwissProt KW line | 
| 781 |  |  |  |  |  |  | sub _generic_keywords { | 
| 782 | 55 |  |  | 55 |  | 151 | my ($self, $data) = @_; | 
| 783 | 55 |  |  |  |  | 368 | $data->{DATA} =~ s{\.$}{}; | 
| 784 | 55 |  |  |  |  | 562 | my @kw = split m{\s*\;\s*}xo ,$data->{DATA}; | 
| 785 | 55 |  |  |  |  | 246 | $self->{'_params'}->{'-keywords'} = \@kw; | 
| 786 |  |  |  |  |  |  | } | 
| 787 |  |  |  |  |  |  |  | 
| 788 |  |  |  |  |  |  | # GenBank DEFINITION line | 
| 789 |  |  |  |  |  |  | # EMBL DE line | 
| 790 |  |  |  |  |  |  | # UniProt/SwissProt DE line | 
| 791 |  |  |  |  |  |  | sub _generic_description { | 
| 792 | 57 |  |  | 57 |  | 159 | my ($self, $data) = @_; | 
| 793 | 57 |  |  |  |  | 216 | $self->{'_params'}->{'-desc'} = $data->{DATA}; | 
| 794 |  |  |  |  |  |  | } | 
| 795 |  |  |  |  |  |  |  | 
| 796 |  |  |  |  |  |  | # GenBank ACCESSION line | 
| 797 |  |  |  |  |  |  | # EMBL AC line | 
| 798 |  |  |  |  |  |  | # UniProt/SwissProt AC line | 
| 799 |  |  |  |  |  |  | sub _generic_accession { | 
| 800 | 56 |  |  | 56 |  | 163 | my ($self, $data) = @_; | 
| 801 | 56 |  |  |  |  | 370 | my @accs = split m{[\s;]+}, $data->{DATA}; | 
| 802 | 56 |  |  |  |  | 214 | $self->{'_params'}->{'-accession_number'} = shift @accs; | 
| 803 | 56 | 100 |  |  |  | 253 | $self->{'_params'}->{'-secondary_accessions'} = \@accs if @accs; | 
| 804 |  |  |  |  |  |  | } | 
| 805 |  |  |  |  |  |  |  | 
| 806 |  |  |  |  |  |  | ####################### SPECIES HANDLERS ####################### | 
| 807 |  |  |  |  |  |  |  | 
| 808 |  |  |  |  |  |  | # uses Bio::Species | 
| 809 |  |  |  |  |  |  | # GenBank SOURCE, ORGANISM lines | 
| 810 |  |  |  |  |  |  | # EMBL O* lines | 
| 811 |  |  |  |  |  |  | # UniProt/SwissProt O* lines | 
| 812 |  |  |  |  |  |  | sub _generic_species { | 
| 813 | 56 |  |  | 56 |  | 149 | my ($self, $data) = @_; | 
| 814 |  |  |  |  |  |  |  | 
| 815 | 56 |  |  |  |  | 195 | my $seqformat = $self->format; | 
| 816 |  |  |  |  |  |  | # if data is coming in from GenBank parser... | 
| 817 | 56 | 100 | 66 |  |  | 456 | if ($seqformat eq 'genbank' && | 
| 818 |  |  |  |  |  |  | $data->{ORGANISM} =~ m{(.+?)\s(\S+;[^\n\.]+)}ox) { | 
| 819 | 30 |  |  |  |  | 135 | ($data->{ORGANISM}, $data->{CLASSIFICATION}) = ($1, $2); | 
| 820 |  |  |  |  |  |  | } | 
| 821 |  |  |  |  |  |  |  | 
| 822 |  |  |  |  |  |  | # SwissProt stuff... | 
| 823 |  |  |  |  |  |  | # hybrid names in swissprot files are no longer valid per intergration into | 
| 824 |  |  |  |  |  |  | # UniProt. Files containing these have been split into separate entries, so | 
| 825 |  |  |  |  |  |  | # it is probably a good idea to update if one has these lingering around... | 
| 826 |  |  |  |  |  |  |  | 
| 827 | 56 |  |  |  |  | 137 | my $taxid; | 
| 828 | 56 | 100 |  |  |  | 172 | if ($seqformat eq 'swiss') { | 
| 829 | 17 | 50 |  |  |  | 135 | if ($data->{DATA} =~ m{^([^,]+)}ox) { | 
| 830 | 17 |  |  |  |  | 98 | $data->{DATA} = $1; | 
| 831 |  |  |  |  |  |  | } | 
| 832 | 17 | 100 | 66 |  |  | 170 | if ($data->{CROSSREF} && $data->{CROSSREF} =~ m{NCBI_TaxID=(\d+)}) { | 
| 833 | 16 |  |  |  |  | 61 | $taxid = $1; | 
| 834 |  |  |  |  |  |  | } | 
| 835 |  |  |  |  |  |  | } | 
| 836 |  |  |  |  |  |  |  | 
| 837 |  |  |  |  |  |  | my ($sl, $class, $sci_name) = ($data->{DATA}, | 
| 838 |  |  |  |  |  |  | $data->{CLASSIFICATION}, | 
| 839 | 56 |  | 100 |  |  | 334 | $data->{ORGANISM} || ''); | 
| 840 | 56 |  |  |  |  | 109 | my ($organelle,$abbr_name, $common); | 
| 841 | 56 |  |  |  |  | 708 | my @class = reverse split m{\s*;\s*}, $class; | 
| 842 |  |  |  |  |  |  | # have to treat swiss different from everything else... | 
| 843 | 56 | 50 |  |  |  | 749 | if ($sl =~ m{^(mitochondrion|chloroplast|plastid)?   # GenBank format | 
| 844 |  |  |  |  |  |  | \s*(.*?) | 
| 845 |  |  |  |  |  |  | \s*(?: \( (.*?) \) )?\.?$ | 
| 846 |  |  |  |  |  |  | }xmso ){ | 
| 847 | 56 |  |  |  |  | 272 | ($organelle, $abbr_name, $common) = ($1, $2, $3); # optional | 
| 848 |  |  |  |  |  |  | } else { | 
| 849 | 0 |  |  |  |  | 0 | $abbr_name = $sl;	# nothing caught; this is a backup! | 
| 850 |  |  |  |  |  |  | } | 
| 851 |  |  |  |  |  |  | # there is no 'abbreviated name' for EMBL | 
| 852 | 56 | 100 |  |  |  | 208 | $sci_name = $abbr_name if $seqformat ne 'genbank'; | 
| 853 | 56 |  | 100 |  |  | 325 | $organelle ||= ''; | 
| 854 | 56 |  | 100 |  |  | 195 | $common ||= ''; | 
| 855 | 56 | 50 |  |  |  | 118 | $sci_name || return; | 
| 856 | 56 |  |  |  |  | 150 | unshift @class, $sci_name; | 
| 857 |  |  |  |  |  |  | # no genus/species parsing here; moving to Bio::Taxon-based taxonomy | 
| 858 | 56 |  |  |  |  | 532 | my $make = Bio::Species->new(); | 
| 859 | 56 |  |  |  |  | 294 | $make->scientific_name($sci_name); | 
| 860 | 56 | 50 |  |  |  | 344 | $make->classification(@class) if @class > 0; | 
| 861 | 56 | 100 |  |  |  | 302 | $common    && $make->common_name( $common ); | 
| 862 | 56 | 50 |  |  |  | 334 | $abbr_name && $make->name('abbreviated', $abbr_name); | 
| 863 | 56 | 100 |  |  |  | 247 | $organelle && $make->organelle($organelle); | 
| 864 | 56 | 100 |  |  |  | 287 | $taxid     && $make->ncbi_taxid($taxid); | 
| 865 | 56 |  |  |  |  | 668 | $self->{'_params'}->{'-species'} = $make; | 
| 866 |  |  |  |  |  |  | } | 
| 867 |  |  |  |  |  |  |  | 
| 868 |  |  |  |  |  |  | ####################### ANNOTATION HANDLERS ####################### | 
| 869 |  |  |  |  |  |  |  | 
| 870 |  |  |  |  |  |  | # GenBank DBSOURCE line | 
| 871 |  |  |  |  |  |  | sub _genbank_dbsource { | 
| 872 | 1 |  |  | 1 |  | 3 | my ($self, $data) = @_; | 
| 873 | 1 |  |  |  |  | 3 | my $dbsource = $data->{DATA}; | 
| 874 | 1 |  |  |  |  | 3 | my $annotation = $self->annotation_collection; | 
| 875 |  |  |  |  |  |  | # deal with swissprot dbsources | 
| 876 |  |  |  |  |  |  | # we could possibly parcel these out to subhandlers... | 
| 877 | 1 | 50 |  |  |  | 4 | if( $dbsource =~ s/(UniProt(?:KB)|swissprot):\s+locus\s+(\S+)\,.+\n// ) { | 
| 878 | 0 |  |  |  |  | 0 | $annotation->add_Annotation | 
| 879 |  |  |  |  |  |  | ('dblink', | 
| 880 |  |  |  |  |  |  | Bio::Annotation::DBLink->new | 
| 881 |  |  |  |  |  |  | (-primary_id => $2, | 
| 882 |  |  |  |  |  |  | -database => $1, | 
| 883 |  |  |  |  |  |  | -tagname => 'dblink')); | 
| 884 | 0 | 0 |  |  |  | 0 | if( $dbsource =~ s/\s*created:\s+([^\.]+)\.\n// ) { | 
| 885 | 0 |  |  |  |  | 0 | $annotation->add_Annotation | 
| 886 |  |  |  |  |  |  | ('swissprot_dates', | 
| 887 |  |  |  |  |  |  | Bio::Annotation::SimpleValue->new | 
| 888 |  |  |  |  |  |  | (-tagname => 'date_created', | 
| 889 |  |  |  |  |  |  | -value => $1)); | 
| 890 |  |  |  |  |  |  | } | 
| 891 | 0 |  |  |  |  | 0 | while( $dbsource =~ s/\s*(sequence|annotation)\s+updated:\s+([^\.]+)\.\n//g ) { | 
| 892 | 0 |  |  |  |  | 0 | $annotation->add_Annotation | 
| 893 |  |  |  |  |  |  | ('swissprot_dates', | 
| 894 |  |  |  |  |  |  | Bio::Annotation::SimpleValue->new | 
| 895 |  |  |  |  |  |  | (-tagname => 'date_updated', | 
| 896 |  |  |  |  |  |  | -value => $1)); | 
| 897 |  |  |  |  |  |  | } | 
| 898 | 0 |  |  |  |  | 0 | $dbsource =~ s/\n/ /g; | 
| 899 | 0 | 0 |  |  |  | 0 | if( $dbsource =~ s/\s*xrefs:\s+((?:\S+,\s+)+\S+)\s+xrefs/xrefs/ ) { | 
|  |  | 0 |  |  |  |  |  | 
| 900 |  |  |  |  |  |  | # will use $i to determine even or odd | 
| 901 |  |  |  |  |  |  | # for swissprot the accessions are paired | 
| 902 | 0 |  |  |  |  | 0 | my $i = 0; | 
| 903 | 0 |  |  |  |  | 0 | for my $dbsrc ( split(/,\s+/,$1) ) { | 
| 904 | 0 | 0 | 0 |  |  | 0 | if( $dbsrc =~ /(\S+)\.(\d+)/ || $dbsrc =~ /(\S+)/ ) { | 
| 905 | 0 |  |  |  |  | 0 | my ($id,$version) = ($1,$2); | 
| 906 | 0 | 0 |  |  |  | 0 | $version ='' unless defined $version; | 
| 907 | 0 |  |  |  |  | 0 | my $db; | 
| 908 | 0 | 0 |  |  |  | 0 | if( $id =~ /^\d\S{3}/) { | 
| 909 | 0 |  |  |  |  | 0 | $db = 'PDB'; | 
| 910 |  |  |  |  |  |  | } else { | 
| 911 | 0 | 0 |  |  |  | 0 | $db = ($i++ % 2 ) ? 'GenPept' : 'GenBank'; | 
| 912 |  |  |  |  |  |  | } | 
| 913 | 0 |  |  |  |  | 0 | $annotation->add_Annotation | 
| 914 |  |  |  |  |  |  | ('dblink', | 
| 915 |  |  |  |  |  |  | Bio::Annotation::DBLink->new | 
| 916 |  |  |  |  |  |  | (-primary_id => $id, | 
| 917 |  |  |  |  |  |  | -version => $version, | 
| 918 |  |  |  |  |  |  | -database => $db, | 
| 919 |  |  |  |  |  |  | -tagname => 'dblink')); | 
| 920 |  |  |  |  |  |  | } | 
| 921 |  |  |  |  |  |  | } | 
| 922 |  |  |  |  |  |  | } elsif( $dbsource =~ s/\s*xrefs:\s+(.+)\s+xrefs/xrefs/i ) { | 
| 923 |  |  |  |  |  |  | # download screwed up and ncbi didn't put acc in for gi numbers | 
| 924 | 0 |  |  |  |  | 0 | my $i = 0; | 
| 925 | 0 |  |  |  |  | 0 | for my $id ( split(/\,\s+/,$1) ) { | 
| 926 | 0 |  |  |  |  | 0 | my ($acc,$db); | 
| 927 | 0 | 0 |  |  |  | 0 | if( $id =~ /gi:\s+(\d+)/ ) { | 
|  |  | 0 |  |  |  |  |  | 
| 928 | 0 |  |  |  |  | 0 | $acc= $1; | 
| 929 | 0 | 0 |  |  |  | 0 | $db = ($i++ % 2 ) ? 'GenPept' : 'GenBank'; | 
| 930 |  |  |  |  |  |  | } elsif( $id =~ /pdb\s+accession\s+(\S+)/ ) { | 
| 931 | 0 |  |  |  |  | 0 | $acc= $1; | 
| 932 | 0 |  |  |  |  | 0 | $db = 'PDB'; | 
| 933 |  |  |  |  |  |  | } else { | 
| 934 | 0 |  |  |  |  | 0 | $acc= $id; | 
| 935 | 0 |  |  |  |  | 0 | $db = ''; | 
| 936 |  |  |  |  |  |  | } | 
| 937 | 0 |  |  |  |  | 0 | $annotation->add_Annotation | 
| 938 |  |  |  |  |  |  | ('dblink', | 
| 939 |  |  |  |  |  |  | Bio::Annotation::DBLink->new | 
| 940 |  |  |  |  |  |  | (-primary_id => $acc, | 
| 941 |  |  |  |  |  |  | -database => $db, | 
| 942 |  |  |  |  |  |  | -tagname => 'dblink')); | 
| 943 |  |  |  |  |  |  | } | 
| 944 |  |  |  |  |  |  | } else { | 
| 945 | 0 |  |  |  |  | 0 | $self->warn("Cannot match $dbsource\n"); | 
| 946 |  |  |  |  |  |  | } | 
| 947 | 0 | 0 |  |  |  | 0 | if( $dbsource =~ s/xrefs\s+\(non\-sequence\s+databases\):\s+ | 
| 948 |  |  |  |  |  |  | ((?:\S+,\s+)+\S+)//x ) { | 
| 949 | 0 |  |  |  |  | 0 | for my $id ( split(/\,\s+/,$1) ) { | 
| 950 | 0 |  |  |  |  | 0 | my $db; | 
| 951 |  |  |  |  |  |  | # this is because GenBank dropped the spaces!!! | 
| 952 |  |  |  |  |  |  | # I'm sure we're not going to get this right | 
| 953 |  |  |  |  |  |  | ##if( $id =~ s/^://i ) { | 
| 954 |  |  |  |  |  |  | ##    $db = $1; | 
| 955 |  |  |  |  |  |  | ##} | 
| 956 | 0 |  |  |  |  | 0 | $db = substr($id,0,index($id,':')); | 
| 957 | 0 | 0 |  |  |  | 0 | if (! exists $DBSOURCE{ $db }) { | 
| 958 | 0 |  |  |  |  | 0 | $db = '';   # do we want 'GenBank' here? | 
| 959 |  |  |  |  |  |  | } | 
| 960 | 0 |  |  |  |  | 0 | $id = substr($id,index($id,':')+1); | 
| 961 | 0 |  |  |  |  | 0 | $annotation->add_Annotation | 
| 962 |  |  |  |  |  |  | ('dblink',Bio::Annotation::DBLink->new | 
| 963 |  |  |  |  |  |  | (-primary_id => $id, | 
| 964 |  |  |  |  |  |  | -database => $db, | 
| 965 |  |  |  |  |  |  | -tagname => 'dblink')); | 
| 966 |  |  |  |  |  |  | } | 
| 967 |  |  |  |  |  |  | } | 
| 968 |  |  |  |  |  |  | } else { | 
| 969 | 1 | 50 |  |  |  | 7 | if( $dbsource =~ /^(\S*?):?\s*accession\s+(\S+)\.(\d+)/ ) { | 
|  |  | 0 |  |  |  |  |  | 
| 970 | 1 |  |  |  |  | 4 | my ($db,$id,$version) = ($1,$2,$3); | 
| 971 | 1 |  | 50 |  |  | 11 | $annotation->add_Annotation | 
| 972 |  |  |  |  |  |  | ('dblink', | 
| 973 |  |  |  |  |  |  | Bio::Annotation::DBLink->new | 
| 974 |  |  |  |  |  |  | (-primary_id => $id, | 
| 975 |  |  |  |  |  |  | -version => $version, | 
| 976 |  |  |  |  |  |  | -database => $db || 'GenBank', | 
| 977 |  |  |  |  |  |  | -tagname => 'dblink')); | 
| 978 |  |  |  |  |  |  | } elsif ( $dbsource =~ /(\S+)([\.:])(\d+)/ ) { | 
| 979 | 0 |  |  |  |  | 0 | my ($id, $db, $version); | 
| 980 | 0 | 0 |  |  |  | 0 | if ($2 eq ':') { | 
| 981 | 0 |  |  |  |  | 0 | ($db, $id) = ($1, $3); | 
| 982 |  |  |  |  |  |  | } else { | 
| 983 | 0 |  |  |  |  | 0 | ($db, $id, $version) = ('GenBank', $1, $3); | 
| 984 |  |  |  |  |  |  | } | 
| 985 | 0 |  |  |  |  | 0 | $annotation->add_Annotation('dblink', | 
| 986 |  |  |  |  |  |  | Bio::Annotation::DBLink->new( | 
| 987 |  |  |  |  |  |  | -primary_id => $id, | 
| 988 |  |  |  |  |  |  | -version => $version, | 
| 989 |  |  |  |  |  |  | -database => $db, | 
| 990 |  |  |  |  |  |  | -tagname => 'dblink') | 
| 991 |  |  |  |  |  |  | ); | 
| 992 |  |  |  |  |  |  | } else { | 
| 993 | 0 |  |  |  |  | 0 | $self->warn("Unrecognized DBSOURCE data: $dbsource\n"); | 
| 994 |  |  |  |  |  |  | } | 
| 995 |  |  |  |  |  |  | } | 
| 996 |  |  |  |  |  |  | } | 
| 997 |  |  |  |  |  |  |  | 
| 998 |  |  |  |  |  |  | # EMBL DR lines | 
| 999 |  |  |  |  |  |  | # UniProt/SwissProt DR lines | 
| 1000 |  |  |  |  |  |  | sub _generic_dbsource { | 
| 1001 | 20 |  |  | 20 |  | 76 | my ($self, $data) = @_; | 
| 1002 |  |  |  |  |  |  | #$self->debug(Dumper($data)); | 
| 1003 | 20 |  |  |  |  | 222 | while ($data->{DATA} =~ m{([^\n]+)}og) { | 
| 1004 | 351 |  |  |  |  | 1061 | my $dblink = $1; | 
| 1005 | 351 |  |  |  |  | 1515 | $dblink =~ s{\.$}{}; | 
| 1006 | 351 |  |  |  |  | 585 | my $link; | 
| 1007 | 351 |  |  |  |  | 1079 | my @linkdata = split '; ',$dblink; | 
| 1008 | 351 | 50 |  |  |  | 1899 | if ( $dblink =~ m{([^\s;]+);\s*([^\s;]+);?\s*([^\s;]+)?}) { | 
| 1009 |  |  |  |  |  |  | #if ( $dblink =~ m{([^\s;]+);\s*([^\s;]+);?\s*([^\s;]+)?}) { | 
| 1010 | 351 |  |  |  |  | 1323 | my ($databse, $prim_id, $sec_id) = ($1,$2,$3); | 
| 1011 | 351 |  |  |  |  | 1388 | $link = Bio::Annotation::DBLink->new(-database    => $databse, | 
| 1012 |  |  |  |  |  |  | -primary_id  => $prim_id, | 
| 1013 |  |  |  |  |  |  | -optional_id => $sec_id); | 
| 1014 |  |  |  |  |  |  | } else { | 
| 1015 | 0 |  |  |  |  | 0 | $self->warn("No match for $dblink"); | 
| 1016 |  |  |  |  |  |  | } | 
| 1017 | 351 |  |  |  |  | 1010 | $self->annotation_collection->add_Annotation('dblink', $link); | 
| 1018 |  |  |  |  |  |  | } | 
| 1019 |  |  |  |  |  |  | } | 
| 1020 |  |  |  |  |  |  |  | 
| 1021 |  |  |  |  |  |  |  | 
| 1022 |  |  |  |  |  |  | # GenBank REFERENCE and related lines | 
| 1023 |  |  |  |  |  |  | # EMBL R* lines | 
| 1024 |  |  |  |  |  |  | # UniProt/SwissProt R* lines | 
| 1025 |  |  |  |  |  |  | sub _generic_reference { | 
| 1026 | 301 |  |  | 301 |  | 594 | my ($self, $data) = @_; | 
| 1027 | 301 |  |  |  |  | 707 | my $seqformat = $self->format; | 
| 1028 | 301 |  |  |  |  | 526 | my ($start, $end); | 
| 1029 |  |  |  |  |  |  | # get these in EMBL/Swiss | 
| 1030 | 301 | 100 |  |  |  | 803 | if ($data->{CROSSREF}) { | 
| 1031 | 128 |  |  |  |  | 1322 | while ($data->{CROSSREF} =~ m{(pubmed|doi|medline)(?:=|;\s+)(\S+)}oig) { | 
| 1032 | 199 |  |  |  |  | 838 | my ($db, $ref) = (uc $1, $2); | 
| 1033 | 199 |  |  |  |  | 872 | $ref =~ s{[;.]+$}{}; | 
| 1034 | 199 |  |  |  |  | 1015 | $data->{$db} = $ref; | 
| 1035 |  |  |  |  |  |  | } | 
| 1036 |  |  |  |  |  |  | } | 
| 1037 |  |  |  |  |  |  | # run some cleanup for swissprot | 
| 1038 | 301 | 100 |  |  |  | 639 | if ($seqformat eq 'swiss') { | 
| 1039 | 109 |  |  |  |  | 165 | for my $val (values %{ $data }) { | 
|  | 109 |  |  |  |  | 623 |  | 
| 1040 | 959 |  |  |  |  | 2027 | $val =~ s{;$}{}; | 
| 1041 | 959 |  |  |  |  | 1776 | $val =~ s{(\w-)\s}{$1}; | 
| 1042 |  |  |  |  |  |  | } | 
| 1043 |  |  |  |  |  |  | } | 
| 1044 | 301 | 100 |  |  |  | 692 | if ( $data->{POSITION} ) { | 
| 1045 | 127 | 100 |  |  |  | 628 | if ($seqformat eq 'embl') { | 
|  |  | 100 |  |  |  |  |  | 
| 1046 | 18 |  |  |  |  | 110 | ($start, $end) = split '-', $data->{POSITION},2; | 
| 1047 |  |  |  |  |  |  | } elsif ($data->{POSITION} =~ m{.+? OF (\d+)-(\d+).*}) { #swiss | 
| 1048 | 23 |  |  |  |  | 94 | ($start, $end) = ($1, $2); | 
| 1049 |  |  |  |  |  |  | } | 
| 1050 |  |  |  |  |  |  | } | 
| 1051 | 301 | 100 |  |  |  | 925 | if ($data->{DATA} =~ m{^\d+\s+\([a-z]+\s+(\d+)\s+to\s+(\d+)\)}xmso) { | 
| 1052 | 59 |  |  |  |  | 161 | ($start, $end) = ($1, $2); | 
| 1053 |  |  |  |  |  |  | } | 
| 1054 |  |  |  |  |  |  | my $ref = Bio::Annotation::Reference->new( | 
| 1055 |  |  |  |  |  |  | -comment    => $data->{REMARK}, | 
| 1056 |  |  |  |  |  |  | -location   => $data->{JOURNAL}, | 
| 1057 |  |  |  |  |  |  | -pubmed     => $data->{PUBMED}, | 
| 1058 |  |  |  |  |  |  | -consortium => $data->{CONSRTM}, | 
| 1059 |  |  |  |  |  |  | -title      => $data->{TITLE}, | 
| 1060 |  |  |  |  |  |  | -authors    => $data->{AUTHORS}, | 
| 1061 |  |  |  |  |  |  | -medline    => $data->{MEDLINE}, | 
| 1062 |  |  |  |  |  |  | -doi        => $data->{DOI}, | 
| 1063 |  |  |  |  |  |  | -rp         => $data->{POSITION}, # JIC... | 
| 1064 | 301 |  |  |  |  | 3272 | -start      => $start, | 
| 1065 |  |  |  |  |  |  | -end        => $end, | 
| 1066 |  |  |  |  |  |  | ); | 
| 1067 | 301 | 100 |  |  |  | 1780 | if ($data->{DATA} =~  m{^\d+\s+\((.*)\)}xmso) { | 
| 1068 | 59 |  |  |  |  | 185 | $ref->gb_reference($1); | 
| 1069 |  |  |  |  |  |  | } | 
| 1070 | 301 |  |  |  |  | 925 | $self->annotation_collection->add_Annotation('reference', $ref); | 
| 1071 |  |  |  |  |  |  | } | 
| 1072 |  |  |  |  |  |  |  | 
| 1073 |  |  |  |  |  |  | # GenBank COMMENT lines | 
| 1074 |  |  |  |  |  |  | # EMBL CC lines | 
| 1075 |  |  |  |  |  |  | # UniProt/SwissProt CC lines | 
| 1076 |  |  |  |  |  |  | sub _generic_comment { | 
| 1077 | 43 |  |  | 43 |  | 137 | my ($self, $data) = @_; | 
| 1078 |  |  |  |  |  |  | $self->annotation_collection->add_Annotation('comment', | 
| 1079 | 43 |  |  |  |  | 185 | Bio::Annotation::Comment->new( -text => $data->{DATA} )); | 
| 1080 |  |  |  |  |  |  | } | 
| 1081 |  |  |  |  |  |  |  | 
| 1082 |  |  |  |  |  |  | ####################### SEQFEATURE HANDLER ####################### | 
| 1083 |  |  |  |  |  |  |  | 
| 1084 |  |  |  |  |  |  | # GenBank Feature Table | 
| 1085 |  |  |  |  |  |  | sub _generic_seqfeatures { | 
| 1086 | 848 |  |  | 848 |  | 1542 | my ($self, $data) = @_; | 
| 1087 | 848 | 100 |  |  |  | 1951 | return if $data->{FEATURE_KEY} eq 'FEATURES'; | 
| 1088 | 818 |  |  |  |  | 1479 | my $primary_tag = $data->{FEATURE_KEY}; | 
| 1089 |  |  |  |  |  |  |  | 
| 1090 |  |  |  |  |  |  | # grab the NCBI taxon ID from the source SF | 
| 1091 | 818 | 100 | 100 |  |  | 1879 | if ($primary_tag eq 'source' && exists $data->{'db_xref'}) { | 
| 1092 | 36 | 100 | 66 |  |  | 290 | if ( $self->{'_params'}->{'-species'} && | 
| 1093 |  |  |  |  |  |  | $data->{'db_xref'} =~ m{taxon:(\d+)}xmso ) { | 
| 1094 | 35 |  |  |  |  | 188 | $self->{'_params'}->{'-species'}->ncbi_taxid($1); | 
| 1095 |  |  |  |  |  |  | } | 
| 1096 |  |  |  |  |  |  | } | 
| 1097 | 818 |  |  |  |  | 1903 | my $source = $self->format; | 
| 1098 |  |  |  |  |  |  |  | 
| 1099 | 818 |  |  |  |  | 1214 | my $seqid = ${ $self->get_params('accession_number')  }{'accession_number'}; | 
|  | 818 |  |  |  |  | 1930 |  | 
| 1100 |  |  |  |  |  |  |  | 
| 1101 | 818 |  |  |  |  | 1651 | my $loc; | 
| 1102 | 818 |  |  |  |  | 1519 | eval { | 
| 1103 | 818 |  |  |  |  | 3376 | $loc = $self->{'_locfactory'}->from_string($data->{'LOCATION'}); | 
| 1104 |  |  |  |  |  |  | }; | 
| 1105 | 818 | 50 |  |  |  | 1987 | if(! $loc) { | 
| 1106 |  |  |  |  |  |  | $self->warn("exception while parsing location line [" . | 
| 1107 |  |  |  |  |  |  | $data->{'LOCATION'} . | 
| 1108 |  |  |  |  |  |  | "] in reading $source, ignoring feature " . | 
| 1109 | 0 |  |  |  |  | 0 | $data->{'primary_tag'}. | 
| 1110 |  |  |  |  |  |  | " (seqid=" . $seqid . "): " . $@); | 
| 1111 | 0 |  |  |  |  | 0 | return; | 
| 1112 |  |  |  |  |  |  | } | 
| 1113 | 818 | 50 | 33 |  |  | 1747 | if($seqid && (! $loc->is_remote())) { | 
| 1114 | 0 |  |  |  |  | 0 | $loc->seq_id($seqid); # propagates if it is a split location | 
| 1115 |  |  |  |  |  |  | } | 
| 1116 | 818 |  |  |  |  | 3299 | my $sf = Bio::SeqFeature::Generic->direct_new(); | 
| 1117 | 818 |  |  |  |  | 2458 | $sf->location($loc); | 
| 1118 | 818 |  |  |  |  | 2680 | $sf->primary_tag($primary_tag); | 
| 1119 | 818 |  |  |  |  | 2441 | $sf->seq_id($seqid); | 
| 1120 | 818 |  |  |  |  | 2149 | $sf->source_tag($source); | 
| 1121 | 818 |  |  |  |  | 1813 | delete $data->{'FEATURE_KEY'}; | 
| 1122 | 818 |  |  |  |  | 1444 | delete $data->{'LOCATION'}; | 
| 1123 | 818 |  |  |  |  | 1220 | delete $data->{'NAME'}; | 
| 1124 | 818 |  |  |  |  | 1113 | delete $data->{'DATA'}; | 
| 1125 | 818 |  |  |  |  | 2673 | $sf->set_attributes(-tag => $data); | 
| 1126 | 818 |  |  |  |  | 1546 | push @{ $self->{'_params'}->{'-features'} }, $sf; | 
|  | 818 |  |  |  |  | 3767 |  | 
| 1127 |  |  |  |  |  |  | } | 
| 1128 |  |  |  |  |  |  |  | 
| 1129 |  |  |  |  |  |  | ####################### ODDS AND ENDS ####################### | 
| 1130 |  |  |  |  |  |  |  | 
| 1131 |  |  |  |  |  |  | # Those things that don't fit anywhere else.  If a specific name | 
| 1132 |  |  |  |  |  |  | # maps to the below table, that class and method are used, otherwise | 
| 1133 |  |  |  |  |  |  | # it goes into a SimpleValue (I think this is a good argument for why | 
| 1134 |  |  |  |  |  |  | # we need a generic mechanism for storing annotation) | 
| 1135 |  |  |  |  |  |  |  | 
| 1136 |  |  |  |  |  |  | sub _generic_simplevalue { | 
| 1137 | 16 |  |  | 16 |  | 36 | my ($self, $data) = @_; | 
| 1138 |  |  |  |  |  |  | $self->annotation_collection->add_Annotation( | 
| 1139 |  |  |  |  |  |  | Bio::Annotation::SimpleValue->new(-tagname => lc($data->{NAME}), | 
| 1140 |  |  |  |  |  |  | -value => $data->{DATA}) | 
| 1141 | 16 |  |  |  |  | 49 | ); | 
| 1142 |  |  |  |  |  |  | } | 
| 1143 |  |  |  |  |  |  |  | 
| 1144 |  |  |  | 15 | 0 |  | sub noop {} | 
| 1145 |  |  |  |  |  |  |  | 
| 1146 |  |  |  |  |  |  | sub _debug { | 
| 1147 | 0 |  |  | 0 |  |  | my ($self, $data) = @_; | 
| 1148 | 0 |  |  |  |  |  | $self->debug(Dumper($data)); | 
| 1149 |  |  |  |  |  |  | } | 
| 1150 |  |  |  |  |  |  |  | 
| 1151 |  |  |  |  |  |  |  | 
| 1152 |  |  |  |  |  |  | 1; |