| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
# |
|
2
|
|
|
|
|
|
|
# BioPerl module for Bio::Phenotype::OMIM::OMIMparser |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# Please direct questions and support issues to |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# Cared for by Christian M. Zmasek or |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# (c) Christian M. Zmasek, czmasek-at-burnham.org, 2002. |
|
9
|
|
|
|
|
|
|
# (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. |
|
10
|
|
|
|
|
|
|
# |
|
11
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself. |
|
12
|
|
|
|
|
|
|
# Refer to the Perl Artistic License (see the license accompanying this |
|
13
|
|
|
|
|
|
|
# software package, or see http://www.perl.com/language/misc/Artistic.html) |
|
14
|
|
|
|
|
|
|
# for the terms under which you may use, modify, and redistribute this module. |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
# THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED |
|
17
|
|
|
|
|
|
|
# WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF |
|
18
|
|
|
|
|
|
|
# MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. |
|
19
|
|
|
|
|
|
|
# |
|
20
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# POD documentation - main docs before the code |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 NAME |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
Bio::Phenotype::OMIM::OMIMparser - parser for the OMIM database |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
use Bio::Phenotype::OMIM::OMIMparser; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# The OMIM database is available as textfile at: |
|
33
|
|
|
|
|
|
|
# ftp://ncbi.nlm.nih.gov/repository/OMIM/omim.txt.Z |
|
34
|
|
|
|
|
|
|
# The genemap is available as textfile at: |
|
35
|
|
|
|
|
|
|
# ftp://ncbi.nlm.nih.gov/repository/OMIM/genemap |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
$omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap => "/path/to/genemap", |
|
38
|
|
|
|
|
|
|
-omimtext => "/path/to/omim.txt" ); |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
while ( my $omim_entry = $omim_parser->next_phenotype() ) { |
|
41
|
|
|
|
|
|
|
# This prints everything. |
|
42
|
|
|
|
|
|
|
print( $omim_entry->to_string() ); |
|
43
|
|
|
|
|
|
|
print "\n\n"; |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
# This gets individual data (some of them object-arrays) |
|
46
|
|
|
|
|
|
|
# (and illustrates the relevant methods of OMIMentry). |
|
47
|
|
|
|
|
|
|
my $numb = $omim_entry->MIM_number(); # *FIELD* NO |
|
48
|
|
|
|
|
|
|
my $title = $omim_entry->title(); # *FIELD* TI - first line |
|
49
|
|
|
|
|
|
|
my $alt = $omim_entry->alternative_titles_and_symbols(); # *FIELD* TI - additional lines |
|
50
|
|
|
|
|
|
|
my $mtt = $omim_entry->more_than_two_genes(); # "#" before title |
|
51
|
|
|
|
|
|
|
my $sep = $omim_entry->is_separate(); # "*" before title |
|
52
|
|
|
|
|
|
|
my $desc = $omim_entry->description(); # *FIELD* TX |
|
53
|
|
|
|
|
|
|
my $mm = $omim_entry->mapping_method(); # from genemap |
|
54
|
|
|
|
|
|
|
my $gs = $omim_entry->gene_status(); # from genemap |
|
55
|
|
|
|
|
|
|
my $cr = $omim_entry->created(); # *FIELD* CD |
|
56
|
|
|
|
|
|
|
my $cont = $omim_entry->contributors(); # *FIELD* CN |
|
57
|
|
|
|
|
|
|
my $ed = $omim_entry->edited(); # *FIELD* ED |
|
58
|
|
|
|
|
|
|
my $sa = $omim_entry->additional_references(); # *FIELD* SA |
|
59
|
|
|
|
|
|
|
my $cs = $omim_entry->clinical_symptoms_raw(); # *FIELD* CS |
|
60
|
|
|
|
|
|
|
my $comm = $omim_entry->comment(); # from genemap |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
my $mini_mim = $omim_entry->miniMIM(); # *FIELD* MN |
|
63
|
|
|
|
|
|
|
# A Bio::Phenotype::OMIM::MiniMIMentry object. |
|
64
|
|
|
|
|
|
|
# class Bio::Phenotype::OMIM::MiniMIMentry |
|
65
|
|
|
|
|
|
|
# provides the following: |
|
66
|
|
|
|
|
|
|
# - description() |
|
67
|
|
|
|
|
|
|
# - created() |
|
68
|
|
|
|
|
|
|
# - contributors() |
|
69
|
|
|
|
|
|
|
# - edited() |
|
70
|
|
|
|
|
|
|
# |
|
71
|
|
|
|
|
|
|
# Prints the contents of the MINI MIM entry (most OMIM entries do |
|
72
|
|
|
|
|
|
|
# not have MINI MIM entries, though). |
|
73
|
|
|
|
|
|
|
print $mini_mim->description()."\n"; |
|
74
|
|
|
|
|
|
|
print $mini_mim->created()."\n"; |
|
75
|
|
|
|
|
|
|
print $mini_mim->contributors()."\n"; |
|
76
|
|
|
|
|
|
|
print $mini_mim->edited()."\n"; |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
my @corrs = $omim_entry->each_Correlate(); # from genemap |
|
79
|
|
|
|
|
|
|
# Array of Bio::Phenotype::Correlate objects. |
|
80
|
|
|
|
|
|
|
# class Bio::Phenotype::Correlate |
|
81
|
|
|
|
|
|
|
# provides the following: |
|
82
|
|
|
|
|
|
|
# - name() |
|
83
|
|
|
|
|
|
|
# - description() (not used) |
|
84
|
|
|
|
|
|
|
# - species() (always mouse) |
|
85
|
|
|
|
|
|
|
# - type() ("OMIM mouse correlate") |
|
86
|
|
|
|
|
|
|
# - comment() |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my @refs = $omim_entry->each_Reference(); # *FIELD* RF |
|
89
|
|
|
|
|
|
|
# Array of Bio::Annotation::Reference objects. |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
my @avs = $omim_entry->each_AllelicVariant(); # *FIELD* AV |
|
93
|
|
|
|
|
|
|
# Array of Bio::Phenotype::OMIM::OMIMentryAllelicVariant objects. |
|
94
|
|
|
|
|
|
|
# class Bio::Phenotype::OMIM::OMIMentryAllelicVariant |
|
95
|
|
|
|
|
|
|
# provides the following: |
|
96
|
|
|
|
|
|
|
# - number (e.g ".0001" ) |
|
97
|
|
|
|
|
|
|
# - title (e.g "ALCOHOL INTOLERANCE" ) |
|
98
|
|
|
|
|
|
|
# - symbol (e.g "ALDH2*2" ) |
|
99
|
|
|
|
|
|
|
# - description (e.g "The ALDH2*2-encoded protein has a change ..." ) |
|
100
|
|
|
|
|
|
|
# - aa_ori (used if information in the form "LYS123ARG" is found) |
|
101
|
|
|
|
|
|
|
# - aa_mut (used if information in the form "LYS123ARG" is found) |
|
102
|
|
|
|
|
|
|
# - position (used if information in the form "LYS123ARG" is found) |
|
103
|
|
|
|
|
|
|
# - additional_mutations (used for e.g. "1-BP DEL, 911T") |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
my @cps = $omim_entry->each_CytoPosition(); # from genemap |
|
106
|
|
|
|
|
|
|
# Array of Bio::Map::CytoPosition objects. |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
my @gss = $omim_entry->each_gene_symbol(); # from genemap |
|
109
|
|
|
|
|
|
|
# Array of strings. |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# do something ... |
|
112
|
|
|
|
|
|
|
} |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
This parser returns Bio::Phenotype::OMIM::OMIMentry objects |
|
117
|
|
|
|
|
|
|
(which inherit from Bio::Phenotype::PhenotypeI). |
|
118
|
|
|
|
|
|
|
It parses the OMIM database available as |
|
119
|
|
|
|
|
|
|
ftp://ncbi.nlm.nih.gov/repository/OMIM/omim.txt.Z |
|
120
|
|
|
|
|
|
|
together with (optionally) the gene map file at |
|
121
|
|
|
|
|
|
|
ftp://ncbi.nlm.nih.gov/repository/OMIM/genemap. |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=head1 FEEDBACK |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=head2 Mailing Lists |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
|
129
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to the |
|
130
|
|
|
|
|
|
|
Bioperl mailing lists Your participation is much appreciated. |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
|
133
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head2 Support |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
I |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
|
142
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
|
143
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
|
144
|
|
|
|
|
|
|
with code and data examples if at all possible. |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 Reporting Bugs |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
report bugs to the Bioperl bug tracking system to help us keep track |
|
149
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
|
150
|
|
|
|
|
|
|
web: |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live/issues |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=head1 AUTHOR |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
Christian M. Zmasek |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
Email: czmasek-at-burnham.org or cmzmasek@yahoo.com |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
WWW: http://monochrome-effect.net/ |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Address: |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
Genomics Institute of the Novartis Research Foundation |
|
165
|
|
|
|
|
|
|
10675 John Jay Hopkins Drive |
|
166
|
|
|
|
|
|
|
San Diego, CA 92121 |
|
167
|
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
=head1 APPENDIX |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
The rest of the documentation details each of the object |
|
171
|
|
|
|
|
|
|
methods. Internal methods are usually preceded with a _ |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=cut |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
# Let the code begin... |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
package Bio::Phenotype::OMIM::OMIMparser; |
|
180
|
|
|
|
|
|
|
|
|
181
|
1
|
|
|
1
|
|
470
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
22
|
|
|
182
|
|
|
|
|
|
|
|
|
183
|
1
|
|
|
1
|
|
306
|
use Bio::Root::IO; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
23
|
|
|
184
|
1
|
|
|
1
|
|
263
|
use Bio::Species; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
22
|
|
|
185
|
1
|
|
|
1
|
|
259
|
use Bio::Annotation::Reference; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
21
|
|
|
186
|
1
|
|
|
1
|
|
281
|
use Bio::Map::CytoPosition; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
24
|
|
|
187
|
1
|
|
|
1
|
|
324
|
use Bio::Phenotype::OMIM::OMIMentry; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
27
|
|
|
188
|
1
|
|
|
1
|
|
4
|
use Bio::Phenotype::OMIM::OMIMentryAllelicVariant; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
16
|
|
|
189
|
1
|
|
|
1
|
|
3
|
use Bio::Phenotype::Correlate; |
|
|
1
|
|
|
|
|
0
|
|
|
|
1
|
|
|
|
|
16
|
|
|
190
|
|
|
|
|
|
|
|
|
191
|
1
|
|
|
1
|
|
3
|
use base qw(Bio::Root::Root); |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
54
|
|
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
|
|
194
|
1
|
|
|
1
|
|
3
|
use constant DEFAULT_STATE => 0; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
44
|
|
|
195
|
1
|
|
|
1
|
|
3
|
use constant MIM_NUMBER_STATE => 1; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
33
|
|
|
196
|
1
|
|
|
1
|
|
2
|
use constant TITLE_STATE => 2; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
29
|
|
|
197
|
1
|
|
|
1
|
|
3
|
use constant TEXT_STATE => 3; |
|
|
1
|
|
|
|
|
0
|
|
|
|
1
|
|
|
|
|
29
|
|
|
198
|
1
|
|
|
1
|
|
3
|
use constant MINI_MIM_TEXT_STATE => 4; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
37
|
|
|
199
|
1
|
|
|
1
|
|
3
|
use constant ALLELIC_VARIANT_STATE => 5; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
29
|
|
|
200
|
1
|
|
|
1
|
|
3
|
use constant SEE_ALSO_STATE => 6; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
34
|
|
|
201
|
1
|
|
|
1
|
|
3
|
use constant REF_STATE => 7; |
|
|
1
|
|
|
|
|
0
|
|
|
|
1
|
|
|
|
|
35
|
|
|
202
|
1
|
|
|
1
|
|
3
|
use constant SYMPT_STATE => 8; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
31
|
|
|
203
|
1
|
|
|
1
|
|
3
|
use constant CONTRIBUTORS_STATE => 9; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
31
|
|
|
204
|
1
|
|
|
1
|
|
3
|
use constant CREATED_BY_STATE => 10; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
28
|
|
|
205
|
1
|
|
|
1
|
|
2
|
use constant EDITED_BY_STATE => 11; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
31
|
|
|
206
|
1
|
|
|
1
|
|
3
|
use constant MINI_MIM_EDITED_BY_STATE => 12; |
|
|
1
|
|
|
|
|
0
|
|
|
|
1
|
|
|
|
|
32
|
|
|
207
|
1
|
|
|
1
|
|
3
|
use constant MINI_MIM_CREATED_BY_STATE => 13; |
|
|
1
|
|
|
|
|
0
|
|
|
|
1
|
|
|
|
|
31
|
|
|
208
|
1
|
|
|
1
|
|
3
|
use constant MINI_MIM_CONTRIBUTORS_STATE => 14; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
54
|
|
|
209
|
1
|
|
|
1
|
|
3
|
use constant TRUE => 1; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
33
|
|
|
210
|
1
|
|
|
1
|
|
2
|
use constant FALSE => 0; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
2892
|
|
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=head2 new |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
Title : new |
|
217
|
|
|
|
|
|
|
Usage : $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap => "/path/to/genemap", |
|
218
|
|
|
|
|
|
|
-omimtext => "/path/to/omim.txt" ); |
|
219
|
|
|
|
|
|
|
Function: Creates a new OMIMparser. |
|
220
|
|
|
|
|
|
|
Returns : A new OMIMparser object. |
|
221
|
|
|
|
|
|
|
Args : -genemap => the genemap file name (optional) |
|
222
|
|
|
|
|
|
|
-omimtext => the omim text file name |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=cut |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
sub new { |
|
227
|
2
|
|
|
2
|
1
|
6
|
my( $class, @args ) = @_; |
|
228
|
|
|
|
|
|
|
|
|
229
|
2
|
|
|
|
|
17
|
my $self = $class->SUPER::new( @args ); |
|
230
|
|
|
|
|
|
|
|
|
231
|
2
|
|
|
|
|
12
|
my ( $genemap_file_name, $omimtxt_file_name ) |
|
232
|
|
|
|
|
|
|
= $self->_rearrange( [ qw( GENEMAP OMIMTEXT ) ], @args ); |
|
233
|
|
|
|
|
|
|
|
|
234
|
2
|
|
|
|
|
10
|
$self->init(); |
|
235
|
|
|
|
|
|
|
|
|
236
|
2
|
50
|
|
|
|
7
|
$genemap_file_name && $self->genemap_file_name( $genemap_file_name ); |
|
237
|
|
|
|
|
|
|
|
|
238
|
1
|
50
|
|
|
|
4
|
$omimtxt_file_name && $self->omimtxt_file_name( $omimtxt_file_name); |
|
239
|
|
|
|
|
|
|
|
|
240
|
1
|
|
|
|
|
3
|
return $self; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
=head2 next_phenotype |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
Title : next_phenotype() |
|
249
|
|
|
|
|
|
|
Usage : while ( my $omim_entry = $omim_parser->next_phenotype() ) { |
|
250
|
|
|
|
|
|
|
# do something with $omim_entry |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
Function: Returns an Bio::Phenotype::OMIM::OMIMentry or |
|
253
|
|
|
|
|
|
|
undef once the end of the omim text file is reached. |
|
254
|
|
|
|
|
|
|
Returns : A Bio::Phenotype::OMIM::OMIMentry. |
|
255
|
|
|
|
|
|
|
Args : |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
=cut |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub next_phenotype { |
|
260
|
2
|
|
|
2
|
1
|
652
|
my ( $self ) = @_; |
|
261
|
|
|
|
|
|
|
|
|
262
|
2
|
50
|
|
|
|
5
|
unless( defined( $self->_OMIM_text_file() ) ) { |
|
263
|
0
|
|
|
|
|
0
|
$self->_no_OMIM_text_file_provided_error(); |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
|
|
266
|
2
|
50
|
|
|
|
4
|
if ( $self->_done() == TRUE ) { |
|
267
|
0
|
|
|
|
|
0
|
return; |
|
268
|
|
|
|
|
|
|
} |
|
269
|
|
|
|
|
|
|
|
|
270
|
2
|
|
|
|
|
3
|
my $fieldtag = ""; |
|
271
|
2
|
|
|
|
|
2
|
my $contents = ""; |
|
272
|
2
|
|
|
|
|
3
|
my $line = ""; |
|
273
|
2
|
|
|
|
|
3
|
my $state = DEFAULT_STATE; |
|
274
|
2
|
|
|
|
|
2
|
my $saw_mini_min_flag = FALSE; |
|
275
|
2
|
|
|
|
|
3
|
my %record = (); |
|
276
|
|
|
|
|
|
|
|
|
277
|
2
|
|
|
|
|
5
|
while( $line = ( $self->_OMIM_text_file )->_readline() ) { |
|
278
|
227
|
100
|
|
|
|
417
|
if ( $line =~ /^\s*\*RECORD\*/ ) { |
|
|
|
100
|
|
|
|
|
|
|
279
|
2
|
100
|
|
|
|
3
|
if ( $self->_is_not_first_record() == TRUE ) { |
|
280
|
1
|
|
|
|
|
3
|
$self->_add_to_hash( $state, $contents,\%record ); |
|
281
|
1
|
|
|
|
|
2
|
my $omim_entry = $self->_createOMIMentry( \%record ); |
|
282
|
1
|
|
|
|
|
6
|
return $omim_entry; |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
else { |
|
285
|
1
|
|
|
|
|
3
|
$self->_is_not_first_record( TRUE ); |
|
286
|
|
|
|
|
|
|
} |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
} |
|
289
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*\*FIELD\*\s*(\S+)/ ) { |
|
290
|
36
|
|
|
|
|
43
|
$fieldtag = $1; |
|
291
|
36
|
100
|
|
|
|
43
|
if ( $state != DEFAULT_STATE ) { |
|
292
|
34
|
|
|
|
|
48
|
$self->_add_to_hash( $state, $contents,\%record ); |
|
293
|
|
|
|
|
|
|
} |
|
294
|
36
|
|
|
|
|
28
|
$contents = ""; |
|
295
|
|
|
|
|
|
|
|
|
296
|
36
|
100
|
|
|
|
133
|
if ( $fieldtag eq "NO" ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
297
|
2
|
|
|
|
|
3
|
$state = MIM_NUMBER_STATE; |
|
298
|
2
|
|
|
|
|
4
|
$saw_mini_min_flag = FALSE; |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
elsif ( $fieldtag eq "TI" ) { |
|
301
|
2
|
|
|
|
|
3
|
$state = TITLE_STATE; |
|
302
|
2
|
|
|
|
|
4
|
$saw_mini_min_flag = FALSE; |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
elsif ( $fieldtag eq "TX" ) { |
|
305
|
4
|
|
|
|
|
4
|
$state = TEXT_STATE; |
|
306
|
4
|
|
|
|
|
6
|
$saw_mini_min_flag = FALSE; |
|
307
|
|
|
|
|
|
|
} |
|
308
|
|
|
|
|
|
|
elsif ( $fieldtag eq "MN" ) { |
|
309
|
2
|
|
|
|
|
2
|
$state = MINI_MIM_TEXT_STATE; |
|
310
|
2
|
|
|
|
|
3
|
$saw_mini_min_flag = TRUE; |
|
311
|
|
|
|
|
|
|
} |
|
312
|
|
|
|
|
|
|
elsif ( $fieldtag eq "AV" ) { |
|
313
|
2
|
|
|
|
|
1
|
$state = ALLELIC_VARIANT_STATE; |
|
314
|
2
|
|
|
|
|
3
|
$saw_mini_min_flag = FALSE; |
|
315
|
|
|
|
|
|
|
} |
|
316
|
|
|
|
|
|
|
elsif ( $fieldtag eq "SA" ) { |
|
317
|
2
|
|
|
|
|
3
|
$state = SEE_ALSO_STATE; |
|
318
|
2
|
|
|
|
|
3
|
$saw_mini_min_flag = FALSE; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
elsif ( $fieldtag eq "RF" ) { |
|
321
|
2
|
|
|
|
|
2
|
$state = REF_STATE; |
|
322
|
2
|
|
|
|
|
3
|
$saw_mini_min_flag = FALSE; |
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
elsif ( $fieldtag eq "CS" ) { |
|
325
|
2
|
|
|
|
|
3
|
$state = SYMPT_STATE; |
|
326
|
2
|
|
|
|
|
2
|
$saw_mini_min_flag = FALSE; |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
elsif ( $fieldtag eq "CN" ) { |
|
329
|
6
|
100
|
|
|
|
7
|
if ( $saw_mini_min_flag == TRUE ) { |
|
330
|
2
|
|
|
|
|
4
|
$state = MINI_MIM_CONTRIBUTORS_STATE; |
|
331
|
|
|
|
|
|
|
} |
|
332
|
|
|
|
|
|
|
else { |
|
333
|
4
|
|
|
|
|
6
|
$state = CONTRIBUTORS_STATE; |
|
334
|
|
|
|
|
|
|
} |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
elsif ( $fieldtag eq "CD" ) { |
|
337
|
6
|
100
|
|
|
|
8
|
if ( $saw_mini_min_flag == TRUE ) { |
|
338
|
2
|
|
|
|
|
3
|
$state = MINI_MIM_CREATED_BY_STATE; |
|
339
|
|
|
|
|
|
|
} |
|
340
|
|
|
|
|
|
|
else { |
|
341
|
4
|
|
|
|
|
7
|
$state = CREATED_BY_STATE; |
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
elsif ( $fieldtag eq "ED" ) { |
|
345
|
6
|
100
|
|
|
|
8
|
if ( $saw_mini_min_flag == TRUE ) { |
|
346
|
2
|
|
|
|
|
3
|
$state = MINI_MIM_EDITED_BY_STATE; |
|
347
|
|
|
|
|
|
|
} |
|
348
|
|
|
|
|
|
|
else { |
|
349
|
4
|
|
|
|
|
7
|
$state = EDITED_BY_STATE; |
|
350
|
|
|
|
|
|
|
} |
|
351
|
|
|
|
|
|
|
} |
|
352
|
|
|
|
|
|
|
else { |
|
353
|
0
|
|
|
|
|
0
|
print "Warning: Unknown tag: $fieldtag\n"; |
|
354
|
|
|
|
|
|
|
} |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
} |
|
357
|
|
|
|
|
|
|
else { |
|
358
|
189
|
|
|
|
|
244
|
$contents .= $line; |
|
359
|
|
|
|
|
|
|
} |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
|
|
362
|
1
|
|
|
|
|
2
|
$self->_OMIM_text_file()->close(); |
|
363
|
1
|
|
|
|
|
3
|
$self->_done( TRUE ); |
|
364
|
|
|
|
|
|
|
|
|
365
|
1
|
50
|
|
|
|
5
|
unless( %record ) { |
|
366
|
0
|
|
|
|
|
0
|
$self->_not_a_OMIM_text_file_error(); |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
|
|
369
|
1
|
|
|
|
|
3
|
$self->_add_to_hash( $state, $contents,\%record ); |
|
370
|
|
|
|
|
|
|
|
|
371
|
1
|
|
|
|
|
3
|
my $omim_entry = $self->_createOMIMentry( \%record ); |
|
372
|
|
|
|
|
|
|
|
|
373
|
1
|
|
|
|
|
4
|
return $omim_entry; |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
} # next_phenotype |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
=head2 init |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
Title : init() |
|
383
|
|
|
|
|
|
|
Usage : $omim_parser->init(); |
|
384
|
|
|
|
|
|
|
Function: Initializes this OMIMparser to all "". |
|
385
|
|
|
|
|
|
|
Returns : |
|
386
|
|
|
|
|
|
|
Args : |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=cut |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
sub init { |
|
391
|
2
|
|
|
2
|
1
|
3
|
my ( $self ) = @_; |
|
392
|
|
|
|
|
|
|
|
|
393
|
2
|
|
|
|
|
8
|
$self->genemap_file_name( "" ); |
|
394
|
2
|
|
|
|
|
6
|
$self->omimtxt_file_name( "" ); |
|
395
|
2
|
|
|
|
|
4
|
$self->_genemap_hash( {} ); |
|
396
|
2
|
|
|
|
|
5
|
$self->_OMIM_text_file( undef ); |
|
397
|
2
|
|
|
|
|
6
|
$self->_is_not_first_record( FALSE ); |
|
398
|
2
|
|
|
|
|
6
|
$self->_done( FALSE ); |
|
399
|
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
} # init |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
=head2 genemap_file_name |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
Title : genemap_file_name |
|
408
|
|
|
|
|
|
|
Usage : $omimparser->genemap_file_name( "genemap" ); |
|
409
|
|
|
|
|
|
|
Function: Set/get for the genemap file name. |
|
410
|
|
|
|
|
|
|
Returns : The genemap file name [string]. |
|
411
|
|
|
|
|
|
|
Args : The genemap file name [string] (optional). |
|
412
|
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=cut |
|
414
|
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
sub genemap_file_name { |
|
416
|
4
|
|
|
4
|
1
|
4
|
my ( $self, $value ) = @_; |
|
417
|
|
|
|
|
|
|
|
|
418
|
4
|
50
|
|
|
|
11
|
if ( defined $value ) { |
|
419
|
4
|
|
|
|
|
7
|
$self->{ "_genemap_file_name" } = $value; |
|
420
|
4
|
|
|
|
|
9
|
$self->_genemap_hash( $self->_read_genemap( $value ) ); |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
|
|
423
|
3
|
|
|
|
|
4
|
return $self->{ "_genemap_file_name" }; |
|
424
|
|
|
|
|
|
|
} # genemap_file_name |
|
425
|
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
|
|
428
|
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=head2 omimtxt_file_name |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
Title : omimtxt_file_name |
|
432
|
|
|
|
|
|
|
Usage : $omimparser->omimtxt_file_name( "omim.txt" ); |
|
433
|
|
|
|
|
|
|
Function: Set/get for the omim text file name. |
|
434
|
|
|
|
|
|
|
Returns : The the omim text file name [string]. |
|
435
|
|
|
|
|
|
|
Args : The the omim text file name [string] (optional). |
|
436
|
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
=cut |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
sub omimtxt_file_name { |
|
440
|
3
|
|
|
3
|
1
|
5
|
my ( $self, $value ) = @_; |
|
441
|
|
|
|
|
|
|
|
|
442
|
3
|
50
|
|
|
|
5
|
if ( defined $value ) { |
|
443
|
3
|
|
|
|
|
5
|
$self->{ "_omimtxt_file_name" } = $value; |
|
444
|
3
|
100
|
|
|
|
10
|
if ( $value =~ /\W/ ) { |
|
445
|
1
|
|
|
|
|
5
|
$self->_OMIM_text_file( Bio::Root::IO->new->new( -file => $value ) ); |
|
446
|
|
|
|
|
|
|
} |
|
447
|
|
|
|
|
|
|
} |
|
448
|
|
|
|
|
|
|
|
|
449
|
3
|
|
|
|
|
4
|
return $self->{ "_omimtxt_file_name" }; |
|
450
|
|
|
|
|
|
|
} # omimtxt_file_name |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
sub _createOMIMentry { |
|
457
|
2
|
|
|
2
|
|
2
|
my ( $self, $record_ref ) = @_; |
|
458
|
|
|
|
|
|
|
|
|
459
|
2
|
|
|
|
|
14
|
my $omim_entry = Bio::Phenotype::OMIM::OMIMentry->new(); |
|
460
|
2
|
|
|
|
|
4
|
my $mini_mim = Bio::Phenotype::OMIM::MiniMIMentry->new(); |
|
461
|
|
|
|
|
|
|
|
|
462
|
2
|
|
|
|
|
9
|
while ( ( my $key, my $val ) = each( %$record_ref ) ) { |
|
463
|
|
|
|
|
|
|
|
|
464
|
28
|
|
|
|
|
36
|
$val =~ s/^\s+//; |
|
465
|
28
|
|
|
|
|
104
|
$val =~ s/\s+$//; |
|
466
|
|
|
|
|
|
|
|
|
467
|
28
|
100
|
|
|
|
120
|
if ( $key == MIM_NUMBER_STATE ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
468
|
2
|
|
|
|
|
3
|
$val =~ s/\s+//g; |
|
469
|
2
|
|
|
|
|
3
|
$val =~ s/\D//g; |
|
470
|
|
|
|
|
|
|
|
|
471
|
2
|
|
|
|
|
3
|
$omim_entry->MIM_number( $val ); |
|
472
|
|
|
|
|
|
|
|
|
473
|
2
|
|
|
|
|
4
|
my $gm = $self->_genemap_hash(); |
|
474
|
2
|
50
|
|
|
|
5
|
if ( exists( $$gm{ $val } ) ) { |
|
475
|
2
|
|
|
|
|
10
|
$self->_parse_genemap( $omim_entry, $val ); |
|
476
|
|
|
|
|
|
|
} |
|
477
|
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
} |
|
479
|
|
|
|
|
|
|
elsif ( $key == TITLE_STATE ) { |
|
480
|
2
|
|
|
|
|
5
|
my ( $title, $alt_titles ) = $self->_parse_title( $val ); |
|
481
|
2
|
|
|
|
|
4
|
$omim_entry->title( $title ); |
|
482
|
2
|
|
|
|
|
4
|
$omim_entry->alternative_titles_and_symbols( $alt_titles ); |
|
483
|
2
|
100
|
|
|
|
7
|
if ( $title =~ /^\*/ ) { |
|
|
|
50
|
|
|
|
|
|
|
484
|
1
|
|
|
|
|
2
|
$omim_entry->is_separate( TRUE ); |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
elsif ( $title =~ /^#/ ) { |
|
487
|
1
|
|
|
|
|
2
|
$omim_entry->more_than_two_genes( TRUE ); |
|
488
|
|
|
|
|
|
|
} |
|
489
|
|
|
|
|
|
|
} |
|
490
|
|
|
|
|
|
|
elsif ( $key == TEXT_STATE ) { |
|
491
|
2
|
50
|
|
|
|
6
|
$val = undef if($val =~ /DESCRIPTION1\nDESCRIPTION2/); |
|
492
|
2
|
|
|
|
|
4
|
$omim_entry->description( $val ); |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
elsif ( $key == ALLELIC_VARIANT_STATE ) { |
|
495
|
2
|
|
|
|
|
6
|
my @allelic_variants = $self->_parse_allelic_variants( $val ); |
|
496
|
2
|
|
|
|
|
5
|
$omim_entry->add_AllelicVariants( @allelic_variants ); |
|
497
|
|
|
|
|
|
|
} |
|
498
|
|
|
|
|
|
|
elsif ( $key == SEE_ALSO_STATE ) { |
|
499
|
2
|
|
|
|
|
4
|
$omim_entry->additional_references( $val ); |
|
500
|
|
|
|
|
|
|
} |
|
501
|
|
|
|
|
|
|
elsif ( $key == REF_STATE ) { |
|
502
|
2
|
|
|
|
|
4
|
my @refs = $self->_parse_references( $val ); |
|
503
|
2
|
|
|
|
|
8
|
$omim_entry->add_References( @refs ); |
|
504
|
|
|
|
|
|
|
} |
|
505
|
|
|
|
|
|
|
elsif ( $key == SYMPT_STATE ) { |
|
506
|
2
|
50
|
|
|
|
6
|
$val = '' if($val eq 'clinical symptoms'); |
|
507
|
2
|
|
|
|
|
4
|
$omim_entry->clinical_symptoms_raw( $val ); |
|
508
|
|
|
|
|
|
|
} |
|
509
|
|
|
|
|
|
|
elsif ( $key == CONTRIBUTORS_STATE ) { |
|
510
|
2
|
50
|
|
|
|
6
|
$val = undef if($val =~ /cn1\ncn2\ncn3/); |
|
511
|
2
|
|
|
|
|
4
|
$omim_entry->contributors( $val ); |
|
512
|
|
|
|
|
|
|
} |
|
513
|
|
|
|
|
|
|
elsif ( $key == CREATED_BY_STATE ) { |
|
514
|
2
|
50
|
|
|
|
5
|
$val = undef if($val =~ /cd1\ncd2\ncd3/); |
|
515
|
2
|
|
|
|
|
3
|
$omim_entry->created( $val ); |
|
516
|
|
|
|
|
|
|
} |
|
517
|
|
|
|
|
|
|
elsif ( $key == EDITED_BY_STATE ) { |
|
518
|
2
|
50
|
|
|
|
7
|
$val = undef if($val =~ /ed1\ned2\ned3/); |
|
519
|
2
|
|
|
|
|
4
|
$omim_entry->edited( $val ); |
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
elsif ( $key == MINI_MIM_TEXT_STATE ) { |
|
522
|
2
|
|
|
|
|
4
|
$mini_mim->description( $val ); |
|
523
|
|
|
|
|
|
|
} |
|
524
|
|
|
|
|
|
|
elsif ( $key == MINI_MIM_CONTRIBUTORS_STATE ) { |
|
525
|
2
|
|
|
|
|
4
|
$mini_mim->contributors( $val ); |
|
526
|
|
|
|
|
|
|
} |
|
527
|
|
|
|
|
|
|
elsif ( $key == MINI_MIM_CREATED_BY_STATE ) { |
|
528
|
2
|
|
|
|
|
4
|
$mini_mim->created( $val ); |
|
529
|
|
|
|
|
|
|
} |
|
530
|
|
|
|
|
|
|
elsif ( $key == MINI_MIM_EDITED_BY_STATE ) { |
|
531
|
2
|
|
|
|
|
4
|
$mini_mim->edited( $val ); |
|
532
|
|
|
|
|
|
|
} |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
|
|
536
|
2
|
|
|
|
|
6
|
my $man = Bio::Species->new(); |
|
537
|
2
|
|
|
|
|
4
|
$man->classification( qw( sapiens Homo ) ); |
|
538
|
2
|
|
|
|
|
4
|
$man->common_name( "man" ); |
|
539
|
2
|
|
|
|
|
4
|
$omim_entry->species( $man ); |
|
540
|
2
|
|
|
|
|
5
|
$omim_entry->miniMIM( $mini_mim ); |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
# parse the symptoms text into a hash-based structure. |
|
543
|
2
|
|
|
|
|
5
|
$self->_finer_parse_symptoms($omim_entry); |
|
544
|
|
|
|
|
|
|
|
|
545
|
2
|
|
|
|
|
3
|
return $omim_entry; |
|
546
|
|
|
|
|
|
|
|
|
547
|
|
|
|
|
|
|
} # _createOMIMentry |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
sub _finer_parse_symptoms { |
|
551
|
2
|
|
|
2
|
|
2
|
my ($self, $omim_entry) = @_; |
|
552
|
2
|
|
|
|
|
11
|
my $text = $omim_entry->clinical_symptoms_raw; |
|
553
|
2
|
50
|
|
|
|
5
|
if( $text ) { |
|
554
|
0
|
|
|
|
|
0
|
my $part; |
|
555
|
0
|
|
|
|
|
0
|
for my $line (split /\n/, $text){ |
|
556
|
0
|
0
|
|
|
|
0
|
if ($line =~ /^([\w\s,]+)\:\s*$/) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
557
|
0
|
|
|
|
|
0
|
$part = $1; |
|
558
|
|
|
|
|
|
|
} elsif( $line =~ /^\s+$/ ) { |
|
559
|
|
|
|
|
|
|
} elsif($line =~ /^(\s+)([^;]+)\;?\s*$/){ |
|
560
|
0
|
|
|
|
|
0
|
my $symptom = $2; |
|
561
|
0
|
0
|
|
|
|
0
|
if( ! $part ) { |
|
562
|
|
|
|
|
|
|
# $self->warn("$text\nline='$line'\n"); |
|
563
|
0
|
|
|
|
|
0
|
next; |
|
564
|
|
|
|
|
|
|
} |
|
565
|
0
|
|
|
|
|
0
|
$omim_entry->add_clinical_symptoms($part, $symptom); |
|
566
|
|
|
|
|
|
|
} |
|
567
|
|
|
|
|
|
|
} |
|
568
|
|
|
|
|
|
|
} |
|
569
|
2
|
|
|
|
|
4
|
$omim_entry->clinical_symptoms_raw(''); |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
sub _parse_genemap { |
|
573
|
2
|
|
|
2
|
|
2
|
my ( $self, $omim_entry, $val ) = @_; |
|
574
|
|
|
|
|
|
|
|
|
575
|
2
|
|
|
|
|
2
|
my $genemap_line = ${ $self->_genemap_hash() }{ $val }; |
|
|
2
|
|
|
|
|
4
|
|
|
576
|
2
|
|
|
|
|
11
|
my @a = split( /\|/, $genemap_line ); |
|
577
|
|
|
|
|
|
|
|
|
578
|
2
|
|
|
|
|
3
|
my $locations = $a[ 4 ]; |
|
579
|
2
|
50
|
|
|
|
4
|
if ( defined ( $locations ) ) { |
|
580
|
2
|
|
|
|
|
2
|
$locations =~ s/\s+//g; |
|
581
|
2
|
|
|
|
|
4
|
my @ls = split( /[,;]/, $locations ); |
|
582
|
2
|
|
|
|
|
2
|
my @cps; |
|
583
|
2
|
|
|
|
|
3
|
foreach my $l ( @ls ) { |
|
584
|
2
|
|
|
|
|
14
|
my $cp = Bio::Map::CytoPosition->new( -value => $l ); |
|
585
|
2
|
|
|
|
|
5
|
push( @cps, $cp ); |
|
586
|
|
|
|
|
|
|
} |
|
587
|
2
|
|
|
|
|
7
|
$omim_entry->add_CytoPositions( @cps ); |
|
588
|
|
|
|
|
|
|
} |
|
589
|
|
|
|
|
|
|
|
|
590
|
2
|
|
|
|
|
2
|
my $gene_symbols = $a[ 5 ]; |
|
591
|
2
|
50
|
|
|
|
4
|
if ( defined ( $gene_symbols ) ) { |
|
592
|
2
|
|
|
|
|
4
|
$gene_symbols =~ s/\s+//g; |
|
593
|
2
|
|
|
|
|
4
|
my @gss = split( /[,;]/, $gene_symbols ); |
|
594
|
2
|
|
|
|
|
8
|
$omim_entry->add_gene_symbols( @gss ); |
|
595
|
|
|
|
|
|
|
} |
|
596
|
|
|
|
|
|
|
|
|
597
|
2
|
|
|
|
|
1
|
my $mouse_correlates = $a[ 16 ]; |
|
598
|
2
|
50
|
|
|
|
5
|
if ( defined ( $mouse_correlates ) ) { |
|
599
|
2
|
|
|
|
|
2
|
$mouse_correlates =~ s/\s+//g; |
|
600
|
2
|
|
|
|
|
3
|
my @mcs = split( /[,;]/, $mouse_correlates ); |
|
601
|
2
|
|
|
|
|
2
|
my @cs; |
|
602
|
2
|
|
|
|
|
3
|
foreach my $mc ( @mcs ) { |
|
603
|
2
|
|
|
|
|
4
|
my $mouse = Bio::Species->new(); |
|
604
|
2
|
|
|
|
|
4
|
$mouse->classification( qw( musculus Mus ) ); |
|
605
|
2
|
|
|
|
|
6
|
$mouse->common_name( "mouse" ); |
|
606
|
2
|
|
|
|
|
10
|
my $c = Bio::Phenotype::Correlate->new(); |
|
607
|
2
|
|
|
|
|
7
|
$c->name( $mc ); |
|
608
|
2
|
|
|
|
|
4
|
$c->species( $mouse ); |
|
609
|
2
|
|
|
|
|
4
|
$c->type( "OMIM mouse correlate" ); |
|
610
|
|
|
|
|
|
|
|
|
611
|
2
|
|
|
|
|
4
|
push( @cs, $c ); |
|
612
|
|
|
|
|
|
|
} |
|
613
|
2
|
|
|
|
|
9
|
$omim_entry->add_Correlates( @cs ); |
|
614
|
|
|
|
|
|
|
} |
|
615
|
|
|
|
|
|
|
|
|
616
|
2
|
50
|
|
|
|
8
|
$omim_entry->gene_status( $a[ 6 ] ) if defined $a[ 6 ]; |
|
617
|
2
|
50
|
|
|
|
5
|
$omim_entry->mapping_method( $a[ 10 ] ) if defined $a[ 10 ]; |
|
618
|
2
|
50
|
|
|
|
6
|
$omim_entry->comment( $a[ 11 ] ) if defined $a[ 11 ]; |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
} # _parse_genemap |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
sub _parse_allelic_variants { |
|
626
|
2
|
|
|
2
|
|
3
|
my ( $self, $text ) = @_; |
|
627
|
|
|
|
|
|
|
|
|
628
|
2
|
|
|
|
|
2
|
my @allelic_variants; |
|
629
|
2
|
|
|
|
|
2
|
my $number = ""; |
|
630
|
2
|
|
|
|
|
2
|
my $title = ""; |
|
631
|
2
|
|
|
|
|
3
|
my $symbol_mut_line = ""; |
|
632
|
2
|
|
|
|
|
2
|
my $prev_line = ""; |
|
633
|
2
|
|
|
|
|
2
|
my $description = ""; |
|
634
|
2
|
|
|
|
|
2
|
my $saw_empty_line = FALSE; |
|
635
|
|
|
|
|
|
|
|
|
636
|
2
|
|
|
|
|
18
|
my @lines = split( /\n/, $text ); |
|
637
|
|
|
|
|
|
|
|
|
638
|
2
|
|
|
|
|
4
|
foreach my $line ( @lines ) { |
|
639
|
84
|
100
|
|
|
|
196
|
if ( $line !~ /\w/ ) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
640
|
22
|
|
|
|
|
16
|
$saw_empty_line = TRUE; |
|
641
|
|
|
|
|
|
|
} |
|
642
|
|
|
|
|
|
|
elsif ( $line =~ /^\s*(\.\d+)/ ) { |
|
643
|
12
|
|
|
|
|
19
|
my $current_number = $1; |
|
644
|
12
|
100
|
|
|
|
16
|
if ( $number ne "" ) { |
|
645
|
10
|
|
|
|
|
14
|
my $allelic_variant = $self->_create_allelic_variant( $number, $title, |
|
646
|
|
|
|
|
|
|
$symbol_mut_line, $description ); |
|
647
|
|
|
|
|
|
|
|
|
648
|
10
|
|
|
|
|
13
|
push( @allelic_variants, $allelic_variant ); |
|
649
|
|
|
|
|
|
|
} |
|
650
|
12
|
|
|
|
|
6
|
$number = $current_number; |
|
651
|
12
|
|
|
|
|
9
|
$title = ""; |
|
652
|
12
|
|
|
|
|
10
|
$prev_line = ""; |
|
653
|
12
|
|
|
|
|
5
|
$symbol_mut_line = ""; |
|
654
|
12
|
|
|
|
|
10
|
$description = ""; |
|
655
|
12
|
|
|
|
|
12
|
$saw_empty_line = FALSE; |
|
656
|
|
|
|
|
|
|
} |
|
657
|
|
|
|
|
|
|
elsif ( $title eq "" ) { |
|
658
|
12
|
|
|
|
|
11
|
$title = $line; |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
elsif ( $saw_empty_line == FALSE ) { |
|
661
|
20
|
|
|
|
|
15
|
$prev_line = $line; |
|
662
|
|
|
|
|
|
|
} |
|
663
|
|
|
|
|
|
|
elsif ( $saw_empty_line == TRUE ) { |
|
664
|
18
|
100
|
|
|
|
21
|
if ( $prev_line ne "" ) { |
|
665
|
12
|
|
|
|
|
9
|
$symbol_mut_line = $prev_line; |
|
666
|
12
|
|
|
|
|
8
|
$prev_line = ""; |
|
667
|
|
|
|
|
|
|
} |
|
668
|
18
|
100
|
|
|
|
17
|
if ( $description ne "" ) { |
|
669
|
6
|
|
|
|
|
11
|
$description .= "\n" . $line; |
|
670
|
|
|
|
|
|
|
} |
|
671
|
|
|
|
|
|
|
else { |
|
672
|
12
|
|
|
|
|
11
|
$description = $line; |
|
673
|
|
|
|
|
|
|
} |
|
674
|
|
|
|
|
|
|
} |
|
675
|
|
|
|
|
|
|
} |
|
676
|
|
|
|
|
|
|
|
|
677
|
2
|
|
|
|
|
7
|
my $allelic_variant = $self->_create_allelic_variant( $number, $title, |
|
678
|
|
|
|
|
|
|
$symbol_mut_line, $description ); |
|
679
|
|
|
|
|
|
|
|
|
680
|
2
|
|
|
|
|
2
|
push( @allelic_variants, $allelic_variant ); |
|
681
|
|
|
|
|
|
|
|
|
682
|
2
|
|
|
|
|
11
|
return @allelic_variants; |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
} # _parse_allelic_variants |
|
685
|
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
|
|
688
|
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
sub _create_allelic_variant { |
|
690
|
12
|
|
|
12
|
|
13
|
my ( $self, $number, $title, $symbol_mut_line, $description ) = @_; |
|
691
|
|
|
|
|
|
|
|
|
692
|
12
|
|
|
|
|
9
|
my $symbol = ""; |
|
693
|
12
|
|
|
|
|
6
|
my $mutation = ""; |
|
694
|
12
|
|
|
|
|
10
|
my $aa_ori = ""; |
|
695
|
12
|
|
|
|
|
10
|
my $aa_mut = ""; |
|
696
|
12
|
|
|
|
|
5
|
my $position = ""; |
|
697
|
|
|
|
|
|
|
|
|
698
|
12
|
100
|
|
|
|
173
|
if ( $symbol_mut_line =~ /\s*(.+?)\s*,\s*([a-z]{3})(\d+)([a-z]{3})/i ) { |
|
|
|
50
|
|
|
|
|
|
|
699
|
6
|
|
|
|
|
7
|
$symbol = $1; |
|
700
|
6
|
|
|
|
|
6
|
$aa_ori = $2; |
|
701
|
6
|
|
|
|
|
7
|
$aa_mut = $4; |
|
702
|
6
|
|
|
|
|
5
|
$position = $3; |
|
703
|
|
|
|
|
|
|
} |
|
704
|
|
|
|
|
|
|
elsif ( $symbol_mut_line =~ /\s*(.+?)\s*,\s*(.+)/ ) { |
|
705
|
6
|
|
|
|
|
7
|
$symbol = $1; |
|
706
|
6
|
|
|
|
|
6
|
$mutation = $2; |
|
707
|
|
|
|
|
|
|
} |
|
708
|
|
|
|
|
|
|
else { |
|
709
|
0
|
|
|
|
|
0
|
$symbol = $symbol_mut_line; |
|
710
|
|
|
|
|
|
|
} |
|
711
|
|
|
|
|
|
|
|
|
712
|
12
|
50
|
|
|
|
20
|
if ( ! defined( $description ) ) { $self->throw("undef desc"); } |
|
|
0
|
|
|
|
|
0
|
|
|
713
|
12
|
50
|
|
|
|
14
|
if ( ! defined( $mutation ) ) { $self->throw("undef mutation"); } |
|
|
0
|
|
|
|
|
0
|
|
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
|
|
716
|
12
|
|
|
|
|
24
|
my $allelic_variant = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new(); |
|
717
|
12
|
|
|
|
|
19
|
$allelic_variant->number( $number ); |
|
718
|
12
|
|
|
|
|
14
|
$allelic_variant->aa_ori( $aa_ori ); |
|
719
|
12
|
|
|
|
|
36
|
$allelic_variant->aa_mut( $aa_mut ); |
|
720
|
12
|
|
|
|
|
13
|
$allelic_variant->position( $position ); |
|
721
|
12
|
|
|
|
|
13
|
$allelic_variant->title( $title ); |
|
722
|
12
|
|
|
|
|
18
|
$allelic_variant->symbol( $symbol ); |
|
723
|
12
|
|
|
|
|
12
|
$allelic_variant->description( $description ); |
|
724
|
12
|
|
|
|
|
15
|
$allelic_variant->additional_mutations( $mutation ); |
|
725
|
|
|
|
|
|
|
|
|
726
|
12
|
|
|
|
|
13
|
return $allelic_variant; |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
} # _create_allelic_variant |
|
729
|
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
sub _parse_title { |
|
734
|
2
|
|
|
2
|
|
2
|
my ( $self, $text ) = @_; |
|
735
|
2
|
|
|
|
|
3
|
my $title = ""; |
|
736
|
2
|
50
|
|
|
|
7
|
if ( $text =~ /^(.+)\n/ ) { |
|
737
|
2
|
|
|
|
|
8
|
$title = $1; |
|
738
|
2
|
|
|
|
|
6
|
$text =~ s/^.+\n//; |
|
739
|
|
|
|
|
|
|
} |
|
740
|
|
|
|
|
|
|
else { |
|
741
|
0
|
|
|
|
|
0
|
$title = $text; |
|
742
|
0
|
|
|
|
|
0
|
$text = ""; |
|
743
|
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
} |
|
745
|
|
|
|
|
|
|
|
|
746
|
2
|
|
|
|
|
3
|
return ( $title, $text ); |
|
747
|
|
|
|
|
|
|
} # _parse_title |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
|
|
750
|
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
sub _parse_references { |
|
753
|
2
|
|
|
2
|
|
3
|
my ( $self, $text ) = @_; |
|
754
|
|
|
|
|
|
|
|
|
755
|
2
|
|
|
|
|
4
|
$text =~ s/\A\s+//; |
|
756
|
2
|
|
|
|
|
12
|
$text =~ s/\s+\z//; |
|
757
|
2
|
|
|
|
|
10
|
$text =~ s/\A\d+\.\s*//; |
|
758
|
|
|
|
|
|
|
|
|
759
|
2
|
|
|
|
|
2
|
my @references; |
|
760
|
|
|
|
|
|
|
|
|
761
|
2
|
|
|
|
|
27
|
my @texts = split( /\s*\n\s*\n\s*\d+\.\s*/, $text ); |
|
762
|
|
|
|
|
|
|
|
|
763
|
2
|
|
|
|
|
3
|
foreach my $t ( @texts ) { |
|
764
|
|
|
|
|
|
|
|
|
765
|
8
|
|
|
|
|
7
|
my $authors = ""; |
|
766
|
8
|
|
|
|
|
6
|
my $title = ""; |
|
767
|
8
|
|
|
|
|
6
|
my $location = ""; |
|
768
|
|
|
|
|
|
|
|
|
769
|
8
|
|
|
|
|
49
|
$t =~ s/\s+/ /g; |
|
770
|
|
|
|
|
|
|
|
|
771
|
8
|
100
|
|
|
|
58
|
if ( $t =~ /(.+?)\s*:\s*(.+?[.?!])\s+(.+?)\s+(\S+?)\s*:\s*(\w?\d+.*)\s*,\s*(\d+)/ ) { |
|
|
|
50
|
|
|
|
|
|
|
772
|
6
|
|
|
|
|
10
|
$authors = $1; |
|
773
|
6
|
|
|
|
|
7
|
$title = $2; |
|
774
|
6
|
|
|
|
|
5
|
my $journal = $3; |
|
775
|
6
|
|
|
|
|
5
|
my $volume = $4; |
|
776
|
6
|
|
|
|
|
7
|
my $fromto = $5; |
|
777
|
6
|
|
|
|
|
7
|
my $year = $6; |
|
778
|
6
|
|
|
|
|
5
|
my $from = "", |
|
779
|
|
|
|
|
|
|
my $to = ""; |
|
780
|
6
|
50
|
|
|
|
15
|
if ( $fromto =~ /(\d+)-+(\d+)/ ) { |
|
|
|
0
|
|
|
|
|
|
|
781
|
6
|
|
|
|
|
5
|
$from = $1; |
|
782
|
6
|
|
|
|
|
8
|
$to = "-".$2; |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
elsif ( $fromto =~ /\A(\w+)/ ) { |
|
785
|
0
|
|
|
|
|
0
|
$from = $1; |
|
786
|
|
|
|
|
|
|
} |
|
787
|
6
|
|
|
|
|
12
|
$location = $journal." ".$volume." ".$from.$to." (".$year.")"; |
|
788
|
|
|
|
|
|
|
} |
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
elsif ( $t =~ /(.+?)\s*:\s*(.+?[.?!])\s*(.+?)\z/ ) { |
|
792
|
0
|
|
|
|
|
0
|
$authors = $1; |
|
793
|
0
|
|
|
|
|
0
|
$title = $2; |
|
794
|
0
|
|
|
|
|
0
|
$location = $3; |
|
795
|
|
|
|
|
|
|
} |
|
796
|
|
|
|
|
|
|
else { |
|
797
|
2
|
|
|
|
|
1
|
$title = $t; |
|
798
|
|
|
|
|
|
|
} |
|
799
|
|
|
|
|
|
|
|
|
800
|
8
|
|
|
|
|
28
|
my $ref = Bio::Annotation::Reference->new( -title => $title, |
|
801
|
|
|
|
|
|
|
-location => $location, |
|
802
|
|
|
|
|
|
|
-authors => $authors ); |
|
803
|
8
|
|
|
|
|
14
|
push( @references, $ref ); |
|
804
|
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
} |
|
806
|
2
|
|
|
|
|
4
|
return @references; |
|
807
|
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
} # _parse_references |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
|
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
sub _genemap_hash { |
|
814
|
12
|
|
|
12
|
|
9
|
my ( $self, $value ) = @_; |
|
815
|
|
|
|
|
|
|
|
|
816
|
12
|
100
|
|
|
|
20
|
if ( defined $value ) { |
|
817
|
8
|
50
|
|
|
|
14
|
unless ( ref( $value ) eq "HASH" ) { |
|
818
|
0
|
|
|
|
|
0
|
$self->throw( "Argument to method \"_genemap_hash\" is not a reference to an Hash" ); |
|
819
|
|
|
|
|
|
|
} |
|
820
|
8
|
|
|
|
|
10
|
$self->{ "_genemap_hash" } = $value; |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
} |
|
823
|
|
|
|
|
|
|
|
|
824
|
12
|
|
|
|
|
25
|
return $self->{ "_genemap_hash" }; |
|
825
|
|
|
|
|
|
|
} # _genemap_hash |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
sub _is_not_first_record { |
|
831
|
|
|
|
|
|
|
|
|
832
|
5
|
|
|
5
|
|
7
|
my ( $self, $value ) = @_; |
|
833
|
|
|
|
|
|
|
|
|
834
|
5
|
100
|
|
|
|
8
|
if ( defined $value ) { |
|
835
|
3
|
50
|
66
|
|
|
12
|
unless ( $value == FALSE || $value == TRUE ) { |
|
836
|
0
|
|
|
|
|
0
|
$self->throw( "Found [$value] where [" . TRUE |
|
837
|
|
|
|
|
|
|
." or " . FALSE . "] expected" ); |
|
838
|
|
|
|
|
|
|
} |
|
839
|
3
|
|
|
|
|
5
|
$self->{ "_not_first_record" } = $value; |
|
840
|
|
|
|
|
|
|
} |
|
841
|
|
|
|
|
|
|
|
|
842
|
5
|
|
|
|
|
7
|
return $self->{ "_not_first_record" }; |
|
843
|
|
|
|
|
|
|
} # _is_not_first_record |
|
844
|
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
|
|
846
|
|
|
|
|
|
|
|
|
847
|
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
sub _done { |
|
849
|
5
|
|
|
5
|
|
7
|
my ( $self, $value ) = @_; |
|
850
|
|
|
|
|
|
|
|
|
851
|
5
|
100
|
|
|
|
11
|
if ( defined $value ) { |
|
852
|
3
|
50
|
66
|
|
|
11
|
unless ( $value == FALSE || $value == TRUE ) { |
|
853
|
0
|
|
|
|
|
0
|
$self->throw( "Found [$value] where [" . TRUE |
|
854
|
|
|
|
|
|
|
." or " . FALSE . "] expected" ); |
|
855
|
|
|
|
|
|
|
} |
|
856
|
3
|
|
|
|
|
5
|
$self->{ "_done" } = $value; |
|
857
|
|
|
|
|
|
|
} |
|
858
|
|
|
|
|
|
|
|
|
859
|
5
|
|
|
|
|
10
|
return $self->{ "_done" }; |
|
860
|
|
|
|
|
|
|
} # _done |
|
861
|
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
sub _OMIM_text_file { |
|
866
|
234
|
|
|
234
|
|
190
|
my ( $self, $value ) = @_; |
|
867
|
|
|
|
|
|
|
|
|
868
|
234
|
100
|
|
|
|
272
|
if ( defined $value ) { |
|
869
|
1
|
50
|
|
|
|
4
|
unless ( $value->isa( "Bio::Root::IO" ) ) { |
|
870
|
0
|
|
|
|
|
0
|
$self->throw( "[$value] is not a valid \"Bio::Root::IO\"" ); |
|
871
|
|
|
|
|
|
|
} |
|
872
|
1
|
|
|
|
|
2
|
$self->{ "_omimtxt_file" } = $value; |
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
} |
|
875
|
|
|
|
|
|
|
|
|
876
|
234
|
|
|
|
|
323
|
return $self->{ "_omimtxt_file" }; |
|
877
|
|
|
|
|
|
|
} # _OMIM_text_file |
|
878
|
|
|
|
|
|
|
|
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
|
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
sub _read_genemap { |
|
883
|
4
|
|
|
4
|
|
8
|
my ( $self, $genemap_file_name ) = @_; |
|
884
|
|
|
|
|
|
|
|
|
885
|
4
|
|
|
|
|
5
|
my $line = ""; |
|
886
|
4
|
|
|
|
|
7
|
my %genemap_hash = (); |
|
887
|
4
|
|
|
|
|
20
|
my $genemap_file = Bio::Root::IO->new( -file => $genemap_file_name ); |
|
888
|
3
|
|
|
|
|
5
|
my @a = (); |
|
889
|
3
|
|
|
|
|
4
|
my %gm = (); |
|
890
|
|
|
|
|
|
|
|
|
891
|
3
|
|
|
|
|
9
|
while( $line = $genemap_file->_readline() ) { |
|
892
|
137
|
|
|
|
|
449
|
@a = split( /\|/, $line ); |
|
893
|
137
|
50
|
|
|
|
170
|
unless( scalar( @a ) == 18 ) { |
|
894
|
0
|
|
|
|
|
0
|
$self->throw( "Gene map file \"".$self->genemap_file_name() |
|
895
|
|
|
|
|
|
|
. "\" is not in the expected format." |
|
896
|
|
|
|
|
|
|
. " Make sure there is a linebreak after the final line." ); |
|
897
|
|
|
|
|
|
|
} |
|
898
|
137
|
|
|
|
|
256
|
$gm{ $a[ 9 ] } = $line; |
|
899
|
|
|
|
|
|
|
} |
|
900
|
3
|
|
|
|
|
8
|
$genemap_file->close(); |
|
901
|
3
|
|
|
|
|
10
|
$self->_genemap_hash( \%gm ); |
|
902
|
|
|
|
|
|
|
|
|
903
|
|
|
|
|
|
|
} #_read_genemap |
|
904
|
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
sub _no_OMIM_text_file_provided_error { |
|
909
|
0
|
|
|
0
|
|
0
|
my ( $self ) = @_; |
|
910
|
|
|
|
|
|
|
|
|
911
|
0
|
|
|
|
|
0
|
my $msg = "Need to indicate a OMIM text file to read from with\n"; |
|
912
|
0
|
|
|
|
|
0
|
$msg .= "either \"OMIMparser->new( -omimtext => \"path/to/omim.txt\" );\"\n"; |
|
913
|
0
|
|
|
|
|
0
|
$msg .= "or \"\$omim_parser->omimtxt_file_name( \"path/to/omim.txt\" );\""; |
|
914
|
0
|
|
|
|
|
0
|
$self->throw( $msg ); |
|
915
|
|
|
|
|
|
|
} # _no_OMIM_text_file_provided_error |
|
916
|
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
|
|
918
|
|
|
|
|
|
|
|
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
sub _not_a_OMIM_text_file_error { |
|
921
|
0
|
|
|
0
|
|
0
|
my ( $self ) = @_; |
|
922
|
|
|
|
|
|
|
|
|
923
|
0
|
|
|
|
|
0
|
my $msg = "File \"".$self->omimtxt_file_name() . |
|
924
|
|
|
|
|
|
|
"\" appears not to be a OMIM text file"; |
|
925
|
0
|
|
|
|
|
0
|
$self->throw( $msg ); |
|
926
|
|
|
|
|
|
|
} # _not_a_OMIM_text_file_error |
|
927
|
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
sub _add_to_hash { |
|
932
|
36
|
|
|
36
|
|
36
|
my ( $self, $state, $contents, $record_ref ) = @_; |
|
933
|
|
|
|
|
|
|
|
|
934
|
36
|
100
|
|
|
|
46
|
if ( exists( $record_ref->{ $state } ) ) { |
|
935
|
8
|
|
|
|
|
10
|
chomp( $record_ref->{ $state } ); |
|
936
|
8
|
|
|
|
|
17
|
$record_ref->{ $state } = $record_ref->{ $state } . $contents; |
|
937
|
|
|
|
|
|
|
} |
|
938
|
|
|
|
|
|
|
else { |
|
939
|
28
|
|
|
|
|
46
|
$record_ref->{ $state } = $contents; |
|
940
|
|
|
|
|
|
|
} |
|
941
|
|
|
|
|
|
|
} # _add_to_hash |
|
942
|
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
1; |