line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package NCBIx::eUtils::GeneAliases; |
2
|
1
|
|
|
1
|
|
20614
|
use Class::Std; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
use Class::Std::Utils; |
4
|
|
|
|
|
|
|
use LWP::Simple; |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
use warnings; |
7
|
|
|
|
|
|
|
use strict; |
8
|
|
|
|
|
|
|
use Carp; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.9.0'); |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $utils = "http://www.ncbi.nlm.nih.gov/entrez/eutils"; |
13
|
|
|
|
|
|
|
our $retmax = 500; |
14
|
|
|
|
|
|
|
our @keywords = ('Official Symbol:', 'and Name:', 'Name:', 'Other Aliases:', 'Other Designations:', 'Chromosome:', 'Location:', 'Annotation:', 'MIM:', 'Genomic context:', 'Macronuclear:', 'GeneID:'); |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
{ |
17
|
|
|
|
|
|
|
my %utils_url_of :ATTR( :get :set :default<''> :init_arg ); |
18
|
|
|
|
|
|
|
my %retmax_of :ATTR( :get :set :default<'500'> :init_arg ); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub START { |
21
|
|
|
|
|
|
|
my ($self, $ident, $arg_ref) = @_; |
22
|
|
|
|
|
|
|
$self->set_utils_url( $utils ); |
23
|
|
|
|
|
|
|
return; |
24
|
|
|
|
|
|
|
} |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub get_aliases { |
27
|
|
|
|
|
|
|
my ( $self, $gene_id ) = @_; |
28
|
|
|
|
|
|
|
my $gene_names = {}; |
29
|
|
|
|
|
|
|
my $gene_data; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# Get NCBI records for gene |
32
|
|
|
|
|
|
|
my $gene_alts = $self->_get_docsums( $gene_id ); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
if ( $gene_alts ) { |
35
|
|
|
|
|
|
|
# Remove newlines |
36
|
|
|
|
|
|
|
$gene_alts =~ s/\n/ /g; |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Break into lines before keywords |
39
|
|
|
|
|
|
|
foreach my $keyword ( @keywords ) { $gene_alts =~ s/$keyword/\n$keyword/g; } |
40
|
|
|
|
|
|
|
my @alt_lines = split( /\n/, $gene_alts ); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Process lines |
43
|
|
|
|
|
|
|
foreach my $alt_line ( @alt_lines ) { |
44
|
|
|
|
|
|
|
if ( $alt_line =~ m/^Official Symbol:(.*)$/ ) { |
45
|
|
|
|
|
|
|
my $match = $1; $match =~ s/[,;]/ /g; |
46
|
|
|
|
|
|
|
my @symbols = split( /\s+/, $match ); |
47
|
|
|
|
|
|
|
foreach my $symbol ( @symbols ) { if ( $symbol && $symbol ne $gene_id ) { $gene_names->{$symbol}++; } } |
48
|
|
|
|
|
|
|
} |
49
|
|
|
|
|
|
|
elsif ( $alt_line =~ m/^Other Aliases:(.*)$/ ) { |
50
|
|
|
|
|
|
|
my $match = $1; $match =~ s/[,;]/ /g; |
51
|
|
|
|
|
|
|
my @aliases = split( /\s+/, $match ); |
52
|
|
|
|
|
|
|
foreach my $alias ( @aliases ) { if ( $alias ) { $gene_names->{$alias}++; } } |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
} |
56
|
|
|
|
|
|
|
return sort keys %$gene_names; |
57
|
|
|
|
|
|
|
} else { |
58
|
|
|
|
|
|
|
return (); |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub _get_docsums { |
64
|
|
|
|
|
|
|
my ( $self, $gene_id ) = @_; |
65
|
|
|
|
|
|
|
my $gene_data = ''; |
66
|
|
|
|
|
|
|
my $retmax = $self->get_retmax(); |
67
|
|
|
|
|
|
|
my $utils_url = $self->get_utils_url(); |
68
|
|
|
|
|
|
|
my $retstart; |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# Get the query |
71
|
|
|
|
|
|
|
my $esearch = $utils_url . "/esearch.fcgi?" . "db=gene&retmax=1&usehistory=y&term=$gene_id" . |
72
|
|
|
|
|
|
|
'&tool=cpan_ncbix_eutils_genealiases&email=roger@iosea.com'; |
73
|
|
|
|
|
|
|
my $esearch_result = get( $esearch ); |
74
|
|
|
|
|
|
|
sleep(3); |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Parse the count, query_key, and webenv |
77
|
|
|
|
|
|
|
$esearch_result =~ m|(\d+).*(\d+).*(\S+)|s; |
78
|
|
|
|
|
|
|
my $Count = $1 ? $1 : 0; |
79
|
|
|
|
|
|
|
my $QueryKey = $2; |
80
|
|
|
|
|
|
|
my $WebEnv = $3; |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
#print " STATUS: Getting $Count results for $gene_id \n"; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
for ( my $retstart = 0; $retstart < $Count; $retstart += $retmax ) { |
85
|
|
|
|
|
|
|
my $efetch = $utils_url . "/efetch.fcgi?" . |
86
|
|
|
|
|
|
|
"rettype=docsum&retmode=text&retstart=$retstart&retmax=$retmax&" . |
87
|
|
|
|
|
|
|
"db=gene&query_key=$QueryKey&WebEnv=$WebEnv" . |
88
|
|
|
|
|
|
|
'&tool=cpan_ncbix_eutils_genealiases&email=roger@iosea.com'; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
$gene_data .= get($efetch); |
91
|
|
|
|
|
|
|
sleep(2); |
92
|
|
|
|
|
|
|
} |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
#print " STATUS: Done. \n"; |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
return $gene_data; |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
} |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
101
|
|
|
|
|
|
|
__END__ |