line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
# BioPerl module for Bio::DB::NextProt |
2
|
|
|
|
|
|
|
# |
3
|
|
|
|
|
|
|
# Please direct questions and support issues to |
4
|
|
|
|
|
|
|
# |
5
|
|
|
|
|
|
|
# Copyright Felipe da Veiga Leprevost |
6
|
|
|
|
|
|
|
# |
7
|
|
|
|
|
|
|
# You may distribute this module under the same terms as perl itself. |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 NAME |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
Bio::DB::NextProt - Object interface to NextProt REST API. |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 SYNOPSIS |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
my $np = Bio::DB::NextProt->new(); |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my @result_1 = $np->search_cv(-query => "kinase#"); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
my @result_2 = $np->get_isoform_info(-id => "NX_O00142-2"); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
my @result_3 = $np->get_protein_cv_info(-id => "PTM-0205", -format => "html"); |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
=head1 DESCRIPTION |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
The module allows the dynamic retrieval of information from the NextProt Database |
26
|
|
|
|
|
|
|
through its API service. All the information below was extracted from the API webpage. |
27
|
|
|
|
|
|
|
For the moment the results obtained from the API are in pure HTML, XML or JSON, so |
28
|
|
|
|
|
|
|
you will have to parse them yourself. |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
=head2 Search functionalities |
31
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
=head3 Search Protein |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
Search proteins matching the query or search proteins for which the filter is true. |
35
|
|
|
|
|
|
|
Available filter values are: structure, disease, expression, mutagenesis or proteomics. |
36
|
|
|
|
|
|
|
Note: only one filter parameter at a time is possible for the moment. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
@result = $np->search_protein(-query => "kinase"); |
39
|
|
|
|
|
|
|
@result = $np->search_protein(-query => "kinase", -filter => "disease"); |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head3 Control Vocabulary Terms |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
Search control vocabulary terms matching the query or search control vocabulary terms in the category specified by the filter. |
44
|
|
|
|
|
|
|
Available filter values are: enzyme, go, mesh, cell, domain, family, tissue, metal, pathway, disease, keyword, ptm, subcell. |
45
|
|
|
|
|
|
|
Note: only one category at a time is possible. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
@result = $np->search_cv(-query => "colon"); |
48
|
|
|
|
|
|
|
@result = $np->search_cv(-query => "colon", -filter => "keyword"); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
=head3 Format: |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
Output format maybe in JSON (default), HTML or XML. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
@result = $np->search_protein(-query => "kinase", -filter => "disease", -format => "html"); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=head2 Find information by protein entry |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head3 Protein ID |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
ID is neXtProt identifier. |
62
|
|
|
|
|
|
|
Retrieve gene name as well as main identifier and isoform sequences |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
@result = $np->get_protein_info(-query => "NX_P13051"); |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head3 Post-translational modifications |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
For each isoform of the specified entry, retrieve all post-translational modifications. |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
@result = $np->get_protein_info(-query => "NX_P13051", -retrieve => "ptm"); |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=head3 Variant |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
For each isoform of the specified entry, retrieve all variants. |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
@result = $np->get_protein_info(-query => "NX_P13051", -retrieve => "variant"); |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head3 Localisation |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
For each isoform of the specified entry, retrieve all subcellular location. |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
@result = $np->get_protein_info(-query => "NX_P13051", -retrieve => "localisation"); |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
=head3 Expression |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
Retrieve all expression information by tissue for the specified entry. |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
@result = $np->get_protein_info(-query => "NX_P13051", -retrieve => "expression"); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=head3 Format: |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Output format maybe in JSON (default), HTML or XML. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
@result = $np->get_protein_info(-query => "NX_P13051", -retrieve => "expression", -format => "html"); |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=head2 Find information by isoform |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
=head3 Protein ID |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
ID is neXtProt identifier. |
102
|
|
|
|
|
|
|
Retrieve gene name as well as main identifier and isoform sequences |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
@result = $np->get_isoform_info(-query => "NX_O00142-2"); |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=head3 Post-translational modifications |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
For each isoform of the specified entry, retrieve all post-translational modifications. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
@result = $np->get_isoform_info(-query => "NX_P01116-2", -retrieve => "ptm"); |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head3 Variant |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
For each isoform of the specified entry, retrieve all variants. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
@result = $np->get_isoform_info(-query => "NX_P01116-2", -retrieve => "variant"); |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=head3 Localisation |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
For each isoform of the specified entry, retrieve all subcellular location. |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
@result = $np->get_isoform_info(-query => "NX_P01116-2", -retrieve => "localisation"); |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=head2 Find information by controlled vocabulary term |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=head3 Protein ID |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
ID is neXtProt identifier. |
130
|
|
|
|
|
|
|
Retrieve the accession, the name and the category of the CV term. |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
@result = $np->get_protein_cv_info(-query => "PTM-0205"); |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=head3 Protein List |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
List all the proteins associated with the term in neXtProt. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
@result = $np->get_protein_cv_info(-query => "PTM-0205", -retrieve => "proteins"); |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head3 Format: |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
Output format maybe in JSON (default), HTML or XML. |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
@result = $np->get_protein_cv_info(-query => "PTM-0205", -retrieve => "proteins", -format => "html"); |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 Retrieving Accession Lists |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Allows the retrieval of all accession codes from individual chromossomes or from the entire NextProt database. |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
@result = $np->get_accession_list(-chromosome => "10"); |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
@result = $np->get_accession_list(-chromosome => "all"); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
=head3 entries with a protein existence "at protein level" (PE 1) |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
@result = $np->get_accession_list(-evidence => "protein_level"); |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head3 entries with a protein existence "at transcript level" (PE 2) |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
@result = $np->get_accession_list(-evidence => "transcript_level"); |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=head3 entries with a protein existence "by homology" (PE 3) |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
@result = $np->get_accession_list(-evidence => "homology"); |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head3 entries with a protein existence "predicted" (PE 4) |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
@result = $np->get_accession_list(-evidence => "predicted"); |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head3 entries with a protein existence "uncertain" (PE 5) |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
@result = $np->get_accession_list(-evidence => "uncertain"); |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head2 Customized Report Files for the HUPO Human Proteome Project (HPP) |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head3 Individual report files for each chromosomes (1 to 22, X, Y and MT) |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
my @list = $np->get_hpp_report(-chromosome => 10); |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head3 Annotated phosphorylated residues per chromosome |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
my @list = $np->get_hpp_report(-phospho => "true"); |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head3 Annotated N-Acetyl residues per chromosome |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
my @list = $np->get_hpp_report(-nacetyl => "true"); |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=head2 NextProt Mapping |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Mapping of neXtProt accession numbers to external resources. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head3 Ensembl gene identifiers |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "ensembl_gene"); |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head3 Ensembl protein identifiers |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "ensembl_protein"); |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
=head3 protein ids that cannot be mapped to any isoform in neXtProt |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "ensembl_unmapped"); |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=head3 Ensembl transcript identifiers |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "ensembl_transcript"); |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=head3 transcript is considered as coding by Ensembl that cannot be mapped to any isoform in neXtProt |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "ensembl_transcript_unmapped"); |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=head3 NCBI GeneID gene accession numbers |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "geneid"); |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head3 HGNC gene accession numbers |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "hgnc"); |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head3 MGI mouse gene accession numbers |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "mgi"); |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=head3 NCBI RefSeq gene accession numbers |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
@list = $np->get_mapping(-map => "refseq"); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head2 Chromosome Report |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
The module also allows the programatic access to chromosome information by accessing and formatting the |
236
|
|
|
|
|
|
|
chr_report tables from the nextprot ftp server. |
237
|
|
|
|
|
|
|
The retrieved structure is a hash of hashes, being the first key the NextProt Accession Number. |
238
|
|
|
|
|
|
|
The internal hashes have the following values: |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
* Gene Name |
241
|
|
|
|
|
|
|
* Chromosomal position |
242
|
|
|
|
|
|
|
* Start position |
243
|
|
|
|
|
|
|
* Stop position |
244
|
|
|
|
|
|
|
* Protein existence |
245
|
|
|
|
|
|
|
* Proteomics |
246
|
|
|
|
|
|
|
* Antibody |
247
|
|
|
|
|
|
|
* 3D |
248
|
|
|
|
|
|
|
* Disease |
249
|
|
|
|
|
|
|
* Isoforms |
250
|
|
|
|
|
|
|
* Variants |
251
|
|
|
|
|
|
|
* PTMs |
252
|
|
|
|
|
|
|
* Description |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
This is how the data is representes in the hashes: |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
NX_A7E2V4 { |
257
|
|
|
|
|
|
|
antibody "yes", |
258
|
|
|
|
|
|
|
description "Zinc finger SWIM domain-containing protein 8", |
259
|
|
|
|
|
|
|
disease "no", |
260
|
|
|
|
|
|
|
existence "protein level", |
261
|
|
|
|
|
|
|
has_3d "no", |
262
|
|
|
|
|
|
|
isoforms 5, |
263
|
|
|
|
|
|
|
gene_name "ZSWIM8", |
264
|
|
|
|
|
|
|
position "10q22.2", |
265
|
|
|
|
|
|
|
proteomics "yes", |
266
|
|
|
|
|
|
|
ptms 6, |
267
|
|
|
|
|
|
|
start_position 75545340, |
268
|
|
|
|
|
|
|
stop_position 75561551, |
269
|
|
|
|
|
|
|
variants 67 |
270
|
|
|
|
|
|
|
} |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=head3 Loading the Chromosome table. |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
Loas all the information from tha table. |
275
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
my %data = $np->get_chromosome(-chromosome => 10); |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
=head3 Accessing Protein information: |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
say $data{ZSWIM8}->{isoforms}; |
282
|
|
|
|
|
|
|
say $data{ZSWIM8}->{proteomics}; |
283
|
|
|
|
|
|
|
say $data{ZSWIM8}->{description}; |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
|
286
|
|
|
|
|
|
|
=head3 Counting the number of Proteins in the Chromosome |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
$sum = (keys %data); |
289
|
|
|
|
|
|
|
say $sum; |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=head3 Retrieve all Gene Names from a giving Chromosome |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
for my $prot (keys %data) { |
295
|
|
|
|
|
|
|
say $prot; |
296
|
|
|
|
|
|
|
} |
297
|
|
|
|
|
|
|
|
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=head1 FEEDBACK |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=head2 Mailing Lists |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
User feedback is an integral part of the evolution of this and other |
304
|
|
|
|
|
|
|
Bioperl modules. Send your comments and suggestions preferably to one |
305
|
|
|
|
|
|
|
of the Bioperl mailing lists. Your participation is much appreciated. |
306
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
bioperl-l@bioperl.org - General discussion |
308
|
|
|
|
|
|
|
http://bioperl.org/wiki/Mailing_lists - About the mailing lists |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
=head2 Support |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
Please direct usage questions or support issues to the mailing list: |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
I |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
rather than to the module maintainer directly. Many experienced and |
317
|
|
|
|
|
|
|
reponsive experts will be able look at the problem and quickly |
318
|
|
|
|
|
|
|
address it. Please include a thorough description of the problem |
319
|
|
|
|
|
|
|
with code and data examples if at all possible. |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
=head2 Reporting Bugs |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
Report bugs to the Bioperl bug tracking system to help us keep track |
324
|
|
|
|
|
|
|
the bugs and their resolution. Bug reports can be submitted via the |
325
|
|
|
|
|
|
|
web: |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
https://github.com/bioperl/bioperl-live |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=head1 AUTHOR - Felipe da Veiga Leprevost |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
Email leprevost@cpan.org |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=cut |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
package Bio::DB::NextProt; |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
our $VERSION = '1.02'; |
338
|
|
|
|
|
|
|
|
339
|
2
|
|
|
2
|
|
109699
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
62
|
|
340
|
2
|
|
|
2
|
|
8
|
use warnings; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
51
|
|
341
|
2
|
|
|
2
|
|
720
|
use REST::Client; |
|
2
|
|
|
|
|
107453
|
|
|
2
|
|
|
|
|
55
|
|
342
|
2
|
|
|
2
|
|
1565
|
use Net::FTP::Tiny qw(ftp_get); |
|
2
|
|
|
|
|
13825
|
|
|
2
|
|
|
|
|
3643
|
|
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
sub new { |
346
|
1
|
|
|
1
|
0
|
202
|
my ($class, @args) = @_; |
347
|
|
|
|
|
|
|
#my $self = $class->SUPER::new(@args); |
348
|
1
|
|
|
|
|
2
|
my $self = {}; |
349
|
1
|
|
|
|
|
12
|
$self->{_client} = REST::Client->new({host=> "http://www.nextprot.org", timeout => 10,}); |
350
|
1
|
|
|
|
|
7673
|
$self->{_query} = undef; |
351
|
1
|
|
|
|
|
4
|
$self->{_filter} = undef; |
352
|
1
|
|
|
|
|
4
|
$self->{_chromosome} = undef; |
353
|
1
|
|
|
|
|
2
|
$self->{_format} = "json"; |
354
|
1
|
|
|
|
|
6
|
bless($self, $class); |
355
|
1
|
|
|
|
|
11
|
return $self; |
356
|
|
|
|
|
|
|
} |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub search_protein() { |
359
|
|
|
|
|
|
|
|
360
|
1
|
|
|
1
|
0
|
1061118
|
my $self = shift; |
361
|
1
|
|
|
|
|
5
|
my %param = @_; |
362
|
|
|
|
|
|
|
|
363
|
1
|
|
|
|
|
3
|
my $path = "/rest/protein/list"; |
364
|
|
|
|
|
|
|
|
365
|
1
|
50
|
|
|
|
8
|
$self->{_format} = $param{'-format'} if defined $param{'-format'}; |
366
|
|
|
|
|
|
|
|
367
|
1
|
50
|
33
|
|
|
16
|
if (defined $param{'-query'} && defined $param{'-filter'}) { |
|
|
50
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
368
|
|
|
|
|
|
|
|
369
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path."?query=".$param{'-query'}."&filter=".$param{'-filter'}."&format=".$self->{_format}); |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
} elsif (defined $param{'-query'}) { |
372
|
|
|
|
|
|
|
|
373
|
1
|
|
|
|
|
10
|
$self->{_client}->GET($path."?query=".$param{'-query'}."&format=".$self->{_format}); |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
} elsif (defined $param{'-filter'}) { |
376
|
|
|
|
|
|
|
|
377
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path."?filter=".$param{'-filter'}."&format=".$self->{_format}); |
378
|
|
|
|
|
|
|
} |
379
|
|
|
|
|
|
|
|
380
|
1
|
|
|
|
|
590880
|
&reset_params(); |
381
|
|
|
|
|
|
|
|
382
|
1
|
|
|
|
|
12
|
return $self->{_client}->responseContent(); |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
} |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub search_cv() { |
387
|
1
|
|
|
1
|
0
|
228921
|
my $self = shift; |
388
|
1
|
|
|
|
|
6
|
my %param = @_; |
389
|
|
|
|
|
|
|
|
390
|
1
|
|
|
|
|
4
|
my $path = "/rest/cv/list"; |
391
|
|
|
|
|
|
|
|
392
|
1
|
50
|
|
|
|
10
|
$self->{_format} = $param{'-format'} if defined $param{'-format'}; |
393
|
|
|
|
|
|
|
|
394
|
1
|
50
|
33
|
|
|
9
|
if (defined $param{'-query'} && defined $param{'-filter'}) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
395
|
|
|
|
|
|
|
|
396
|
1
|
|
|
|
|
11
|
$self->{_client}->GET($path."?query=".$param{'-query'}."&filter=".$param{'-filter'}."&format=".$self->{_format}); |
397
|
|
|
|
|
|
|
|
398
|
|
|
|
|
|
|
} elsif (defined $param{'-query'}) { |
399
|
|
|
|
|
|
|
|
400
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path."?query=".$param{'-query'}."&format=".$self->{_format}); |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
} elsif (defined $param{'-filter'}) { |
403
|
|
|
|
|
|
|
|
404
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path."?filter=".$param{'-filter'}."&format=".$self->{_format}); |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
|
407
|
1
|
|
|
|
|
224918
|
&reset_params(); |
408
|
|
|
|
|
|
|
|
409
|
1
|
|
|
|
|
9
|
return $self->{_client}->responseContent(); |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
sub get_protein_info() { |
414
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
415
|
0
|
|
|
|
|
0
|
my %param = @_; |
416
|
|
|
|
|
|
|
|
417
|
0
|
|
|
|
|
0
|
my $path = "/rest/entry/"; |
418
|
|
|
|
|
|
|
|
419
|
0
|
0
|
|
|
|
0
|
$self->{_format} = $param{'-format'} if defined $param{'-format'}; |
420
|
|
|
|
|
|
|
|
421
|
0
|
0
|
0
|
|
|
0
|
if (defined $param{'-query'} && $param{'-retrieve'}) { |
|
|
0
|
|
|
|
|
|
422
|
|
|
|
|
|
|
|
423
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path.$param{'-query'}."/".$param{'-retrieve'}."?format=".$self->{_format}); |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
} elsif (defined $param{'-query'}) { |
426
|
|
|
|
|
|
|
|
427
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path.$param{'-query'}."?format=".$self->{_format}); |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
0
|
|
|
|
|
0
|
&reset_params(); |
431
|
|
|
|
|
|
|
|
432
|
0
|
|
|
|
|
0
|
return $self->{_client}->responseContent(); |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub get_isoform_info() { |
437
|
1
|
|
|
1
|
0
|
88589
|
my $self = shift; |
438
|
1
|
|
|
|
|
3
|
my %param = @_; |
439
|
|
|
|
|
|
|
|
440
|
1
|
|
|
|
|
5
|
my $path = "/rest/isoform/"; |
441
|
|
|
|
|
|
|
|
442
|
1
|
50
|
|
|
|
8
|
$self->{_format} = $param{'-format'} if defined $param{'-format'}; |
443
|
|
|
|
|
|
|
|
444
|
1
|
50
|
33
|
|
|
10
|
if (defined $param{'-query'} && $param{'-retrieve'}) { |
|
|
50
|
|
|
|
|
|
445
|
|
|
|
|
|
|
|
446
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path.$param{'-query'}."/".$param{'-retrieve'}."?format=".$self->{_format}); |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
} elsif (defined $param{'-query'}) { |
449
|
|
|
|
|
|
|
|
450
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path.$param{'-query'}."?format=".$self->{_format}); |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
1
|
|
|
|
|
7
|
&reset_params(); |
454
|
|
|
|
|
|
|
|
455
|
1
|
|
|
|
|
7
|
return $self->{_client}->responseContent(); |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
} |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
sub get_protein_cv_info() { |
460
|
1
|
|
|
1
|
0
|
63136
|
my $self = shift; |
461
|
1
|
|
|
|
|
5
|
my %param = @_; |
462
|
|
|
|
|
|
|
|
463
|
1
|
|
|
|
|
3
|
my $path = "/rest/cv/"; |
464
|
|
|
|
|
|
|
|
465
|
1
|
50
|
|
|
|
6
|
$self->{_format} = $param{'-format'} if defined $param{'-format'}; |
466
|
|
|
|
|
|
|
|
467
|
1
|
50
|
33
|
|
|
13
|
if (defined $param{'-query'} && $param{'-retrieve'}) { |
|
|
0
|
|
|
|
|
|
468
|
|
|
|
|
|
|
|
469
|
1
|
|
|
|
|
11
|
$self->{_client}->GET($path.$param{'-query'}."/".$param{'-retrieve'}."?format=".$self->{_format}); |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
} elsif (defined $param{'-query'}) { |
472
|
|
|
|
|
|
|
|
473
|
0
|
|
|
|
|
0
|
$self->{_client}->GET($path.$param{'-query'}."?format=".$self->{_format}); |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
1
|
|
|
|
|
1138475
|
&reset_params(); |
477
|
|
|
|
|
|
|
|
478
|
1
|
|
|
|
|
8
|
return $self->{_client}->responseContent(); |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
} |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub get_accession_list() { |
483
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
484
|
0
|
|
|
|
|
0
|
my %param = @_; |
485
|
|
|
|
|
|
|
|
486
|
0
|
|
|
|
|
0
|
my $path = "ftp://ftp.nextprot.org/pub/current_release/ac_lists"; |
487
|
0
|
|
|
|
|
0
|
my @file = (); |
488
|
|
|
|
|
|
|
|
489
|
0
|
0
|
|
|
|
0
|
if ( defined $param{'-chromosome'} ) { |
|
|
0
|
|
|
|
|
|
490
|
|
|
|
|
|
|
|
491
|
0
|
|
|
|
|
0
|
$self->{_chromosome} = $param{'-chromosome'}; |
492
|
0
|
|
|
|
|
0
|
my $chrom = $self->{_chromosome}; |
493
|
|
|
|
|
|
|
|
494
|
0
|
0
|
|
|
|
0
|
if ($chrom eq "all") { |
495
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ac_list_all.txt"); |
496
|
|
|
|
|
|
|
} else { |
497
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ac_list_chromosome_".$chrom.".txt"); |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
} elsif ( defined $param{'-evidence'} ) { |
501
|
|
|
|
|
|
|
|
502
|
0
|
0
|
|
|
|
0
|
if ( $param{'-evidence'} eq "protein_level" ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
503
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ac_list_PE1_at_protein_level.txt"); |
504
|
|
|
|
|
|
|
} elsif ( $param{'-evidence'} eq "transcript_level" ) { |
505
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ac_list_PE2_at_transcript_level.txt"); |
506
|
|
|
|
|
|
|
} elsif ( $param{'-evidence'} eq "homology" ) { |
507
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ac_list_PE3_homology.txt") |
508
|
|
|
|
|
|
|
} elsif ( $param{'-evidence'} eq "predicted" ) { |
509
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ac_list_PE4_predicted.txt") |
510
|
|
|
|
|
|
|
} elsif ( $param{'-evidence'} eq "uncertain" ) { |
511
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ac_list_PE5_uncertain.txt") |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
} |
514
|
|
|
|
|
|
|
|
515
|
0
|
|
|
|
|
0
|
&reset_params(); |
516
|
0
|
|
|
|
|
0
|
return @file; |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
sub get_hpp_report() { |
521
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
522
|
0
|
|
|
|
|
0
|
my %param = @_; |
523
|
|
|
|
|
|
|
|
524
|
0
|
|
|
|
|
0
|
my $path = "ftp://ftp.nextprot.org/pub/current_release/custom/hpp"; |
525
|
0
|
|
|
|
|
0
|
my @file = (); |
526
|
|
|
|
|
|
|
|
527
|
0
|
0
|
|
|
|
0
|
if ( defined $param{'-chromosome'} ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
528
|
|
|
|
|
|
|
|
529
|
0
|
|
|
|
|
0
|
my $chrom = $param{'-chromosome'}; |
530
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."HPP_chromosome_".$chrom.".txt"); |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
} elsif ( defined $param{'-phospho'} ) { |
533
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."HPP_entries_with_phospho_by_chromosome.txt"); |
534
|
|
|
|
|
|
|
} elsif ( defined $param{'-nacetyl'} ) { |
535
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."HPP_entries_with_nacetyl_by_chromosome.txt"); |
536
|
|
|
|
|
|
|
} |
537
|
|
|
|
|
|
|
|
538
|
0
|
|
|
|
|
0
|
&reset_params(); |
539
|
0
|
|
|
|
|
0
|
return @file; |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
sub get_mapping() { |
545
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
546
|
0
|
|
|
|
|
0
|
my %param = @_; |
547
|
|
|
|
|
|
|
|
548
|
0
|
|
|
|
|
0
|
my $path = "ftp://ftp.nextprot.org/pub/current_release/mapping"; |
549
|
0
|
|
|
|
|
0
|
my @file = (); |
550
|
|
|
|
|
|
|
|
551
|
0
|
0
|
|
|
|
0
|
if ( defined $param{'-map'} ) { |
552
|
0
|
|
|
|
|
0
|
my $db = $param{'-map'}; |
553
|
|
|
|
|
|
|
|
554
|
0
|
0
|
|
|
|
0
|
if ( $db eq 'ensembl_gene' ) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
555
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ensg.txt"); |
556
|
|
|
|
|
|
|
} elsif ( $db eq 'ensembl_protein' ) { |
557
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ensp.txt"); |
558
|
|
|
|
|
|
|
} elsif ( $db eq 'ensembl_unmapped' ) { |
559
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_ensp_unmapped.txt"); |
560
|
|
|
|
|
|
|
} elsif ( $db eq 'ensembl_transcript' ) { |
561
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_enst.txt"); |
562
|
|
|
|
|
|
|
} elsif ( $db eq 'ensembl_transcript_unmapped' ) { |
563
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_enst_unmapped.txt"); |
564
|
|
|
|
|
|
|
} elsif ( $db eq 'geneid' ) { |
565
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_geneid.txt"); |
566
|
|
|
|
|
|
|
} elsif ( $db eq 'hgnc' ) { |
567
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_hgnc.txt"); |
568
|
|
|
|
|
|
|
} elsif ( $db eq 'mgi' ) { |
569
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_mgi.txt"); |
570
|
|
|
|
|
|
|
} elsif ( $db eq 'refseq' ) { |
571
|
0
|
|
|
|
|
0
|
@file = ftp_get($path."/"."nextprot_refseq.txt"); |
572
|
|
|
|
|
|
|
} |
573
|
|
|
|
|
|
|
} |
574
|
|
|
|
|
|
|
|
575
|
0
|
|
|
|
|
0
|
&reset_params(); |
576
|
0
|
|
|
|
|
0
|
return @file; |
577
|
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
|
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub get_chromosome() { |
582
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
583
|
0
|
|
|
|
|
0
|
my %param = @_; |
584
|
|
|
|
|
|
|
|
585
|
0
|
|
|
|
|
0
|
my @data = (); |
586
|
0
|
|
|
|
|
0
|
my %table = (); |
587
|
|
|
|
|
|
|
|
588
|
0
|
|
|
|
|
0
|
my $path = "ftp://ftp.nextprot.org/pub/current_release/chr_reports"; |
589
|
|
|
|
|
|
|
|
590
|
0
|
0
|
|
|
|
0
|
if ( defined $param{'-chromosome'} ) { |
591
|
|
|
|
|
|
|
|
592
|
0
|
|
|
|
|
0
|
$self->{_chromosome} = $param{'-chromosome'}; |
593
|
0
|
|
|
|
|
0
|
my $chrom = $self->{_chromosome}; |
594
|
0
|
|
|
|
|
0
|
my $file = ftp_get($path."/"."nextprot_"."chromosome_".$chrom.".txt"); |
595
|
0
|
|
|
|
|
0
|
my @data = split /^/m, $file; |
596
|
|
|
|
|
|
|
|
597
|
0
|
|
|
|
|
0
|
for my $prot (@data) { |
598
|
0
|
|
|
|
|
0
|
chomp $prot; |
599
|
|
|
|
|
|
|
|
600
|
0
|
0
|
|
|
|
0
|
if ($prot =~ m/^[A-Za-z|0-9\-]+\s+NX/) { |
601
|
|
|
|
|
|
|
|
602
|
0
|
|
|
|
|
0
|
$prot =~ s/\s{2,}/\t/g; |
603
|
0
|
|
|
|
|
0
|
my @temp = split(/\t/, $prot); |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
#if (exists $table{$temp[1]}) { |
606
|
|
|
|
|
|
|
# print "redundancy detected: $temp[1]\n"; |
607
|
|
|
|
|
|
|
#} |
608
|
|
|
|
|
|
|
|
609
|
0
|
|
|
|
|
0
|
$table{$temp[1]} = { |
610
|
|
|
|
|
|
|
gene_name => $temp[0], |
611
|
|
|
|
|
|
|
position => $temp[2], |
612
|
|
|
|
|
|
|
start_position => $temp[3], |
613
|
|
|
|
|
|
|
stop_position => $temp[4], |
614
|
|
|
|
|
|
|
existence => $temp[5], |
615
|
|
|
|
|
|
|
proteomics => $temp[6], |
616
|
|
|
|
|
|
|
antibody => $temp[7], |
617
|
|
|
|
|
|
|
has_3d => $temp[8], |
618
|
|
|
|
|
|
|
disease => $temp[9], |
619
|
|
|
|
|
|
|
isoforms => $temp[10], |
620
|
|
|
|
|
|
|
variants => $temp[11], |
621
|
|
|
|
|
|
|
ptms => $temp[12], |
622
|
|
|
|
|
|
|
description => $temp[13], |
623
|
|
|
|
|
|
|
} |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
} |
626
|
|
|
|
|
|
|
} |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
} |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
|
631
|
0
|
|
|
|
|
0
|
&reset_params(); |
632
|
|
|
|
|
|
|
|
633
|
0
|
|
|
|
|
0
|
return %table; |
634
|
|
|
|
|
|
|
} |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
sub reset_params() { |
638
|
4
|
|
|
4
|
0
|
11
|
my $self = shift; |
639
|
|
|
|
|
|
|
|
640
|
4
|
|
|
|
|
18
|
$self->{_query} = undef; |
641
|
4
|
|
|
|
|
137
|
$self->{_filter} = undef; |
642
|
|
|
|
|
|
|
} |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
1; |