line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package NCBIx::Geo; |
2
|
1
|
|
|
1
|
|
61687
|
use base qw(NCBIx::Geo::Base); |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
649
|
|
3
|
1
|
|
|
1
|
|
653
|
use NCBIx::Geo::Sample; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
28
|
|
4
|
1
|
|
|
1
|
|
648
|
use NCBIx::Geo::Item; |
|
1
|
|
|
|
|
4
|
|
|
1
|
|
|
|
|
42
|
|
5
|
1
|
|
|
1
|
|
9
|
use LWP::Simple; |
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
10
|
|
6
|
1
|
|
|
1
|
|
954
|
use XML::Simple; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
use warnings; |
9
|
|
|
|
|
|
|
use strict; |
10
|
|
|
|
|
|
|
use Carp; |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use version; our $VERSION = qv('1.0.0'); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
use constant UTILS_URL => 'http://www.ncbi.nlm.nih.gov/entrez/eutils/'; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $accn_types = { GDS => 1, GPL => 1, GSE => 1, GSM => 1 }; |
17
|
|
|
|
|
|
|
our $db = 'gds'; |
18
|
|
|
|
|
|
|
our $default_dir = '/tmp/geo/'; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
{ |
21
|
|
|
|
|
|
|
my %query_of :ATTR( :get :set :default<''> :init_arg ); |
22
|
|
|
|
|
|
|
my %query_type_of :ATTR( :get :set :default<''> :init_arg ); |
23
|
|
|
|
|
|
|
my %geo_item_of :ATTR( :get :set :default<''> :init_arg ); |
24
|
|
|
|
|
|
|
my %data_dir_of :ATTR( :get :set :default<''> :init_arg ); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
my %platforms_of :ATTR( :get :set :default<[]> :init_arg ); |
27
|
|
|
|
|
|
|
my %data_sets_of :ATTR( :get :set :default<[]> :init_arg ); |
28
|
|
|
|
|
|
|
my %series_of :ATTR( :get :set :default<[]> :init_arg ); |
29
|
|
|
|
|
|
|
my %samples_of :ATTR( :get :set :default<[]> :init_arg ); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
my %platforms_count_of :ATTR( :get :set :default<''> :init_arg ); |
32
|
|
|
|
|
|
|
my %data_sets_count_of :ATTR( :get :set :default<''> :init_arg ); |
33
|
|
|
|
|
|
|
my %series_count_of :ATTR( :get :set :default<''> :init_arg ); |
34
|
|
|
|
|
|
|
my %samples_count_of :ATTR( :get :set :default<''> :init_arg ); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
sub START { |
37
|
|
|
|
|
|
|
my ($self, $ident, $arg_ref) = @_; |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# Check for valid data_dir |
40
|
|
|
|
|
|
|
if ( $self->get_data_dir() eq '' ) { |
41
|
|
|
|
|
|
|
# Set default data_dir |
42
|
|
|
|
|
|
|
$self->set_data_dir( $default_dir ); |
43
|
|
|
|
|
|
|
} else { |
44
|
|
|
|
|
|
|
# Check for ending slash |
45
|
|
|
|
|
|
|
my $data_dir = $self->get_data_dir(); |
46
|
|
|
|
|
|
|
if (! $data_dir =~ m#/$# ) { $self->set_data_dir( $data_dir . '/' ); } |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
# Make data_dir if it doesn't already exist |
50
|
|
|
|
|
|
|
if (! -s $self->get_data_dir() ) { mkdir( $self->get_data_dir() ); } |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Load data if accn submitted |
53
|
|
|
|
|
|
|
if ( defined $arg_ref->{accn} ) { |
54
|
|
|
|
|
|
|
# Get meta data |
55
|
|
|
|
|
|
|
$self->meta( $arg_ref ); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
# Check if sample data requested immediately |
58
|
|
|
|
|
|
|
if ( defined $arg_ref->{data} ) { |
59
|
|
|
|
|
|
|
if ( $arg_ref->{data} == 1 ) { $self->data(); } |
60
|
|
|
|
|
|
|
} |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
return; |
64
|
|
|
|
|
|
|
} |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
sub desc { |
67
|
|
|
|
|
|
|
my ( $self ) = @_; |
68
|
|
|
|
|
|
|
my $accn_type = $self->get_query_type(); |
69
|
|
|
|
|
|
|
my $desc; |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
if ( $accn_type eq 'GPL' ) { $desc = $self->_describe_platform(); } |
72
|
|
|
|
|
|
|
elsif ( $accn_type eq 'GDS' ) { $desc = $self->_describe_data_set(); } |
73
|
|
|
|
|
|
|
elsif ( $accn_type eq 'GSE' ) { $desc = $self->_describe_series(); } |
74
|
|
|
|
|
|
|
elsif ( $accn_type eq 'GSM' ) { $desc = $self->_describe_sample(); } |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
return $desc; |
77
|
|
|
|
|
|
|
} |
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
sub sample { my ( $self, $arg_ref ) = @_; $arg_ref->{data_dir} = $self->get_data_dir(); $arg_ref->{debug} = $self->debug(); return NCBIx::Geo::Sample->new( $arg_ref ); } |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub meta { |
82
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
my $accn = defined $arg_ref->{accn} ? $arg_ref->{accn} : ''; |
85
|
|
|
|
|
|
|
$self->set_query( $accn ); |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
my $accn_type = $self->get_accn_type( $arg_ref ); |
88
|
|
|
|
|
|
|
$self->set_query_type( $accn_type ); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
my $file = $self->get_doc({ accn => $accn }); |
91
|
|
|
|
|
|
|
$self->parse_doc({ file => $file }); |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
return $self; |
94
|
|
|
|
|
|
|
} |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
sub data { |
97
|
|
|
|
|
|
|
my ( $self ) = @_; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# Limit to one data_set |
100
|
|
|
|
|
|
|
if ( $self->get_data_sets_count() == 1 ) { |
101
|
|
|
|
|
|
|
# Foreach series |
102
|
|
|
|
|
|
|
my $series_list = $self->get_series(); |
103
|
|
|
|
|
|
|
foreach my $series ( @$series_list ) { |
104
|
|
|
|
|
|
|
# Get the file types |
105
|
|
|
|
|
|
|
my @file_exts = split( /\s/, $series->get_supp_file() ); |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
# Get sample list |
108
|
|
|
|
|
|
|
my @sample_accns = split( /;/, $series->get_sample_ids() ); |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# Process each sample accn |
111
|
|
|
|
|
|
|
foreach my $sample_accn ( @sample_accns ) { |
112
|
|
|
|
|
|
|
$self->get_sample_data({ accn => $sample_accn, exts => \@file_exts }); |
113
|
|
|
|
|
|
|
} |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
} else { |
116
|
|
|
|
|
|
|
my $data_sets_count = $self->get_data_sets_count() ; |
117
|
|
|
|
|
|
|
$self->_debug( "\nNote: Too many data_sets found ($data_sets_count); no samples downloaded.\n Use a GDS, GSE, or GSM accession to download data." ); |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
return; |
121
|
|
|
|
|
|
|
} |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub diff { |
124
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
125
|
|
|
|
|
|
|
my @list = defined $arg_ref->{list} ? @{ $arg_ref->{list} } : (); |
126
|
|
|
|
|
|
|
$self->_debug( "DIFF: " . join( ', ', @list ) ); |
127
|
|
|
|
|
|
|
my $left_accn = shift( @list ); |
128
|
|
|
|
|
|
|
my $left_sample = $self->sample({ accn => $left_accn }); |
129
|
|
|
|
|
|
|
my $left_transcripts = $left_sample->get_transcripts(); |
130
|
|
|
|
|
|
|
my @ids = split( ';', $left_sample->transcript_ids() ); |
131
|
|
|
|
|
|
|
my $results; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
foreach my $right_accn ( @list ) { |
134
|
|
|
|
|
|
|
my ( @left_present, @right_present ); |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
# Get the right transcripts |
137
|
|
|
|
|
|
|
my $right_sample = $self->sample({ accn => $right_accn }); |
138
|
|
|
|
|
|
|
my $right_transcripts = $right_sample->get_transcripts(); |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# Compare each transcript_id |
141
|
|
|
|
|
|
|
foreach my $transcript_id ( @ids ) { |
142
|
|
|
|
|
|
|
my $ltran = $left_transcripts->{$transcript_id}->{call}; |
143
|
|
|
|
|
|
|
my $rtran = $right_transcripts->{$transcript_id}->{call}; |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
if ( $ltran eq 'P' && $rtran eq 'A' ) { push @left_present, $transcript_id; } |
146
|
|
|
|
|
|
|
if ( $ltran eq 'A' && $rtran eq 'P' ) { push @right_present, $transcript_id; } |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
# Build diff results |
150
|
|
|
|
|
|
|
$results .= "<<$left_accn\n"; |
151
|
|
|
|
|
|
|
$results .= join( ';', @left_present ) . "\n"; |
152
|
|
|
|
|
|
|
$results .= ">>$right_accn\n"; |
153
|
|
|
|
|
|
|
$results .= join( ';', @right_present ) . "\n"; |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
return $results; |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
sub get_doc { |
160
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
161
|
|
|
|
|
|
|
my $accn = defined $arg_ref->{accn} ? $arg_ref->{accn} : ''; |
162
|
|
|
|
|
|
|
my $file = $self->get_data_dir() . $accn . '.xml'; |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
# Download XML file if it doesn't exist |
165
|
|
|
|
|
|
|
if (! -s $file ) { |
166
|
|
|
|
|
|
|
my $query = $accn . '[ACCN]'; |
167
|
|
|
|
|
|
|
my $esearch = UTILS_URL . 'esearch.fcgi?'; |
168
|
|
|
|
|
|
|
$esearch .= "db=$db&retmax=1&usehistory=y&term="; |
169
|
|
|
|
|
|
|
my $result = get($esearch . $query); |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
my ( $Count, $QueryKey, $WebEnv ) = $result =~ m|(\d+).*(\d+).*(\S+)|s; |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
$self->_debug( "FOUND: $Count, $QueryKey, $WebEnv" ); |
174
|
|
|
|
|
|
|
my $esummary = UTILS_URL . 'esummary.fcgi?'; |
175
|
|
|
|
|
|
|
$esummary .= "db=$db&query_key=$QueryKey&WebEnv=$WebEnv"; |
176
|
|
|
|
|
|
|
$result = get($esummary); |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
open( OUTFILE, '>', $file ); |
179
|
|
|
|
|
|
|
print OUTFILE $result; |
180
|
|
|
|
|
|
|
close( OUTFILE ); |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
return $file; |
184
|
|
|
|
|
|
|
} |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub parse_doc { |
187
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
188
|
|
|
|
|
|
|
my $file = defined $arg_ref->{file} ? $arg_ref->{file} : ''; |
189
|
|
|
|
|
|
|
$self->_debug( "PARSE EXISTING FILE: $file" ); |
190
|
|
|
|
|
|
|
my $doc = XMLin( $file ); |
191
|
|
|
|
|
|
|
my @items; |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
my $triggers = { SeriesTitle => \&__parse_SeriesTitle, |
194
|
|
|
|
|
|
|
n_samples => \&__parse_n_samples, |
195
|
|
|
|
|
|
|
gdsType => \&__parse_gdsType, |
196
|
|
|
|
|
|
|
ptechType => \&__parse_ptechType, |
197
|
|
|
|
|
|
|
Samples => \&__parse_Samples, |
198
|
|
|
|
|
|
|
Projects => \&__parse_Projects, |
199
|
|
|
|
|
|
|
GSE => \&__parse_GSE, |
200
|
|
|
|
|
|
|
summary => \&__parse_summary, |
201
|
|
|
|
|
|
|
GSM_titles_L => \&__parse_GSM_titles_L, |
202
|
|
|
|
|
|
|
PubMedIds => \&__parse_PubMedIds, |
203
|
|
|
|
|
|
|
Relations => \&__parse_Relations, |
204
|
|
|
|
|
|
|
GPL => \&__parse_GPL, |
205
|
|
|
|
|
|
|
SSInfo => \&__parse_SSInfo, |
206
|
|
|
|
|
|
|
suppFile => \&__parse_suppFile, |
207
|
|
|
|
|
|
|
taxon => \&__parse_taxon, |
208
|
|
|
|
|
|
|
GSM_L => \&__parse_GSM_L, |
209
|
|
|
|
|
|
|
entryType => \&__parse_entryType, |
210
|
|
|
|
|
|
|
valType => \&__parse_valType, |
211
|
|
|
|
|
|
|
PDAT => \&__parse_PDAT, |
212
|
|
|
|
|
|
|
PlatformTaxa => \&__parse_PlatformTaxa, |
213
|
|
|
|
|
|
|
SamplesTaxa => \&__parse_SamplesTaxa, |
214
|
|
|
|
|
|
|
GDS => \&__parse_GDS, |
215
|
|
|
|
|
|
|
subsetInfo => \&__parse_subsetInfo, |
216
|
|
|
|
|
|
|
title => \&__parse_title, |
217
|
|
|
|
|
|
|
PlatformTitle => \&__parse_PlatformTitle }; |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
my $item_list = $doc->{DocSum}; |
220
|
|
|
|
|
|
|
my $count = @$item_list; $self->_debug( "Found $count items." ); |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
foreach my $item ( @$item_list ) { |
223
|
|
|
|
|
|
|
my $item_obj = NCBIx::Geo::Item->new(); |
224
|
|
|
|
|
|
|
my $attr_list = $item->{Item}; |
225
|
|
|
|
|
|
|
foreach my $attr ( @$attr_list ) { |
226
|
|
|
|
|
|
|
&{ $triggers->{ $attr->{Name} } }( $item_obj, $attr ); |
227
|
|
|
|
|
|
|
} |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
my $item_type = $item_obj->get_entry_type(); |
230
|
|
|
|
|
|
|
my $item_id; |
231
|
|
|
|
|
|
|
if ( $item_type eq 'GPL' ) { $item_id = $item_obj->get_gpl(); } |
232
|
|
|
|
|
|
|
elsif ( $item_type eq 'GDS' ) { $item_id = $item_obj->get_gds(); } |
233
|
|
|
|
|
|
|
elsif ( $item_type eq 'GSE' ) { $item_id = $item_obj->get_gse(); } |
234
|
|
|
|
|
|
|
$self->_debug( "PARSE ITEM: $item_type$item_id" ); |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
my $test = $item_obj->check_sample_count(); |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
# Add the item |
239
|
|
|
|
|
|
|
my $accn_type = $item_obj->get_entry_type(); |
240
|
|
|
|
|
|
|
$self->add_item({ $accn_type => $item_obj }); |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
push @items, $item_obj; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
return @items; |
246
|
|
|
|
|
|
|
#return $self; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
sub add_item { |
250
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
251
|
|
|
|
|
|
|
my ( $item, @items ); |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
my $gpl = defined $arg_ref->{GPL} ? $arg_ref->{GPL} : ''; |
254
|
|
|
|
|
|
|
my $gds = defined $arg_ref->{GDS} ? $arg_ref->{GDS} : ''; |
255
|
|
|
|
|
|
|
my $gse = defined $arg_ref->{GSE} ? $arg_ref->{GSE} : ''; |
256
|
|
|
|
|
|
|
my $gsm = defined $arg_ref->{GSM} ? $arg_ref->{GSM} : ''; |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
@items = (); |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
if ( $gpl ) { $item = $arg_ref->{GPL}; @items = @{ $self->get_platforms() }; } |
261
|
|
|
|
|
|
|
elsif ( $gds ) { $item = $arg_ref->{GDS}; @items = @{ $self->get_data_sets() }; } |
262
|
|
|
|
|
|
|
elsif ( $gse ) { $item = $arg_ref->{GSE}; @items = @{ $self->get_series() }; } |
263
|
|
|
|
|
|
|
elsif ( $gsm ) { $item = $arg_ref->{GSM}; @items = @{ $self->get_samples() }; } |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
push @items, $item; |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
if ( $gpl ) { $self->set_platforms( \@items ); $self->set_platforms_count( scalar( @items ) ); } |
268
|
|
|
|
|
|
|
elsif ( $gds ) { $self->set_data_sets( \@items ); $self->set_data_sets_count( scalar( @items ) ); } |
269
|
|
|
|
|
|
|
elsif ( $gse ) { $self->set_series( \@items ); $self->set_series_count( scalar( @items ) ); } |
270
|
|
|
|
|
|
|
elsif ( $gsm ) { $self->set_samples( \@items ); $self->set_samples_count( scalar( @items ) ); } |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
return $self; |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
sub _describe_platform() { |
276
|
|
|
|
|
|
|
my ( $self ) = @_; |
277
|
|
|
|
|
|
|
my $accn = $self->get_query(); |
278
|
|
|
|
|
|
|
my $accn_id = $accn; |
279
|
|
|
|
|
|
|
$accn_id =~ s/^.{3}//; |
280
|
|
|
|
|
|
|
my $desc; |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
my $platforms = $self->get_platforms(); |
283
|
|
|
|
|
|
|
foreach my $platform ( @$platforms ) { |
284
|
|
|
|
|
|
|
if ( $platform->get_gpl() eq $accn_id ) { |
285
|
|
|
|
|
|
|
my $title = $platform->get_title(); |
286
|
|
|
|
|
|
|
my $summary = $platform->get_summary(); |
287
|
|
|
|
|
|
|
my $taxon = $platform->get_taxon(); |
288
|
|
|
|
|
|
|
my $ptech_type = $platform->get_ptech_type(); |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
my @gds = split( ';', $platform->get_gds() ); |
291
|
|
|
|
|
|
|
my $data_set_count = scalar( @gds ); |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
my @gse = split( ';', $platform->get_gse() ); |
294
|
|
|
|
|
|
|
my $series_count = scalar( @gse ); |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
$desc = " Platform: $accn \n"; |
297
|
|
|
|
|
|
|
$desc .= " Title: $title \n"; |
298
|
|
|
|
|
|
|
$desc .= "Tech Type: $ptech_type \n"; |
299
|
|
|
|
|
|
|
$desc .= " Taxon: $taxon \n"; |
300
|
|
|
|
|
|
|
$desc .= " Datasets: $data_set_count \n"; |
301
|
|
|
|
|
|
|
$desc .= " Series: $series_count \n"; |
302
|
|
|
|
|
|
|
$desc .= " Summary: $summary \n"; |
303
|
|
|
|
|
|
|
|
304
|
|
|
|
|
|
|
last; |
305
|
|
|
|
|
|
|
} |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
return "\n$desc\n"; |
308
|
|
|
|
|
|
|
} |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub _describe_data_set() { |
311
|
|
|
|
|
|
|
my ( $self ) = @_; |
312
|
|
|
|
|
|
|
my $accn = $self->get_query(); |
313
|
|
|
|
|
|
|
my $accn_id = $accn; |
314
|
|
|
|
|
|
|
$accn_id =~ s/^.{3}//; |
315
|
|
|
|
|
|
|
my $desc; |
316
|
|
|
|
|
|
|
|
317
|
|
|
|
|
|
|
my $data_sets = $self->get_data_sets(); |
318
|
|
|
|
|
|
|
foreach my $data_set ( @$data_sets ) { |
319
|
|
|
|
|
|
|
if ( $data_set->get_gds() eq $accn_id ) { |
320
|
|
|
|
|
|
|
my $title = $data_set->get_title(); |
321
|
|
|
|
|
|
|
my $summary = $data_set->get_summary(); |
322
|
|
|
|
|
|
|
my $taxon = $data_set->get_taxon(); |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
my @gse = split( ';', $data_set->get_gse() ); |
325
|
|
|
|
|
|
|
my $series_count = scalar( @gse ); |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
my @gsm_ids = split( ';', $data_set->get_gsm_l() ); |
328
|
|
|
|
|
|
|
my @gsm_titles = split( ';', $data_set->get_gsm_titles_l() ); |
329
|
|
|
|
|
|
|
my $samples_count = scalar( @gsm_ids ); |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
my @gpl = split( ';', $data_set->get_gpl() ); |
332
|
|
|
|
|
|
|
my $platform_count = scalar( @gpl ); |
333
|
|
|
|
|
|
|
my ( $gpl_accn, $gpl_title ); |
334
|
|
|
|
|
|
|
if ( $platform_count == 1 ) { |
335
|
|
|
|
|
|
|
$gpl_accn = 'GPL' . $self->get_platforms()->[0]->get_gpl(); |
336
|
|
|
|
|
|
|
$gpl_title = $self->get_platforms()->[0]->get_title(); |
337
|
|
|
|
|
|
|
} |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
$desc = " Data Set: $accn $title \n"; |
340
|
|
|
|
|
|
|
$desc .= " Taxon: $taxon \n"; |
341
|
|
|
|
|
|
|
if ( $gpl_accn ) { $desc .= " Platform: $gpl_accn $gpl_title \n"; } |
342
|
|
|
|
|
|
|
$desc .= "GSE Count: $series_count \n"; |
343
|
|
|
|
|
|
|
$desc .= "GSM Count: $samples_count \n"; |
344
|
|
|
|
|
|
|
$desc .= " Summary: $summary \n"; |
345
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
# Show samples |
347
|
|
|
|
|
|
|
my @samples; |
348
|
|
|
|
|
|
|
foreach ( my $i = 0; $i < @gsm_ids; $i++ ) { |
349
|
|
|
|
|
|
|
push @samples, "GSM$gsm_ids[$i] $gsm_titles[$i]"; |
350
|
|
|
|
|
|
|
} |
351
|
|
|
|
|
|
|
$desc .= "\n Samples: " . join( "\n ", @samples ) . "\n"; |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
last; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
} |
356
|
|
|
|
|
|
|
return "\n$desc\n"; |
357
|
|
|
|
|
|
|
} |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
sub _describe_series() { |
360
|
|
|
|
|
|
|
my ( $self ) = @_; |
361
|
|
|
|
|
|
|
my $accn = $self->get_query(); |
362
|
|
|
|
|
|
|
my $accn_id = $accn; |
363
|
|
|
|
|
|
|
$accn_id =~ s/^.{3}//; |
364
|
|
|
|
|
|
|
my $desc; |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
my $series_list = $self->get_series(); |
367
|
|
|
|
|
|
|
foreach my $series ( @$series_list ) { |
368
|
|
|
|
|
|
|
if ( $series->get_gse() eq $accn_id ) { |
369
|
|
|
|
|
|
|
my $title = $series->get_title(); |
370
|
|
|
|
|
|
|
my $summary = $series->get_summary(); |
371
|
|
|
|
|
|
|
my $taxon = $series->get_taxon(); |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
my @gpl = split( ';', $series->get_gpl() ); |
374
|
|
|
|
|
|
|
my $platform_count = scalar( @gpl ); |
375
|
|
|
|
|
|
|
my ( $gpl_accn, $gpl_title ); |
376
|
|
|
|
|
|
|
if ( $platform_count == 1 ) { |
377
|
|
|
|
|
|
|
$gpl_accn = 'GPL' . $self->get_platforms()->[0]->get_gpl(); |
378
|
|
|
|
|
|
|
$gpl_title = $self->get_platforms()->[0]->get_title(); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
my @gds = split( ';', $series->get_gds() ); |
382
|
|
|
|
|
|
|
my $data_set_count = scalar( @gds ); |
383
|
|
|
|
|
|
|
my ( $gds_accn, $gds_title ); |
384
|
|
|
|
|
|
|
if ( $data_set_count == 1 ) { |
385
|
|
|
|
|
|
|
$gds_accn = 'GDS' . $self->get_data_sets()->[0]->get_gds(); |
386
|
|
|
|
|
|
|
$gds_title = $self->get_data_sets()->[0]->get_title(); |
387
|
|
|
|
|
|
|
} |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
my @gsm_ids = split( ';', $series->get_gsm_l() ); |
390
|
|
|
|
|
|
|
my @gsm_titles = split( ';', $series->get_gsm_titles_l() ); |
391
|
|
|
|
|
|
|
my $samples_count = scalar( @gsm_ids ); |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
$desc = " Series: $accn $title \n"; |
394
|
|
|
|
|
|
|
$desc .= " Taxon: $taxon \n"; |
395
|
|
|
|
|
|
|
if ( $gpl_accn ) { $desc .= " Platform: $gpl_accn $gpl_title \n"; } |
396
|
|
|
|
|
|
|
if ( $gds_accn ) { $desc .= " Data Set: $gds_accn $gds_title \n"; } |
397
|
|
|
|
|
|
|
$desc .= "GDS Count: $data_set_count \n"; |
398
|
|
|
|
|
|
|
$desc .= "GSM Count: $samples_count \n"; |
399
|
|
|
|
|
|
|
$desc .= " Summary: $summary \n"; |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
# Show samples |
402
|
|
|
|
|
|
|
my @samples; |
403
|
|
|
|
|
|
|
foreach ( my $i = 0; $i < @gsm_ids; $i++ ) { |
404
|
|
|
|
|
|
|
push @samples, "GSM$gsm_ids[$i] $gsm_titles[$i]"; |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
$desc .= "\n Samples: " . join( "\n ", @samples ) . "\n"; |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
last; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
return "\n$desc\n"; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
sub _describe_sample() { |
415
|
|
|
|
|
|
|
my ( $self ) = @_; |
416
|
|
|
|
|
|
|
my $gsm_accn = $self->get_query(); |
417
|
|
|
|
|
|
|
my $accn_id = $gsm_accn; |
418
|
|
|
|
|
|
|
$accn_id =~ s/^.{3}//; |
419
|
|
|
|
|
|
|
my $desc; |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
my $series_list = $self->get_series(); |
422
|
|
|
|
|
|
|
foreach my $series ( @$series_list ) { |
423
|
|
|
|
|
|
|
if ( $series->get_gsm_l() =~ $accn_id ) { |
424
|
|
|
|
|
|
|
my @gsm_ids = split( ';', $series->get_gsm_l() ); |
425
|
|
|
|
|
|
|
my @gsm_titles = split( ';', $series->get_gsm_titles_l() ); |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
my $gsm_title; |
428
|
|
|
|
|
|
|
for ( my $i = 0; $i < @gsm_ids; $i++ ) { |
429
|
|
|
|
|
|
|
if ( $gsm_ids[$i] eq $accn_id ) { |
430
|
|
|
|
|
|
|
$gsm_title = $gsm_titles[$i]; |
431
|
|
|
|
|
|
|
} |
432
|
|
|
|
|
|
|
} |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
my $gse_title = $series->get_title(); |
435
|
|
|
|
|
|
|
my $gse_accn = 'GSE' . $series->get_gse(); |
436
|
|
|
|
|
|
|
my $summary = $series->get_summary(); |
437
|
|
|
|
|
|
|
my $taxon = $series->get_taxon(); |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
my $gpl_accn = 'GPL' . $self->get_platforms()->[0]->get_gpl(); |
440
|
|
|
|
|
|
|
my $gpl_title = $self->get_platforms()->[0]->get_title(); |
441
|
|
|
|
|
|
|
my $gds_accn = 'GDS' . $self->get_data_sets()->[0]->get_gds(); |
442
|
|
|
|
|
|
|
my $gds_title = $self->get_data_sets()->[0]->get_title(); |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
$desc = " Sample: $gsm_accn $gsm_title \n"; |
445
|
|
|
|
|
|
|
$desc .= " Taxon: $taxon \n"; |
446
|
|
|
|
|
|
|
$desc .= " Platform: $gpl_accn $gpl_title \n"; |
447
|
|
|
|
|
|
|
$desc .= " Data Set: $gds_accn $gds_title \n"; |
448
|
|
|
|
|
|
|
$desc .= " Series: $gse_accn $gse_title \n"; |
449
|
|
|
|
|
|
|
$desc .= " Summary: $summary \n"; |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
last; |
452
|
|
|
|
|
|
|
} |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
return "\n$desc\n"; |
455
|
|
|
|
|
|
|
} |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
sub __parse_SeriesTitle { |
458
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
459
|
|
|
|
|
|
|
$item_obj->set_series_title( __parse_string( $data ) ); |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
sub __parse_n_samples { |
463
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
464
|
|
|
|
|
|
|
$item_obj->set_n_samples( __parse_integer( $data ) ); |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
sub __parse_gdsType { |
468
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
469
|
|
|
|
|
|
|
$item_obj->set_gds_type( __parse_string( $data ) ); |
470
|
|
|
|
|
|
|
} |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub __parse_ptechType { |
473
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
474
|
|
|
|
|
|
|
$item_obj->set_ptech_type( __parse_string( $data ) ); |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub __parse_Samples { |
478
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
479
|
|
|
|
|
|
|
my @values; |
480
|
|
|
|
|
|
|
if ( defined $data->{Item} ) { |
481
|
|
|
|
|
|
|
if ( $data->{Item} =~ m/ARRAY/ ) { |
482
|
|
|
|
|
|
|
foreach my $item ( @{ $data->{Item} } ) { |
483
|
|
|
|
|
|
|
if ( $item->{Type} =~ m/Structure/ ) { |
484
|
|
|
|
|
|
|
my $attr_list = $item->{Item}; |
485
|
|
|
|
|
|
|
my %attr; |
486
|
|
|
|
|
|
|
foreach my $attr ( @$attr_list ) { |
487
|
|
|
|
|
|
|
my $key = $attr->{Name}; |
488
|
|
|
|
|
|
|
my $value = $attr->{content}; |
489
|
|
|
|
|
|
|
$attr{$key} = $value; |
490
|
|
|
|
|
|
|
} |
491
|
|
|
|
|
|
|
push @values, { Accession => $attr{Accession}, Title => $attr{Title} }; |
492
|
|
|
|
|
|
|
} else { |
493
|
|
|
|
|
|
|
__exception( $data ); |
494
|
|
|
|
|
|
|
} |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
} else { |
497
|
|
|
|
|
|
|
__exception( $data ); |
498
|
|
|
|
|
|
|
} |
499
|
|
|
|
|
|
|
} |
500
|
|
|
|
|
|
|
$item_obj->set_samples_count( scalar(@values) ); |
501
|
|
|
|
|
|
|
$item_obj->set_samples( \@values ); |
502
|
|
|
|
|
|
|
} |
503
|
|
|
|
|
|
|
|
504
|
|
|
|
|
|
|
sub __parse_Projects { |
505
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
506
|
|
|
|
|
|
|
$item_obj->set_projects( '' ); # Have yet to see an example with data |
507
|
|
|
|
|
|
|
} |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
sub __parse_GSE { |
510
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
511
|
|
|
|
|
|
|
$item_obj->set_gse( __parse_string( $data ) ); |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
sub __parse_summary { |
515
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
516
|
|
|
|
|
|
|
$item_obj->set_summary( __parse_string( $data ) ); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub __parse_GSM_titles_L { |
520
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
521
|
|
|
|
|
|
|
$item_obj->set_gsm_titles_l( __parse_string( $data ) ); |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
sub __parse_PubMedIds { |
525
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
526
|
|
|
|
|
|
|
my @values; |
527
|
|
|
|
|
|
|
if ( defined $data->{Item} ) { |
528
|
|
|
|
|
|
|
if ( $data->{Item} =~ m/ARRAY/ ) { |
529
|
|
|
|
|
|
|
foreach my $item ( @{ $data->{Item} } ) { |
530
|
|
|
|
|
|
|
if ( defined $item->{content} ) { |
531
|
|
|
|
|
|
|
push @values, $item->{content}; |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
} |
534
|
|
|
|
|
|
|
} elsif ( $data->{Item} =~ m/HASH/ ) { |
535
|
|
|
|
|
|
|
my $item = $data->{Item}; |
536
|
|
|
|
|
|
|
if ( defined $item->{content} ) { |
537
|
|
|
|
|
|
|
push @values, $item->{content}; |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
} else { |
540
|
|
|
|
|
|
|
__exception( $data ); |
541
|
|
|
|
|
|
|
} |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
$item_obj->set_pubmed_ids( join( ';', @values ) ); |
544
|
|
|
|
|
|
|
} |
545
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
sub __parse_Relations { |
547
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
548
|
|
|
|
|
|
|
$item_obj->set_relations( '' ); # Have yet to see an example with data |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
sub __parse_GPL { |
552
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
553
|
|
|
|
|
|
|
$item_obj->set_gpl( __parse_string( $data ) ); |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
sub __parse_SSInfo { |
557
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
558
|
|
|
|
|
|
|
$item_obj->set_ss_info( __parse_string( $data ) ); |
559
|
|
|
|
|
|
|
} |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
sub __parse_suppFile { |
562
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
563
|
|
|
|
|
|
|
$item_obj->set_supp_file( __parse_string( $data ) ); |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
sub __parse_taxon { |
567
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
568
|
|
|
|
|
|
|
$item_obj->set_taxon( __parse_string( $data ) ); |
569
|
|
|
|
|
|
|
} |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
sub __parse_GSM_L { |
572
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
573
|
|
|
|
|
|
|
$item_obj->set_gsm_l( __parse_string( $data ) ); |
574
|
|
|
|
|
|
|
} |
575
|
|
|
|
|
|
|
|
576
|
|
|
|
|
|
|
sub __parse_entryType { |
577
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
578
|
|
|
|
|
|
|
$item_obj->set_entry_type( __parse_string( $data ) ); |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub __parse_valType { |
582
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
583
|
|
|
|
|
|
|
$item_obj->set_val_type( __parse_string( $data ) ); |
584
|
|
|
|
|
|
|
} |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
sub __parse_PDAT { |
587
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
588
|
|
|
|
|
|
|
$item_obj->set_pdat( __parse_string( $data ) ); |
589
|
|
|
|
|
|
|
} |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
sub __parse_PlatformTaxa { |
592
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
593
|
|
|
|
|
|
|
$item_obj->set_platform_taxa( __parse_string( $data ) ); |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
sub __parse_SamplesTaxa { |
597
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
598
|
|
|
|
|
|
|
$item_obj->set_samples_taxa( __parse_string( $data ) ); |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
sub __parse_GDS { |
602
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
603
|
|
|
|
|
|
|
$item_obj->set_gds( __parse_string( $data ) ); |
604
|
|
|
|
|
|
|
} |
605
|
|
|
|
|
|
|
|
606
|
|
|
|
|
|
|
sub __parse_subsetInfo { |
607
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
608
|
|
|
|
|
|
|
$item_obj->set_subset_info( __parse_string( $data ) ); |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
|
611
|
|
|
|
|
|
|
sub __parse_title { |
612
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
613
|
|
|
|
|
|
|
$item_obj->set_title( __parse_string( $data ) ); |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
sub __parse_PlatformTitle { |
617
|
|
|
|
|
|
|
my ( $item_obj, $data ) = @_; |
618
|
|
|
|
|
|
|
$item_obj->set_platform_title( __parse_string( $data ) ); |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
sub __parse_string { |
622
|
|
|
|
|
|
|
my ( $data ) = @_; |
623
|
|
|
|
|
|
|
my $value = ''; |
624
|
|
|
|
|
|
|
if ( $data->{Type} =~ m/String/ ) { |
625
|
|
|
|
|
|
|
if ( defined $data->{content} ) { |
626
|
|
|
|
|
|
|
$value = $data->{content}; |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
} else { |
629
|
|
|
|
|
|
|
__exception( $data ); |
630
|
|
|
|
|
|
|
} |
631
|
|
|
|
|
|
|
return $value; |
632
|
|
|
|
|
|
|
} |
633
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
sub __parse_integer { |
635
|
|
|
|
|
|
|
my ( $data ) = @_; |
636
|
|
|
|
|
|
|
my $value = ''; |
637
|
|
|
|
|
|
|
if ( $data->{Type} =~ m/Integer/ ) { |
638
|
|
|
|
|
|
|
if ( defined $data->{content} ) { |
639
|
|
|
|
|
|
|
$value = $data->{content}; |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
} else { |
642
|
|
|
|
|
|
|
__exception( $data ); |
643
|
|
|
|
|
|
|
} |
644
|
|
|
|
|
|
|
return $value; |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
} |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
650
|
|
|
|
|
|
|
__END__ |