| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package NCBIx::Geo::Base; |
|
2
|
1
|
|
|
1
|
|
3360
|
use Class::Std; |
|
|
1
|
|
|
|
|
32380
|
|
|
|
1
|
|
|
|
|
10
|
|
|
3
|
1
|
|
|
1
|
|
9285
|
use Class::Std::Utils; |
|
|
1
|
|
|
|
|
4170
|
|
|
|
1
|
|
|
|
|
6
|
|
|
4
|
1
|
|
|
1
|
|
7837
|
use LWP::Simple; |
|
|
1
|
|
|
|
|
193306
|
|
|
|
1
|
|
|
|
|
10
|
|
|
5
|
1
|
|
|
1
|
|
8647
|
use Data::Dumper; |
|
|
1
|
|
|
|
|
7302
|
|
|
|
1
|
|
|
|
|
113
|
|
|
6
|
|
|
|
|
|
|
|
|
7
|
1
|
|
|
1
|
|
11
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
37
|
|
|
8
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
36
|
|
|
9
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
60
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
6
|
use version; our $VERSION = qv('1.0.0'); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
8
|
|
|
12
|
|
|
|
|
|
|
|
|
13
|
1
|
|
|
1
|
|
87
|
use constant GEO_FTP_URL => 'ftp://ftp.ncbi.nih.gov/pub/geo/DATA/supplementary/samples/'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
57
|
|
|
14
|
1
|
|
|
1
|
|
6
|
use constant GEO_ACC_URL => 'http://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?view=data&db=GeoDb_blob01&acc='; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1107
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
{ |
|
17
|
|
|
|
|
|
|
my %debug_of :ATTR( :get :set :default<'0'> :init_arg ); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
sub START { |
|
20
|
0
|
|
|
0
|
0
|
|
my ($self, $ident, $arg_ref) = @_; |
|
21
|
|
|
|
|
|
|
|
|
22
|
0
|
0
|
|
|
|
|
if ( defined $arg_ref->{debug} ) { $self->set_debug( $arg_ref->{debug} ); } |
|
|
0
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
|
|
24
|
0
|
|
|
|
|
|
return; |
|
25
|
|
|
|
|
|
|
} |
|
26
|
|
|
|
|
|
|
|
|
27
|
0
|
|
|
0
|
0
|
|
sub debug { my ( $self ) = @_; return $self->get_debug(); } |
|
|
0
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub get_accn_type { |
|
30
|
0
|
|
|
0
|
0
|
|
my ( $self, $arg_ref ) = @_; |
|
31
|
|
|
|
|
|
|
|
|
32
|
0
|
0
|
|
|
|
|
my $accn = defined $arg_ref->{accn} ? $arg_ref->{accn} : ''; |
|
33
|
0
|
|
|
|
|
|
$accn =~ m/^(\w{3})/; |
|
34
|
0
|
|
|
|
|
|
return $1; |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
sub get_sample_data { |
|
38
|
0
|
|
|
0
|
0
|
|
my ( $self, $arg_ref ) = @_; |
|
39
|
0
|
0
|
|
|
|
|
my $sample_accn = defined $arg_ref->{accn} ? $arg_ref->{accn} : ''; |
|
40
|
0
|
0
|
|
|
|
|
my @file_exts = defined $arg_ref->{exts} ? @{ $arg_ref->{exts} } : (); |
|
|
0
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Calculate the NCBI GSM directory |
|
43
|
0
|
|
|
|
|
|
my $gsm_dir = $sample_accn; |
|
44
|
0
|
|
|
|
|
|
$gsm_dir =~ s/.{3}$/nnn/; |
|
45
|
0
|
|
|
|
|
|
my $gsm_url = GEO_FTP_URL . "$gsm_dir/$sample_accn/"; |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
# Get the raw data |
|
48
|
0
|
|
|
|
|
|
foreach my $ext ( @file_exts ) { |
|
49
|
0
|
0
|
|
|
|
|
if ( $ext eq 'TXT' ) { $ext = 'txt'; } |
|
|
0
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
# Calculate file_name and file_path_name |
|
52
|
0
|
|
|
|
|
|
my $file = $sample_accn . '.' . $ext . '.gz'; |
|
53
|
0
|
|
|
|
|
|
my $path_name = $self->get_data_dir() . $file; |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Download if it doesn't exist |
|
56
|
0
|
0
|
|
|
|
|
if (! -s $path_name ) { |
|
57
|
0
|
|
|
|
|
|
my $url = $gsm_url . $file; |
|
58
|
0
|
|
|
|
|
|
$self->_debug( "DOWNLOAD: $url" ); |
|
59
|
0
|
|
|
|
|
|
my $result = get( $url ); |
|
60
|
0
|
|
|
|
|
|
$self->_debug( "SAVE FILE: $path_name" ); |
|
61
|
0
|
|
|
|
|
|
$self->_set_file_text({ file => $path_name, text => $result }); |
|
62
|
|
|
|
|
|
|
} else { |
|
63
|
0
|
|
|
|
|
|
$self->_debug( "FILE EXISTS: $path_name" ); |
|
64
|
|
|
|
|
|
|
} |
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
# Calculate call data file_name and file_path_name |
|
68
|
0
|
|
|
|
|
|
my $file = $sample_accn . '.html'; |
|
69
|
0
|
|
|
|
|
|
my $path_name = $self->get_data_dir() . $file; |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
# Download call data if it doesn't exist |
|
72
|
0
|
0
|
|
|
|
|
if (! -s $path_name ) { |
|
73
|
0
|
|
|
|
|
|
my $acc_url = GEO_ACC_URL . $sample_accn; |
|
74
|
0
|
|
|
|
|
|
$self->_debug( "DOWNLOAD: $acc_url" ); |
|
75
|
0
|
|
|
|
|
|
my $result = get( $acc_url ); |
|
76
|
0
|
|
|
|
|
|
$self->_debug( "SAVE FILE: $path_name" ); |
|
77
|
0
|
|
|
|
|
|
$self->_set_file_text({ file => $path_name, text => $result }); |
|
78
|
|
|
|
|
|
|
} else { |
|
79
|
0
|
|
|
|
|
|
$self->_debug( "FILE EXISTS: $path_name" ); |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
sub _set_file_text { |
|
84
|
0
|
|
|
0
|
|
|
my ( $self, $arg_ref ) = @_; |
|
85
|
0
|
0
|
|
|
|
|
my $file = defined $arg_ref->{file} ? $arg_ref->{file} : ''; |
|
86
|
0
|
0
|
|
|
|
|
my $text = defined $arg_ref->{text} ? $arg_ref->{text} : ''; |
|
87
|
|
|
|
|
|
|
|
|
88
|
0
|
0
|
|
|
|
|
if ( $file ) { |
|
89
|
0
|
|
|
|
|
|
open( OUTFILE, '>', $file ); |
|
90
|
0
|
|
|
|
|
|
print OUTFILE $text; |
|
91
|
0
|
|
|
|
|
|
close( OUTFILE ); |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
0
|
|
|
|
|
|
return; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
sub _get_file_text { |
|
98
|
0
|
|
|
0
|
|
|
my ( $self, $arg_ref ) = @_; |
|
99
|
0
|
0
|
|
|
|
|
my $file = defined $arg_ref->{file} ? $arg_ref->{file} : ''; |
|
100
|
0
|
|
|
|
|
|
$self->_debug( "INFILE: $file" ); |
|
101
|
0
|
|
|
|
|
|
my $lines; |
|
102
|
|
|
|
|
|
|
|
|
103
|
0
|
0
|
|
|
|
|
if ( $file ) { |
|
104
|
0
|
|
|
|
|
|
open( INFILE, '<', $file ); |
|
105
|
0
|
|
|
|
|
|
while ( my $line = ) { $lines .= $line; } |
|
|
0
|
|
|
|
|
|
|
|
106
|
0
|
|
|
|
|
|
close( INFILE ); |
|
107
|
|
|
|
|
|
|
} |
|
108
|
|
|
|
|
|
|
|
|
109
|
0
|
|
|
|
|
|
return $lines; |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub _get_url { |
|
113
|
0
|
|
|
0
|
|
|
my ( $self, $url ) = @_; |
|
114
|
0
|
|
|
|
|
|
my $response; |
|
115
|
0
|
|
|
|
|
|
eval { $response = get( $url ); }; |
|
|
0
|
|
|
|
|
|
|
|
116
|
0
|
0
|
|
|
|
|
if( $@ ) { |
|
117
|
0
|
|
|
|
|
|
$self->warn("Can't query website: $@"); |
|
118
|
0
|
|
|
|
|
|
return; |
|
119
|
|
|
|
|
|
|
} |
|
120
|
0
|
|
|
|
|
|
$self->debug( "resp is $response\n"); |
|
121
|
0
|
|
|
|
|
|
return $response; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub _debug { |
|
125
|
0
|
|
|
0
|
|
|
my ( $self, $message ) = @_; |
|
126
|
0
|
0
|
|
|
|
|
if ( $self->debug() ) { print( $message . "\n" ); } |
|
|
0
|
|
|
|
|
|
|
|
127
|
0
|
|
|
|
|
|
return $self; |
|
128
|
|
|
|
|
|
|
} |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub __exception { |
|
131
|
0
|
|
|
0
|
|
|
my ( $data ) = @_; |
|
132
|
0
|
|
|
|
|
|
print "\n#######################################\n"; |
|
133
|
0
|
|
|
|
|
|
print "# Exception #\n"; |
|
134
|
0
|
|
|
|
|
|
print "#######################################\n"; |
|
135
|
0
|
|
|
|
|
|
print Dumper( $data ); |
|
136
|
0
|
|
|
|
|
|
print "#######################################\n\n"; |
|
137
|
|
|
|
|
|
|
} |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
|
142
|
|
|
|
|
|
|
__END__ |