line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HackaMol::Roles::RcsbRole; |
2
|
|
|
|
|
|
|
$HackaMol::Roles::RcsbRole::VERSION = '0.051'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Read files with molecular information |
4
|
11
|
|
|
11
|
|
7303
|
use Moose::Role; |
|
11
|
|
|
|
|
39
|
|
|
11
|
|
|
|
|
100
|
|
5
|
11
|
|
|
11
|
|
64676
|
use MooseX::Types::Path::Tiny qw/Path Paths AbsPath AbsPaths/; |
|
11
|
|
|
|
|
30
|
|
|
11
|
|
|
|
|
172
|
|
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
has 'sync_overwrite' => ( |
8
|
|
|
|
|
|
|
is => 'ro', |
9
|
|
|
|
|
|
|
isa => 'Bool', |
10
|
|
|
|
|
|
|
default => 0, |
11
|
|
|
|
|
|
|
lazy => 1, |
12
|
|
|
|
|
|
|
); |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
has 'local_pdb_path' => ( |
15
|
|
|
|
|
|
|
is => 'ro', |
16
|
|
|
|
|
|
|
isa => Path, |
17
|
|
|
|
|
|
|
coerce => 1, |
18
|
|
|
|
|
|
|
default => '~/myPDB/pdb', |
19
|
|
|
|
|
|
|
lazy => 1, |
20
|
|
|
|
|
|
|
); |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
has 'local_cif_path' => ( |
23
|
|
|
|
|
|
|
is => 'ro', |
24
|
|
|
|
|
|
|
isa => Path, |
25
|
|
|
|
|
|
|
coerce => 1, |
26
|
|
|
|
|
|
|
default => '~/myPDB/cif', |
27
|
|
|
|
|
|
|
lazy => 1, |
28
|
|
|
|
|
|
|
); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
has 'rcsb_rest_addr' => ( |
31
|
|
|
|
|
|
|
is => 'ro', |
32
|
|
|
|
|
|
|
isa => 'Str', |
33
|
|
|
|
|
|
|
default => 'http://www.rcsb.org/pdb/rest/', |
34
|
|
|
|
|
|
|
lazy => 1, |
35
|
|
|
|
|
|
|
); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
has 'rcsb_ftp_addr' => ( |
38
|
|
|
|
|
|
|
is => 'ro', |
39
|
|
|
|
|
|
|
isa => 'Str', |
40
|
|
|
|
|
|
|
default => 'ftp.rcsb.org', |
41
|
|
|
|
|
|
|
lazy => 1, |
42
|
|
|
|
|
|
|
); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
has 'ftp_user' => ( |
45
|
|
|
|
|
|
|
is => 'ro', |
46
|
|
|
|
|
|
|
isa => 'Str', |
47
|
|
|
|
|
|
|
default => 'anonymous', |
48
|
|
|
|
|
|
|
lazy => 1, |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
has 'ftp_password' => ( |
52
|
|
|
|
|
|
|
is => 'ro', |
53
|
|
|
|
|
|
|
isa => 'Str', |
54
|
|
|
|
|
|
|
default => 'anonymous', |
55
|
|
|
|
|
|
|
lazy => 1, |
56
|
|
|
|
|
|
|
); |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
sub pdbid_local_path { |
59
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
60
|
0
|
|
|
|
|
|
my $pdbid = lc( shift ); |
61
|
|
|
|
|
|
|
|
62
|
0
|
0
|
|
|
|
|
die "Invocation: ->pdb_id_local_path(pdbid,[cif|pdb]?)" |
63
|
|
|
|
|
|
|
unless length($pdbid) == 4; |
64
|
|
|
|
|
|
|
|
65
|
0
|
|
0
|
|
|
|
my $type = shift || 'cif'; |
66
|
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
|
my $path_method = "local_${type}_path"; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# may or may not exist |
70
|
0
|
|
|
|
|
|
my $path = $self->$path_method->child( |
71
|
|
|
|
|
|
|
substr( $pdbid, 1, 2 ) . "/$pdbid.$type" ); |
72
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
return $path; |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
} |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
|
78
|
0
|
|
|
0
|
0
|
|
sub local_pdbs { shift->_local_cifs_pdbs('pdb') } |
79
|
0
|
|
|
0
|
0
|
|
sub local_cifs { shift->_local_cifs_pdbs('cif') } |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
sub _local_cifs_pdbs { |
82
|
0
|
|
|
0
|
|
|
my $self = shift; |
83
|
0
|
|
|
|
|
|
my $type = shift; |
84
|
0
|
|
|
|
|
|
my $path_method = "local_${type}_path"; |
85
|
0
|
|
|
|
|
|
my @files = map { $_->children(qr/\.$type$/) } |
86
|
0
|
|
|
|
|
|
grep { $_->is_dir } $self->$path_method->children; |
|
0
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
return @files; |
88
|
|
|
|
|
|
|
} |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
sub rcsb_sync_local { |
92
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
93
|
0
|
|
|
|
|
|
my $type = shift; |
94
|
0
|
0
|
|
|
|
|
die "Invocation-> rcsb_sync_local('cif|pdb')" unless $type =~ /(?:cif|pdb)/; |
95
|
0
|
|
|
|
|
|
my @pdbids = map{ lc($_) } @_; |
|
0
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
|
my $local_types = "local_${type}s"; |
98
|
0
|
|
|
|
|
|
my @local_pdbids = map{ $_->basename(qr/\.$type$/)} $self->$local_types; |
|
0
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
my %seen = map {$_ => 1} @local_pdbids; |
|
0
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
101
|
0
|
0
|
|
|
|
|
unless ($self->sync_overwrite){ |
102
|
0
|
|
|
|
|
|
my $count = @pdbids; |
103
|
0
|
|
|
|
|
|
@pdbids = grep {! exists($seen{$_})} @pdbids; |
|
0
|
|
|
|
|
|
|
104
|
0
|
0
|
|
|
|
|
if ($count != @pdbids){ |
105
|
0
|
|
|
|
|
|
my $local_path = "local_${type}_path"; |
106
|
0
|
|
|
|
|
|
warn "ignoring @{[$count - @pdbids]} files contained in @{[$self->$local_path]}\n"; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
} |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
0
|
0
|
|
|
|
|
return ([],[]) unless @pdbids; |
111
|
0
|
|
|
|
|
|
print "syncing @{[scalar @pdbids]} $type files\n"; |
|
0
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
|
my ($synced_pdbids,$missed_pdbids) = $self->rcsb_ftp_fetch($type, \@pdbids ); |
113
|
0
|
|
|
|
|
|
return ($synced_pdbids,$missed_pdbids); |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
sub rcsb_ftp_fetch { |
117
|
|
|
|
|
|
|
|
118
|
0
|
|
|
0
|
0
|
|
require IO::Uncompress::Gunzip; |
119
|
0
|
|
|
|
|
|
my $self = shift; |
120
|
0
|
|
|
|
|
|
my $type = shift; |
121
|
0
|
0
|
|
|
|
|
die "Invocation-> rcsb_sync_local('cif|pdb')" unless $type =~ /(?:cif|pdb)/; |
122
|
0
|
|
|
|
|
|
my $pdbids = shift; |
123
|
0
|
|
|
|
|
|
my $parent_path = shift; |
124
|
0
|
|
|
|
|
|
my $local_path = "local_${type}_path"; |
125
|
0
|
0
|
|
|
|
|
$parent_path = $self->$local_path unless $parent_path; |
126
|
|
|
|
|
|
|
|
127
|
0
|
0
|
|
|
|
|
my $cwd_base = $type eq 'cif' ? 'mmCIF' : 'pdb'; |
128
|
0
|
|
|
|
|
|
my $ftp = $self->ftp_connect("/pub/pdb/data/structures/divided/$cwd_base"); |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my @pdbids = map { lc($_) } @$pdbids; |
|
0
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
my @fetched; |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
my @missed; |
134
|
0
|
|
|
|
|
|
foreach my $pdbid (@pdbids) { |
135
|
0
|
|
|
|
|
|
print "fetching $pdbid\n"; |
136
|
0
|
|
|
|
|
|
my $gz = "$pdbid.cif.gz"; |
137
|
0
|
|
|
|
|
|
my $subdir = substr( $pdbid, 1, 2 ); |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
$ftp->get("$subdir/$gz") |
140
|
0
|
0
|
|
|
|
|
or do { |
141
|
0
|
|
|
|
|
|
print "unable to fetch $pdbid\n"; |
142
|
0
|
|
|
|
|
|
print $ftp->message; |
143
|
0
|
0
|
|
|
|
|
die "ftp download problems" if ($ftp->message =~ /load was .+ when you connected/); |
144
|
0
|
|
|
|
|
|
push @missed,$pdbid; |
145
|
|
|
|
|
|
|
next |
146
|
0
|
|
|
|
|
|
}; |
147
|
|
|
|
|
|
|
|
148
|
0
|
|
|
|
|
|
my $dest_par = $parent_path->child("$subdir"); |
149
|
0
|
0
|
|
|
|
|
$dest_par->mkpath unless $dest_par->exists; |
150
|
0
|
|
|
|
|
|
my $dest = $dest_par->child("$pdbid.$type"); |
151
|
0
|
0
|
|
|
|
|
IO::Uncompress::Gunzip::gunzip( $gz => $dest->stringify ) |
152
|
|
|
|
|
|
|
or die "unable to gunzip $gz"; |
153
|
0
|
|
|
|
|
|
unlink $gz; |
154
|
0
|
|
|
|
|
|
push @fetched, $pdbid; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
0
|
|
|
|
|
|
$ftp->quit; |
158
|
0
|
|
|
|
|
|
return (\@fetched,\@missed); |
159
|
|
|
|
|
|
|
} |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub ftp_connect { |
163
|
|
|
|
|
|
|
# connects to FTP via rcsb_ftp_addr and sets working directory to path if passed |
164
|
0
|
|
|
0
|
0
|
|
require Net::FTP; |
165
|
|
|
|
|
|
|
|
166
|
0
|
|
|
|
|
|
my $self = shift; |
167
|
0
|
|
|
|
|
|
my $path = shift; |
168
|
0
|
|
|
|
|
|
my $host = $self->rcsb_ftp_addr; |
169
|
0
|
|
|
|
|
|
my $user = $self->ftp_user; |
170
|
0
|
|
|
|
|
|
my $pass = $self->ftp_password; |
171
|
|
|
|
|
|
|
|
172
|
0
|
|
|
|
|
|
my $ftp = Net::FTP->new($host); |
173
|
0
|
0
|
|
|
|
|
$ftp->login( $user, $pass ) or die "cannot login to rcsb ftp addr"; |
174
|
0
|
0
|
|
|
|
|
if($path){ |
175
|
0
|
0
|
|
|
|
|
$ftp->cwd($path) or die "cannont cwd to $path"; |
176
|
|
|
|
|
|
|
} |
177
|
0
|
|
|
|
|
|
$ftp->binary(); |
178
|
0
|
|
|
|
|
|
return $ftp; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
11
|
|
|
11
|
|
55395
|
no Moose::Role; |
|
11
|
|
|
|
|
43
|
|
|
11
|
|
|
|
|
61
|
|
182
|
|
|
|
|
|
|
1; |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
__END__ |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
=pod |
187
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
=head1 NAME |
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
HackaMol::Roles::RcsbRole - Read files with molecular information |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=head1 VERSION |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
version 0.051 |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=head1 AUTHOR |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
Demian Riccardi <demianriccardi@gmail.com> |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
This software is copyright (c) 2017 by Demian Riccardi. |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
205
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
=cut |