line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
$HackaMol::Roles::RcsbRole::VERSION = '0.052'; |
2
|
|
|
|
|
|
|
# ABSTRACT: Read files with molecular information |
3
|
|
|
|
|
|
|
use Moose::Role; |
4
|
11
|
|
|
11
|
|
6434
|
use MooseX::Types::Path::Tiny qw/Path Paths AbsPath AbsPaths/; |
|
11
|
|
|
|
|
29
|
|
|
11
|
|
|
|
|
76
|
|
5
|
11
|
|
|
11
|
|
53059
|
|
|
11
|
|
|
|
|
28
|
|
|
11
|
|
|
|
|
128
|
|
6
|
|
|
|
|
|
|
has 'sync_overwrite' => ( |
7
|
|
|
|
|
|
|
is => 'ro', |
8
|
|
|
|
|
|
|
isa => 'Bool', |
9
|
|
|
|
|
|
|
default => 0, |
10
|
|
|
|
|
|
|
lazy => 1, |
11
|
|
|
|
|
|
|
); |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
has 'local_pdb_path' => ( |
14
|
|
|
|
|
|
|
is => 'ro', |
15
|
|
|
|
|
|
|
isa => Path, |
16
|
|
|
|
|
|
|
coerce => 1, |
17
|
|
|
|
|
|
|
default => '~/myPDB/pdb', |
18
|
|
|
|
|
|
|
lazy => 1, |
19
|
|
|
|
|
|
|
); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
has 'local_cif_path' => ( |
22
|
|
|
|
|
|
|
is => 'ro', |
23
|
|
|
|
|
|
|
isa => Path, |
24
|
|
|
|
|
|
|
coerce => 1, |
25
|
|
|
|
|
|
|
default => '~/myPDB/cif', |
26
|
|
|
|
|
|
|
lazy => 1, |
27
|
|
|
|
|
|
|
); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
has 'rcsb_rest_addr' => ( |
30
|
|
|
|
|
|
|
is => 'ro', |
31
|
|
|
|
|
|
|
isa => 'Str', |
32
|
|
|
|
|
|
|
default => 'http://www.rcsb.org/pdb/rest/', |
33
|
|
|
|
|
|
|
lazy => 1, |
34
|
|
|
|
|
|
|
); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
has 'rcsb_ftp_addr' => ( |
37
|
|
|
|
|
|
|
is => 'ro', |
38
|
|
|
|
|
|
|
isa => 'Str', |
39
|
|
|
|
|
|
|
default => 'ftp.rcsb.org', |
40
|
|
|
|
|
|
|
lazy => 1, |
41
|
|
|
|
|
|
|
); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
has 'ftp_user' => ( |
44
|
|
|
|
|
|
|
is => 'ro', |
45
|
|
|
|
|
|
|
isa => 'Str', |
46
|
|
|
|
|
|
|
default => 'anonymous', |
47
|
|
|
|
|
|
|
lazy => 1, |
48
|
|
|
|
|
|
|
); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
has 'ftp_password' => ( |
51
|
|
|
|
|
|
|
is => 'ro', |
52
|
|
|
|
|
|
|
isa => 'Str', |
53
|
|
|
|
|
|
|
default => 'anonymous', |
54
|
|
|
|
|
|
|
lazy => 1, |
55
|
|
|
|
|
|
|
); |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $self = shift; |
58
|
|
|
|
|
|
|
my $pdbid = lc( shift ); |
59
|
0
|
|
|
0
|
0
|
|
|
60
|
0
|
|
|
|
|
|
die "Invocation: ->pdb_id_local_path(pdbid,[cif|pdb]?)" |
61
|
|
|
|
|
|
|
unless length($pdbid) == 4; |
62
|
0
|
0
|
|
|
|
|
|
63
|
|
|
|
|
|
|
my $type = shift || 'cif'; |
64
|
|
|
|
|
|
|
|
65
|
0
|
|
0
|
|
|
|
my $path_method = "local_${type}_path"; |
66
|
|
|
|
|
|
|
|
67
|
0
|
|
|
|
|
|
# may or may not exist |
68
|
|
|
|
|
|
|
my $path = $self->$path_method->child( |
69
|
|
|
|
|
|
|
substr( $pdbid, 1, 2 ) . "/$pdbid.$type" ); |
70
|
0
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
return $path; |
72
|
|
|
|
|
|
|
|
73
|
0
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
my $self = shift; |
78
|
0
|
|
|
0
|
0
|
|
my $type = shift; |
79
|
0
|
|
|
0
|
0
|
|
my $path_method = "local_${type}_path"; |
80
|
|
|
|
|
|
|
my @files = map { $_->children(qr/\.$type$/) } |
81
|
|
|
|
|
|
|
grep { $_->is_dir } $self->$path_method->children; |
82
|
0
|
|
|
0
|
|
|
return @files; |
83
|
0
|
|
|
|
|
|
} |
84
|
0
|
|
|
|
|
|
|
85
|
0
|
|
|
|
|
|
|
86
|
0
|
|
|
|
|
|
my $self = shift; |
|
0
|
|
|
|
|
|
|
87
|
0
|
|
|
|
|
|
my $type = shift; |
88
|
|
|
|
|
|
|
die "Invocation-> rcsb_sync_local('cif|pdb')" unless $type =~ /(?:cif|pdb)/; |
89
|
|
|
|
|
|
|
my @pdbids = map{ lc($_) } @_; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
my $local_types = "local_${type}s"; |
92
|
0
|
|
|
0
|
0
|
|
my @local_pdbids = map{ $_->basename(qr/\.$type$/)} $self->$local_types; |
93
|
0
|
|
|
|
|
|
my %seen = map {$_ => 1} @local_pdbids; |
94
|
0
|
0
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
unless ($self->sync_overwrite){ |
|
0
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
my $count = @pdbids; |
97
|
0
|
|
|
|
|
|
@pdbids = grep {! exists($seen{$_})} @pdbids; |
98
|
0
|
|
|
|
|
|
if ($count != @pdbids){ |
|
0
|
|
|
|
|
|
|
99
|
0
|
|
|
|
|
|
my $local_path = "local_${type}_path"; |
|
0
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
warn "ignoring @{[$count - @pdbids]} files contained in @{[$self->$local_path]}\n"; |
101
|
0
|
0
|
|
|
|
|
} |
102
|
0
|
|
|
|
|
|
} |
103
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
104
|
0
|
0
|
|
|
|
|
return ([],[]) unless @pdbids; |
105
|
0
|
|
|
|
|
|
print "syncing @{[scalar @pdbids]} $type files\n"; |
106
|
0
|
|
|
|
|
|
my ($synced_pdbids,$missed_pdbids) = $self->rcsb_ftp_fetch($type, \@pdbids ); |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
return ($synced_pdbids,$missed_pdbids); |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
0
|
0
|
|
|
|
|
|
111
|
0
|
|
|
|
|
|
require IO::Uncompress::Gunzip; |
|
0
|
|
|
|
|
|
|
112
|
0
|
|
|
|
|
|
my $self = shift; |
113
|
0
|
|
|
|
|
|
my $type = shift; |
114
|
|
|
|
|
|
|
die "Invocation-> rcsb_sync_local('cif|pdb')" unless $type =~ /(?:cif|pdb)/; |
115
|
|
|
|
|
|
|
my $pdbids = shift; |
116
|
|
|
|
|
|
|
my $parent_path = shift; |
117
|
|
|
|
|
|
|
my $local_path = "local_${type}_path"; |
118
|
0
|
|
|
0
|
0
|
|
$parent_path = $self->$local_path unless $parent_path; |
119
|
0
|
|
|
|
|
|
|
120
|
0
|
|
|
|
|
|
my $cwd_base = $type eq 'cif' ? 'mmCIF' : 'pdb'; |
121
|
0
|
0
|
|
|
|
|
my $ftp = $self->ftp_connect("/pub/pdb/data/structures/divided/$cwd_base"); |
122
|
0
|
|
|
|
|
|
|
123
|
0
|
|
|
|
|
|
my @pdbids = map { lc($_) } @$pdbids; |
124
|
0
|
|
|
|
|
|
my @fetched; |
125
|
0
|
0
|
|
|
|
|
|
126
|
|
|
|
|
|
|
my @missed; |
127
|
0
|
0
|
|
|
|
|
foreach my $pdbid (@pdbids) { |
128
|
0
|
|
|
|
|
|
print "fetching $pdbid\n"; |
129
|
|
|
|
|
|
|
my $gz = "$pdbid.cif.gz"; |
130
|
0
|
|
|
|
|
|
my $subdir = substr( $pdbid, 1, 2 ); |
|
0
|
|
|
|
|
|
|
131
|
0
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
$ftp->get("$subdir/$gz") |
133
|
|
|
|
|
|
|
or do { |
134
|
0
|
|
|
|
|
|
print "unable to fetch $pdbid\n"; |
135
|
0
|
|
|
|
|
|
print $ftp->message; |
136
|
0
|
|
|
|
|
|
die "ftp download problems" if ($ftp->message =~ /load was .+ when you connected/); |
137
|
0
|
|
|
|
|
|
push @missed,$pdbid; |
138
|
|
|
|
|
|
|
next |
139
|
|
|
|
|
|
|
}; |
140
|
0
|
0
|
|
|
|
|
|
141
|
0
|
|
|
|
|
|
my $dest_par = $parent_path->child("$subdir"); |
142
|
0
|
|
|
|
|
|
$dest_par->mkpath unless $dest_par->exists; |
143
|
0
|
0
|
|
|
|
|
my $dest = $dest_par->child("$pdbid.$type"); |
144
|
0
|
|
|
|
|
|
IO::Uncompress::Gunzip::gunzip( $gz => $dest->stringify ) |
145
|
|
|
|
|
|
|
or die "unable to gunzip $gz"; |
146
|
0
|
|
|
|
|
|
unlink $gz; |
147
|
|
|
|
|
|
|
push @fetched, $pdbid; |
148
|
0
|
|
|
|
|
|
} |
149
|
0
|
0
|
|
|
|
|
|
150
|
0
|
|
|
|
|
|
$ftp->quit; |
151
|
0
|
0
|
|
|
|
|
return (\@fetched,\@missed); |
152
|
|
|
|
|
|
|
} |
153
|
0
|
|
|
|
|
|
|
154
|
0
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# connects to FTP via rcsb_ftp_addr and sets working directory to path if passed |
156
|
|
|
|
|
|
|
require Net::FTP; |
157
|
0
|
|
|
|
|
|
|
158
|
0
|
|
|
|
|
|
my $self = shift; |
159
|
|
|
|
|
|
|
my $path = shift; |
160
|
|
|
|
|
|
|
my $host = $self->rcsb_ftp_addr; |
161
|
|
|
|
|
|
|
my $user = $self->ftp_user; |
162
|
|
|
|
|
|
|
my $pass = $self->ftp_password; |
163
|
|
|
|
|
|
|
|
164
|
0
|
|
|
0
|
0
|
|
my $ftp = Net::FTP->new($host); |
165
|
|
|
|
|
|
|
$ftp->login( $user, $pass ) or die "cannot login to rcsb ftp addr"; |
166
|
0
|
|
|
|
|
|
if($path){ |
167
|
0
|
|
|
|
|
|
$ftp->cwd($path) or die "cannont cwd to $path"; |
168
|
0
|
|
|
|
|
|
} |
169
|
0
|
|
|
|
|
|
$ftp->binary(); |
170
|
0
|
|
|
|
|
|
return $ftp; |
171
|
|
|
|
|
|
|
} |
172
|
0
|
|
|
|
|
|
|
173
|
0
|
0
|
|
|
|
|
no Moose::Role; |
174
|
0
|
0
|
|
|
|
|
1; |
175
|
0
|
0
|
|
|
|
|
|
176
|
|
|
|
|
|
|
|
177
|
0
|
|
|
|
|
|
=pod |
178
|
0
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 NAME |
180
|
|
|
|
|
|
|
|
181
|
11
|
|
|
11
|
|
45100
|
HackaMol::Roles::RcsbRole - Read files with molecular information |
|
11
|
|
|
|
|
25
|
|
|
11
|
|
|
|
|
58
|
|
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head1 VERSION |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
version 0.052 |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head1 AUTHOR |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
Demian Riccardi <demianriccardi@gmail.com> |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
This software is copyright (c) 2017 by Demian Riccardi. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
196
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=cut |