line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package HackaMol::Roles::ReadPdbRole; |
2
|
|
|
|
|
|
|
$HackaMol::Roles::ReadPdbRole::VERSION = '0.051'; |
3
|
|
|
|
|
|
|
# ABSTRACT: Read files with molecular information |
4
|
11
|
|
|
11
|
|
5701
|
use Moose::Role; |
|
11
|
|
|
|
|
34
|
|
|
11
|
|
|
|
|
87
|
|
5
|
11
|
|
|
11
|
|
57716
|
use HackaMol::PeriodicTable qw(_element_name _trim _qstring_num); |
|
11
|
|
|
|
|
60
|
|
|
11
|
|
|
|
|
677
|
|
6
|
11
|
|
|
11
|
|
81
|
use Math::Vector::Real; |
|
11
|
|
|
|
|
41
|
|
|
11
|
|
|
|
|
438
|
|
7
|
11
|
|
|
11
|
|
74
|
use Carp; |
|
11
|
|
|
|
|
35
|
|
|
11
|
|
|
|
|
10169
|
|
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
requires 'readline_func'; |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
sub read_pdb_parts{ |
12
|
1
|
|
|
1
|
0
|
4
|
my $self = shift; |
13
|
1
|
|
|
|
|
2
|
my $fh = shift; |
14
|
|
|
|
|
|
|
|
15
|
1
|
|
|
|
|
83
|
my $header = $self->read_pdb_header( $fh ); |
16
|
1
|
|
|
|
|
15
|
my ($atoms,$model_ids) = $self->read_pdb_atoms( $fh ); |
17
|
|
|
|
|
|
|
|
18
|
1
|
|
|
|
|
53
|
return ($header,$atoms,$model_ids); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
} |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
sub read_pdb_header{ |
23
|
1
|
|
|
1
|
0
|
15
|
my $self = shift; |
24
|
1
|
|
|
|
|
2
|
my $fh = shift; |
25
|
1
|
|
|
|
|
3
|
my $header; |
26
|
|
|
|
|
|
|
my $line; |
27
|
1
|
|
|
|
|
59
|
while($line = <$fh>){ |
28
|
3
|
100
|
|
|
|
15
|
if ($line =~ m/^(?:MODEL|ATOM|HETATM)\s/){ |
29
|
1
|
|
|
|
|
16
|
seek($fh, -length($line),1); # rewind back and end |
30
|
1
|
|
|
|
|
3
|
last; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
else{ |
33
|
2
|
|
|
|
|
9
|
$header .= $line; |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
} |
36
|
1
|
|
|
|
|
6
|
return ($header); |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
sub read_pdb_atoms { |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
#read pdb file and generate list of Atom objects |
42
|
15
|
|
|
15
|
1
|
45
|
my $self = shift; |
43
|
|
|
|
|
|
|
|
44
|
15
|
|
|
|
|
43
|
my $fh = shift; |
45
|
|
|
|
|
|
|
#my $file = shift; |
46
|
|
|
|
|
|
|
#my $fh = FileHandle->new("<$file") or croak "unable to open $file"; |
47
|
|
|
|
|
|
|
|
48
|
15
|
|
|
|
|
50
|
my @atoms; |
49
|
|
|
|
|
|
|
my @model_ids; |
50
|
15
|
|
|
|
|
61
|
my ( $n, $t ) = ( 0, 0 ); |
51
|
15
|
|
|
|
|
49
|
my $q_tbad = 0; |
52
|
15
|
|
|
|
|
42
|
my $something_dirty = 0; |
53
|
15
|
|
|
|
|
38
|
my $t0_atom_count = 0; |
54
|
15
|
|
|
|
|
35
|
my $t_atom_count = 0; |
55
|
15
|
|
|
|
|
41
|
my $pdb_model_id; |
56
|
|
|
|
|
|
|
|
57
|
15
|
|
|
|
|
2029
|
while (<$fh>) { |
58
|
8525
|
100
|
|
|
|
268192
|
if($self->has_readline_func){ |
59
|
921
|
100
|
|
|
|
19969
|
next if $self->readline_func->($_) eq 'PDB_SKIP'; |
60
|
|
|
|
|
|
|
} |
61
|
7670
|
100
|
|
|
|
39666
|
if (/^(?:MODEL\s+(\d+))/) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
63
|
19
|
|
|
|
|
58
|
$n = 0; |
64
|
19
|
|
|
|
|
45
|
$q_tbad = 0; # flag a bad model and never read again! |
65
|
19
|
|
|
|
|
56
|
$t_atom_count = 0 ; |
66
|
19
|
|
|
|
|
55
|
$pdb_model_id = $1; |
67
|
19
|
|
|
|
|
139
|
$model_ids[$t] = $pdb_model_id; |
68
|
|
|
|
|
|
|
} |
69
|
|
|
|
|
|
|
elsif (/^(?:ENDMDL)/) { |
70
|
|
|
|
|
|
|
# delete coords if the number of atoms on t is != to that at start |
71
|
19
|
100
|
|
|
|
91
|
if ($t){ |
72
|
9
|
50
|
|
|
|
43
|
if ($t_atom_count != $t0_atom_count){ |
73
|
0
|
|
|
|
|
0
|
my $carp_message = |
74
|
|
|
|
|
|
|
"BAD t->$t PDB atom list length changed from $t0_atom_count to $t_atom_count: ignoring model $t"; |
75
|
0
|
|
|
|
|
0
|
carp $carp_message; |
76
|
0
|
|
|
|
|
0
|
$_->delete_coords($t) foreach @atoms; |
77
|
0
|
|
|
|
|
0
|
$t--; |
78
|
|
|
|
|
|
|
} |
79
|
|
|
|
|
|
|
} |
80
|
19
|
|
|
|
|
191
|
$t++; |
81
|
|
|
|
|
|
|
} |
82
|
|
|
|
|
|
|
elsif (/^(?:HETATM|ATOM)/) { |
83
|
7326
|
100
|
|
|
|
16819
|
next if $q_tbad; |
84
|
|
|
|
|
|
|
my ( |
85
|
7257
|
|
|
|
|
58289
|
$record_name, $serial, $name, $altloc, |
86
|
|
|
|
|
|
|
$resName, $chainID, $resSeq, $icod, |
87
|
|
|
|
|
|
|
$x, $y, $z, $occ, |
88
|
|
|
|
|
|
|
$B, $segID, $element, $charge |
89
|
|
|
|
|
|
|
) = unpack "A6A5x1A4A1A3x1A1A4A1x3A8A8A8A6A6x6A4A2A2", $_ . (" " x 12); # padded out to accommodate truncated pdbs |
90
|
|
|
|
|
|
|
|
91
|
7257
|
100
|
|
|
|
19543
|
if ( $charge =~ m/\d/ ) { $charge = _qstring_num($charge) } |
|
7
|
|
|
|
|
49
|
|
92
|
7250
|
|
|
|
|
11860
|
else { $charge = 0 } |
93
|
|
|
|
|
|
|
|
94
|
7257
|
100
|
|
|
|
20451
|
if ( $chainID =~ m/\w/ ) { $chainID = _trim($chainID) } |
|
7242
|
|
|
|
|
22164
|
|
95
|
15
|
|
|
|
|
38
|
else { $chainID = ' ' } |
96
|
|
|
|
|
|
|
|
97
|
7257
|
|
|
|
|
15260
|
$name = _trim($name); |
98
|
7257
|
|
|
|
|
14812
|
$resName = _trim($resName); |
99
|
7257
|
|
|
|
|
14718
|
$resSeq = _trim($resSeq); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
#$resSeq = 0 if ( $resSeq < 0 ); |
102
|
7257
|
|
|
|
|
15387
|
$serial = _trim($serial); |
103
|
7257
|
|
|
|
|
14366
|
$segID = _trim($segID); |
104
|
|
|
|
|
|
|
|
105
|
7257
|
|
|
|
|
14563
|
$element = ucfirst( lc( _trim($element) ) ); |
106
|
7257
|
|
|
|
|
12524
|
my $qdirt = 0; |
107
|
7257
|
100
|
|
|
|
21054
|
( $element, $qdirt ) = _element_name($name) |
108
|
|
|
|
|
|
|
unless ( $element =~ /\w+/ ); |
109
|
7257
|
100
|
|
|
|
14759
|
$something_dirty++ if ($qdirt); |
110
|
7257
|
|
|
|
|
37614
|
my $xyz = V( $x, $y, $z ); |
111
|
|
|
|
|
|
|
|
112
|
7257
|
100
|
|
|
|
17974
|
if ( $t == 0 ) { |
113
|
4863
|
|
|
|
|
147935
|
$atoms[$n] = HackaMol::Atom->new( |
114
|
|
|
|
|
|
|
name => $name, |
115
|
|
|
|
|
|
|
record_name => $record_name, |
116
|
|
|
|
|
|
|
serial => $serial, |
117
|
|
|
|
|
|
|
chain => $chainID, |
118
|
|
|
|
|
|
|
symbol => $element, |
119
|
|
|
|
|
|
|
charges => [$charge], |
120
|
|
|
|
|
|
|
coords => [$xyz], |
121
|
|
|
|
|
|
|
occ => $occ * 1, |
122
|
|
|
|
|
|
|
bfact => $B * 1, |
123
|
|
|
|
|
|
|
resname => $resName, |
124
|
|
|
|
|
|
|
resid => $resSeq, |
125
|
|
|
|
|
|
|
icode => $icod, |
126
|
|
|
|
|
|
|
segid => $segID, |
127
|
|
|
|
|
|
|
altloc => $altloc, |
128
|
|
|
|
|
|
|
); |
129
|
4863
|
50
|
|
|
|
126673
|
$atoms[$n]->is_dirty($qdirt) unless $atoms[$n]->is_dirty; |
130
|
4863
|
|
|
|
|
8800
|
$t0_atom_count++; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
else { |
133
|
|
|
|
|
|
|
# croak condition if atom changes between models |
134
|
|
|
|
|
|
|
# does not catch case were number of atoms shrinks! |
135
|
2394
|
100
|
66
|
|
|
71248
|
if ( $n > $#atoms or $name ne $atoms[$n]->name |
|
|
|
100
|
|
|
|
|
136
|
|
|
|
|
|
|
or $element ne $atoms[$n]->symbol ) |
137
|
|
|
|
|
|
|
{ |
138
|
2
|
|
|
|
|
22
|
my $carp_message = |
139
|
|
|
|
|
|
|
"BAD t->$t PDB Atom $n " |
140
|
|
|
|
|
|
|
. "serial $serial resname $resName " |
141
|
|
|
|
|
|
|
. "has changed"; |
142
|
2
|
|
|
|
|
85
|
carp $carp_message; |
143
|
2
|
|
|
|
|
1312
|
$q_tbad = $t; # this is a bad model! |
144
|
|
|
|
|
|
|
# wipe out all the coords prior |
145
|
2
|
|
|
|
|
85
|
$atoms[$_]->delete_coords($t) foreach 0 .. $n - 1; |
146
|
2
|
|
|
|
|
7
|
$t--; |
147
|
2
|
|
|
|
|
23
|
next; |
148
|
|
|
|
|
|
|
} |
149
|
2392
|
|
|
|
|
80285
|
$atoms[$n]->set_coords( $t, $xyz ); |
150
|
2392
|
|
|
|
|
3386
|
$t_atom_count++; |
151
|
|
|
|
|
|
|
} |
152
|
7255
|
|
|
|
|
63746
|
$n++; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
} |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
# set iatom to track the array. diff from serial which refers to pdb |
157
|
15
|
|
|
|
|
715
|
$atoms[$_]->iatom($_) foreach ( 0 .. $#atoms ); |
158
|
15
|
100
|
|
|
|
80
|
if ($something_dirty) { |
159
|
2
|
100
|
|
|
|
80
|
if ( $self->hush_read <= 0 ) { |
160
|
1
|
|
|
|
|
6
|
my $message = "MolReadRole> found $something_dirty dirty atoms. "; |
161
|
1
|
|
|
|
|
10
|
$message .= "Check symbols and lookup names PeriodicTable.pm:"; |
162
|
1
|
|
|
|
|
4
|
my @sprintf; |
163
|
1
|
|
|
|
|
4
|
foreach my $atom (grep {$_->is_dirty} @atoms){ |
|
36
|
|
|
|
|
837
|
|
164
|
2
|
|
|
|
|
58
|
push @sprintf, sprintf(" DIRTY: index %s name %s element %s %10.3f %10.3f %10.3f;", $atom->iatom, $atom->name, $atom->symbol, @{$atom->xyz}); |
|
2
|
|
|
|
|
27
|
|
165
|
|
|
|
|
|
|
} |
166
|
1
|
|
|
|
|
8
|
$message .= $_ foreach @sprintf; |
167
|
1
|
|
|
|
|
31
|
carp $message; |
168
|
|
|
|
|
|
|
} |
169
|
|
|
|
|
|
|
} |
170
|
15
|
|
|
|
|
853
|
return (\@atoms,\@model_ids); |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
11
|
|
|
11
|
|
120
|
no Moose::Role; |
|
11
|
|
|
|
|
39
|
|
|
11
|
|
|
|
|
69
|
|
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
1; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
__END__ |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=pod |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=head1 NAME |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
HackaMol::Roles::ReadPdbRole - Read files with molecular information |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head1 VERSION |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
version 0.051 |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head1 SYNOPSIS |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
my @atoms = HackaMol->new |
192
|
|
|
|
|
|
|
->read_pdb_atoms("some.pdb"); |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
=head1 DESCRIPTION |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
The HackaMol::Roles::ReadPdbRole provides read_pdb_atoms reading protein database files. |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
=head1 METHODS |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head2 read_pdb_atoms |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
One argument: the filename |
203
|
|
|
|
|
|
|
Returns a list of HackaMol::Atom objects. |
204
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
=head1 Additional information |
206
|
|
|
|
|
|
|
|
207
|
|
|
|
|
|
|
According to the PDB specification, the element symbol should be present in columns |
208
|
|
|
|
|
|
|
77-78. The element is often ommitted by programs, such as charmm, that can write |
209
|
|
|
|
|
|
|
pdbs because it makes the file larger, and the information is accessible somewhere |
210
|
|
|
|
|
|
|
else (protein structure file). Unfortunately, other programs require the information. |
211
|
|
|
|
|
|
|
HackaMol::MolReadRole, uses HackaMol::PeriodicTable to map common names to the element |
212
|
|
|
|
|
|
|
(e.g. POT => 'K'). read_pdb_atoms will carp if the name is not in the hash, and then |
213
|
|
|
|
|
|
|
set the element to the first letter of the name. If you see one of these messages for |
214
|
|
|
|
|
|
|
a common atom, please submit an issue or pull request so the atom can be added! |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
=head1 SEE ALSO |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=over 4 |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=item * |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
L<HackaMol> |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item * |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
L<HackaMol::Atom> |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=item * |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
L<HackaMol::Roles::MolReadRole> |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
=item * |
233
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
L<Protein Data Bank|http://pdb.org> |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=back |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=head1 AUTHOR |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
Demian Riccardi <demianriccardi@gmail.com> |
241
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
This software is copyright (c) 2017 by Demian Riccardi. |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
247
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=cut |