| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
$HackaMol::Roles::ReadPdbRole::VERSION = '0.053'; |
|
2
|
|
|
|
|
|
|
# ABSTRACT: Read files with molecular information |
|
3
|
|
|
|
|
|
|
use Moose::Role; |
|
4
|
11
|
|
|
11
|
|
5452
|
use HackaMol::PeriodicTable qw(_element_name _trim _qstring_num); |
|
|
11
|
|
|
|
|
26
|
|
|
|
11
|
|
|
|
|
78
|
|
|
5
|
11
|
|
|
11
|
|
48231
|
use Math::Vector::Real; |
|
|
11
|
|
|
|
|
28
|
|
|
|
11
|
|
|
|
|
626
|
|
|
6
|
11
|
|
|
11
|
|
61
|
use Carp; |
|
|
11
|
|
|
|
|
24
|
|
|
|
11
|
|
|
|
|
389
|
|
|
7
|
11
|
|
|
11
|
|
58
|
|
|
|
11
|
|
|
|
|
24
|
|
|
|
11
|
|
|
|
|
8348
|
|
|
8
|
|
|
|
|
|
|
requires 'readline_func'; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
my $self = shift; |
|
11
|
|
|
|
|
|
|
my $fh = shift; |
|
12
|
1
|
|
|
1
|
0
|
3
|
|
|
13
|
1
|
|
|
|
|
2
|
my $header = $self->read_pdb_header( $fh ); |
|
14
|
|
|
|
|
|
|
my ($atoms,$model_ids) = $self->read_pdb_atoms( $fh ); |
|
15
|
1
|
|
|
|
|
98
|
|
|
16
|
1
|
|
|
|
|
6
|
return ($header,$atoms,$model_ids); |
|
17
|
|
|
|
|
|
|
|
|
18
|
1
|
|
|
|
|
32
|
} |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $self = shift; |
|
21
|
|
|
|
|
|
|
my $fh = shift; |
|
22
|
|
|
|
|
|
|
my $header; |
|
23
|
1
|
|
|
1
|
0
|
5
|
my $line; |
|
24
|
1
|
|
|
|
|
3
|
while($line = <$fh>){ |
|
25
|
1
|
|
|
|
|
3
|
if ($line =~ m/^(?:MODEL|ATOM|HETATM)\s/){ |
|
26
|
|
|
|
|
|
|
seek($fh, -length($line),1); # rewind back and end |
|
27
|
1
|
|
|
|
|
48
|
last; |
|
28
|
3
|
100
|
|
|
|
14
|
} |
|
29
|
1
|
|
|
|
|
14
|
else{ |
|
30
|
1
|
|
|
|
|
4
|
$header .= $line; |
|
31
|
|
|
|
|
|
|
} |
|
32
|
|
|
|
|
|
|
} |
|
33
|
2
|
|
|
|
|
7
|
return ($header); |
|
34
|
|
|
|
|
|
|
} |
|
35
|
|
|
|
|
|
|
|
|
36
|
1
|
|
|
|
|
6
|
|
|
37
|
|
|
|
|
|
|
#read pdb file and generate list of Atom objects |
|
38
|
|
|
|
|
|
|
my $self = shift; |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $fh = shift; |
|
41
|
|
|
|
|
|
|
#my $file = shift; |
|
42
|
15
|
|
|
15
|
1
|
43
|
#my $fh = FileHandle->new("<$file") or croak "unable to open $file"; |
|
43
|
|
|
|
|
|
|
|
|
44
|
15
|
|
|
|
|
29
|
my @atoms; |
|
45
|
|
|
|
|
|
|
my @model_ids; |
|
46
|
|
|
|
|
|
|
my ( $n, $t ) = ( 0, 0 ); |
|
47
|
|
|
|
|
|
|
my $q_tbad = 0; |
|
48
|
15
|
|
|
|
|
42
|
my $something_dirty = 0; |
|
49
|
|
|
|
|
|
|
my $t0_atom_count = 0; |
|
50
|
15
|
|
|
|
|
48
|
my $t_atom_count = 0; |
|
51
|
15
|
|
|
|
|
34
|
my $pdb_model_id; |
|
52
|
15
|
|
|
|
|
29
|
|
|
53
|
15
|
|
|
|
|
32
|
while (<$fh>) { |
|
54
|
15
|
|
|
|
|
38
|
if($self->has_readline_func){ |
|
55
|
15
|
|
|
|
|
30
|
next if $self->readline_func->($_) eq 'PDB_SKIP'; |
|
56
|
|
|
|
|
|
|
} |
|
57
|
15
|
|
|
|
|
2149
|
if (/^(?:MODEL\s+(\d+))/) { |
|
58
|
8525
|
100
|
|
|
|
215209
|
|
|
59
|
921
|
100
|
|
|
|
16230
|
$n = 0; |
|
60
|
|
|
|
|
|
|
$q_tbad = 0; # flag a bad model and never read again! |
|
61
|
7670
|
100
|
|
|
|
31958
|
$t_atom_count = 0 ; |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
$pdb_model_id = $1; |
|
63
|
19
|
|
|
|
|
44
|
$model_ids[$t] = $pdb_model_id; |
|
64
|
19
|
|
|
|
|
34
|
} |
|
65
|
19
|
|
|
|
|
33
|
elsif (/^(?:ENDMDL)/) { |
|
66
|
19
|
|
|
|
|
42
|
# delete coords if the number of atoms on t is != to that at start |
|
67
|
19
|
|
|
|
|
155
|
if ($t){ |
|
68
|
|
|
|
|
|
|
if ($t_atom_count != $t0_atom_count){ |
|
69
|
|
|
|
|
|
|
my $carp_message = |
|
70
|
|
|
|
|
|
|
"BAD t->$t PDB atom list length changed from $t0_atom_count to $t_atom_count: ignoring model $t"; |
|
71
|
19
|
100
|
|
|
|
62
|
carp $carp_message; |
|
72
|
9
|
50
|
|
|
|
35
|
$_->delete_coords($t) foreach @atoms; |
|
73
|
0
|
|
|
|
|
0
|
$t--; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
0
|
|
|
|
|
0
|
} |
|
76
|
0
|
|
|
|
|
0
|
$t++; |
|
77
|
0
|
|
|
|
|
0
|
} |
|
78
|
|
|
|
|
|
|
elsif (/^(?:HETATM|ATOM)/) { |
|
79
|
|
|
|
|
|
|
next if $q_tbad; |
|
80
|
19
|
|
|
|
|
193
|
my ( |
|
81
|
|
|
|
|
|
|
$record_name, $serial, $name, $altloc, |
|
82
|
|
|
|
|
|
|
$resName, $chainID, $resSeq, $icod, |
|
83
|
7326
|
100
|
|
|
|
13836
|
$x, $y, $z, $occ, |
|
84
|
|
|
|
|
|
|
$B, $segID, $element, $charge |
|
85
|
7257
|
|
|
|
|
46140
|
) = unpack "A6A5x1A4A1A3A2A4A1x3A8A8A8A6A6x6A4A2A2", $_ . (" " x 12); # padded out to accommodate truncated pdbs |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
if ( $charge =~ m/\d/ ) { $charge = _qstring_num($charge) } |
|
88
|
|
|
|
|
|
|
else { $charge = 0 } |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
if ( $chainID =~ m/\w/ ) { $chainID = _trim($chainID) } |
|
91
|
7257
|
100
|
|
|
|
14416
|
else { $chainID = ' ' } |
|
|
7
|
|
|
|
|
44
|
|
|
92
|
7250
|
|
|
|
|
10304
|
|
|
93
|
|
|
|
|
|
|
$name = _trim($name); |
|
94
|
7257
|
100
|
|
|
|
15939
|
$resName = _trim($resName); |
|
|
7242
|
|
|
|
|
17713
|
|
|
95
|
15
|
|
|
|
|
30
|
$resSeq = _trim($resSeq); |
|
96
|
|
|
|
|
|
|
|
|
97
|
7257
|
|
|
|
|
13020
|
#$resSeq = 0 if ( $resSeq < 0 ); |
|
98
|
7257
|
|
|
|
|
12534
|
$serial = _trim($serial); |
|
99
|
7257
|
|
|
|
|
11719
|
$segID = _trim($segID); |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$element = ucfirst( lc( _trim($element) ) ); |
|
102
|
7257
|
|
|
|
|
11600
|
my $qdirt = 0; |
|
103
|
7257
|
|
|
|
|
12197
|
( $element, $qdirt ) = _element_name($name) |
|
104
|
|
|
|
|
|
|
unless ( $element =~ /\w+/ ); |
|
105
|
7257
|
|
|
|
|
11783
|
$something_dirty++ if ($qdirt); |
|
106
|
7257
|
|
|
|
|
10325
|
my $xyz = V( $x, $y, $z ); |
|
107
|
7257
|
100
|
|
|
|
16732
|
|
|
108
|
|
|
|
|
|
|
if ( $t == 0 ) { |
|
109
|
7257
|
100
|
|
|
|
12388
|
$atoms[$n] = HackaMol::Atom->new( |
|
110
|
7257
|
|
|
|
|
32425
|
name => $name, |
|
111
|
|
|
|
|
|
|
record_name => $record_name, |
|
112
|
7257
|
100
|
|
|
|
12847
|
serial => $serial, |
|
113
|
4863
|
|
|
|
|
117553
|
chain => $chainID, |
|
114
|
|
|
|
|
|
|
symbol => $element, |
|
115
|
|
|
|
|
|
|
charges => [$charge], |
|
116
|
|
|
|
|
|
|
coords => [$xyz], |
|
117
|
|
|
|
|
|
|
occ => $occ * 1, |
|
118
|
|
|
|
|
|
|
bfact => $B * 1, |
|
119
|
|
|
|
|
|
|
resname => $resName, |
|
120
|
|
|
|
|
|
|
resid => $resSeq, |
|
121
|
|
|
|
|
|
|
icode => $icod, |
|
122
|
|
|
|
|
|
|
segid => $segID, |
|
123
|
|
|
|
|
|
|
altloc => $altloc, |
|
124
|
|
|
|
|
|
|
); |
|
125
|
|
|
|
|
|
|
$atoms[$n]->is_dirty($qdirt) unless $atoms[$n]->is_dirty; |
|
126
|
|
|
|
|
|
|
$t0_atom_count++; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
else { |
|
129
|
4863
|
50
|
|
|
|
103351
|
# croak condition if atom changes between models |
|
130
|
4863
|
|
|
|
|
7004
|
# does not catch case were number of atoms shrinks! |
|
131
|
|
|
|
|
|
|
if ( $n > $#atoms or $name ne $atoms[$n]->name |
|
132
|
|
|
|
|
|
|
or $element ne $atoms[$n]->symbol ) |
|
133
|
|
|
|
|
|
|
{ |
|
134
|
|
|
|
|
|
|
my $carp_message = |
|
135
|
2394
|
100
|
66
|
|
|
55400
|
"BAD t->$t PDB Atom $n " |
|
|
|
|
100
|
|
|
|
|
|
136
|
|
|
|
|
|
|
. "serial $serial resname $resName " |
|
137
|
|
|
|
|
|
|
. "has changed"; |
|
138
|
2
|
|
|
|
|
15
|
carp $carp_message; |
|
139
|
|
|
|
|
|
|
$q_tbad = $t; # this is a bad model! |
|
140
|
|
|
|
|
|
|
# wipe out all the coords prior |
|
141
|
|
|
|
|
|
|
$atoms[$_]->delete_coords($t) foreach 0 .. $n - 1; |
|
142
|
2
|
|
|
|
|
67
|
$t--; |
|
143
|
2
|
|
|
|
|
1085
|
next; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
2
|
|
|
|
|
51
|
$atoms[$n]->set_coords( $t, $xyz ); |
|
146
|
2
|
|
|
|
|
7
|
$t_atom_count++; |
|
147
|
2
|
|
|
|
|
16
|
} |
|
148
|
|
|
|
|
|
|
$n++; |
|
149
|
2392
|
|
|
|
|
63498
|
} |
|
150
|
2392
|
|
|
|
|
2765
|
} |
|
151
|
|
|
|
|
|
|
|
|
152
|
7255
|
|
|
|
|
52225
|
# set iatom to track the array. diff from serial which refers to pdb |
|
153
|
|
|
|
|
|
|
$atoms[$_]->iatom($_) foreach ( 0 .. $#atoms ); |
|
154
|
|
|
|
|
|
|
if ($something_dirty) { |
|
155
|
|
|
|
|
|
|
if ( $self->hush_read <= 0 ) { |
|
156
|
|
|
|
|
|
|
my $message = "MolReadRole> found $something_dirty dirty atoms. "; |
|
157
|
15
|
|
|
|
|
541
|
$message .= "Check symbols and lookup names PeriodicTable.pm:"; |
|
158
|
15
|
100
|
|
|
|
58
|
my @sprintf; |
|
159
|
2
|
100
|
|
|
|
56
|
foreach my $atom (grep {$_->is_dirty} @atoms){ |
|
160
|
1
|
|
|
|
|
6
|
push @sprintf, sprintf(" DIRTY: index %s name %s element %s %10.3f %10.3f %10.3f;", $atom->iatom, $atom->name, $atom->symbol, @{$atom->xyz}); |
|
161
|
1
|
|
|
|
|
4
|
} |
|
162
|
1
|
|
|
|
|
3
|
$message .= $_ foreach @sprintf; |
|
163
|
1
|
|
|
|
|
4
|
carp $message; |
|
|
36
|
|
|
|
|
653
|
|
|
164
|
2
|
|
|
|
|
46
|
} |
|
|
2
|
|
|
|
|
12
|
|
|
165
|
|
|
|
|
|
|
} |
|
166
|
1
|
|
|
|
|
7
|
return (\@atoms,\@model_ids); |
|
167
|
1
|
|
|
|
|
31
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
no Moose::Role; |
|
170
|
15
|
|
|
|
|
620
|
|
|
171
|
|
|
|
|
|
|
1; |
|
172
|
|
|
|
|
|
|
|
|
173
|
11
|
|
|
11
|
|
93
|
|
|
|
11
|
|
|
|
|
26
|
|
|
|
11
|
|
|
|
|
70
|
|
|
174
|
|
|
|
|
|
|
=pod |
|
175
|
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 NAME |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
HackaMol::Roles::ReadPdbRole - Read files with molecular information |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head1 VERSION |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
version 0.053 |
|
183
|
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
185
|
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
my @atoms = HackaMol->new |
|
187
|
|
|
|
|
|
|
->read_pdb_atoms("some.pdb"); |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
The HackaMol::Roles::ReadPdbRole provides read_pdb_atoms reading protein database files. |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head1 METHODS |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head2 read_pdb_atoms |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
One argument: the filename |
|
198
|
|
|
|
|
|
|
Returns a list of HackaMol::Atom objects. |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 Additional information |
|
201
|
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
According to the PDB specification, the element symbol should be present in columns |
|
203
|
|
|
|
|
|
|
77-78. The element is often ommitted by programs, such as charmm, that can write |
|
204
|
|
|
|
|
|
|
pdbs because it makes the file larger, and the information is accessible somewhere |
|
205
|
|
|
|
|
|
|
else (protein structure file). Unfortunately, other programs require the information. |
|
206
|
|
|
|
|
|
|
HackaMol::MolReadRole, uses HackaMol::PeriodicTable to map common names to the element |
|
207
|
|
|
|
|
|
|
(e.g. POT => 'K'). read_pdb_atoms will carp if the name is not in the hash, and then |
|
208
|
|
|
|
|
|
|
set the element to the first letter of the name. If you see one of these messages for |
|
209
|
|
|
|
|
|
|
a common atom, please submit an issue or pull request so the atom can be added! |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
212
|
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=over 4 |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=item * |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
L<HackaMol> |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=item * |
|
220
|
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
L<HackaMol::Atom> |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=item * |
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
L<HackaMol::Roles::MolReadRole> |
|
226
|
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=item * |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
L<Protein Data Bank|http://pdb.org> |
|
230
|
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=back |
|
232
|
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head1 AUTHOR |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
Demian Riccardi <demianriccardi@gmail.com> |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
This software is copyright (c) 2017 by Demian Riccardi. |
|
240
|
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
|
242
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=cut |