line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
$HackaMol::Roles::ReadPdbRole::VERSION = '0.052'; |
2
|
|
|
|
|
|
|
# ABSTRACT: Read files with molecular information |
3
|
|
|
|
|
|
|
use Moose::Role; |
4
|
11
|
|
|
11
|
|
5960
|
use HackaMol::PeriodicTable qw(_element_name _trim _qstring_num); |
|
11
|
|
|
|
|
36
|
|
|
11
|
|
|
|
|
104
|
|
5
|
11
|
|
|
11
|
|
51217
|
use Math::Vector::Real; |
|
11
|
|
|
|
|
28
|
|
|
11
|
|
|
|
|
668
|
|
6
|
11
|
|
|
11
|
|
60
|
use Carp; |
|
11
|
|
|
|
|
28
|
|
|
11
|
|
|
|
|
410
|
|
7
|
11
|
|
|
11
|
|
61
|
|
|
11
|
|
|
|
|
37
|
|
|
11
|
|
|
|
|
9071
|
|
8
|
|
|
|
|
|
|
requires 'readline_func'; |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
my $self = shift; |
11
|
|
|
|
|
|
|
my $fh = shift; |
12
|
1
|
|
|
1
|
0
|
3
|
|
13
|
1
|
|
|
|
|
2
|
my $header = $self->read_pdb_header( $fh ); |
14
|
|
|
|
|
|
|
my ($atoms,$model_ids) = $self->read_pdb_atoms( $fh ); |
15
|
1
|
|
|
|
|
82
|
|
16
|
1
|
|
|
|
|
9
|
return ($header,$atoms,$model_ids); |
17
|
|
|
|
|
|
|
|
18
|
1
|
|
|
|
|
27
|
} |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
my $self = shift; |
21
|
|
|
|
|
|
|
my $fh = shift; |
22
|
|
|
|
|
|
|
my $header; |
23
|
1
|
|
|
1
|
0
|
3
|
my $line; |
24
|
1
|
|
|
|
|
3
|
while($line = <$fh>){ |
25
|
1
|
|
|
|
|
2
|
if ($line =~ m/^(?:MODEL|ATOM|HETATM)\s/){ |
26
|
|
|
|
|
|
|
seek($fh, -length($line),1); # rewind back and end |
27
|
1
|
|
|
|
|
49
|
last; |
28
|
3
|
100
|
|
|
|
15
|
} |
29
|
1
|
|
|
|
|
17
|
else{ |
30
|
1
|
|
|
|
|
5
|
$header .= $line; |
31
|
|
|
|
|
|
|
} |
32
|
|
|
|
|
|
|
} |
33
|
2
|
|
|
|
|
8
|
return ($header); |
34
|
|
|
|
|
|
|
} |
35
|
|
|
|
|
|
|
|
36
|
1
|
|
|
|
|
4
|
|
37
|
|
|
|
|
|
|
#read pdb file and generate list of Atom objects |
38
|
|
|
|
|
|
|
my $self = shift; |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
my $fh = shift; |
41
|
|
|
|
|
|
|
#my $file = shift; |
42
|
15
|
|
|
15
|
1
|
36
|
#my $fh = FileHandle->new("<$file") or croak "unable to open $file"; |
43
|
|
|
|
|
|
|
|
44
|
15
|
|
|
|
|
33
|
my @atoms; |
45
|
|
|
|
|
|
|
my @model_ids; |
46
|
|
|
|
|
|
|
my ( $n, $t ) = ( 0, 0 ); |
47
|
|
|
|
|
|
|
my $q_tbad = 0; |
48
|
15
|
|
|
|
|
40
|
my $something_dirty = 0; |
49
|
|
|
|
|
|
|
my $t0_atom_count = 0; |
50
|
15
|
|
|
|
|
58
|
my $t_atom_count = 0; |
51
|
15
|
|
|
|
|
29
|
my $pdb_model_id; |
52
|
15
|
|
|
|
|
42
|
|
53
|
15
|
|
|
|
|
33
|
while (<$fh>) { |
54
|
15
|
|
|
|
|
32
|
if($self->has_readline_func){ |
55
|
15
|
|
|
|
|
28
|
next if $self->readline_func->($_) eq 'PDB_SKIP'; |
56
|
|
|
|
|
|
|
} |
57
|
15
|
|
|
|
|
1421
|
if (/^(?:MODEL\s+(\d+))/) { |
58
|
8525
|
100
|
|
|
|
219760
|
|
59
|
921
|
100
|
|
|
|
16040
|
$n = 0; |
60
|
|
|
|
|
|
|
$q_tbad = 0; # flag a bad model and never read again! |
61
|
7670
|
100
|
|
|
|
33112
|
$t_atom_count = 0 ; |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
62
|
|
|
|
|
|
|
$pdb_model_id = $1; |
63
|
19
|
|
|
|
|
53
|
$model_ids[$t] = $pdb_model_id; |
64
|
19
|
|
|
|
|
45
|
} |
65
|
19
|
|
|
|
|
29
|
elsif (/^(?:ENDMDL)/) { |
66
|
19
|
|
|
|
|
47
|
# delete coords if the number of atoms on t is != to that at start |
67
|
19
|
|
|
|
|
145
|
if ($t){ |
68
|
|
|
|
|
|
|
if ($t_atom_count != $t0_atom_count){ |
69
|
|
|
|
|
|
|
my $carp_message = |
70
|
|
|
|
|
|
|
"BAD t->$t PDB atom list length changed from $t0_atom_count to $t_atom_count: ignoring model $t"; |
71
|
19
|
100
|
|
|
|
70
|
carp $carp_message; |
72
|
9
|
50
|
|
|
|
40
|
$_->delete_coords($t) foreach @atoms; |
73
|
0
|
|
|
|
|
0
|
$t--; |
74
|
|
|
|
|
|
|
} |
75
|
0
|
|
|
|
|
0
|
} |
76
|
0
|
|
|
|
|
0
|
$t++; |
77
|
0
|
|
|
|
|
0
|
} |
78
|
|
|
|
|
|
|
elsif (/^(?:HETATM|ATOM)/) { |
79
|
|
|
|
|
|
|
next if $q_tbad; |
80
|
19
|
|
|
|
|
186
|
my ( |
81
|
|
|
|
|
|
|
$record_name, $serial, $name, $altloc, |
82
|
|
|
|
|
|
|
$resName, $chainID, $resSeq, $icod, |
83
|
7326
|
100
|
|
|
|
12617
|
$x, $y, $z, $occ, |
84
|
|
|
|
|
|
|
$B, $segID, $element, $charge |
85
|
7257
|
|
|
|
|
47702
|
) = unpack "A6A5x1A4A1A3A2A4A1x3A8A8A8A6A6x6A4A2A2", $_ . (" " x 12); # padded out to accommodate truncated pdbs |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
if ( $charge =~ m/\d/ ) { $charge = _qstring_num($charge) } |
88
|
|
|
|
|
|
|
else { $charge = 0 } |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
if ( $chainID =~ m/\w/ ) { $chainID = _trim($chainID) } |
91
|
7257
|
100
|
|
|
|
14484
|
else { $chainID = ' ' } |
|
7
|
|
|
|
|
42
|
|
92
|
7250
|
|
|
|
|
9320
|
|
93
|
|
|
|
|
|
|
$name = _trim($name); |
94
|
7257
|
100
|
|
|
|
16333
|
$resName = _trim($resName); |
|
7242
|
|
|
|
|
17934
|
|
95
|
15
|
|
|
|
|
29
|
$resSeq = _trim($resSeq); |
96
|
|
|
|
|
|
|
|
97
|
7257
|
|
|
|
|
12982
|
#$resSeq = 0 if ( $resSeq < 0 ); |
98
|
7257
|
|
|
|
|
11450
|
$serial = _trim($serial); |
99
|
7257
|
|
|
|
|
11170
|
$segID = _trim($segID); |
100
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
$element = ucfirst( lc( _trim($element) ) ); |
102
|
7257
|
|
|
|
|
12474
|
my $qdirt = 0; |
103
|
7257
|
|
|
|
|
11846
|
( $element, $qdirt ) = _element_name($name) |
104
|
|
|
|
|
|
|
unless ( $element =~ /\w+/ ); |
105
|
7257
|
|
|
|
|
11218
|
$something_dirty++ if ($qdirt); |
106
|
7257
|
|
|
|
|
10438
|
my $xyz = V( $x, $y, $z ); |
107
|
7257
|
100
|
|
|
|
17024
|
|
108
|
|
|
|
|
|
|
if ( $t == 0 ) { |
109
|
7257
|
100
|
|
|
|
11529
|
$atoms[$n] = HackaMol::Atom->new( |
110
|
7257
|
|
|
|
|
31444
|
name => $name, |
111
|
|
|
|
|
|
|
record_name => $record_name, |
112
|
7257
|
100
|
|
|
|
13738
|
serial => $serial, |
113
|
4863
|
|
|
|
|
118271
|
chain => $chainID, |
114
|
|
|
|
|
|
|
symbol => $element, |
115
|
|
|
|
|
|
|
charges => [$charge], |
116
|
|
|
|
|
|
|
coords => [$xyz], |
117
|
|
|
|
|
|
|
occ => $occ * 1, |
118
|
|
|
|
|
|
|
bfact => $B * 1, |
119
|
|
|
|
|
|
|
resname => $resName, |
120
|
|
|
|
|
|
|
resid => $resSeq, |
121
|
|
|
|
|
|
|
icode => $icod, |
122
|
|
|
|
|
|
|
segid => $segID, |
123
|
|
|
|
|
|
|
altloc => $altloc, |
124
|
|
|
|
|
|
|
); |
125
|
|
|
|
|
|
|
$atoms[$n]->is_dirty($qdirt) unless $atoms[$n]->is_dirty; |
126
|
|
|
|
|
|
|
$t0_atom_count++; |
127
|
|
|
|
|
|
|
} |
128
|
|
|
|
|
|
|
else { |
129
|
4863
|
50
|
|
|
|
101617
|
# croak condition if atom changes between models |
130
|
4863
|
|
|
|
|
6638
|
# does not catch case were number of atoms shrinks! |
131
|
|
|
|
|
|
|
if ( $n > $#atoms or $name ne $atoms[$n]->name |
132
|
|
|
|
|
|
|
or $element ne $atoms[$n]->symbol ) |
133
|
|
|
|
|
|
|
{ |
134
|
|
|
|
|
|
|
my $carp_message = |
135
|
2394
|
100
|
66
|
|
|
56857
|
"BAD t->$t PDB Atom $n " |
|
|
|
100
|
|
|
|
|
136
|
|
|
|
|
|
|
. "serial $serial resname $resName " |
137
|
|
|
|
|
|
|
. "has changed"; |
138
|
2
|
|
|
|
|
17
|
carp $carp_message; |
139
|
|
|
|
|
|
|
$q_tbad = $t; # this is a bad model! |
140
|
|
|
|
|
|
|
# wipe out all the coords prior |
141
|
|
|
|
|
|
|
$atoms[$_]->delete_coords($t) foreach 0 .. $n - 1; |
142
|
2
|
|
|
|
|
56
|
$t--; |
143
|
2
|
|
|
|
|
1011
|
next; |
144
|
|
|
|
|
|
|
} |
145
|
2
|
|
|
|
|
47
|
$atoms[$n]->set_coords( $t, $xyz ); |
146
|
2
|
|
|
|
|
5
|
$t_atom_count++; |
147
|
2
|
|
|
|
|
18
|
} |
148
|
|
|
|
|
|
|
$n++; |
149
|
2392
|
|
|
|
|
65402
|
} |
150
|
2392
|
|
|
|
|
2689
|
} |
151
|
|
|
|
|
|
|
|
152
|
7255
|
|
|
|
|
50446
|
# set iatom to track the array. diff from serial which refers to pdb |
153
|
|
|
|
|
|
|
$atoms[$_]->iatom($_) foreach ( 0 .. $#atoms ); |
154
|
|
|
|
|
|
|
if ($something_dirty) { |
155
|
|
|
|
|
|
|
if ( $self->hush_read <= 0 ) { |
156
|
|
|
|
|
|
|
my $message = "MolReadRole> found $something_dirty dirty atoms. "; |
157
|
15
|
|
|
|
|
591
|
$message .= "Check symbols and lookup names PeriodicTable.pm:"; |
158
|
15
|
100
|
|
|
|
53
|
my @sprintf; |
159
|
2
|
100
|
|
|
|
46
|
foreach my $atom (grep {$_->is_dirty} @atoms){ |
160
|
1
|
|
|
|
|
4
|
push @sprintf, sprintf(" DIRTY: index %s name %s element %s %10.3f %10.3f %10.3f;", $atom->iatom, $atom->name, $atom->symbol, @{$atom->xyz}); |
161
|
1
|
|
|
|
|
3
|
} |
162
|
1
|
|
|
|
|
2
|
$message .= $_ foreach @sprintf; |
163
|
1
|
|
|
|
|
4
|
carp $message; |
|
36
|
|
|
|
|
643
|
|
164
|
2
|
|
|
|
|
46
|
} |
|
2
|
|
|
|
|
9
|
|
165
|
|
|
|
|
|
|
} |
166
|
1
|
|
|
|
|
4
|
return (\@atoms,\@model_ids); |
167
|
1
|
|
|
|
|
44
|
} |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
no Moose::Role; |
170
|
15
|
|
|
|
|
553
|
|
171
|
|
|
|
|
|
|
1; |
172
|
|
|
|
|
|
|
|
173
|
11
|
|
|
11
|
|
101
|
|
|
11
|
|
|
|
|
43
|
|
|
11
|
|
|
|
|
66
|
|
174
|
|
|
|
|
|
|
=pod |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
=head1 NAME |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
HackaMol::Roles::ReadPdbRole - Read files with molecular information |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=head1 VERSION |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
version 0.052 |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=head1 SYNOPSIS |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
my @atoms = HackaMol->new |
187
|
|
|
|
|
|
|
->read_pdb_atoms("some.pdb"); |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=head1 DESCRIPTION |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
The HackaMol::Roles::ReadPdbRole provides read_pdb_atoms reading protein database files. |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
=head1 METHODS |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head2 read_pdb_atoms |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
One argument: the filename |
198
|
|
|
|
|
|
|
Returns a list of HackaMol::Atom objects. |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
=head1 Additional information |
201
|
|
|
|
|
|
|
|
202
|
|
|
|
|
|
|
According to the PDB specification, the element symbol should be present in columns |
203
|
|
|
|
|
|
|
77-78. The element is often ommitted by programs, such as charmm, that can write |
204
|
|
|
|
|
|
|
pdbs because it makes the file larger, and the information is accessible somewhere |
205
|
|
|
|
|
|
|
else (protein structure file). Unfortunately, other programs require the information. |
206
|
|
|
|
|
|
|
HackaMol::MolReadRole, uses HackaMol::PeriodicTable to map common names to the element |
207
|
|
|
|
|
|
|
(e.g. POT => 'K'). read_pdb_atoms will carp if the name is not in the hash, and then |
208
|
|
|
|
|
|
|
set the element to the first letter of the name. If you see one of these messages for |
209
|
|
|
|
|
|
|
a common atom, please submit an issue or pull request so the atom can be added! |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
=head1 SEE ALSO |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=over 4 |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=item * |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
L<HackaMol> |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=item * |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
L<HackaMol::Atom> |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
=item * |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
L<HackaMol::Roles::MolReadRole> |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
=item * |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
L<Protein Data Bank|http://pdb.org> |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
=back |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=head1 AUTHOR |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
Demian Riccardi <demianriccardi@gmail.com> |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
=head1 COPYRIGHT AND LICENSE |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
This software is copyright (c) 2017 by Demian Riccardi. |
240
|
|
|
|
|
|
|
|
241
|
|
|
|
|
|
|
This is free software; you can redistribute it and/or modify it under |
242
|
|
|
|
|
|
|
the same terms as the Perl 5 programming language system itself. |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
=cut |