line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::Bond::Find; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
$VERSION = '0.23'; |
4
|
|
|
|
|
|
|
our $DEBUG = 0; |
5
|
|
|
|
|
|
|
# $Id: Find.pm,v 1.9 2009/05/10 20:03:44 itubert Exp $ |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
=head1 NAME |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
Chemistry::Bond::Find - Detect bonds in a molecule from atomic 3D coordinates and assign formal bond orders |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
=head1 SYNOPSIS |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
use Chemistry::Bond::Find ':all'; # export all available functions |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# $mol is a Chemistry::Mol object |
16
|
|
|
|
|
|
|
find_bonds($mol); |
17
|
|
|
|
|
|
|
assign_bond_orders($mol); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 DESCRIPTION |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
This module provides functions for detecting the bonds in a molecule from its |
22
|
|
|
|
|
|
|
3D coordinates by using simple cutoffs, and for guessing the formal bond |
23
|
|
|
|
|
|
|
orders. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
This module is part of the PerlMol project, L. |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
=head1 FUNCTIONS |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
These functions may be exported, although nothing is exported by default. |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
=over |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
34
|
|
|
|
|
|
|
|
35
|
1
|
|
|
1
|
|
24871
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
56
|
|
36
|
1
|
|
|
1
|
|
1033
|
use Chemistry::Mol; |
|
1
|
|
|
|
|
102308
|
|
|
1
|
|
|
|
|
77
|
|
37
|
1
|
|
|
1
|
|
13
|
use Exporter; |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
28
|
|
38
|
1
|
|
|
1
|
|
6
|
use Carp; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
511
|
|
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
our @ISA = qw(Exporter); |
41
|
|
|
|
|
|
|
our @EXPORT_OK = qw(find_bonds assign_bond_orders ); |
42
|
|
|
|
|
|
|
our %EXPORT_TAGS = (all => \@EXPORT_OK); |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# table taken from |
45
|
|
|
|
|
|
|
# http://environmentalchemistry.com/yogi/periodic/covalentradius.html |
46
|
|
|
|
|
|
|
our %Covalent_Radius = ( |
47
|
|
|
|
|
|
|
Ag => 1.34, Al => 1.18, Ar => 0.98, As => 1.20, At => 1.45, Au => 1.34, |
48
|
|
|
|
|
|
|
B => 0.82, Ba => 1.98, Be => 0.90, Bi => 1.46, Br => 1.14, C => 0.77, |
49
|
|
|
|
|
|
|
Ca => 1.74, Cd => 1.48, Ce => 1.65, Cl => 0.99, Co => 1.16, Cr => 1.18, |
50
|
|
|
|
|
|
|
Cs => 2.35, Cu => 1.17, Dy => 1.59, Er => 1.57, Eu => 1.85, F => 0.72, |
51
|
|
|
|
|
|
|
Fe => 1.17, Ga => 1.26, Gd => 1.61, Ge => 1.22, H => 0.32, He => 0.93, |
52
|
|
|
|
|
|
|
Hf => 1.44, Hg => 1.49, Ho => 1.58, I => 1.33, In => 1.44, Ir => 1.27, |
53
|
|
|
|
|
|
|
K => 2.03, Kr => 1.12, La => 1.69, Li => 1.23, Lu => 1.56, Mg => 1.36, |
54
|
|
|
|
|
|
|
Mn => 1.17, Mo => 1.30, N => 0.75, Na => 1.54, Nb => 1.34, Nd => 1.64, |
55
|
|
|
|
|
|
|
Ne => 0.71, Ni => 1.15, O => 0.73, Os => 1.26, P => 1.06, Pb => 1.47, |
56
|
|
|
|
|
|
|
Pd => 1.28, Pm => 1.63, Po => 1.46, Pr => 1.65, Pt => 1.30, Rb => 2.16, |
57
|
|
|
|
|
|
|
Re => 1.28, Rh => 1.25, Ru => 1.25, S => 1.02, Sb => 1.40, Sc => 1.44, |
58
|
|
|
|
|
|
|
Se => 1.16, Si => 1.11, Sm => 1.62, Sn => 1.41, Sr => 1.91, Ta => 1.34, |
59
|
|
|
|
|
|
|
Tb => 1.59, Tc => 1.27, Te => 1.36, Th => 1.65, Ti => 1.32, Tl => 1.48, |
60
|
|
|
|
|
|
|
Tm => 1.56, U => 1.42, V => 1.22, W => 1.30, Xe => 1.31, Y => 1.62, |
61
|
|
|
|
|
|
|
Yb => 1.74, Zn => 1.25, Zr => 1.45, |
62
|
|
|
|
|
|
|
); |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
our $Default_Radius = 1.5; # radius for unknown elements |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
# I considered inlining this function, but the performance gain was minimal |
67
|
|
|
|
|
|
|
# ( < 5 % ), so it's probably better to leave it here |
68
|
|
|
|
|
|
|
# $opts->{cuttof} Hash Table of cutoffs. |
69
|
|
|
|
|
|
|
# Example: key = "H C", value = [0.76, 1.42] |
70
|
|
|
|
|
|
|
sub are_bonded { |
71
|
0
|
|
|
0
|
0
|
|
my ($symbol_a, $symbol_b, $r, $opts) = @_; |
72
|
0
|
|
0
|
|
|
|
return $r < ($opts->{cuttoffs}{"$symbol_a $symbol_b"} ||= |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
73
|
|
|
|
|
|
|
(($Covalent_Radius{$symbol_a} || $Default_Radius) |
74
|
|
|
|
|
|
|
+ ($Covalent_Radius{$symbol_b} || $Default_Radius)) |
75
|
|
|
|
|
|
|
* $opts->{tolerance}); |
76
|
|
|
|
|
|
|
} |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
=item C |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
Finds and adds the bonds in a molecule. Only use it in molecules that have no |
82
|
|
|
|
|
|
|
explicit bonds; for example, after reading a file with 3D coordinates but no |
83
|
|
|
|
|
|
|
bond orders. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
Available options: |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=over |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
=item tolerance |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Defaults to 1.1. Two atoms are considered to be bound if the distance between |
92
|
|
|
|
|
|
|
them is less than the sum of their covalent radii multiplied by the tolerance. |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item margin |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
NOTE: in general setting this option is not recommended, unless you know what |
97
|
|
|
|
|
|
|
you are doing. It is used by the space partitioning algorithm to determine the |
98
|
|
|
|
|
|
|
"bucket size". It defaults to 2 * Rmax * tolerance, where Rmax is the largest |
99
|
|
|
|
|
|
|
covalent radius among the elements found in the molecule. For example, if a |
100
|
|
|
|
|
|
|
molecule has C, H, N, O, and I, Rmax = R(I) = 1.33, so the margin defaults to 2 |
101
|
|
|
|
|
|
|
* 1.33 * 1.1 = 2.926. This margin ensures that no bonds are missed by the |
102
|
|
|
|
|
|
|
partitioning algorithm. |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
Using a smaller value gives faster results, but at the risk of missing some |
105
|
|
|
|
|
|
|
bonds. In this example, if you are certain that your molecule doesn't contain |
106
|
|
|
|
|
|
|
I-I bonds (but it has C-I bonds), you can set margin to (0.77 + 1.33) * 1.1 = |
107
|
|
|
|
|
|
|
2.31 and you still won't miss any bonds (0.77 is the radius of carbon). This |
108
|
|
|
|
|
|
|
only has a significant impact for molecules with a thousand atoms or more, but |
109
|
|
|
|
|
|
|
it can reduce the execution time by 50% in some cases. |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
=item orders |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
If true, assign the bond orders after finding them, by calling |
114
|
|
|
|
|
|
|
C. |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item bond_class |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
The class that will be used for creating the new bonds. The default is |
119
|
|
|
|
|
|
|
the bond class returned by C<< $mol->bond_class >>. |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=back |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=cut |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
# The new algorithm based on a suggestion by BrowserUK |
126
|
|
|
|
|
|
|
sub find_bonds { |
127
|
0
|
|
|
0
|
1
|
|
my ($mol, %opts) = @_; |
128
|
0
|
|
|
|
|
|
%opts = (min_atoms => 20, tolerance => 1.1, %opts, |
129
|
|
|
|
|
|
|
cutoffs => {}); # set defaults |
130
|
0
|
|
|
|
|
|
my $margin = guess_margin($mol, \%opts); |
131
|
0
|
|
|
|
|
|
%opts = (margin => $margin, %opts); |
132
|
0
|
|
|
|
|
|
my $grid = {}; |
133
|
0
|
|
|
|
|
|
partition_space($mol, $grid, \%opts); |
134
|
0
|
|
|
|
|
|
find_bonds_grid($mol, $grid, \%opts); |
135
|
0
|
0
|
|
|
|
|
if ($opts{orders}) { |
136
|
0
|
|
|
|
|
|
assign_bond_orders($mol, %opts); |
137
|
|
|
|
|
|
|
} |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
1
|
|
|
1
|
|
1141
|
use POSIX 'floor'; |
|
1
|
|
|
|
|
14546
|
|
|
1
|
|
|
|
|
8
|
|
141
|
|
|
|
|
|
|
my $Y = 1000; |
142
|
|
|
|
|
|
|
my $X = $Y * $Y; |
143
|
|
|
|
|
|
|
my $Z = 1; |
144
|
|
|
|
|
|
|
my $ORIGIN = int(($Y**3 + $Y**2 + $Y)/2); |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# used by the new BrowserUK algorithm |
147
|
|
|
|
|
|
|
sub partition_space { |
148
|
0
|
|
|
0
|
0
|
|
my ($mol, $grid, $opts) = @_; |
149
|
0
|
|
|
|
|
|
my $margin = $opts->{margin}; |
150
|
0
|
|
|
|
|
|
for my $atom ($mol->atoms) { |
151
|
0
|
|
|
|
|
|
my (@vec) = $atom->coords->array; |
152
|
0
|
|
|
|
|
|
my (@norm_vec) = map { floor($_ / $margin) } @vec; |
|
0
|
|
|
|
|
|
|
153
|
0
|
|
|
|
|
|
my $n = $X * $norm_vec[0] + $Y * $norm_vec[1] |
154
|
|
|
|
|
|
|
+ $norm_vec[2] + $ORIGIN; |
155
|
0
|
|
|
|
|
|
push @{$grid->{$n}}, $atom; |
|
0
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
} |
157
|
|
|
|
|
|
|
} |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
# used by the new BrowserUK algorithm |
160
|
|
|
|
|
|
|
sub find_bonds_grid { |
161
|
0
|
|
|
0
|
0
|
|
my ($mol, $grid, $opts) = @_; |
162
|
0
|
|
|
|
|
|
while (my ($n, $atoms) = each %$grid) { |
163
|
|
|
|
|
|
|
#print "Cell $n has ". @$atoms . " atoms\n"; |
164
|
0
|
|
|
|
|
|
find_bonds_n2_one_set($mol, $atoms, $opts); |
165
|
0
|
|
|
|
|
|
for my $neigh_n ( |
166
|
|
|
|
|
|
|
$n+$Z, |
167
|
|
|
|
|
|
|
$n+$Z+$Y, $n+$Z-$Y, $n+$Z+$X, $n+$Z-$X, |
168
|
|
|
|
|
|
|
$n+$Z+$Y+$X, $n+$Z+$Y-$X, $n+$Z-$Y+$X, $n+$Z-$Y-$X, |
169
|
|
|
|
|
|
|
$n+$Y, $n+$Y+$X, $n+$Y-$X, |
170
|
|
|
|
|
|
|
$n+$X, |
171
|
|
|
|
|
|
|
) { |
172
|
0
|
0
|
|
|
|
|
if ($grid->{$neigh_n}) { |
173
|
0
|
|
|
|
|
|
find_bonds_n2_two_sets($mol, $atoms, $grid->{$neigh_n}, $opts); |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
} |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# used by both find_bonds variants to figure out the maximum cutoff |
180
|
|
|
|
|
|
|
sub guess_margin { |
181
|
0
|
|
|
0
|
0
|
|
my ($mol, $opts) = @_; |
182
|
0
|
|
|
|
|
|
my $formula = $mol->formula_hash; |
183
|
0
|
|
|
|
|
|
my $max = 0; |
184
|
0
|
|
|
|
|
|
for my $elem (keys %$formula) { |
185
|
0
|
0
|
|
|
|
|
$max = $Covalent_Radius{$elem} if $Covalent_Radius{$elem} > $max; |
186
|
|
|
|
|
|
|
} |
187
|
0
|
|
|
|
|
|
$max *= 2 * $opts->{tolerance}; |
188
|
|
|
|
|
|
|
#printf "MARGIN guessed at (%.2f)\n", $max; |
189
|
0
|
|
|
|
|
|
$max; |
190
|
|
|
|
|
|
|
} |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# brute-force N^2 algorithm |
193
|
|
|
|
|
|
|
sub find_bonds_n2_one_set { |
194
|
0
|
|
|
0
|
0
|
|
my ($mol, $atoms, $opts) = @_; |
195
|
0
|
|
0
|
|
|
|
my $bond_class = $opts->{bond_class} || $mol->bond_class; |
196
|
0
|
|
|
|
|
|
for (my $i = 0; $i < @$atoms; ++$i) { |
197
|
0
|
|
|
|
|
|
for (my $j = $i + 1; $j < @$atoms; ++$j) { |
198
|
0
|
|
|
|
|
|
my ($a1, $a2) = ($atoms->[$i], $atoms->[$j]); |
199
|
0
|
0
|
|
|
|
|
if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) { |
200
|
0
|
|
|
|
|
|
$mol->add_bond($bond_class->new(atoms=>[$a1, $a2])); |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
} |
204
|
|
|
|
|
|
|
} |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
# brute force N*M algorithm for finding bonds between to sets of atoms |
207
|
|
|
|
|
|
|
sub find_bonds_n2_two_sets { |
208
|
0
|
|
|
0
|
0
|
|
my ($mol, $atoms1, $atoms2, $opts) = @_; |
209
|
0
|
|
0
|
|
|
|
my $bond_class = $opts->{bond_class} || $mol->bond_class; |
210
|
0
|
|
|
|
|
|
for my $a1 (@$atoms1) { |
211
|
0
|
|
|
|
|
|
for my $a2 (@$atoms2) { |
212
|
0
|
0
|
|
|
|
|
if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) { |
213
|
0
|
|
|
|
|
|
$mol->add_bond($bond_class->new(atoms=>[$a1, $a2])); |
214
|
|
|
|
|
|
|
} |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
} |
217
|
|
|
|
|
|
|
} |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=item C |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Assign the formal bond orders in a molecule. The bonds must already be defined, |
223
|
|
|
|
|
|
|
either by C or because the molecule was read from a file that |
224
|
|
|
|
|
|
|
included bonds but no bond orders. If the bond orders were already defined |
225
|
|
|
|
|
|
|
(maybe the molecule came from a file that did include bond orders after all), |
226
|
|
|
|
|
|
|
the original bond orders are erased and the process begins from scratch. Two |
227
|
|
|
|
|
|
|
different algorithms are available, and may be selected by using the "method" |
228
|
|
|
|
|
|
|
option: |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
assign_bond_orders($mol, method => 'itub'); |
231
|
|
|
|
|
|
|
assign_bond_orders($mol, method => 'baber'); |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
=over |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=item itub |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
This is the default if no method is specified. Developed from scratch by the |
238
|
|
|
|
|
|
|
author of this module, this algorithm requires only the connection table |
239
|
|
|
|
|
|
|
information, and it requires that all hydrogen atoms be explicit. It looks for |
240
|
|
|
|
|
|
|
an atom with unsatisfied valence, increases a bond order, and then does the |
241
|
|
|
|
|
|
|
same recursively for the neighbors. If everybody's not happy at the end, it |
242
|
|
|
|
|
|
|
backtracks and tries another bond. The recursive part does not cover the whole |
243
|
|
|
|
|
|
|
molecule, but only the contiguous region of "unhappy" atoms next to the |
244
|
|
|
|
|
|
|
starting atom and their neighbors. This permits separating the molecule into |
245
|
|
|
|
|
|
|
independent regions, so that if one is solved and there's a problem in another, |
246
|
|
|
|
|
|
|
we don't have to backtrack to the first one. |
247
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
The itub algorithm has the following additional options: |
249
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
=over |
251
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=item use_coords |
253
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Although the algorithm does not I 3D coordinates, it uses them by |
255
|
|
|
|
|
|
|
default to improve the initial guesses of which bond orders should be |
256
|
|
|
|
|
|
|
increased. To avoid using coordinates, add the C option with a |
257
|
|
|
|
|
|
|
false value: |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
assign_bond_orders($mol, use_coords => 0); |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
The results are the same most of the time, but using good coordinates improves |
262
|
|
|
|
|
|
|
the results for complicated cases such as fused heteroaromatic systems. |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
=item scratch |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
If true, start the bond order assignment from scratch by assuming that all bond |
267
|
|
|
|
|
|
|
orders are 1. If false, start from the current bond orders and try to fix the |
268
|
|
|
|
|
|
|
unsatisfied valences. This option is true by default. |
269
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
=back |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
=item baber |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
A bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E. |
275
|
|
|
|
|
|
|
J. Chem. Inf. Comp. Sci. 1992, 32, 401-406 (with some interpretation). |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
This algorithm uses the 3D coordinates along with various cutoffs and |
278
|
|
|
|
|
|
|
confidence formulas to guess the bond orders. It then tries to resolve |
279
|
|
|
|
|
|
|
conflicts by looping through the atoms (but is not recursive or backtracking). |
280
|
|
|
|
|
|
|
It does not require explicit hydrogens (although it's better when they are |
281
|
|
|
|
|
|
|
available) because it was designed for use with real crystallographic data |
282
|
|
|
|
|
|
|
which often doesn't have hydrogen atoms. |
283
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
This method doesn't always give a good answer, especially for conjugated and |
285
|
|
|
|
|
|
|
aromatic systems. The variation used in this module adds some random numbers to |
286
|
|
|
|
|
|
|
resolve some ambiguities and break loops, so the results are not even entirely |
287
|
|
|
|
|
|
|
deterministic (the 'itub' method is deterministic but the result may depend on |
288
|
|
|
|
|
|
|
the input order of the atoms). |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=back |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
=cut |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
sub assign_bond_orders { |
295
|
0
|
|
|
0
|
1
|
|
my ($mol, %opts) = @_; |
296
|
0
|
0
|
0
|
|
|
|
if ($opts{method} and $opts{method} eq 'baber') { |
297
|
0
|
|
|
|
|
|
assign_bond_orders_baber($mol, %opts); |
298
|
|
|
|
|
|
|
} else { |
299
|
0
|
|
|
|
|
|
assign_bond_orders_itub($mol, %opts); |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
####### Bond order assignment algorithm by Ivan Tubert-Brohman |
304
|
|
|
|
|
|
|
|
305
|
|
|
|
|
|
|
# The "typical" valence that we expect an atom to have satisfied. If not |
306
|
|
|
|
|
|
|
# given, a value of 1 is assumed. |
307
|
|
|
|
|
|
|
my %MIN_VALENCES = ( O => 2, C => 4, S => 2, H => 1, N => 3, P => 3, Si => 2, |
308
|
|
|
|
|
|
|
F => 1, Cl => 1, Br => 1, I => 1 ); |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
# $ALLOWED_INCREASES{$from}{$to} means that element $from is willing to |
311
|
|
|
|
|
|
|
# exceed its minimum valence by having a multiple bond to element $to. |
312
|
|
|
|
|
|
|
my %ALLOWED_INCREASES = ( |
313
|
|
|
|
|
|
|
Cl => { O => 7 }, |
314
|
|
|
|
|
|
|
Br => { O => 7 }, |
315
|
|
|
|
|
|
|
I => { O => 7 }, |
316
|
|
|
|
|
|
|
S => { O => 6, C => 3, S => 4 }, |
317
|
|
|
|
|
|
|
N => { O => 5, C => 4, N => 4 }, |
318
|
|
|
|
|
|
|
Si => { O => 4, C => 4 }, |
319
|
|
|
|
|
|
|
O => { C => 3, O => 3 }, |
320
|
|
|
|
|
|
|
P => { O => 5, C => 4 }, |
321
|
|
|
|
|
|
|
); |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
sub assign_bond_orders_itub { |
324
|
0
|
|
|
0
|
0
|
|
my ($mol, %opts) = @_; |
325
|
0
|
|
|
|
|
|
%opts = (use_coords => 1, scratch => 1, funny => {}, %opts); |
326
|
|
|
|
|
|
|
#my @funny_atoms; |
327
|
|
|
|
|
|
|
|
328
|
0
|
0
|
|
|
|
|
if ($opts{scratch}) { |
329
|
0
|
|
|
|
|
|
$_->order(1) for $mol->bonds; |
330
|
|
|
|
|
|
|
} |
331
|
0
|
|
|
|
|
|
for my $atom ($mol->atoms) { |
332
|
0
|
0
|
|
|
|
|
if (wants_more_bonds($atom)) { |
333
|
0
|
|
|
|
|
|
my $ret = make_happy($atom, \%opts, []); |
334
|
0
|
0
|
|
|
|
|
print "Atom $atom made happy? '$ret'\n" if $DEBUG; |
335
|
0
|
0
|
|
|
|
|
unless ($ret) { # atom is funny (has formal charge or unpaired e-) |
336
|
0
|
|
|
|
|
|
$opts{funny}{$atom} = 1; |
337
|
|
|
|
|
|
|
#push @funny_atoms, $atom; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
} |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
sub wants_more_bonds { |
344
|
0
|
|
|
0
|
0
|
|
my ($atom) = @_; |
345
|
0
|
|
|
|
|
|
my $valence = $atom->valence; |
346
|
|
|
|
|
|
|
#$n_bonds += $_->order for $atom->bonds; |
347
|
|
|
|
|
|
|
|
348
|
0
|
|
0
|
|
|
|
my $ret = ($valence < ($MIN_VALENCES{$atom->symbol} || 1)); |
349
|
0
|
0
|
|
|
|
|
print " $atom wants more bonds? '$ret'\n" if $DEBUG; |
350
|
0
|
|
|
|
|
|
$ret; |
351
|
|
|
|
|
|
|
} |
352
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
sub valid_bonds { |
354
|
0
|
|
|
0
|
0
|
|
my ($atom) = @_; |
355
|
0
|
|
|
|
|
|
my $valence = $atom->valence; |
356
|
|
|
|
|
|
|
#$n_bonds += $_->order for $atom->bonds; |
357
|
|
|
|
|
|
|
|
358
|
0
|
|
0
|
|
|
|
my $ret = ($valence <= ($MIN_VALENCES{$atom->symbol} || 1)); |
359
|
0
|
0
|
|
|
|
|
print " $atom wants more bonds? '$ret'\n" if $DEBUG; |
360
|
0
|
|
|
|
|
|
$ret; |
361
|
|
|
|
|
|
|
} |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# the heart of the algorithm; increase a bond order, and then do the same |
364
|
|
|
|
|
|
|
# recursively for the neighbors. If everybody's not happy at the end, |
365
|
|
|
|
|
|
|
# backtrack and try another bond. Note that this search does not cover the |
366
|
|
|
|
|
|
|
# whole molecule, but only the "unhappy" atoms and their neighbors. This |
367
|
|
|
|
|
|
|
# permits separating the molecule into independent regions, so that if one |
368
|
|
|
|
|
|
|
# is solved and there's a problem in another, we don't have to backtrack |
369
|
|
|
|
|
|
|
# to the first one. |
370
|
|
|
|
|
|
|
sub make_happy { |
371
|
0
|
|
|
0
|
0
|
|
my ($atom, $opts, $q) = @_; |
372
|
|
|
|
|
|
|
|
373
|
0
|
0
|
0
|
|
|
|
if (!$opts->{funny}{$atom} and wants_more_bonds($atom)) { |
374
|
0
|
|
|
|
|
|
push @$q, ($atom->neighbors); # the queue of atoms to be checked |
375
|
0
|
|
|
|
|
|
for my $bn (sorted_neighbors($atom, $opts)) { |
376
|
0
|
|
|
|
|
|
my ($nei, $bond) = @$bn{'to', 'bond'}; |
377
|
|
|
|
|
|
|
# note: it would be better to find the bond that is most likely to |
378
|
|
|
|
|
|
|
# be increased by taking bond length into account |
379
|
0
|
0
|
|
|
|
|
if (accepts_more_bonds($nei, $atom)) { |
380
|
0
|
|
|
|
|
|
my $order = $bond->order; |
381
|
0
|
0
|
|
|
|
|
print "increasing $bond($atom-$nei)\n" if $DEBUG; |
382
|
0
|
|
|
|
|
|
$bond->order($order+1); # increase order, be happy |
383
|
|
|
|
|
|
|
# now make sure everybody's happy |
384
|
0
|
|
|
|
|
|
push @$q, $nei->neighbors($atom); # our friend's neighbors |
385
|
|
|
|
|
|
|
# need to be happy too |
386
|
0
|
0
|
|
|
|
|
if (make_happy($atom, $opts, $q)) { # are we happy now? |
387
|
0
|
|
|
|
|
|
return 1; # everybody's happy? |
388
|
|
|
|
|
|
|
} else { |
389
|
|
|
|
|
|
|
# something's wrong, will have to backtrack |
390
|
0
|
0
|
|
|
|
|
print "decreasing $bond($atom-$nei)\n" if $DEBUG; |
391
|
0
|
|
|
|
|
|
$bond->order($order); |
392
|
0
|
|
|
|
|
|
push @$q, $nei; # remember; neighbor's not happy either |
393
|
|
|
|
|
|
|
} |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
} |
396
|
|
|
|
|
|
|
# couldn't find happiness |
397
|
|
|
|
|
|
|
} else { |
398
|
0
|
|
|
|
|
|
my $next = shift @$q; |
399
|
0
|
0
|
|
|
|
|
unless ($next) { # no one left; everybody is happy! |
400
|
0
|
0
|
|
|
|
|
print "no more atoms left to check at $atom\n" if $DEBUG; |
401
|
0
|
|
|
|
|
|
return 1; |
402
|
|
|
|
|
|
|
} |
403
|
0
|
|
|
|
|
|
return (make_happy($next, $opts, $q)); # happy if next atom is happy |
404
|
|
|
|
|
|
|
} |
405
|
0
|
0
|
|
|
|
|
print "not happy at $atom\n" if $DEBUG; |
406
|
0
|
|
|
|
|
|
0; # not happy |
407
|
|
|
|
|
|
|
} |
408
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
sub sorted_neighbors { |
410
|
0
|
|
|
0
|
0
|
|
my ($atom, $opts) = @_; |
411
|
0
|
|
|
|
|
|
my $use_coords = $opts->{use_coords}; |
412
|
|
|
|
|
|
|
|
413
|
0
|
0
|
|
|
|
|
print "sorting neighbors\n" if $DEBUG; |
414
|
0
|
|
|
|
|
|
return map { |
415
|
0
|
0
|
|
|
|
|
$_->{bn} |
|
|
0
|
|
|
|
|
|
416
|
|
|
|
|
|
|
} sort { |
417
|
0
|
|
0
|
|
|
|
($b->{wants} cmp $a->{wants}) # those who want go first |
418
|
|
|
|
|
|
|
|| ($use_coords ? ($a->{len} <=> $b->{len}) : 0); |
419
|
|
|
|
|
|
|
# if they both want to the same degree, the shortest |
420
|
|
|
|
|
|
|
# bond goes first if we are using coords |
421
|
|
|
|
|
|
|
} map { |
422
|
0
|
|
|
|
|
|
+{ |
423
|
|
|
|
|
|
|
wants => wants_more_bonds($_->{to}), |
424
|
|
|
|
|
|
|
bn => $_, |
425
|
|
|
|
|
|
|
len => !$use_coords || $_->{bond}->length, |
426
|
|
|
|
|
|
|
} |
427
|
|
|
|
|
|
|
} $atom->bonds_neighbors; |
428
|
|
|
|
|
|
|
} |
429
|
|
|
|
|
|
|
|
430
|
|
|
|
|
|
|
sub accepts_more_bonds { |
431
|
0
|
|
|
0
|
0
|
|
my ($atom, $to) = @_; |
432
|
0
|
|
|
|
|
|
my ($symbol, $to_symbol) = ($atom->symbol, $to->symbol); |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
#my $n_bonds = 0; |
435
|
|
|
|
|
|
|
#$n_bonds += $_->order for $atom->bonds; |
436
|
0
|
|
|
|
|
|
my $valence = $atom->valence; |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# not enough bonds even for the minimum valence? |
439
|
0
|
0
|
0
|
|
|
|
return 1 if ($valence < ($MIN_VALENCES{$atom->symbol} || 1)); |
440
|
|
|
|
|
|
|
|
441
|
0
|
0
|
0
|
|
|
|
if ($valence < ($ALLOWED_INCREASES{$symbol}{$to_symbol} || 0)) { |
442
|
|
|
|
|
|
|
# make sure we are willing to make multiple bonds with this guy |
443
|
0
|
|
|
|
|
|
return 1; |
444
|
|
|
|
|
|
|
} else { |
445
|
0
|
|
|
|
|
|
return 0; # max. valence satisfied |
446
|
|
|
|
|
|
|
} |
447
|
|
|
|
|
|
|
} |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
############ |
451
|
|
|
|
|
|
|
# Bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E. |
452
|
|
|
|
|
|
|
# J. Chem. Inf. Comp. Sci. 1992, 32, 401-406 |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
# this algorithm uses the 3D coordinates along with various cutoffs and |
455
|
|
|
|
|
|
|
# confidence formulas to assign the bond orders. It does not require all |
456
|
|
|
|
|
|
|
# explicit hydrogens (although it's better when they are available) because |
457
|
|
|
|
|
|
|
# it's design for use with real crystallographic data which often doesn't |
458
|
|
|
|
|
|
|
# have hydrogen. |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
my %Valences = ( |
461
|
|
|
|
|
|
|
C => [4], N => [3,4], O => [2], |
462
|
|
|
|
|
|
|
P => [4, 5], S => [2, 4, 6], As => [4, 5], Se => [2, 4, 6], |
463
|
|
|
|
|
|
|
Te => [2, 4, 6], |
464
|
|
|
|
|
|
|
F => [1], Cl => [1, 3], Br => [1, 3, 5], I => [1, 3, 5, 7], |
465
|
|
|
|
|
|
|
H => [1], |
466
|
|
|
|
|
|
|
); |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
# note; this table should only have single-letter symbols |
469
|
|
|
|
|
|
|
my %Bond_Orders = ( |
470
|
|
|
|
|
|
|
"C C" => { is => 1.49, id => 1.31, it => 1.18, wsd => 1.38, wdt => 1.21 }, |
471
|
|
|
|
|
|
|
"C N" => { is => 1.42, id => 1.32, it => 1.14, wsd => 1.34, wdt => 1.20 }, |
472
|
|
|
|
|
|
|
"C O" => { is => 1.41, id => 1.22, wsd => 1.28 }, |
473
|
|
|
|
|
|
|
"C S" => { is => 1.78, id => 1.68, wsd => 1.70 }, |
474
|
|
|
|
|
|
|
"N N" => { is => 1.40, id => 1.22, wsd => 1.32 }, |
475
|
|
|
|
|
|
|
"N O" => { is => 1.39, id => 1.22, wsd => 1.25 }, |
476
|
|
|
|
|
|
|
"O S" => { is => 1.58, id => 1.45, wsd => 1.54 }, |
477
|
|
|
|
|
|
|
#"O S" => { is => 1.58, id => 1.45, wsd => 1.51 }, # modified value |
478
|
|
|
|
|
|
|
"O P" => { is => 1.60, id => 1.48, wsd => 1.52 }, |
479
|
|
|
|
|
|
|
); |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
# add keys for reverse bond for ease of lookup |
482
|
|
|
|
|
|
|
for (keys %Bond_Orders) { |
483
|
|
|
|
|
|
|
$Bond_Orders{ scalar reverse $_ } = $Bond_Orders{$_}; |
484
|
|
|
|
|
|
|
} |
485
|
|
|
|
|
|
|
|
486
|
|
|
|
|
|
|
sub assign_bond_orders_baber { |
487
|
0
|
|
|
0
|
0
|
|
my ($mol, %opts) = @_; |
488
|
0
|
|
0
|
|
|
|
my $max_tries = $opts{max_tries} || 10; |
489
|
|
|
|
|
|
|
|
490
|
0
|
|
|
|
|
|
assign_initial_bonds($mol); |
491
|
0
|
|
|
|
|
|
assign_initial_coordinations($mol); |
492
|
0
|
|
|
|
|
|
my $tries = 0; |
493
|
0
|
|
|
|
|
|
while (resolve_conflicts($mol)) { |
494
|
0
|
0
|
|
|
|
|
if ($tries++ > $max_tries) { |
495
|
0
|
0
|
|
|
|
|
print "too many tries\n" if $DEBUG; |
496
|
0
|
|
|
|
|
|
last; |
497
|
|
|
|
|
|
|
} |
498
|
0
|
0
|
|
|
|
|
print "try again\n" if $DEBUG; |
499
|
|
|
|
|
|
|
} |
500
|
0
|
|
|
|
|
|
$tries; |
501
|
|
|
|
|
|
|
} |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
sub assign_initial_bonds { |
504
|
0
|
|
|
0
|
0
|
|
my ($mol) = @_; |
505
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
|
for my $bond ($mol->bonds) { |
507
|
|
|
|
|
|
|
#my %bond_has = map { ($_->symbol, 1 ) } $bond->atoms; |
508
|
0
|
|
|
|
|
|
my $symbols = join " ", map { $_->symbol } $bond->atoms; |
|
0
|
|
|
|
|
|
|
509
|
0
|
|
|
|
|
|
my ($order, $confidence); |
510
|
0
|
|
|
|
|
|
my $l_obs = $bond->length; |
511
|
0
|
0
|
|
|
|
|
if ($symbols =~ /\b(H|F|Cl|Br|I)\b/) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
512
|
0
|
|
|
|
|
|
$order = 1; |
513
|
0
|
|
|
|
|
|
$confidence = 10000; |
514
|
|
|
|
|
|
|
} elsif ($symbols =~ /\bSi\b/) { |
515
|
0
|
|
|
|
|
|
$order = 1; |
516
|
0
|
|
|
|
|
|
$confidence = 20 * ($l_obs - 1.4); |
517
|
|
|
|
|
|
|
} elsif ($symbols =~ /\bB\b/) { |
518
|
0
|
|
|
|
|
|
$order = 1; |
519
|
0
|
|
|
|
|
|
$confidence = 20 * ($l_obs - 1.2); |
520
|
|
|
|
|
|
|
} elsif (my $pars = $Bond_Orders{$symbols}) { |
521
|
0
|
0
|
|
|
|
|
if ($pars->{it}) { # may have triple bonds |
522
|
0
|
0
|
0
|
|
|
|
if ($l_obs > $pars->{wsd}) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
523
|
0
|
|
|
|
|
|
$order = 1; |
524
|
0
|
|
|
|
|
|
$confidence = ($l_obs - $pars->{wsd}) / |
525
|
|
|
|
|
|
|
(2 * ($pars->{is} - $pars->{wsd})); |
526
|
|
|
|
|
|
|
} elsif ($pars->{id} < $l_obs and $l_obs < $pars->{wsd}) { |
527
|
0
|
|
|
|
|
|
$order = 2; |
528
|
0
|
|
|
|
|
|
$confidence = ($pars->{wsd} - $l_obs) / |
529
|
|
|
|
|
|
|
(2 * ($pars->{wsd} - $pars->{id})); |
530
|
|
|
|
|
|
|
} elsif ($pars->{id} > $l_obs and $l_obs > $pars->{wdt}) { |
531
|
0
|
|
|
|
|
|
$order = 2; |
532
|
0
|
|
|
|
|
|
$confidence = ($l_obs - $pars->{wdt}) / |
533
|
|
|
|
|
|
|
(2 * ($pars->{id} - $pars->{wdt})); |
534
|
|
|
|
|
|
|
} else { |
535
|
0
|
|
|
|
|
|
$order = 3; |
536
|
0
|
|
|
|
|
|
$confidence = ($pars->{wdt} - $l_obs) / |
537
|
|
|
|
|
|
|
(2 * ($pars->{wdt} - $pars->{it})); |
538
|
|
|
|
|
|
|
} |
539
|
|
|
|
|
|
|
} else { # only single and double |
540
|
0
|
0
|
|
|
|
|
if ($l_obs > $pars->{wsd}) { |
541
|
0
|
|
|
|
|
|
$order = 1; |
542
|
0
|
|
|
|
|
|
$confidence = ($l_obs - $pars->{wsd}) / |
543
|
|
|
|
|
|
|
(2 * ($pars->{is} - $pars->{wsd})); |
544
|
|
|
|
|
|
|
} else { |
545
|
0
|
|
|
|
|
|
$order = 2; |
546
|
0
|
|
|
|
|
|
$confidence = ($pars->{wsd} - $l_obs) / |
547
|
|
|
|
|
|
|
(2 * ($pars->{wsd} - $pars->{id})); |
548
|
|
|
|
|
|
|
} |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
} else { # unknown atom pair; let's assume it's always single |
551
|
0
|
|
|
|
|
|
$order = 1; |
552
|
0
|
|
|
|
|
|
$confidence = 100_000; |
553
|
|
|
|
|
|
|
} |
554
|
|
|
|
|
|
|
|
555
|
0
|
|
|
|
|
|
$confidence *= 10; # normalization constant |
556
|
0
|
|
|
|
|
|
$bond->order($order); |
557
|
0
|
|
|
|
|
|
$bond->attr("bond-find/confidence", $confidence); |
558
|
0
|
0
|
|
|
|
|
print "bond $bond($symbols) len=$l_obs, order=$order, ", |
559
|
|
|
|
|
|
|
"conf=$confidence\n" |
560
|
|
|
|
|
|
|
if $DEBUG; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
sub assign_initial_coordinations { |
565
|
0
|
|
|
0
|
0
|
|
my ($mol) = @_; |
566
|
|
|
|
|
|
|
|
567
|
0
|
|
|
|
|
|
for my $atom ($mol->atoms) { |
568
|
0
|
|
|
|
|
|
my @neighbors = $atom->neighbors; |
569
|
0
|
|
|
|
|
|
my $a_obs = 0; |
570
|
0
|
|
|
|
|
|
my $n = 0; |
571
|
0
|
|
|
|
|
|
my ($max_conns, $confidence); |
572
|
0
|
|
|
|
|
|
for (my $i = 0; $i < @neighbors - 1; $i++) { |
573
|
0
|
|
|
|
|
|
for (my $j = $i + 1; $j < @neighbors; $j++) { |
574
|
0
|
|
|
|
|
|
my $angle = Chemistry::Atom::angle_deg($neighbors[$i], |
575
|
|
|
|
|
|
|
$atom, $neighbors[$j]); |
576
|
|
|
|
|
|
|
# don't count linear angles for octahedral geometries |
577
|
0
|
0
|
0
|
|
|
|
$n++, $a_obs += $angle unless @neighbors > 2 and $angle > 150; |
578
|
|
|
|
|
|
|
} |
579
|
|
|
|
|
|
|
} |
580
|
0
|
0
|
|
|
|
|
$a_obs /= $n if $n; |
581
|
|
|
|
|
|
|
|
582
|
0
|
0
|
0
|
|
|
|
if ($n == 0) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
583
|
|
|
|
|
|
|
# this condition is not described explicitly in the article |
584
|
0
|
|
|
|
|
|
$max_conns = 1; # undefined |
585
|
0
|
|
|
|
|
|
$confidence = 0; |
586
|
0
|
|
|
|
|
|
$n = 1; |
587
|
|
|
|
|
|
|
} elsif ($a_obs > 150) { |
588
|
0
|
|
|
|
|
|
$max_conns = 2; # linear |
589
|
0
|
|
|
|
|
|
$confidence = ($a_obs - 150) / 30; |
590
|
|
|
|
|
|
|
} elsif (120 > $a_obs and $a_obs >= 115) { |
591
|
0
|
|
|
|
|
|
$max_conns = 3; # trigonal |
592
|
0
|
|
|
|
|
|
$confidence = ($a_obs - 115) / 5; |
593
|
|
|
|
|
|
|
} elsif (150 > $a_obs and $a_obs >= 120) { |
594
|
0
|
|
|
|
|
|
$max_conns = 3; # trigonal |
595
|
0
|
|
|
|
|
|
$confidence = (150 - $a_obs) / 30; |
596
|
|
|
|
|
|
|
} elsif (109.5 > $a_obs and $a_obs >= 99) { |
597
|
0
|
|
|
|
|
|
$max_conns = 4; # tetrahedral |
598
|
0
|
|
|
|
|
|
$confidence = ($a_obs - 99) / 10.5; |
599
|
|
|
|
|
|
|
} elsif (115 > $a_obs and $a_obs >= 109.5) { |
600
|
0
|
|
|
|
|
|
$max_conns = 4; # tetrahedral |
601
|
0
|
|
|
|
|
|
$confidence = (115 - $a_obs) / 5.5; |
602
|
|
|
|
|
|
|
} elsif ($a_obs < 99) { |
603
|
0
|
|
|
|
|
|
$max_conns = 6; # octahedral |
604
|
0
|
|
|
|
|
|
$confidence = (99 - $a_obs) / 9; |
605
|
|
|
|
|
|
|
} else { |
606
|
0
|
|
|
|
|
|
confess("impossible coordination angle $a_obs!"); |
607
|
|
|
|
|
|
|
} |
608
|
|
|
|
|
|
|
|
609
|
0
|
|
|
|
|
|
$confidence *= 100 * $n; |
610
|
0
|
0
|
|
|
|
|
if ($Valences{$atom->symbol}) { |
611
|
0
|
|
|
|
|
|
$atom->attr("bond-find/valence", $Valences{$atom->symbol}[0]); |
612
|
|
|
|
|
|
|
} else { |
613
|
0
|
|
|
|
|
|
$atom->attr("bond-find/valence", $max_conns); |
614
|
|
|
|
|
|
|
} |
615
|
0
|
|
|
|
|
|
$atom->attr("bond-find/confidence", $confidence); |
616
|
0
|
|
|
|
|
|
$atom->attr("bond-find/max_conns", $max_conns); |
617
|
0
|
0
|
|
|
|
|
print "Atom $atom has CN $max_conns with a conf. of $confidence\n" |
618
|
|
|
|
|
|
|
if $DEBUG; |
619
|
|
|
|
|
|
|
} |
620
|
|
|
|
|
|
|
} |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
sub resolve_conflicts { |
623
|
0
|
|
|
0
|
0
|
|
my ($mol) = @_; |
624
|
|
|
|
|
|
|
|
625
|
0
|
|
|
|
|
|
my $changes = 0; |
626
|
0
|
|
|
|
|
|
for my $atom ($mol->atoms) { |
627
|
0
|
|
|
|
|
|
my $valence = $atom->attr('bond-find/valence'); |
628
|
0
|
|
|
|
|
|
my $max_conns = $atom->attr('bond-find/max_conns'); |
629
|
0
|
|
|
|
|
|
my $confidence = $atom->attr('bond-find/confidence'); |
630
|
0
|
|
|
|
|
|
my $n_conns = $atom->bonds; |
631
|
|
|
|
|
|
|
|
632
|
0
|
|
|
|
|
|
my $n_bonds = 0; |
633
|
0
|
|
|
|
|
|
$n_bonds += $_->order for $atom->bonds; |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
# this is a modification to avoid cummulative double bonds on |
636
|
|
|
|
|
|
|
# non linear carbon |
637
|
0
|
|
|
|
|
|
my $n_multiple_bonds = grep { $_->order > 1 } $atom->bonds; |
|
0
|
|
|
|
|
|
|
638
|
|
|
|
|
|
|
|
639
|
0
|
0
|
0
|
|
|
|
if ($n_conns > $valence) { |
|
|
0
|
0
|
|
|
|
|
|
|
0
|
0
|
|
|
|
|
640
|
0
|
|
|
|
|
|
my $next_valence = next_valence($atom->symbol, $valence); |
641
|
0
|
0
|
|
|
|
|
if ($next_valence) { |
642
|
0
|
0
|
|
|
|
|
print "increasing valence of $atom to $next_valence\n" |
643
|
|
|
|
|
|
|
if $DEBUG; |
644
|
0
|
|
|
|
|
|
$atom->attr('bond-find/valence', $next_valence); |
645
|
0
|
|
|
|
|
|
++$changes, redo; |
646
|
|
|
|
|
|
|
} else { |
647
|
0
|
|
|
|
|
|
warn "too many conns $n_conns to $atom with valence $valence\n"; |
648
|
0
|
|
|
|
|
|
next; |
649
|
|
|
|
|
|
|
} |
650
|
|
|
|
|
|
|
} elsif ($n_bonds > $valence |
651
|
|
|
|
|
|
|
or $n_multiple_bonds > 1 and $max_conns > 2 and $atom->symbol eq 'C' |
652
|
|
|
|
|
|
|
) { |
653
|
0
|
|
|
|
|
|
my $next_valence = next_valence($atom->symbol, $valence); |
654
|
0
|
0
|
|
|
|
|
if ($next_valence) { |
655
|
0
|
0
|
|
|
|
|
print "increasing valence of $atom to $next_valence\n" |
656
|
|
|
|
|
|
|
if $DEBUG; |
657
|
0
|
|
|
|
|
|
$atom->attr('bond-find/valence', $next_valence); |
658
|
0
|
|
|
|
|
|
++$changes, next; |
659
|
|
|
|
|
|
|
} |
660
|
0
|
|
|
|
|
|
my ($min_conf, $min_bond); |
661
|
0
|
|
|
|
|
|
for my $bond ($atom->bonds) { |
662
|
0
|
0
|
0
|
|
|
|
if ($bond->order > 1 and |
|
|
|
0
|
|
|
|
|
663
|
|
|
|
|
|
|
(not defined $min_conf |
664
|
|
|
|
|
|
|
or $bond->attr("bond-find/confidence") < $min_conf)) { |
665
|
0
|
|
|
|
|
|
$min_conf = $bond->attr("bond-find/confidence"); |
666
|
0
|
|
|
|
|
|
$min_bond = $bond; |
667
|
|
|
|
|
|
|
} |
668
|
|
|
|
|
|
|
} |
669
|
0
|
|
|
|
|
|
my $new_order = $min_bond->order - 1; |
670
|
0
|
|
|
|
|
|
$min_bond->order($new_order); |
671
|
0
|
0
|
|
|
|
|
print "Decreasing order of $min_bond to $new_order\n" if $DEBUG; |
672
|
|
|
|
|
|
|
#$min_bond->attr('bond-find/confidence', $min_conf*1.2+rand()); |
673
|
0
|
|
|
|
|
|
++$changes, next; |
674
|
|
|
|
|
|
|
} elsif ($n_bonds + $max_conns - $n_conns < $valence) { |
675
|
0
|
|
|
|
|
|
my ($min_conf, $min_bond); |
676
|
0
|
|
|
|
|
|
for my $bond ($atom->bonds) { |
677
|
0
|
0
|
0
|
|
|
|
if (($bond->order == 1 |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
678
|
|
|
|
|
|
|
or $atom->symbol =~ /^[CN]$/ and $bond->order == 2) and |
679
|
|
|
|
|
|
|
(not defined $min_conf |
680
|
|
|
|
|
|
|
or $bond->attr("bond-find/confidence") < $min_conf)) { |
681
|
0
|
|
|
|
|
|
$min_conf = $bond->attr("bond-find/confidence"); |
682
|
0
|
|
|
|
|
|
$min_bond = $bond; |
683
|
|
|
|
|
|
|
} |
684
|
|
|
|
|
|
|
} |
685
|
|
|
|
|
|
|
|
686
|
|
|
|
|
|
|
#print "\t($confidence, $min_conf)\n" if ($atom->id eq 'a1'); |
687
|
0
|
0
|
0
|
|
|
|
if ($min_bond and ($confidence > 95 or $min_conf < $confidence)) { |
|
|
|
0
|
|
|
|
|
688
|
|
|
|
|
|
|
#print "XXX: $atom\n"; |
689
|
0
|
|
|
|
|
|
my $new_order = $min_bond->order + 1; |
690
|
0
|
|
|
|
|
|
$min_bond->order($new_order); |
691
|
0
|
0
|
|
|
|
|
print "Increasing order of $min_bond to $new_order\n" if $DEBUG; |
692
|
|
|
|
|
|
|
#$min_bond->attr('bond-find/confidence', $min_conf*1.2+rand()); |
693
|
|
|
|
|
|
|
#$atom->attr('bond-find/confidence', $confidence / 1.5); |
694
|
|
|
|
|
|
|
} else { |
695
|
0
|
|
|
|
|
|
$max_conns++; |
696
|
0
|
|
|
|
|
|
$atom->attr("bond-find/max_conns", $max_conns); |
697
|
0
|
0
|
|
|
|
|
print "Increasing coord. num. of $atom to $max_conns\n" |
698
|
|
|
|
|
|
|
if $DEBUG; |
699
|
|
|
|
|
|
|
} |
700
|
0
|
|
|
|
|
|
++$changes, next; |
701
|
|
|
|
|
|
|
} |
702
|
|
|
|
|
|
|
} |
703
|
0
|
|
|
|
|
|
$changes; |
704
|
|
|
|
|
|
|
} |
705
|
|
|
|
|
|
|
|
706
|
|
|
|
|
|
|
sub next_valence { |
707
|
0
|
|
|
0
|
0
|
|
my ($symbol, $current) = @_; |
708
|
|
|
|
|
|
|
|
709
|
0
|
0
|
|
|
|
|
if ($Valences{$symbol}) { |
710
|
0
|
|
|
|
|
|
for my $v (@{$Valences{$symbol}}) { |
|
0
|
|
|
|
|
|
|
711
|
0
|
0
|
|
|
|
|
return $v if $v > $current; |
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
} |
714
|
0
|
|
|
|
|
|
return undef; |
715
|
|
|
|
|
|
|
} |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
1; |
718
|
|
|
|
|
|
|
|
719
|
|
|
|
|
|
|
=back |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=head1 VERSION |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
0.23 |
724
|
|
|
|
|
|
|
|
725
|
|
|
|
|
|
|
=head1 TO DO |
726
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
Some future version should let the user specify the desired cutoffs, and |
728
|
|
|
|
|
|
|
not always create a bond but call a user-supplied function instead. This way |
729
|
|
|
|
|
|
|
these functions could be used for other purposes such as finding hydrogen bonds |
730
|
|
|
|
|
|
|
or neighbor lists. |
731
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
Add some tests. |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
=head1 SEE ALSO |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
L, L, L, |
737
|
|
|
|
|
|
|
L. |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=head1 AUTHOR |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
Ivan Tubert-BrohmanEitub@cpan.orgE |
742
|
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
The new C algorithm was loosely based on a suggestion by BrowserUK |
744
|
|
|
|
|
|
|
on perlmonks.org (L). |
745
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
=head1 COPYRIGHT |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is |
749
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
750
|
|
|
|
|
|
|
Perl itself. |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
=cut |
753
|
|
|
|
|
|
|
|