File Coverage

blib/lib/Chemistry/Bond/Find.pm
Criterion Covered Total %
statement 15 254 5.9
branch 0 128 0.0
condition 0 85 0.0
subroutine 5 24 20.8
pod 2 19 10.5
total 22 510 4.3


line stmt bran cond sub pod time code
1             package Chemistry::Bond::Find;
2              
3             $VERSION = '0.23';
4             our $DEBUG = 0;
5             # $Id: Find.pm,v 1.9 2009/05/10 20:03:44 itubert Exp $
6              
7             =head1 NAME
8              
9             Chemistry::Bond::Find - Detect bonds in a molecule from atomic 3D coordinates and assign formal bond orders
10              
11             =head1 SYNOPSIS
12              
13             use Chemistry::Bond::Find ':all'; # export all available functions
14              
15             # $mol is a Chemistry::Mol object
16             find_bonds($mol);
17             assign_bond_orders($mol);
18              
19             =head1 DESCRIPTION
20              
21             This module provides functions for detecting the bonds in a molecule from its
22             3D coordinates by using simple cutoffs, and for guessing the formal bond
23             orders.
24              
25             This module is part of the PerlMol project, L.
26              
27             =head1 FUNCTIONS
28              
29             These functions may be exported, although nothing is exported by default.
30              
31             =over
32              
33             =cut
34              
35 1     1   24871 use strict;
  1         3  
  1         56  
36 1     1   1033 use Chemistry::Mol;
  1         102308  
  1         77  
37 1     1   13 use Exporter;
  1         8  
  1         28  
38 1     1   6 use Carp;
  1         2  
  1         511  
39              
40             our @ISA = qw(Exporter);
41             our @EXPORT_OK = qw(find_bonds assign_bond_orders );
42             our %EXPORT_TAGS = (all => \@EXPORT_OK);
43              
44             # table taken from
45             # http://environmentalchemistry.com/yogi/periodic/covalentradius.html
46             our %Covalent_Radius = (
47             Ag => 1.34, Al => 1.18, Ar => 0.98, As => 1.20, At => 1.45, Au => 1.34,
48             B => 0.82, Ba => 1.98, Be => 0.90, Bi => 1.46, Br => 1.14, C => 0.77,
49             Ca => 1.74, Cd => 1.48, Ce => 1.65, Cl => 0.99, Co => 1.16, Cr => 1.18,
50             Cs => 2.35, Cu => 1.17, Dy => 1.59, Er => 1.57, Eu => 1.85, F => 0.72,
51             Fe => 1.17, Ga => 1.26, Gd => 1.61, Ge => 1.22, H => 0.32, He => 0.93,
52             Hf => 1.44, Hg => 1.49, Ho => 1.58, I => 1.33, In => 1.44, Ir => 1.27,
53             K => 2.03, Kr => 1.12, La => 1.69, Li => 1.23, Lu => 1.56, Mg => 1.36,
54             Mn => 1.17, Mo => 1.30, N => 0.75, Na => 1.54, Nb => 1.34, Nd => 1.64,
55             Ne => 0.71, Ni => 1.15, O => 0.73, Os => 1.26, P => 1.06, Pb => 1.47,
56             Pd => 1.28, Pm => 1.63, Po => 1.46, Pr => 1.65, Pt => 1.30, Rb => 2.16,
57             Re => 1.28, Rh => 1.25, Ru => 1.25, S => 1.02, Sb => 1.40, Sc => 1.44,
58             Se => 1.16, Si => 1.11, Sm => 1.62, Sn => 1.41, Sr => 1.91, Ta => 1.34,
59             Tb => 1.59, Tc => 1.27, Te => 1.36, Th => 1.65, Ti => 1.32, Tl => 1.48,
60             Tm => 1.56, U => 1.42, V => 1.22, W => 1.30, Xe => 1.31, Y => 1.62,
61             Yb => 1.74, Zn => 1.25, Zr => 1.45,
62             );
63              
64             our $Default_Radius = 1.5; # radius for unknown elements
65              
66             # I considered inlining this function, but the performance gain was minimal
67             # ( < 5 % ), so it's probably better to leave it here
68             # $opts->{cuttof} Hash Table of cutoffs.
69             # Example: key = "H C", value = [0.76, 1.42]
70             sub are_bonded {
71 0     0 0   my ($symbol_a, $symbol_b, $r, $opts) = @_;
72 0   0       return $r < ($opts->{cuttoffs}{"$symbol_a $symbol_b"} ||=
      0        
      0        
73             (($Covalent_Radius{$symbol_a} || $Default_Radius)
74             + ($Covalent_Radius{$symbol_b} || $Default_Radius))
75             * $opts->{tolerance});
76             }
77              
78              
79             =item C
80              
81             Finds and adds the bonds in a molecule. Only use it in molecules that have no
82             explicit bonds; for example, after reading a file with 3D coordinates but no
83             bond orders.
84              
85             Available options:
86              
87             =over
88              
89             =item tolerance
90              
91             Defaults to 1.1. Two atoms are considered to be bound if the distance between
92             them is less than the sum of their covalent radii multiplied by the tolerance.
93              
94             =item margin
95              
96             NOTE: in general setting this option is not recommended, unless you know what
97             you are doing. It is used by the space partitioning algorithm to determine the
98             "bucket size". It defaults to 2 * Rmax * tolerance, where Rmax is the largest
99             covalent radius among the elements found in the molecule. For example, if a
100             molecule has C, H, N, O, and I, Rmax = R(I) = 1.33, so the margin defaults to 2
101             * 1.33 * 1.1 = 2.926. This margin ensures that no bonds are missed by the
102             partitioning algorithm.
103              
104             Using a smaller value gives faster results, but at the risk of missing some
105             bonds. In this example, if you are certain that your molecule doesn't contain
106             I-I bonds (but it has C-I bonds), you can set margin to (0.77 + 1.33) * 1.1 =
107             2.31 and you still won't miss any bonds (0.77 is the radius of carbon). This
108             only has a significant impact for molecules with a thousand atoms or more, but
109             it can reduce the execution time by 50% in some cases.
110              
111             =item orders
112              
113             If true, assign the bond orders after finding them, by calling
114             C.
115              
116             =item bond_class
117              
118             The class that will be used for creating the new bonds. The default is
119             the bond class returned by C<< $mol->bond_class >>.
120              
121             =back
122              
123             =cut
124              
125             # The new algorithm based on a suggestion by BrowserUK
126             sub find_bonds {
127 0     0 1   my ($mol, %opts) = @_;
128 0           %opts = (min_atoms => 20, tolerance => 1.1, %opts,
129             cutoffs => {}); # set defaults
130 0           my $margin = guess_margin($mol, \%opts);
131 0           %opts = (margin => $margin, %opts);
132 0           my $grid = {};
133 0           partition_space($mol, $grid, \%opts);
134 0           find_bonds_grid($mol, $grid, \%opts);
135 0 0         if ($opts{orders}) {
136 0           assign_bond_orders($mol, %opts);
137             }
138             }
139              
140 1     1   1141 use POSIX 'floor';
  1         14546  
  1         8  
141             my $Y = 1000;
142             my $X = $Y * $Y;
143             my $Z = 1;
144             my $ORIGIN = int(($Y**3 + $Y**2 + $Y)/2);
145              
146             # used by the new BrowserUK algorithm
147             sub partition_space {
148 0     0 0   my ($mol, $grid, $opts) = @_;
149 0           my $margin = $opts->{margin};
150 0           for my $atom ($mol->atoms) {
151 0           my (@vec) = $atom->coords->array;
152 0           my (@norm_vec) = map { floor($_ / $margin) } @vec;
  0            
153 0           my $n = $X * $norm_vec[0] + $Y * $norm_vec[1]
154             + $norm_vec[2] + $ORIGIN;
155 0           push @{$grid->{$n}}, $atom;
  0            
156             }
157             }
158              
159             # used by the new BrowserUK algorithm
160             sub find_bonds_grid {
161 0     0 0   my ($mol, $grid, $opts) = @_;
162 0           while (my ($n, $atoms) = each %$grid) {
163             #print "Cell $n has ". @$atoms . " atoms\n";
164 0           find_bonds_n2_one_set($mol, $atoms, $opts);
165 0           for my $neigh_n (
166             $n+$Z,
167             $n+$Z+$Y, $n+$Z-$Y, $n+$Z+$X, $n+$Z-$X,
168             $n+$Z+$Y+$X, $n+$Z+$Y-$X, $n+$Z-$Y+$X, $n+$Z-$Y-$X,
169             $n+$Y, $n+$Y+$X, $n+$Y-$X,
170             $n+$X,
171             ) {
172 0 0         if ($grid->{$neigh_n}) {
173 0           find_bonds_n2_two_sets($mol, $atoms, $grid->{$neigh_n}, $opts);
174             }
175             }
176             }
177             }
178              
179             # used by both find_bonds variants to figure out the maximum cutoff
180             sub guess_margin {
181 0     0 0   my ($mol, $opts) = @_;
182 0           my $formula = $mol->formula_hash;
183 0           my $max = 0;
184 0           for my $elem (keys %$formula) {
185 0 0         $max = $Covalent_Radius{$elem} if $Covalent_Radius{$elem} > $max;
186             }
187 0           $max *= 2 * $opts->{tolerance};
188             #printf "MARGIN guessed at (%.2f)\n", $max;
189 0           $max;
190             }
191              
192             # brute-force N^2 algorithm
193             sub find_bonds_n2_one_set {
194 0     0 0   my ($mol, $atoms, $opts) = @_;
195 0   0       my $bond_class = $opts->{bond_class} || $mol->bond_class;
196 0           for (my $i = 0; $i < @$atoms; ++$i) {
197 0           for (my $j = $i + 1; $j < @$atoms; ++$j) {
198 0           my ($a1, $a2) = ($atoms->[$i], $atoms->[$j]);
199 0 0         if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) {
200 0           $mol->add_bond($bond_class->new(atoms=>[$a1, $a2]));
201             }
202             }
203             }
204             }
205              
206             # brute force N*M algorithm for finding bonds between to sets of atoms
207             sub find_bonds_n2_two_sets {
208 0     0 0   my ($mol, $atoms1, $atoms2, $opts) = @_;
209 0   0       my $bond_class = $opts->{bond_class} || $mol->bond_class;
210 0           for my $a1 (@$atoms1) {
211 0           for my $a2 (@$atoms2) {
212 0 0         if (are_bonded($a1->symbol, $a2->symbol, scalar $a1->distance($a2), $opts)) {
213 0           $mol->add_bond($bond_class->new(atoms=>[$a1, $a2]));
214             }
215             }
216             }
217             }
218              
219              
220             =item C
221              
222             Assign the formal bond orders in a molecule. The bonds must already be defined,
223             either by C or because the molecule was read from a file that
224             included bonds but no bond orders. If the bond orders were already defined
225             (maybe the molecule came from a file that did include bond orders after all),
226             the original bond orders are erased and the process begins from scratch. Two
227             different algorithms are available, and may be selected by using the "method"
228             option:
229              
230             assign_bond_orders($mol, method => 'itub');
231             assign_bond_orders($mol, method => 'baber');
232              
233             =over
234              
235             =item itub
236              
237             This is the default if no method is specified. Developed from scratch by the
238             author of this module, this algorithm requires only the connection table
239             information, and it requires that all hydrogen atoms be explicit. It looks for
240             an atom with unsatisfied valence, increases a bond order, and then does the
241             same recursively for the neighbors. If everybody's not happy at the end, it
242             backtracks and tries another bond. The recursive part does not cover the whole
243             molecule, but only the contiguous region of "unhappy" atoms next to the
244             starting atom and their neighbors. This permits separating the molecule into
245             independent regions, so that if one is solved and there's a problem in another,
246             we don't have to backtrack to the first one.
247              
248             The itub algorithm has the following additional options:
249              
250             =over
251              
252             =item use_coords
253              
254             Although the algorithm does not I 3D coordinates, it uses them by
255             default to improve the initial guesses of which bond orders should be
256             increased. To avoid using coordinates, add the C option with a
257             false value:
258              
259             assign_bond_orders($mol, use_coords => 0);
260              
261             The results are the same most of the time, but using good coordinates improves
262             the results for complicated cases such as fused heteroaromatic systems.
263              
264             =item scratch
265              
266             If true, start the bond order assignment from scratch by assuming that all bond
267             orders are 1. If false, start from the current bond orders and try to fix the
268             unsatisfied valences. This option is true by default.
269              
270             =back
271              
272             =item baber
273              
274             A bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E.
275             J. Chem. Inf. Comp. Sci. 1992, 32, 401-406 (with some interpretation).
276              
277             This algorithm uses the 3D coordinates along with various cutoffs and
278             confidence formulas to guess the bond orders. It then tries to resolve
279             conflicts by looping through the atoms (but is not recursive or backtracking).
280             It does not require explicit hydrogens (although it's better when they are
281             available) because it was designed for use with real crystallographic data
282             which often doesn't have hydrogen atoms.
283              
284             This method doesn't always give a good answer, especially for conjugated and
285             aromatic systems. The variation used in this module adds some random numbers to
286             resolve some ambiguities and break loops, so the results are not even entirely
287             deterministic (the 'itub' method is deterministic but the result may depend on
288             the input order of the atoms).
289              
290             =back
291              
292             =cut
293              
294             sub assign_bond_orders {
295 0     0 1   my ($mol, %opts) = @_;
296 0 0 0       if ($opts{method} and $opts{method} eq 'baber') {
297 0           assign_bond_orders_baber($mol, %opts);
298             } else {
299 0           assign_bond_orders_itub($mol, %opts);
300             }
301             }
302              
303             ####### Bond order assignment algorithm by Ivan Tubert-Brohman
304              
305             # The "typical" valence that we expect an atom to have satisfied. If not
306             # given, a value of 1 is assumed.
307             my %MIN_VALENCES = ( O => 2, C => 4, S => 2, H => 1, N => 3, P => 3, Si => 2,
308             F => 1, Cl => 1, Br => 1, I => 1 );
309              
310             # $ALLOWED_INCREASES{$from}{$to} means that element $from is willing to
311             # exceed its minimum valence by having a multiple bond to element $to.
312             my %ALLOWED_INCREASES = (
313             Cl => { O => 7 },
314             Br => { O => 7 },
315             I => { O => 7 },
316             S => { O => 6, C => 3, S => 4 },
317             N => { O => 5, C => 4, N => 4 },
318             Si => { O => 4, C => 4 },
319             O => { C => 3, O => 3 },
320             P => { O => 5, C => 4 },
321             );
322              
323             sub assign_bond_orders_itub {
324 0     0 0   my ($mol, %opts) = @_;
325 0           %opts = (use_coords => 1, scratch => 1, funny => {}, %opts);
326             #my @funny_atoms;
327              
328 0 0         if ($opts{scratch}) {
329 0           $_->order(1) for $mol->bonds;
330             }
331 0           for my $atom ($mol->atoms) {
332 0 0         if (wants_more_bonds($atom)) {
333 0           my $ret = make_happy($atom, \%opts, []);
334 0 0         print "Atom $atom made happy? '$ret'\n" if $DEBUG;
335 0 0         unless ($ret) { # atom is funny (has formal charge or unpaired e-)
336 0           $opts{funny}{$atom} = 1;
337             #push @funny_atoms, $atom;
338             }
339             }
340             }
341             }
342              
343             sub wants_more_bonds {
344 0     0 0   my ($atom) = @_;
345 0           my $valence = $atom->valence;
346             #$n_bonds += $_->order for $atom->bonds;
347              
348 0   0       my $ret = ($valence < ($MIN_VALENCES{$atom->symbol} || 1));
349 0 0         print " $atom wants more bonds? '$ret'\n" if $DEBUG;
350 0           $ret;
351             }
352              
353             sub valid_bonds {
354 0     0 0   my ($atom) = @_;
355 0           my $valence = $atom->valence;
356             #$n_bonds += $_->order for $atom->bonds;
357              
358 0   0       my $ret = ($valence <= ($MIN_VALENCES{$atom->symbol} || 1));
359 0 0         print " $atom wants more bonds? '$ret'\n" if $DEBUG;
360 0           $ret;
361             }
362              
363             # the heart of the algorithm; increase a bond order, and then do the same
364             # recursively for the neighbors. If everybody's not happy at the end,
365             # backtrack and try another bond. Note that this search does not cover the
366             # whole molecule, but only the "unhappy" atoms and their neighbors. This
367             # permits separating the molecule into independent regions, so that if one
368             # is solved and there's a problem in another, we don't have to backtrack
369             # to the first one.
370             sub make_happy {
371 0     0 0   my ($atom, $opts, $q) = @_;
372              
373 0 0 0       if (!$opts->{funny}{$atom} and wants_more_bonds($atom)) {
374 0           push @$q, ($atom->neighbors); # the queue of atoms to be checked
375 0           for my $bn (sorted_neighbors($atom, $opts)) {
376 0           my ($nei, $bond) = @$bn{'to', 'bond'};
377             # note: it would be better to find the bond that is most likely to
378             # be increased by taking bond length into account
379 0 0         if (accepts_more_bonds($nei, $atom)) {
380 0           my $order = $bond->order;
381 0 0         print "increasing $bond($atom-$nei)\n" if $DEBUG;
382 0           $bond->order($order+1); # increase order, be happy
383             # now make sure everybody's happy
384 0           push @$q, $nei->neighbors($atom); # our friend's neighbors
385             # need to be happy too
386 0 0         if (make_happy($atom, $opts, $q)) { # are we happy now?
387 0           return 1; # everybody's happy?
388             } else {
389             # something's wrong, will have to backtrack
390 0 0         print "decreasing $bond($atom-$nei)\n" if $DEBUG;
391 0           $bond->order($order);
392 0           push @$q, $nei; # remember; neighbor's not happy either
393             }
394             }
395             }
396             # couldn't find happiness
397             } else {
398 0           my $next = shift @$q;
399 0 0         unless ($next) { # no one left; everybody is happy!
400 0 0         print "no more atoms left to check at $atom\n" if $DEBUG;
401 0           return 1;
402             }
403 0           return (make_happy($next, $opts, $q)); # happy if next atom is happy
404             }
405 0 0         print "not happy at $atom\n" if $DEBUG;
406 0           0; # not happy
407             }
408              
409             sub sorted_neighbors {
410 0     0 0   my ($atom, $opts) = @_;
411 0           my $use_coords = $opts->{use_coords};
412              
413 0 0         print "sorting neighbors\n" if $DEBUG;
414 0           return map {
415 0 0         $_->{bn}
    0          
416             } sort {
417 0   0       ($b->{wants} cmp $a->{wants}) # those who want go first
418             || ($use_coords ? ($a->{len} <=> $b->{len}) : 0);
419             # if they both want to the same degree, the shortest
420             # bond goes first if we are using coords
421             } map {
422 0           +{
423             wants => wants_more_bonds($_->{to}),
424             bn => $_,
425             len => !$use_coords || $_->{bond}->length,
426             }
427             } $atom->bonds_neighbors;
428             }
429              
430             sub accepts_more_bonds {
431 0     0 0   my ($atom, $to) = @_;
432 0           my ($symbol, $to_symbol) = ($atom->symbol, $to->symbol);
433              
434             #my $n_bonds = 0;
435             #$n_bonds += $_->order for $atom->bonds;
436 0           my $valence = $atom->valence;
437              
438             # not enough bonds even for the minimum valence?
439 0 0 0       return 1 if ($valence < ($MIN_VALENCES{$atom->symbol} || 1));
440              
441 0 0 0       if ($valence < ($ALLOWED_INCREASES{$symbol}{$to_symbol} || 0)) {
442             # make sure we are willing to make multiple bonds with this guy
443 0           return 1;
444             } else {
445 0           return 0; # max. valence satisfied
446             }
447             }
448              
449              
450             ############
451             # Bond order assignment algorithm based on Baber, J. C.; Hodgkin, E. E.
452             # J. Chem. Inf. Comp. Sci. 1992, 32, 401-406
453              
454             # this algorithm uses the 3D coordinates along with various cutoffs and
455             # confidence formulas to assign the bond orders. It does not require all
456             # explicit hydrogens (although it's better when they are available) because
457             # it's design for use with real crystallographic data which often doesn't
458             # have hydrogen.
459              
460             my %Valences = (
461             C => [4], N => [3,4], O => [2],
462             P => [4, 5], S => [2, 4, 6], As => [4, 5], Se => [2, 4, 6],
463             Te => [2, 4, 6],
464             F => [1], Cl => [1, 3], Br => [1, 3, 5], I => [1, 3, 5, 7],
465             H => [1],
466             );
467              
468             # note; this table should only have single-letter symbols
469             my %Bond_Orders = (
470             "C C" => { is => 1.49, id => 1.31, it => 1.18, wsd => 1.38, wdt => 1.21 },
471             "C N" => { is => 1.42, id => 1.32, it => 1.14, wsd => 1.34, wdt => 1.20 },
472             "C O" => { is => 1.41, id => 1.22, wsd => 1.28 },
473             "C S" => { is => 1.78, id => 1.68, wsd => 1.70 },
474             "N N" => { is => 1.40, id => 1.22, wsd => 1.32 },
475             "N O" => { is => 1.39, id => 1.22, wsd => 1.25 },
476             "O S" => { is => 1.58, id => 1.45, wsd => 1.54 },
477             #"O S" => { is => 1.58, id => 1.45, wsd => 1.51 }, # modified value
478             "O P" => { is => 1.60, id => 1.48, wsd => 1.52 },
479             );
480              
481             # add keys for reverse bond for ease of lookup
482             for (keys %Bond_Orders) {
483             $Bond_Orders{ scalar reverse $_ } = $Bond_Orders{$_};
484             }
485              
486             sub assign_bond_orders_baber {
487 0     0 0   my ($mol, %opts) = @_;
488 0   0       my $max_tries = $opts{max_tries} || 10;
489              
490 0           assign_initial_bonds($mol);
491 0           assign_initial_coordinations($mol);
492 0           my $tries = 0;
493 0           while (resolve_conflicts($mol)) {
494 0 0         if ($tries++ > $max_tries) {
495 0 0         print "too many tries\n" if $DEBUG;
496 0           last;
497             }
498 0 0         print "try again\n" if $DEBUG;
499             }
500 0           $tries;
501             }
502              
503             sub assign_initial_bonds {
504 0     0 0   my ($mol) = @_;
505              
506 0           for my $bond ($mol->bonds) {
507             #my %bond_has = map { ($_->symbol, 1 ) } $bond->atoms;
508 0           my $symbols = join " ", map { $_->symbol } $bond->atoms;
  0            
509 0           my ($order, $confidence);
510 0           my $l_obs = $bond->length;
511 0 0         if ($symbols =~ /\b(H|F|Cl|Br|I)\b/) {
    0          
    0          
    0          
512 0           $order = 1;
513 0           $confidence = 10000;
514             } elsif ($symbols =~ /\bSi\b/) {
515 0           $order = 1;
516 0           $confidence = 20 * ($l_obs - 1.4);
517             } elsif ($symbols =~ /\bB\b/) {
518 0           $order = 1;
519 0           $confidence = 20 * ($l_obs - 1.2);
520             } elsif (my $pars = $Bond_Orders{$symbols}) {
521 0 0         if ($pars->{it}) { # may have triple bonds
522 0 0 0       if ($l_obs > $pars->{wsd}) {
    0 0        
    0          
523 0           $order = 1;
524 0           $confidence = ($l_obs - $pars->{wsd}) /
525             (2 * ($pars->{is} - $pars->{wsd}));
526             } elsif ($pars->{id} < $l_obs and $l_obs < $pars->{wsd}) {
527 0           $order = 2;
528 0           $confidence = ($pars->{wsd} - $l_obs) /
529             (2 * ($pars->{wsd} - $pars->{id}));
530             } elsif ($pars->{id} > $l_obs and $l_obs > $pars->{wdt}) {
531 0           $order = 2;
532 0           $confidence = ($l_obs - $pars->{wdt}) /
533             (2 * ($pars->{id} - $pars->{wdt}));
534             } else {
535 0           $order = 3;
536 0           $confidence = ($pars->{wdt} - $l_obs) /
537             (2 * ($pars->{wdt} - $pars->{it}));
538             }
539             } else { # only single and double
540 0 0         if ($l_obs > $pars->{wsd}) {
541 0           $order = 1;
542 0           $confidence = ($l_obs - $pars->{wsd}) /
543             (2 * ($pars->{is} - $pars->{wsd}));
544             } else {
545 0           $order = 2;
546 0           $confidence = ($pars->{wsd} - $l_obs) /
547             (2 * ($pars->{wsd} - $pars->{id}));
548             }
549             }
550             } else { # unknown atom pair; let's assume it's always single
551 0           $order = 1;
552 0           $confidence = 100_000;
553             }
554              
555 0           $confidence *= 10; # normalization constant
556 0           $bond->order($order);
557 0           $bond->attr("bond-find/confidence", $confidence);
558 0 0         print "bond $bond($symbols) len=$l_obs, order=$order, ",
559             "conf=$confidence\n"
560             if $DEBUG;
561             }
562             }
563              
564             sub assign_initial_coordinations {
565 0     0 0   my ($mol) = @_;
566              
567 0           for my $atom ($mol->atoms) {
568 0           my @neighbors = $atom->neighbors;
569 0           my $a_obs = 0;
570 0           my $n = 0;
571 0           my ($max_conns, $confidence);
572 0           for (my $i = 0; $i < @neighbors - 1; $i++) {
573 0           for (my $j = $i + 1; $j < @neighbors; $j++) {
574 0           my $angle = Chemistry::Atom::angle_deg($neighbors[$i],
575             $atom, $neighbors[$j]);
576             # don't count linear angles for octahedral geometries
577 0 0 0       $n++, $a_obs += $angle unless @neighbors > 2 and $angle > 150;
578             }
579             }
580 0 0         $a_obs /= $n if $n;
581              
582 0 0 0       if ($n == 0) {
    0 0        
    0 0        
    0 0        
    0          
    0          
    0          
583             # this condition is not described explicitly in the article
584 0           $max_conns = 1; # undefined
585 0           $confidence = 0;
586 0           $n = 1;
587             } elsif ($a_obs > 150) {
588 0           $max_conns = 2; # linear
589 0           $confidence = ($a_obs - 150) / 30;
590             } elsif (120 > $a_obs and $a_obs >= 115) {
591 0           $max_conns = 3; # trigonal
592 0           $confidence = ($a_obs - 115) / 5;
593             } elsif (150 > $a_obs and $a_obs >= 120) {
594 0           $max_conns = 3; # trigonal
595 0           $confidence = (150 - $a_obs) / 30;
596             } elsif (109.5 > $a_obs and $a_obs >= 99) {
597 0           $max_conns = 4; # tetrahedral
598 0           $confidence = ($a_obs - 99) / 10.5;
599             } elsif (115 > $a_obs and $a_obs >= 109.5) {
600 0           $max_conns = 4; # tetrahedral
601 0           $confidence = (115 - $a_obs) / 5.5;
602             } elsif ($a_obs < 99) {
603 0           $max_conns = 6; # octahedral
604 0           $confidence = (99 - $a_obs) / 9;
605             } else {
606 0           confess("impossible coordination angle $a_obs!");
607             }
608              
609 0           $confidence *= 100 * $n;
610 0 0         if ($Valences{$atom->symbol}) {
611 0           $atom->attr("bond-find/valence", $Valences{$atom->symbol}[0]);
612             } else {
613 0           $atom->attr("bond-find/valence", $max_conns);
614             }
615 0           $atom->attr("bond-find/confidence", $confidence);
616 0           $atom->attr("bond-find/max_conns", $max_conns);
617 0 0         print "Atom $atom has CN $max_conns with a conf. of $confidence\n"
618             if $DEBUG;
619             }
620             }
621              
622             sub resolve_conflicts {
623 0     0 0   my ($mol) = @_;
624            
625 0           my $changes = 0;
626 0           for my $atom ($mol->atoms) {
627 0           my $valence = $atom->attr('bond-find/valence');
628 0           my $max_conns = $atom->attr('bond-find/max_conns');
629 0           my $confidence = $atom->attr('bond-find/confidence');
630 0           my $n_conns = $atom->bonds;
631              
632 0           my $n_bonds = 0;
633 0           $n_bonds += $_->order for $atom->bonds;
634              
635             # this is a modification to avoid cummulative double bonds on
636             # non linear carbon
637 0           my $n_multiple_bonds = grep { $_->order > 1 } $atom->bonds;
  0            
638              
639 0 0 0       if ($n_conns > $valence) {
    0 0        
    0 0        
640 0           my $next_valence = next_valence($atom->symbol, $valence);
641 0 0         if ($next_valence) {
642 0 0         print "increasing valence of $atom to $next_valence\n"
643             if $DEBUG;
644 0           $atom->attr('bond-find/valence', $next_valence);
645 0           ++$changes, redo;
646             } else {
647 0           warn "too many conns $n_conns to $atom with valence $valence\n";
648 0           next;
649             }
650             } elsif ($n_bonds > $valence
651             or $n_multiple_bonds > 1 and $max_conns > 2 and $atom->symbol eq 'C'
652             ) {
653 0           my $next_valence = next_valence($atom->symbol, $valence);
654 0 0         if ($next_valence) {
655 0 0         print "increasing valence of $atom to $next_valence\n"
656             if $DEBUG;
657 0           $atom->attr('bond-find/valence', $next_valence);
658 0           ++$changes, next;
659             }
660 0           my ($min_conf, $min_bond);
661 0           for my $bond ($atom->bonds) {
662 0 0 0       if ($bond->order > 1 and
      0        
663             (not defined $min_conf
664             or $bond->attr("bond-find/confidence") < $min_conf)) {
665 0           $min_conf = $bond->attr("bond-find/confidence");
666 0           $min_bond = $bond;
667             }
668             }
669 0           my $new_order = $min_bond->order - 1;
670 0           $min_bond->order($new_order);
671 0 0         print "Decreasing order of $min_bond to $new_order\n" if $DEBUG;
672             #$min_bond->attr('bond-find/confidence', $min_conf*1.2+rand());
673 0           ++$changes, next;
674             } elsif ($n_bonds + $max_conns - $n_conns < $valence) {
675 0           my ($min_conf, $min_bond);
676 0           for my $bond ($atom->bonds) {
677 0 0 0       if (($bond->order == 1
      0        
      0        
678             or $atom->symbol =~ /^[CN]$/ and $bond->order == 2) and
679             (not defined $min_conf
680             or $bond->attr("bond-find/confidence") < $min_conf)) {
681 0           $min_conf = $bond->attr("bond-find/confidence");
682 0           $min_bond = $bond;
683             }
684             }
685              
686             #print "\t($confidence, $min_conf)\n" if ($atom->id eq 'a1');
687 0 0 0       if ($min_bond and ($confidence > 95 or $min_conf < $confidence)) {
      0        
688             #print "XXX: $atom\n";
689 0           my $new_order = $min_bond->order + 1;
690 0           $min_bond->order($new_order);
691 0 0         print "Increasing order of $min_bond to $new_order\n" if $DEBUG;
692             #$min_bond->attr('bond-find/confidence', $min_conf*1.2+rand());
693             #$atom->attr('bond-find/confidence', $confidence / 1.5);
694             } else {
695 0           $max_conns++;
696 0           $atom->attr("bond-find/max_conns", $max_conns);
697 0 0         print "Increasing coord. num. of $atom to $max_conns\n"
698             if $DEBUG;
699             }
700 0           ++$changes, next;
701             }
702             }
703 0           $changes;
704             }
705              
706             sub next_valence {
707 0     0 0   my ($symbol, $current) = @_;
708              
709 0 0         if ($Valences{$symbol}) {
710 0           for my $v (@{$Valences{$symbol}}) {
  0            
711 0 0         return $v if $v > $current;
712             }
713             }
714 0           return undef;
715             }
716              
717             1;
718              
719             =back
720              
721             =head1 VERSION
722              
723             0.23
724              
725             =head1 TO DO
726              
727             Some future version should let the user specify the desired cutoffs, and
728             not always create a bond but call a user-supplied function instead. This way
729             these functions could be used for other purposes such as finding hydrogen bonds
730             or neighbor lists.
731              
732             Add some tests.
733              
734             =head1 SEE ALSO
735              
736             L, L, L,
737             L.
738              
739             =head1 AUTHOR
740              
741             Ivan Tubert-BrohmanEitub@cpan.orgE
742              
743             The new C algorithm was loosely based on a suggestion by BrowserUK
744             on perlmonks.org (L).
745              
746             =head1 COPYRIGHT
747              
748             Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is
749             free software; you can redistribute it and/or modify it under the same terms as
750             Perl itself.
751              
752             =cut
753