File Coverage

blib/lib/Chemistry/OpenSMILES.pm
Criterion Covered Total %
statement 325 389 83.5
branch 154 214 71.9
condition 68 93 73.1
subroutine 44 54 81.4
pod 0 21 0.0
total 591 771 76.6


line stmt bran cond sub pod time code
1             package Chemistry::OpenSMILES;
2              
3             # ABSTRACT: OpenSMILES format reader and writer
4             our $VERSION = '0.12.4'; # VERSION
5              
6 45     45   70253 use strict;
  45         65  
  45         1374  
7 45     45   193 use warnings;
  45         85  
  45         1881  
8 45     45   667 use 5.0100;
  45         119  
9              
10 45     45   18207 use Chemistry::OpenSMILES::Stereo::Tables qw( @OH @TB );
  45         156  
  45         5547  
11 45     45   18614 use Graph::MoreUtils qw( SSSR );
  45         2432990  
  45         2585  
12 45     45   323 use Graph::MoreUtils::SSSR;
  45         73  
  45         763  
13 45     45   15613 use Graph::Traversal::BFS;
  45         124426  
  45         1428  
14 45     45   259 use List::Util qw( all any first max min none sum0 );
  45         79  
  45         5354  
15 45     45   233 use Set::Object qw( set );
  45         59  
  45         225242  
16              
17             require Exporter;
18             our @ISA = qw( Exporter );
19             our @EXPORT_OK = qw(
20             %bond_order_to_symbol
21             %bond_symbol_to_order
22             %normal_valence
23             can_be_aromatic_ring
24             can_unsprout_hydrogen
25             clean_chiral_centers
26             is_aromatic
27             is_aromatic_bond
28             is_chiral
29             is_chiral_allenal
30             is_chiral_octahedral
31             is_chiral_planar
32             is_chiral_tetrahedral
33             is_chiral_trigonal_bipyramidal
34             is_cis_trans_bond
35             is_double_bond
36             is_ring_atom
37             is_ring_bond
38             is_single_bond
39             is_triple_bond
40             mirror
41             rings
42             toggle_cistrans
43             valence
44             );
45              
46             sub is_chiral($);
47             sub is_chiral_planar($);
48             sub is_chiral_tetrahedral($);
49             sub mirror($);
50             sub rings($@);
51             sub toggle_cistrans($);
52              
53             our %normal_valence = (
54             B => [ 3 ],
55             C => [ 4 ],
56             N => [ 3, 5 ],
57             O => [ 2 ],
58             P => [ 3, 5 ],
59             S => [ 2, 4, 6 ],
60             F => [ 1 ],
61             Cl => [ 1 ],
62             Br => [ 1 ],
63             I => [ 1 ],
64             c => [ 3 ], # Not from OpenSMILES specification
65             );
66              
67             our %bond_order_to_symbol = (
68             1 => '-',
69             1.5 => ':',
70             2 => '=',
71             3 => '#',
72             4 => '$',
73             );
74              
75             our %bond_symbol_to_order = (
76             '-' => 1,
77             ':' => 1.5,
78             '=' => 2,
79             '#' => 3,
80             '$' => 4,
81             );
82              
83             sub can_be_aromatic_ring
84             {
85 4     4 0 13 my @atoms = @_;
86 4     20   16 return all { /^(Se|As|[BCNOPS])$/i } map { $_->{symbol} } @atoms;
  20         70  
  23         47  
87             }
88              
89             sub can_unsprout_hydrogen
90             {
91 3687     3687 0 4704 my( $moiety, $atom ) = @_;
92              
93 3687 100       8434 return undef unless $atom->{symbol} eq 'H';
94 712 100   1424   2767 return '' if any { exists $atom->{$_} } qw( chirality isotope );
  1424         2252  
95 711 50       2028 return '' if $atom->{charge};
96 711 50       1220 return '' if $atom->{class};
97 711 100       1363 return '' unless $moiety->degree( $atom ) == 1;
98              
99 699         174359 my( $neighbour ) = $moiety->neighbours( $atom );
100 699 100       43895 return '' if $neighbour->{symbol} eq 'H';
101              
102             # Can unsprout only those H atoms with cis/trans bonds, where a substitution could be found.
103 695 100       1429 if( is_cis_trans_bond( $moiety, $atom, $neighbour ) ) {
104             my $n_cis_trans =
105 4         1419 grep { is_cis_trans_bond( $moiety, $neighbour, $_ ) }
  12         3024  
106             $moiety->neighbours( $neighbour );
107 4 50       1273 return '' if $n_cis_trans == 1;
108             }
109              
110             # TODO: Support other chiralities as well
111 695 100       118198 if( is_chiral $neighbour ) {
112 63 50       112 return '' unless is_chiral_tetrahedral $neighbour;
113 63 50       137 return '' unless $neighbour->{chirality_neighbours};
114 63 50       74 return '' unless @{$neighbour->{chirality_neighbours}} == 4;
  63         122  
115             }
116              
117 695         1794 return 1;
118             }
119              
120             # Removes chiral setting from allenal, square planar, tetrahedral and trigonal bipyramidal chiral centers if deemed unimportant.
121             # For allenal, tetrahedral and trigonal bipyramidal arrangements when not all the neighbours are distinct.
122             # For square planar arrangements this means situations when all neighbours are the same.
123             # Chiral centers with lone pairs are left untouched.
124             # Returns the affected atoms.
125             #
126             # TODO: check other chiral centers
127             sub clean_chiral_centers($$)
128             {
129 7     7 0 4257 my( $moiety, $color_sub ) = @_;
130              
131 7         13 my @affected;
132 7         21 for my $atom ($moiety->vertices) {
133 65 100 33     238 next unless is_chiral_allenal( $atom ) ||
      66        
      66        
134             is_chiral_planar( $atom ) ||
135             is_chiral_tetrahedral( $atom ) ||
136             is_chiral_trigonal_bipyramidal( $atom );
137              
138             # Find neighbours which constitute ring bonds with the atom in question
139 7         25 my @ring_neighbours = grep { is_ring_bond( $moiety, $atom, $_, scalar $moiety->edges ) }
  24         2532  
140             $moiety->neighbours( $atom );
141              
142 7 100       769 my $hcount = exists $atom->{hcount} ? $atom->{hcount} : 0;
143 7         16 my @neighbours = $moiety->neighbours( $atom );
144 7 50       491 if( is_chiral_allenal( $atom ) ) {
145 0         0 @neighbours = grep { $_ != $atom }
146 0         0 map { $moiety->neighbours( $_ ) }
  0         0  
147             @neighbours;
148             }
149              
150 7 50       31 if( is_chiral_trigonal_bipyramidal( $atom ) ) {
151 0 0       0 next if @neighbours + $hcount != 5;
152             } else {
153 7 50       21 next if @neighbours + $hcount != 4;
154             }
155              
156 7         10 my %colors;
157 7         27 for (@neighbours, ( { symbol => 'H' } ) x $hcount) {
158 28         82 $colors{$color_sub->( $_ )}++;
159             }
160              
161 7 50       25 if( is_chiral_planar( $atom ) ) {
    50          
162             # Chiral planar center markers make sense even if only two types of atoms are there.
163 0 0       0 next if scalar keys %colors > 2;
164 0 0 0 0   0 next if scalar keys %colors == 2 && all { $_ == 2 } values %colors;
  0         0  
165             } elsif( is_chiral_trigonal_bipyramidal( $atom ) ) {
166 0 0       0 next if scalar keys %colors == 5;
167             } else {
168 7 100       23 next if scalar keys %colors == 4;
169             }
170              
171             # Special treatment for anomers
172 5 100       20 if( @ring_neighbours ) {
173 2 50       6 next unless is_chiral_tetrahedral( $atom );
174 2 50       9 next unless @ring_neighbours == 2;
175 0 0       0 next if $hcount == 1;
176 0 0       0 if( !$hcount ) {
177 0 0       0 my @non_ring_neighbours = grep { $_ != $ring_neighbours[0] &&
  0         0  
178             $_ != $ring_neighbours[1] }
179             @neighbours;
180 0 0       0 next unless $color_sub->( $non_ring_neighbours[0] ) eq
181             $color_sub->( $non_ring_neighbours[1] );
182             }
183             }
184              
185 3         6 delete $atom->{chirality};
186 3         10 push @affected, $atom;
187             }
188              
189 7         25 return @affected;
190             }
191              
192             sub is_aromatic($)
193             {
194 5193     5193 0 7289 my( $atom ) = @_;
195 5193         14227 return $atom->{symbol} ne ucfirst $atom->{symbol};
196             }
197              
198             sub is_aromatic_bond
199             {
200 971     971 0 1374 my( $moiety, $a, $b ) = @_;
201 971   100     11866 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
202             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq ':';
203             }
204              
205             sub is_chiral($)
206             {
207 7655     7655 0 9874 my( $what ) = @_;
208 7655 100       11358 if( ref $what eq 'HASH' ) { # Single atom
209 7650         17588 return exists $what->{chirality};
210             } else { # Graph representing moiety
211 5     16   21 return any { is_chiral( $_ ) } $what->vertices;
  16         90  
212             }
213             }
214              
215             sub is_chiral_allenal($)
216             {
217 698     698 0 916 my( $what ) = @_;
218 698 50       1137 if( ref $what eq 'HASH' ) { # Single atom
219 698   100     1936 return $what->{chirality} && $what->{chirality} =~ /^\@AL[12]$/;
220             } else { # Graph representing moiety
221 0     0   0 return any { is_chiral_allenal( $_ ) } $what->vertices;
  0         0  
222             }
223             }
224              
225             sub is_chiral_planar($)
226             {
227 93     93 0 149 my( $what ) = @_;
228 93 50       127 if( ref $what eq 'HASH' ) { # Single atom
229 93   100     313 return $what->{chirality} && $what->{chirality} =~ /^\@SP[123]$/;
230             } else { # Graph representing moiety
231 0     0   0 return any { is_chiral_planar( $_ ) } $what->vertices;
  0         0  
232             }
233             }
234              
235             sub is_chiral_tetrahedral($)
236             {
237 866     866 0 1052 my( $what ) = @_;
238 866 100       1233 if( ref $what eq 'HASH' ) { # Single atom
239             # CAVEAT: will fail for allenal configurations of @/@@ in raw mode
240 861   100     2846 return $what->{chirality} && $what->{chirality} =~ /^@@?$/;
241             } else { # Graph representing moiety
242 5     19   28 return any { is_chiral_tetrahedral( $_ ) } $what->vertices;
  19         120  
243             }
244             }
245              
246             sub is_chiral_trigonal_bipyramidal($)
247             {
248 112     112 0 150 my( $what ) = @_;
249 112 50       149 if( ref $what eq 'HASH' ) { # Single atom
250 112   100     351 return $what->{chirality} && $what->{chirality} =~ /^\@TB([1-9]|1[0-9]|20)$/;
251             } else { # Graph representing moiety
252 0     0   0 return any { is_chiral_trigonal_bipyramidal( $_ ) } $what->vertices;
  0         0  
253             }
254             }
255              
256             sub is_chiral_octahedral($)
257             {
258 32     32 0 38 my( $what ) = @_;
259 32 50       43 if( ref $what eq 'HASH' ) { # Single atom
260 32   100     100 return $what->{chirality} && $what->{chirality} =~ /^\@OH([1-9]|[12][0-9]|30)$/;
261             } else { # Graph representing moiety
262 0     0   0 return any { is_chiral_octahedral( $_ ) } $what->vertices;
  0         0  
263             }
264             }
265              
266             sub is_cis_trans_bond
267             {
268 1616     1616 0 127972 my( $moiety, $a, $b ) = @_;
269 1616   100     23907 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
270             $moiety->get_edge_attribute( $a, $b, 'bond' ) =~ /^[\\\/]$/;
271             }
272              
273             sub is_double_bond
274             {
275 1350     1350 0 6773 my( $moiety, $a, $b ) = @_;
276 1350   100     16348 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
277             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '=';
278             }
279              
280             # An atom is deemed to be a ring atom if any of its bonds is a ring bond.
281             sub is_ring_atom
282             {
283 58     58 0 5880 my( $moiety, $atom, $max_length ) = @_;
284 58 100       116 return '' unless $moiety->degree( $atom ) > 1;
285 69     69   7198 return any { is_ring_bond( $moiety, $atom, $_, $max_length ) }
286 34         13000 $moiety->neighbours( $atom );
287             }
288              
289             # A bond is deemed to be a ring bond if there is an alternative path
290             # joining its atoms not including the bond in consideration and this
291             # alternative path is not longer than 7 bonds. This is based on
292             # O'Boyle (2012) saying that Open Babel SMILES writer does not output
293             # cis/trans markers for double bonds in rings of size 8 or less due to
294             # them implicilty being cis bonds.
295             #
296             # If maximum ring size is given negative, ring size is not limited.
297             sub is_ring_bond
298             {
299 152     152 0 7965 my( $moiety, $a, $b, $max_length ) = @_;
300 152 100       297 $max_length = 7 unless $max_length;
301              
302             # A couple of shortcuts to reduce the complexity
303 152 100   300   520 return '' if any { $moiety->degree( $_ ) == 1 } ( $a, $b );
  300         56774  
304 96 100       34076 return '' if $moiety->vertices > $moiety->edges;
305              
306 69 100       3586 if( $max_length < 0 ) {
307             # Due to the issue in Graph, bridges() returns strings instead of real objects.
308             # Graph issue: https://github.com/graphviz-perl/Graph/issues/29
309 27         47 my %vertices_by_name = map { $_ => $_ } $moiety->vertices;
  648         1253  
310 329 100 100 329   926 return none { ( $_->[0] == $a && $_->[1] == $b ) ||
      100        
311             ( $_->[0] == $b && $_->[1] == $a ) }
312 27         147 map { [ map { $vertices_by_name{$_} } @$_ ] } $moiety->bridges;
  405         2664  
  810         1155  
313             }
314              
315 42         143 my $copy = $moiety->copy;
316 42         92254 $copy->delete_edge( $a, $b );
317              
318 42         4796 my %distance = ( $a => 0 );
319             my $record_length = sub {
320             # Record number of bonds between $a and any other vertex
321 811     811   267586 my( $u, $v ) = @_;
322 811         1153 my @seen = grep { exists $distance{$_} } ( $u, $v );
  1622         2891  
323 811 50       1497 return '' if @seen != 1; # Can this be 0?
324              
325 811         1042 my $seen = shift @seen;
326 811         2781 my $unseen = first { !exists $distance{$_} } ( $u, $v );
  1622         2437  
327 811         2932 $distance{$unseen} = $distance{$seen} + 1;
328 42         187 };
329              
330             my $operations = {
331 42     42   1616 start => sub { $a },
332 42         139 tree_edge => $record_length,
333             };
334              
335 42         223 my $traversal = Graph::Traversal::BFS->new( $copy, %$operations );
336 42         8769 $traversal->bfs;
337              
338             # $distance{$b} is the distance in bonds. In 8-member rings adjacent
339             # ring atoms have distance of 7 bonds.
340 42   66     26321 return exists $distance{$b} && $distance{$b} <= $max_length;
341             }
342              
343             sub is_single_bond
344             {
345 305     305 0 413 my( $moiety, $a, $b ) = @_;
346 305   66     3831 return !$moiety->has_edge_attribute( $a, $b, 'bond' ) ||
347             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '-';
348             }
349              
350             sub is_triple_bond
351             {
352 0     0 0 0 my( $moiety, $a, $b ) = @_;
353 0   0     0 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
354             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '#';
355             }
356              
357             sub mirror($)
358             {
359 30     30 0 41 my( $what ) = @_;
360 30 100       45 if( ref $what eq 'HASH' ) { # Single atom
361 25 100       32 if( is_chiral_tetrahedral( $what ) ) {
362 2 100       5 $what->{chirality} = $what->{chirality} eq '@' ? '@@' : '@';
363             }
364 25 100       32 if( is_chiral_allenal( $what ) ) {
365 1 50       5 $what->{chirality} = $what->{chirality} eq '@AL1' ? '@AL2' : '@AL1';
366             }
367             # Square planar centers are not affected by mirroring, doing nothing
368 25 50       31 if( is_chiral_trigonal_bipyramidal( $what ) ) {
369 0         0 my $number = substr $what->{chirality}, 3;
370 0         0 my $setting = $TB[$number-1];
371             my $opposite = first { $TB[$_]->{axis}[0] == $setting->{axis}[0] &&
372             $TB[$_]->{axis}[1] == $setting->{axis}[1] &&
373 0 0 0 0   0 $TB[$_]->{order} ne $setting->{order} }
374 0         0 0..$#TB;
375 0         0 $what->{chirality} = '@TB' . ($opposite + 1);
376             }
377 25 50       52 if( is_chiral_octahedral( $what ) ) {
378 0         0 my $number = substr $what->{chirality}, 3;
379 0         0 my $setting = $OH[$number-1];
380             my $opposite = first { $OH[$_]->{shape} eq $setting->{shape} &&
381             $OH[$_]->{axis}[0] == $setting->{axis}[0] &&
382             $OH[$_]->{axis}[1] == $setting->{axis}[1] &&
383 0 0 0 0   0 $OH[$_]->{order} ne $setting->{order} }
      0        
384 0         0 0..$#OH;
385 0         0 $what->{chirality} = '@OH' . ($opposite + 1);
386             }
387             } else {
388 5         14 for ($what->vertices) {
389 25         122 mirror( $_ );
390             }
391             }
392             }
393              
394             sub _order_rings
395             {
396 7     7   26 my( $A, $B ) = @_;
397 7 100       23 return @$A <=> @$B if @$A <=> @$B;
398              
399 5         14 for (0..$#$A) {
400 5 50       18 next unless $A->[$_]{number} <=> $B->[$_]{number};
401 5         19 return $A->[$_]{number} <=> $B->[$_]{number};
402             }
403              
404 0         0 return 0;
405             }
406              
407             sub rings($@)
408             {
409 8     8 0 2248 my( $graph, $atom, $max_length ) = @_;
410 8 50       19 $max_length = 7 unless defined $max_length;
411              
412 8         11 my @rings;
413 8 100       15 if( $atom ) {
414 6         25 @rings = Graph::MoreUtils::SSSR::detect_rings( $graph, $atom, undef, undef, $max_length );
415             } else {
416 2         11 @rings = SSSR( $graph, $max_length );
417             }
418              
419             # Find unique rings
420 8         257779 my %rings_by_name = map { join( '', @$_ ) => $_ } @rings;
  70         243  
421 8         20 my @rings_now;
422 8         16 for (@rings) {
423 70         121 my $name = join '', @$_;
424 70 100       127 next unless $rings_by_name{$name};
425 15         19 push @rings_now, $_;
426 15         29 delete $rings_by_name{$name};
427             }
428 8         29 @rings_now = sort { _order_rings( $a, $b ) } @rings_now;
  7         22  
429              
430 8         51 return @rings_now;
431             }
432              
433             sub toggle_cistrans($)
434             {
435 190 100   190 0 688 return $_[0] unless $_[0] =~ /^[\\\/]$/;
436 79 100       230 return $_[0] eq '/' ? '\\' : '/';
437             }
438              
439             # TODO: The actual unsprouting has to happen during print.
440             sub _unsprout_hydrogens($)
441             {
442 0     0   0 my( $moiety ) = @_;
443              
444 0         0 for my $atom ($moiety->vertices) {
445 0 0       0 next unless can_unsprout_hydrogen( $moiety, $atom );
446              
447 0         0 my( $neighbour ) = $moiety->neighbours( $atom );
448              
449             # TODO: Adjust other chiralities as well
450 0 0       0 if( is_chiral_tetrahedral $neighbour ) {
451 0     0   0 my $pos = first { $neighbour->{chirality_neighbours}[$_] == $atom } 0..3;
  0         0  
452 0         0 @{$neighbour->{chirality_neighbours}} =
453 0         0 grep { $_ != $atom } @{$neighbour->{chirality_neighbours}};
  0         0  
  0         0  
454 0 0       0 mirror $neighbour unless $pos % 2;
455             }
456              
457 0         0 $neighbour->{hcount}++;
458 0         0 $moiety->delete_vertex( $atom );
459             }
460             }
461              
462             sub valence($$)
463             {
464 456     456 0 742 my( $moiety, $atom ) = @_;
465             return ($atom->{hcount} ? $atom->{hcount} : 0) +
466             sum0 map { exists $bond_symbol_to_order{$_}
467 1125 100       89834 ? $bond_symbol_to_order{$_}
468             : 1 }
469 456 50       1227 map { $moiety->has_edge_attribute( $atom, $_, 'bond' )
  1125 100       176290  
470             ? $moiety->get_edge_attribute( $atom, $_, 'bond' )
471             : 1 }
472             $moiety->neighbours( $atom );
473             }
474              
475             # CAVEAT: requires output from non-raw parsing due issue similar to GH#2
476             sub _validate($@)
477             {
478 32     32   2849 my( $moiety, $color_sub ) = @_;
479              
480             # Identify islands of allene systems
481 32         80 my $allenes = _allene_graph( $moiety );
482              
483 32     128   147 my $color_by_element = sub { $_[0]->{symbol} };
  128         283  
484              
485 32         91 for my $atom (sort { $a->{number} <=> $b->{number} } $moiety->vertices) {
  756         1539  
486              
487             # Warn about aromatic atoms with less than two aromatic bonds
488 601 100 100     140689 if( is_aromatic($atom) && $moiety->degree($atom) &&
      100        
489 267         93073 2 > grep { is_aromatic_bond( $moiety, $atom, $_ ) }
490             $moiety->neighbours( $atom ) ) {
491             warn sprintf 'aromatic atom %s(%d) has less than 2 aromatic bonds' . "\n",
492             $atom->{symbol},
493 1         158 $atom->{number};
494             }
495              
496 601 100 66     24857 if( is_chiral_allenal($atom) ) {
    100          
    100          
497 6 50       70 if( $moiety->degree($atom) != 2 ) {
498             warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' .
499             'has %d bonds while 2 are needed' . "\n",
500             $atom->{symbol},
501             $atom->{number},
502 0         0 $moiety->degree($atom);
503 0         0 next;
504             }
505 6 50       1766 if( !$allenes->has_vertex($atom) ) {
506             warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' .
507             'is not a part of any allenal system' . "\n",
508             $atom->{symbol},
509 0         0 $atom->{number};
510 0         0 next;
511             }
512 6 100   9   165 if( none { $allenes->has_edge_attribute( $atom, $_, 'allene' ) &&
  9 100       1057  
513             $allenes->get_edge_attribute( $atom, $_, 'allene' ) eq 'mid' }
514             $allenes->neighbours($atom) ) {
515             warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' .
516             'observed for an atom which is not a center of ' .
517             'an allenal system' . "\n",
518             $atom->{symbol},
519 1         165 $atom->{number};
520 1         9 next;
521             }
522 5 100       1670 next unless $color_sub;
523 3 50       9 next if is_ring_atom( $moiety, $atom, scalar $moiety->edges );
524              
525 3 100       143 my @ends = grep { $allenes->has_edge_attribute( $atom, $_, 'allene' ) &&
  8         1555  
526             $allenes->get_edge_attribute( $atom, $_, 'allene' ) eq 'mid' }
527             $allenes->neighbours($atom);
528 24 100       59 my @neighbours = grep { $_ ne $ends[0] && $_ ne $ends[1] }
529 12         157 map { @$_ }
530 18         912 grep { !$allenes->has_edge( @$_ ) }
531 3         984 map { $moiety->edges_at($_) } @ends;
  6         227  
532 3         9 my %colors = map { ($color_sub->( $_ ) => 1) } @neighbours;
  12         32  
533 3 100       19 if( scalar keys %colors != 4 ) {
534             # FIXME: Emits false positives for coordinating metals.
535             # Need to think of a heuristic to exclude them.
536             warn sprintf 'tetrahedral chiral allenal setting for ' .
537             '%s(%d) is not needed as not all 4 neighbours ' .
538             'are distinct' . "\n",
539             $atom->{symbol},
540 2         19 $atom->{number};
541             }
542             } elsif( is_chiral_tetrahedral($atom) ) {
543 8 100       19 if( $moiety->degree($atom) < 3 ) {
544             # TODO: there should be a strict mode to forbid lone pairs
545             warn sprintf 'tetrahedral chiral center %s(%d) has %d bonds ' .
546             'while at least 3 are required' . "\n",
547             $atom->{symbol},
548             $atom->{number},
549 1         189 $moiety->degree($atom);
550 1         153 next;
551             }
552 7 50       3368 if( $moiety->degree($atom) > 4 ) {
553             warn sprintf 'tetrahedral chiral center %s(%d) has %d bonds ' .
554             'while at most 4 are allowed' . "\n",
555             $atom->{symbol},
556             $atom->{number},
557 0         0 $moiety->degree($atom);
558 0         0 next;
559             }
560              
561 7 50       3121 next unless $color_sub;
562 7 100       20 next if is_ring_atom( $moiety, $atom, scalar $moiety->edges );
563              
564 3         130 my $has_lone_pair = $moiety->degree($atom) == 3;
565 3         1178 my %colors = map { ($color_sub->( $_ ) => 1) }
  12         214  
566             $moiety->neighbours($atom);
567 3 100       22 if( scalar keys %colors != 4 - $has_lone_pair ) {
568             warn sprintf 'tetrahedral chiral setting for %s(%d) ' .
569             'is not needed as not all 4 neighbours ' .
570             '(including possible lone pair) are distinct' . "\n",
571             $atom->{symbol},
572 1         12 $atom->{number};
573             }
574             } elsif( !is_chiral($atom) && $moiety->degree($atom) == 4 ) {
575             # Warn about unmarked tetrahedral chiral centers
576 62 100       26865 my %colors = map { $color_sub
  248         4783  
577             ? ($color_sub->($_) => 1)
578             : ($color_by_element->($_) => 1) }
579             $moiety->neighbours($atom);
580 62 100       308 if( scalar keys %colors == 4 ) {
581             warn sprintf 'atom %s(%d) has 4 distinct neighbours, ' .
582             'but does not have a chiral setting' . "\n",
583             $atom->{symbol},
584 4         44 $atom->{number};
585             }
586             }
587             }
588              
589 32         7038 for my $bond (sort { min( map { $_->{number} } @$a ) <=> min( map { $_->{number} } @$b ) ||
  3390         4339  
  3390         4743  
590 1695 50       3136 max( map { $_->{number} } @$a ) <=> max( map { $_->{number} } @$b ) }
  668         861  
  668         933  
591             $moiety->edges) {
592 594         30468 my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bond;
  594         1369  
593 594 50       1199 if( $A eq $B ) {
594             warn sprintf 'atom %s(%d) has bond to itself' . "\n",
595             $A->{symbol},
596 0         0 $A->{number};
597 0         0 next;
598             }
599              
600 594 100       933 if( is_double_bond( $moiety, @$bond ) ) {
    100          
601             # Test cis/trans bonds
602             # Detect conflicting cis/trans markers, see COD entry 1547257, r297409
603 51         16203 my $cis_trans_A = grep { is_cis_trans_bond( $moiety, $A, $_ ) }
  136         27016  
604             $moiety->neighbours($A);
605 51         13909 my $cis_trans_B = grep { is_cis_trans_bond( $moiety, $B, $_ ) }
  130         24266  
606             $moiety->neighbours($B);
607 51 100 100     13701 if( $cis_trans_A && $cis_trans_B ) {
    100 100        
608             # If any of the bond atoms lack cis/trans markers, it means that the other markers are from some other bond
609 12         26 for my $atom (@$bond) {
610 24         48 my %bond_types = _neighbours_per_bond_type( $moiety, $atom );
611 24         46 for ('/', '\\') {
612 48 100 100     116 if( $bond_types{$_} && @{$bond_types{$_}} > 1 ) {
  27         87  
613             warn sprintf 'atom %s(%d) has %d bonds of type \'%s\', ' .
614             'cis/trans definitions must not conflict' . "\n",
615             $atom->{symbol},
616             $atom->{number},
617 1         3 scalar @{$bond_types{$_}},
  1         11  
618             $_;
619             }
620             }
621             }
622             } elsif( !$allenes->has_edge( @$bond ) && # Allene systems are checked below
623             $cis_trans_A + $cis_trans_B == 1 ) {
624             # FIXME: Source of false-positives.
625             # Cis/trans bond is out of place if none of neighbouring double bonds have other cis/trans bonds.
626             # This has to include allenal systems.
627             warn sprintf 'double bond between atoms %s(%d) and %s(%d) ' .
628             'has only one cis/trans marker' . "\n",
629             $A->{symbol}, $A->{number},
630 3         154 $B->{symbol}, $B->{number};
631             }
632             } elsif( is_cis_trans_bond( $moiety, @$bond ) ) {
633             # Test if next to a double bond.
634             # FIXME: Yields false-positives for delocalised bonds,
635             # see COD entry 1501863.
636             # FIXME: What about triple bond? See COD entry 4103591.
637 33         10683 my %bond_types;
638 33         58 for my $atom (@$bond) {
639 66         117 my %bond_types_now = _neighbours_per_bond_type( $moiety, $atom );
640 66         136 for my $key (keys %bond_types_now) {
641 163         155 push @{$bond_types{$key}}, @{$bond_types_now{$key}};
  163         219  
  163         303  
642             }
643             }
644 33 100       98 if( !$bond_types{'='} ) {
645             warn sprintf 'cis/trans bond is defined between atoms ' .
646             '%s(%d) and %s(%d), but neither of them ' .
647             'is attached to a double bond' . "\n",
648             $A->{symbol},
649             $A->{number},
650             $B->{symbol},
651 2         29 $B->{number};
652             }
653             }
654              
655             # Warn about 2 aromatic atoms in a cycle not having an aromatic bond between them
656 594 100 100     100750 if( is_aromatic( $A ) &&
      100        
      66        
657             is_aromatic( $B ) &&
658             !is_aromatic_bond( $moiety, $A, $B ) &&
659             is_ring_bond( $moiety, $A, $B ) ) {
660              
661 2         9 my %A_rings = map { join( '', @$_ ) => $_ } rings( $moiety, $A );
  4         15  
662 2         5 my %B_rings = map { join( '', @$_ ) => $_ } rings( $moiety, $B );
  4         16  
663              
664 2         8 my $common_rings = set( keys %A_rings ) * set( keys %B_rings );
665 2         173 for (map { $A_rings{$_} } @$common_rings) {
  3         80  
666 2 100       29 next unless can_be_aromatic_ring( @$_ );
667             warn sprintf 'aromatic atoms %s(%d) and %s(%d) belong to same cycle, ' .
668             'but the bond between them is not aromatic' . "\n",
669             $A->{symbol},
670             $A->{number},
671             $B->{symbol},
672 1         17 $B->{number};
673 1         12 last;
674             }
675             }
676             }
677              
678             # Check allene systems
679 32         315 for my $system (sort { min( map { $_->{number} } @$a ) <=>
  0         0  
680 0         0 min( map { $_->{number} } @$b ) }
  0         0  
681             $allenes->connected_components) {
682 9 100       13321 next if @$system % 2;
683              
684 4         15 my @ends = sort { $a->{number} <=> $b->{number} }
685 4         1212 map { @$_ }
686 4 100       21 grep { $allenes->has_edge_attribute( @$_, 'allene' ) &&
  16         5682  
687             $allenes->get_edge_attribute( @$_, 'allene' ) eq 'end' }
688             $allenes->subgraph($system)->edges;
689 24         5048 my $cis_trans_bonds = grep { is_cis_trans_bond( $moiety, @$_ ) }
690 4         31 map { $moiety->edges_at( $_ ) } @ends;
  8         337  
691 4 100       909 if( $cis_trans_bonds == 1 ) {
692             warn sprintf 'allene system between atoms %s(%d) and %s(%d) ' .
693             'has only one cis/trans marker' . "\n",
694             $ends[0]->{symbol}, $ends[0]->{number},
695 1         13 $ends[1]->{symbol}, $ends[1]->{number};
696             }
697 4 100       16 next if $cis_trans_bonds;
698              
699 8 100       23 my @neighbours_at_ends = grep { $_ ne $ends[0] && $_ ne $ends[1] }
700 4         181 map { @$_ }
701 6         1138 grep { !is_double_bond( $moiety, @$_ ) }
702 1         2 map { $moiety->edges_at( $_ ) } @ends;
  2         76  
703 1 50       4 next unless @neighbours_at_ends == 4;
704             warn sprintf 'allene system between atoms %s(%d) and %s(%d) ' .
705             'has 4 neighbours, but does not have cis/trans ' .
706             'setting' . "\n",
707             $ends[0]->{symbol}, $ends[0]->{number},
708 1         12 $ends[1]->{symbol}, $ends[1]->{number};
709             }
710              
711             # Check for bridging aromatic bonds
712 32         1441 my $aromatic = $moiety->copy_graph;
713 505         5371 $aromatic->delete_edges( map { @$_ }
714 32         58810 grep { !is_aromatic_bond( $moiety, @$_ ) }
  594         119298  
715             $moiety->edges );
716              
717             # Due to the issue in Graph, bridges() returns strings instead of real objects.
718             # Graph issue: https://github.com/graphviz-perl/Graph/issues/29
719             # The code below works on buggy (< 0.9727) as well as fixed (>= 0.9727) versions.
720 32         49411 my %vertices_by_name = map { $_ => $_ } $aromatic->vertices;
  601         1618  
721 32         170 my @bridges = map { [ map { $vertices_by_name{$_} } @$_ ] } $aromatic->bridges;
  1         1411  
  2         7  
722 32         22909 for my $bridge (sort { min( map { $_->{number} } @$a ) <=> min( map { $_->{number} } @$b ) ||
  0         0  
  0         0  
723 0 0       0 max( map { $_->{number} } @$a ) <=> max( map { $_->{number} } @$b ) }
  0         0  
  0         0  
724             @bridges) {
725 1         5 my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bridge;
  1         5  
726             warn sprintf 'aromatic bond between atoms %s(%d) and %s(%d) ' .
727             'is outside an aromatic ring' . "\n",
728 1         13 $A->{symbol}, $A->{number}, $B->{symbol}, $B->{number};
729             }
730              
731             # TODO: SP, TB, OH chiral centers
732             }
733              
734             sub _allene_graph
735             {
736 35     35   72 my( $moiety ) = @_;
737              
738 35         116 my $graph = $moiety->copy;
739 561         5725 $graph->delete_edges( map { @$_ }
740 35         59530 grep { !is_double_bond( $moiety, @$_ ) }
  620         122676  
741             $moiety->edges );
742 35         51829 $graph->delete_vertices( grep { !$graph->degree( $_ ) } $graph->vertices );
  630         98448  
743              
744 35         56846 for my $system ($graph->connected_components) {
745 39         43502 my @d1 = grep { $graph->degree( $_ ) == 1 } @$system;
  98         15003  
746 39         8616 my @d2 = grep { $graph->degree( $_ ) == 2 } @$system;
  98         14032  
747 39 100 66     8575 if (@d1 == 2 && @d2 && @d1 + @d2 == @$system ) {
      66        
748 12 100       37 if( @d2 % 2 ) {
749 6         43 my( $center ) = $graph->subgraph( $system )->center_vertices;
750 6         50116 $graph->set_edge_attribute( $center, $d1[0], 'allene', 'mid' );
751 6         1403 $graph->set_edge_attribute( $center, $d1[1], 'allene', 'mid' );
752             }
753 12         1308 $graph->set_edge_attribute( @d1, 'allene', 'end' );
754             } else {
755 27         51 $graph->delete_vertices( @$system );
756             }
757             }
758              
759 35         7027 return $graph;
760             }
761              
762             sub _neighbours_per_bond_type
763             {
764 90     90   125 my( $moiety, $atom ) = @_;
765 90         100 my %bond_types;
766 90         175 for my $neighbour ($moiety->neighbours($atom)) {
767 273         7053 my $bond_type;
768 273 100       3351 if( $moiety->has_edge_attribute( $atom, $neighbour, 'bond' ) ) {
769 190         31987 $bond_type = $moiety->get_edge_attribute( $atom, $neighbour, 'bond' );
770             } else {
771 83         12901 $bond_type = '';
772             }
773 273 100       29373 if( $atom->{number} > $neighbour->{number} ) {
774 84         185 $bond_type = toggle_cistrans $bond_type;
775             }
776 273         307 push @{$bond_types{$bond_type}}, $neighbour;
  273         594  
777             }
778 90         320 return %bond_types;
779             }
780              
781             1;
782              
783             __END__