File Coverage

blib/lib/Chemistry/OpenSMILES.pm
Criterion Covered Total %
statement 281 344 81.6
branch 144 202 71.2
condition 68 93 73.1
subroutine 37 47 78.7
pod 0 19 0.0
total 530 705 75.1


line stmt bran cond sub pod time code
1             package Chemistry::OpenSMILES;
2              
3             # ABSTRACT: OpenSMILES format reader and writer
4             our $VERSION = '0.12.3'; # VERSION
5              
6 44     44   94858 use strict;
  44         97  
  44         1886  
7 44     44   261 use warnings;
  44         112  
  44         2669  
8 44     44   993 use 5.0100;
  44         158  
9              
10 44     44   24880 use Chemistry::OpenSMILES::Stereo::Tables qw( @OH @TB );
  44         255  
  44         11492  
11 44     44   27886 use Graph::Traversal::BFS;
  44         206620  
  44         2273  
12 44     44   382 use List::Util qw( all any first max min none sum0 );
  44         84  
  44         332605  
13              
14             require Exporter;
15             our @ISA = qw( Exporter );
16             our @EXPORT_OK = qw(
17             %bond_order_to_symbol
18             %bond_symbol_to_order
19             %normal_valence
20             can_unsprout_hydrogen
21             clean_chiral_centers
22             is_aromatic
23             is_aromatic_bond
24             is_chiral
25             is_chiral_allenal
26             is_chiral_octahedral
27             is_chiral_planar
28             is_chiral_tetrahedral
29             is_chiral_trigonal_bipyramidal
30             is_cis_trans_bond
31             is_double_bond
32             is_ring_atom
33             is_ring_bond
34             is_single_bond
35             is_triple_bond
36             mirror
37             toggle_cistrans
38             valence
39             );
40              
41             sub is_chiral($);
42             sub is_chiral_planar($);
43             sub is_chiral_tetrahedral($);
44             sub mirror($);
45             sub toggle_cistrans($);
46              
47             our %normal_valence = (
48             B => [ 3 ],
49             C => [ 4 ],
50             N => [ 3, 5 ],
51             O => [ 2 ],
52             P => [ 3, 5 ],
53             S => [ 2, 4, 6 ],
54             F => [ 1 ],
55             Cl => [ 1 ],
56             Br => [ 1 ],
57             I => [ 1 ],
58             c => [ 3 ], # Not from OpenSMILES specification
59             );
60              
61             our %bond_order_to_symbol = (
62             1 => '-',
63             1.5 => ':',
64             2 => '=',
65             3 => '#',
66             4 => '$',
67             );
68              
69             our %bond_symbol_to_order = (
70             '-' => 1,
71             ':' => 1.5,
72             '=' => 2,
73             '#' => 3,
74             '$' => 4,
75             );
76              
77             sub can_unsprout_hydrogen
78             {
79 3687     3687 0 7397 my( $moiety, $atom ) = @_;
80              
81 3687 100       15759 return undef unless $atom->{symbol} eq 'H';
82 712 100   1424   5012 return '' if any { exists $atom->{$_} } qw( chirality isotope );
  1424         3652  
83 711 50       3217 return '' if $atom->{charge};
84 711 50       1863 return '' if $atom->{class};
85 711 100       2529 return '' unless $moiety->degree( $atom ) == 1;
86              
87 699         316430 my( $neighbour ) = $moiety->neighbours( $atom );
88 699 100       82053 return '' if $neighbour->{symbol} eq 'H';
89              
90             # Can unsprout only those H atoms with cis/trans bonds, where a substitution could be found.
91 695 100       2161 if( is_cis_trans_bond( $moiety, $atom, $neighbour ) ) {
92             my $n_cis_trans =
93 4         3047 grep { is_cis_trans_bond( $moiety, $neighbour, $_ ) }
  12         6560  
94             $moiety->neighbours( $neighbour );
95 4 50       2878 return '' if $n_cis_trans == 1;
96             }
97              
98             # TODO: Support other chiralities as well
99 695 100       225726 if( is_chiral $neighbour ) {
100 63 50       177 return '' unless is_chiral_tetrahedral $neighbour;
101 63 50       202 return '' unless $neighbour->{chirality_neighbours};
102 63 50       108 return '' unless @{$neighbour->{chirality_neighbours}} == 4;
  63         191  
103             }
104              
105 695         3198 return 1;
106             }
107              
108             # Removes chiral setting from allenal, square planar, tetrahedral and trigonal bipyramidal chiral centers if deemed unimportant.
109             # For allenal, tetrahedral and trigonal bipyramidal arrangements when not all the neighbours are distinct.
110             # For square planar arrangements this means situations when all neighbours are the same.
111             # Chiral centers with lone pairs are left untouched.
112             # Returns the affected atoms.
113             #
114             # TODO: check other chiral centers
115             sub clean_chiral_centers($$)
116             {
117 7     7 0 7002 my( $moiety, $color_sub ) = @_;
118              
119 7         16 my @affected;
120 7         36 for my $atom ($moiety->vertices) {
121 65 100 33     376 next unless is_chiral_allenal( $atom ) ||
      66        
      66        
122             is_chiral_planar( $atom ) ||
123             is_chiral_tetrahedral( $atom ) ||
124             is_chiral_trigonal_bipyramidal( $atom );
125              
126             # Find neighbours which constitute ring bonds with the atom in question
127 7         43 my @ring_neighbours = grep { is_ring_bond( $moiety, $atom, $_, scalar $moiety->edges ) }
  24         4478  
128             $moiety->neighbours( $atom );
129              
130 7 100       1938 my $hcount = exists $atom->{hcount} ? $atom->{hcount} : 0;
131 7         34 my @neighbours = $moiety->neighbours( $atom );
132 7 50       898 if( is_chiral_allenal( $atom ) ) {
133 0         0 @neighbours = grep { $_ != $atom }
134 0         0 map { $moiety->neighbours( $_ ) }
  0         0  
135             @neighbours;
136             }
137              
138 7 50       26 if( is_chiral_trigonal_bipyramidal( $atom ) ) {
139 0 0       0 next if @neighbours + $hcount != 5;
140             } else {
141 7 50       32 next if @neighbours + $hcount != 4;
142             }
143              
144 7         14 my %colors;
145 7         41 for (@neighbours, ( { symbol => 'H' } ) x $hcount) {
146 28         125 $colors{$color_sub->( $_ )}++;
147             }
148              
149 7 50       41 if( is_chiral_planar( $atom ) ) {
    50          
150             # Chiral planar center markers make sense even if only two types of atoms are there.
151 0 0       0 next if scalar keys %colors > 2;
152 0 0 0 0   0 next if scalar keys %colors == 2 && all { $_ == 2 } values %colors;
  0         0  
153             } elsif( is_chiral_trigonal_bipyramidal( $atom ) ) {
154 0 0       0 next if scalar keys %colors == 5;
155             } else {
156 7 100       36 next if scalar keys %colors == 4;
157             }
158              
159             # Special treatment for anomers
160 5 100       16 if( @ring_neighbours ) {
161 2 50       6 next unless is_chiral_tetrahedral( $atom );
162 2 50       12 next unless @ring_neighbours == 2;
163 0 0       0 next if $hcount == 1;
164 0 0       0 if( !$hcount ) {
165 0 0       0 my @non_ring_neighbours = grep { $_ != $ring_neighbours[0] &&
  0         0  
166             $_ != $ring_neighbours[1] }
167             @neighbours;
168 0 0       0 next unless $color_sub->( $non_ring_neighbours[0] ) eq
169             $color_sub->( $non_ring_neighbours[1] );
170             }
171             }
172              
173 3         7 delete $atom->{chirality};
174 3         14 push @affected, $atom;
175             }
176              
177 7         46 return @affected;
178             }
179              
180             sub is_aromatic($)
181             {
182 5076     5076 0 13335 my( $atom ) = @_;
183 5076         22582 return $atom->{symbol} ne ucfirst $atom->{symbol};
184             }
185              
186             sub is_aromatic_bond
187             {
188 899     899 0 2058 my( $moiety, $a, $b ) = @_;
189 899   100     20415 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
190             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq ':';
191             }
192              
193             sub is_chiral($)
194             {
195 7538     7538 0 16753 my( $what ) = @_;
196 7538 100       22776 if( ref $what eq 'HASH' ) { # Single atom
197 7533         28335 return exists $what->{chirality};
198             } else { # Graph representing moiety
199 5     16   37 return any { is_chiral( $_ ) } $what->vertices;
  16         245  
200             }
201             }
202              
203             sub is_chiral_allenal($)
204             {
205 677     677 0 1255 my( $what ) = @_;
206 677 50       1763 if( ref $what eq 'HASH' ) { # Single atom
207 677   100     3239 return $what->{chirality} && $what->{chirality} =~ /^\@AL[12]$/;
208             } else { # Graph representing moiety
209 0     0   0 return any { is_chiral_allenal( $_ ) } $what->vertices;
  0         0  
210             }
211             }
212              
213             sub is_chiral_planar($)
214             {
215 93     93 0 193 my( $what ) = @_;
216 93 50       213 if( ref $what eq 'HASH' ) { # Single atom
217 93   100     627 return $what->{chirality} && $what->{chirality} =~ /^\@SP[123]$/;
218             } else { # Graph representing moiety
219 0     0   0 return any { is_chiral_planar( $_ ) } $what->vertices;
  0         0  
220             }
221             }
222              
223             sub is_chiral_tetrahedral($)
224             {
225 845     845 0 1552 my( $what ) = @_;
226 845 100       1815 if( ref $what eq 'HASH' ) { # Single atom
227             # CAVEAT: will fail for allenal configurations of @/@@ in raw mode
228 840   100     4431 return $what->{chirality} && $what->{chirality} =~ /^@@?$/;
229             } else { # Graph representing moiety
230 5     19   35 return any { is_chiral_tetrahedral( $_ ) } $what->vertices;
  19         139  
231             }
232             }
233              
234             sub is_chiral_trigonal_bipyramidal($)
235             {
236 112     112 0 209 my( $what ) = @_;
237 112 50       233 if( ref $what eq 'HASH' ) { # Single atom
238 112   100     554 return $what->{chirality} && $what->{chirality} =~ /^\@TB([1-9]|1[0-9]|20)$/;
239             } else { # Graph representing moiety
240 0     0   0 return any { is_chiral_trigonal_bipyramidal( $_ ) } $what->vertices;
  0         0  
241             }
242             }
243              
244             sub is_chiral_octahedral($)
245             {
246 32     32 0 50 my( $what ) = @_;
247 32 50       140 if( ref $what eq 'HASH' ) { # Single atom
248 32   100     188 return $what->{chirality} && $what->{chirality} =~ /^\@OH([1-9]|[12][0-9]|30)$/;
249             } else { # Graph representing moiety
250 0     0   0 return any { is_chiral_octahedral( $_ ) } $what->vertices;
  0         0  
251             }
252             }
253              
254             sub is_cis_trans_bond
255             {
256 1594     1594 0 202192 my( $moiety, $a, $b ) = @_;
257 1594   100     42408 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
258             $moiety->get_edge_attribute( $a, $b, 'bond' ) =~ /^[\\\/]$/;
259             }
260              
261             sub is_double_bond
262             {
263 1310     1310 0 14906 my( $moiety, $a, $b ) = @_;
264 1310   100     29561 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
265             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '=';
266             }
267              
268             # An atom is deemed to be a ring atom if any of its bonds is a ring bond.
269             sub is_ring_atom
270             {
271 58     58 0 13806 my( $moiety, $atom, $max_length ) = @_;
272 58 100       239 return '' unless $moiety->degree( $atom ) > 1;
273 67     67   12611 return any { is_ring_bond( $moiety, $atom, $_, $max_length ) }
274 34         24428 $moiety->neighbours( $atom );
275             }
276              
277             # A bond is deemed to be a ring bond if there is an alternative path
278             # joining its atoms not including the bond in consideration and this
279             # alternative path is not longer than 7 bonds. This is based on
280             # O'Boyle (2012) saying that Open Babel SMILES writer does not output
281             # cis/trans markers for double bonds in rings of size 8 or less due to
282             # them implicilty being cis bonds.
283             #
284             # If maximum ring size is given negative, ring size is not limited.
285             sub is_ring_bond
286             {
287 149     149 0 14874 my( $moiety, $a, $b, $max_length ) = @_;
288 149 100       437 $max_length = 7 unless $max_length;
289              
290             # A couple of shortcuts to reduce the complexity
291 149 100   294   1003 return '' if any { $moiety->degree( $_ ) == 1 } ( $a, $b );
  294         107887  
292 91 100       61118 return '' if $moiety->vertices > $moiety->edges;
293              
294 64 100       6303 if( $max_length < 0 ) {
295             # Due to the issue in Graph, bridges() returns strings instead of real objects.
296             # Graph issue: https://github.com/graphviz-perl/Graph/issues/29
297 25         74 my %vertices_by_name = map { $_ => $_ } $moiety->vertices;
  600         2227  
298 333 100 100 333   1850 return none { ( $_->[0] == $a && $_->[1] == $b ) ||
      100        
299             ( $_->[0] == $b && $_->[1] == $a ) }
300 25         275 map { [ map { $vertices_by_name{$_} } @$_ ] } $moiety->bridges;
  375         4942  
  750         1973  
301             }
302              
303 39         164 my $copy = $moiety->copy;
304 39         155868 $copy->delete_edge( $a, $b );
305              
306 39         7874 my %distance = ( $a => 0 );
307             my $record_length = sub {
308             # Record number of bonds between $a and any other vertex
309 753     753   441018 my( $u, $v ) = @_;
310 753         1898 my @seen = grep { exists $distance{$_} } ( $u, $v );
  1506         5054  
311 753 50       2093 return '' if @seen != 1; # Can this be 0?
312              
313 753         1582 my $seen = shift @seen;
314 753         4343 my $unseen = first { !exists $distance{$_} } ( $u, $v );
  1506         3774  
315 753         4460 $distance{$unseen} = $distance{$seen} + 1;
316 39         274 };
317              
318             my $operations = {
319 39     39   2663 start => sub { $a },
320 39         239 tree_edge => $record_length,
321             };
322              
323 39         339 my $traversal = Graph::Traversal::BFS->new( $copy, %$operations );
324 39         14632 $traversal->bfs;
325              
326             # $distance{$b} is the distance in bonds. In 8-member rings adjacent
327             # ring atoms have distance of 7 bonds.
328 39   66     45751 return exists $distance{$b} && $distance{$b} <= $max_length;
329             }
330              
331             sub is_single_bond
332             {
333 305     305 0 596 my( $moiety, $a, $b ) = @_;
334 305   66     5457 return !$moiety->has_edge_attribute( $a, $b, 'bond' ) ||
335             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '-';
336             }
337              
338             sub is_triple_bond
339             {
340 0     0 0 0 my( $moiety, $a, $b ) = @_;
341 0   0     0 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
342             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '#';
343             }
344              
345             sub mirror($)
346             {
347 30     30 0 46 my( $what ) = @_;
348 30 100       69 if( ref $what eq 'HASH' ) { # Single atom
349 25 100       55 if( is_chiral_tetrahedral( $what ) ) {
350 2 100       9 $what->{chirality} = $what->{chirality} eq '@' ? '@@' : '@';
351             }
352 25 100       43 if( is_chiral_allenal( $what ) ) {
353 1 50       5 $what->{chirality} = $what->{chirality} eq '@AL1' ? '@AL2' : '@AL1';
354             }
355             # Square planar centers are not affected by mirroring, doing nothing
356 25 50       45 if( is_chiral_trigonal_bipyramidal( $what ) ) {
357 0         0 my $number = substr $what->{chirality}, 3;
358 0         0 my $setting = $TB[$number-1];
359             my $opposite = first { $TB[$_]->{axis}[0] == $setting->{axis}[0] &&
360             $TB[$_]->{axis}[1] == $setting->{axis}[1] &&
361 0 0 0 0   0 $TB[$_]->{order} ne $setting->{order} }
362 0         0 0..$#TB;
363 0         0 $what->{chirality} = '@TB' . ($opposite + 1);
364             }
365 25 50       61 if( is_chiral_octahedral( $what ) ) {
366 0         0 my $number = substr $what->{chirality}, 3;
367 0         0 my $setting = $OH[$number-1];
368             my $opposite = first { $OH[$_]->{shape} eq $setting->{shape} &&
369             $OH[$_]->{axis}[0] == $setting->{axis}[0] &&
370             $OH[$_]->{axis}[1] == $setting->{axis}[1] &&
371 0 0 0 0   0 $OH[$_]->{order} ne $setting->{order} }
      0        
372 0         0 0..$#OH;
373 0         0 $what->{chirality} = '@OH' . ($opposite + 1);
374             }
375             } else {
376 5         17 for ($what->vertices) {
377 25         148 mirror( $_ );
378             }
379             }
380             }
381              
382             sub toggle_cistrans($)
383             {
384 190 100   190 0 1096 return $_[0] unless $_[0] =~ /^[\\\/]$/;
385 79 100       386 return $_[0] eq '/' ? '\\' : '/';
386             }
387              
388             # TODO: The actual unsprouting has to happen during print.
389             sub _unsprout_hydrogens($)
390             {
391 0     0   0 my( $moiety ) = @_;
392              
393 0         0 for my $atom ($moiety->vertices) {
394 0 0       0 next unless can_unsprout_hydrogen( $moiety, $atom );
395              
396 0         0 my( $neighbour ) = $moiety->neighbours( $atom );
397              
398             # TODO: Adjust other chiralities as well
399 0 0       0 if( is_chiral_tetrahedral $neighbour ) {
400 0     0   0 my $pos = first { $neighbour->{chirality_neighbours}[$_] == $atom } 0..3;
  0         0  
401 0         0 @{$neighbour->{chirality_neighbours}} =
402 0         0 grep { $_ != $atom } @{$neighbour->{chirality_neighbours}};
  0         0  
  0         0  
403 0 0       0 mirror $neighbour unless $pos % 2;
404             }
405              
406 0         0 $neighbour->{hcount}++;
407 0         0 $moiety->delete_vertex( $atom );
408             }
409             }
410              
411             sub valence($$)
412             {
413 456     456 0 1156 my( $moiety, $atom ) = @_;
414             return ($atom->{hcount} ? $atom->{hcount} : 0) +
415             sum0 map { exists $bond_symbol_to_order{$_}
416 1125 100       171801 ? $bond_symbol_to_order{$_}
417             : 1 }
418 456 50       1850 map { $moiety->has_edge_attribute( $atom, $_, 'bond' )
  1125 100       342589  
419             ? $moiety->get_edge_attribute( $atom, $_, 'bond' )
420             : 1 }
421             $moiety->neighbours( $atom );
422             }
423              
424             # CAVEAT: requires output from non-raw parsing due issue similar to GH#2
425             sub _validate($@)
426             {
427 31     31   4951 my( $moiety, $color_sub ) = @_;
428              
429             # Identify islands of allene systems
430 31         131 my $allenes = _allene_graph( $moiety );
431              
432 31     128   293 my $color_by_element = sub { $_[0]->{symbol} };
  128         421  
433              
434 31         155 for my $atom (sort { $a->{number} <=> $b->{number} } $moiety->vertices) {
  736         2398  
435              
436             # Warn about aromatic atoms with less than two aromatic bonds
437 580 100 100     220516 if( is_aromatic($atom) && $moiety->degree($atom) &&
      100        
438 231         153761 2 > grep { is_aromatic_bond( $moiety, $atom, $_ ) }
439             $moiety->neighbours( $atom ) ) {
440             warn sprintf 'aromatic atom %s(%d) has less than 2 aromatic bonds' . "\n",
441             $atom->{symbol},
442 1         389 $atom->{number};
443             }
444              
445 580 100 66     42676 if( is_chiral_allenal($atom) ) {
    100          
    100          
446 6 50       21 if( $moiety->degree($atom) != 2 ) {
447             warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' .
448             'has %d bonds while 2 are needed' . "\n",
449             $atom->{symbol},
450             $atom->{number},
451 0         0 $moiety->degree($atom);
452 0         0 next;
453             }
454 6 50       2804 if( !$allenes->has_vertex($atom) ) {
455             warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' .
456             'is not a part of any allenal system' . "\n",
457             $atom->{symbol},
458 0         0 $atom->{number};
459 0         0 next;
460             }
461 6 100   8   274 if( none { $allenes->has_edge_attribute( $atom, $_, 'allene' ) &&
  8 100       1639  
462             $allenes->get_edge_attribute( $atom, $_, 'allene' ) eq 'mid' }
463             $allenes->neighbours($atom) ) {
464             warn sprintf 'tetrahedral chiral allenal setting for %s(%d) ' .
465             'observed for an atom which is not a center of ' .
466             'an allenal system' . "\n",
467             $atom->{symbol},
468 1         423 $atom->{number};
469 1         14 next;
470             }
471 5 100       2714 next unless $color_sub;
472 3 50       16 next if is_ring_atom( $moiety, $atom, scalar $moiety->edges );
473              
474 3 100       233 my @ends = grep { $allenes->has_edge_attribute( $atom, $_, 'allene' ) &&
  8         3516  
475             $allenes->get_edge_attribute( $atom, $_, 'allene' ) eq 'mid' }
476             $allenes->neighbours($atom);
477 24 100       92 my @neighbours = grep { $_ ne $ends[0] && $_ ne $ends[1] }
478 12         259 map { @$_ }
479 18         1625 grep { !$allenes->has_edge( @$_ ) }
480 3         1360 map { $moiety->edges_at($_) } @ends;
  6         408  
481 3         30 my %colors = map { ($color_sub->( $_ ) => 1) } @neighbours;
  12         54  
482 3 100       62 if( scalar keys %colors != 4 ) {
483             # FIXME: Emits false positives for coordinating metals.
484             # Need to think of a heuristic to exclude them.
485             warn sprintf 'tetrahedral chiral allenal setting for ' .
486             '%s(%d) is not needed as not all 4 neighbours ' .
487             'are distinct' . "\n",
488             $atom->{symbol},
489 2         31 $atom->{number};
490             }
491             } elsif( is_chiral_tetrahedral($atom) ) {
492 8 100       36 if( $moiety->degree($atom) < 3 ) {
493             # TODO: there should be a strict mode to forbid lone pairs
494             warn sprintf 'tetrahedral chiral center %s(%d) has %d bonds ' .
495             'while at least 3 are required' . "\n",
496             $atom->{symbol},
497             $atom->{number},
498 1         315 $moiety->degree($atom);
499 1         310 next;
500             }
501 7 50       4898 if( $moiety->degree($atom) > 4 ) {
502             warn sprintf 'tetrahedral chiral center %s(%d) has %d bonds ' .
503             'while at most 4 are allowed' . "\n",
504             $atom->{symbol},
505             $atom->{number},
506 0         0 $moiety->degree($atom);
507 0         0 next;
508             }
509              
510 7 50       6400 next unless $color_sub;
511 7 100       45 next if is_ring_atom( $moiety, $atom, scalar $moiety->edges );
512              
513 3         885 my $has_lone_pair = $moiety->degree($atom) == 3;
514 3         1858 my %colors = map { ($color_sub->( $_ ) => 1) }
  12         389  
515             $moiety->neighbours($atom);
516 3 100       42 if( scalar keys %colors != 4 - $has_lone_pair ) {
517             warn sprintf 'tetrahedral chiral setting for %s(%d) ' .
518             'is not needed as not all 4 neighbours ' .
519             '(including possible lone pair) are distinct' . "\n",
520             $atom->{symbol},
521 1         18 $atom->{number};
522             }
523             } elsif( !is_chiral($atom) && $moiety->degree($atom) == 4 ) {
524             # Warn about unmarked tetrahedral chiral centers
525 62 100       46769 my %colors = map { $color_sub
  248         8192  
526             ? ($color_sub->($_) => 1)
527             : ($color_by_element->($_) => 1) }
528             $moiety->neighbours($atom);
529 62 100       496 if( scalar keys %colors == 4 ) {
530             warn sprintf 'atom %s(%d) has 4 distinct neighbours, ' .
531             'but does not have a chiral setting' . "\n",
532             $atom->{symbol},
533 4         69 $atom->{number};
534             }
535             }
536             }
537              
538 31         12156 for my $bond (sort { min( map { $_->{number} } @$a ) <=> min( map { $_->{number} } @$b ) ||
  3254         5865  
  3254         6578  
539 1627 50       4413 max( map { $_->{number} } @$a ) <=> max( map { $_->{number} } @$b ) }
  644         1164  
  644         1186  
540             $moiety->edges) {
541 571         45947 my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bond;
  571         2125  
542 571 50       1783 if( $A eq $B ) {
543             warn sprintf 'atom %s(%d) has bond to itself' . "\n",
544             $A->{symbol},
545 0         0 $A->{number};
546 0         0 next;
547             }
548              
549 571 100       1387 if( is_double_bond( $moiety, @$bond ) ) {
    100          
550             # Test cis/trans bonds
551             # Detect conflicting cis/trans markers, see COD entry 1547257, r297409
552 51         28106 my $cis_trans_A = grep { is_cis_trans_bond( $moiety, $A, $_ ) }
  136         50832  
553             $moiety->neighbours($A);
554 51         23959 my $cis_trans_B = grep { is_cis_trans_bond( $moiety, $B, $_ ) }
  130         41759  
555             $moiety->neighbours($B);
556 51 100 100     24460 if( $cis_trans_A && $cis_trans_B ) {
    100 100        
557             # If any of the bond atoms lack cis/trans markers, it means that the other markers are from some other bond
558 12         38 for my $atom (@$bond) {
559 24         81 my %bond_types = _neighbours_per_bond_type( $moiety, $atom );
560 24         67 for ('/', '\\') {
561 48 100 100     173 if( $bond_types{$_} && @{$bond_types{$_}} > 1 ) {
  27         132  
562             warn sprintf 'atom %s(%d) has %d bonds of type \'%s\', ' .
563             'cis/trans definitions must not conflict' . "\n",
564             $atom->{symbol},
565             $atom->{number},
566 1         3 scalar @{$bond_types{$_}},
  1         11  
567             $_;
568             }
569             }
570             }
571             } elsif( !$allenes->has_edge( @$bond ) && # Allene systems are checked below
572             $cis_trans_A + $cis_trans_B == 1 ) {
573             # FIXME: Source of false-positives.
574             # Cis/trans bond is out of place if none of neighbouring double bonds have other cis/trans bonds.
575             # This has to include allenal systems.
576             warn sprintf 'double bond between atoms %s(%d) and %s(%d) ' .
577             'has only one cis/trans marker' . "\n",
578             $A->{symbol}, $A->{number},
579 3         160 $B->{symbol}, $B->{number};
580             }
581             } elsif( is_cis_trans_bond( $moiety, @$bond ) ) {
582             # Test if next to a double bond.
583             # FIXME: Yields false-positives for delocalised bonds,
584             # see COD entry 1501863.
585             # FIXME: What about triple bond? See COD entry 4103591.
586 33         18255 my %bond_types;
587 33         113 for my $atom (@$bond) {
588 66         163 my %bond_types_now = _neighbours_per_bond_type( $moiety, $atom );
589 66         189 for my $key (keys %bond_types_now) {
590 163         205 push @{$bond_types{$key}}, @{$bond_types_now{$key}};
  163         294  
  163         417  
591             }
592             }
593 33 100       130 if( !$bond_types{'='} ) {
594             warn sprintf 'cis/trans bond is defined between atoms ' .
595             '%s(%d) and %s(%d), but neither of them ' .
596             'is attached to a double bond' . "\n",
597             $A->{symbol},
598             $A->{number},
599             $B->{symbol},
600 2         29 $B->{number};
601             }
602             }
603              
604             # Warn about 2 aromatic atoms in a cycle not having an aromatic bond between them
605 571 100 100     156282 if( is_aromatic( $A ) &&
      100        
      66        
606             is_aromatic( $B ) &&
607             !is_aromatic_bond( $moiety, $A, $B ) &&
608             is_ring_bond( $moiety, $A, $B ) ) {
609             warn sprintf 'aromatic atoms %s(%d) and %s(%d) belong to same cycle, ' .
610             'but the bond between them is not aromatic' . "\n",
611             $A->{symbol},
612             $A->{number},
613             $B->{symbol},
614 1         27 $B->{number};
615             }
616             }
617              
618             # Check allene systems
619 31         608 for my $system (sort { min( map { $_->{number} } @$a ) <=>
  0         0  
620 0         0 min( map { $_->{number} } @$b ) }
  0         0  
621             $allenes->connected_components) {
622 9 100       23069 next if @$system % 2;
623              
624 4         22 my @ends = sort { $a->{number} <=> $b->{number} }
625 4         1460 map { @$_ }
626 4 100       45 grep { $allenes->has_edge_attribute( @$_, 'allene' ) &&
  16         10158  
627             $allenes->get_edge_attribute( @$_, 'allene' ) eq 'end' }
628             $allenes->subgraph($system)->edges;
629 24         9904 my $cis_trans_bonds = grep { is_cis_trans_bond( $moiety, @$_ ) }
630 4         43 map { $moiety->edges_at( $_ ) } @ends;
  8         592  
631 4 100       1086 if( $cis_trans_bonds == 1 ) {
632             warn sprintf 'allene system between atoms %s(%d) and %s(%d) ' .
633             'has only one cis/trans marker' . "\n",
634             $ends[0]->{symbol}, $ends[0]->{number},
635 1         16 $ends[1]->{symbol}, $ends[1]->{number};
636             }
637 4 100       27 next if $cis_trans_bonds;
638              
639 8 100       35 my @neighbours_at_ends = grep { $_ ne $ends[0] && $_ ne $ends[1] }
640 4         555 map { @$_ }
641 6         1733 grep { !is_double_bond( $moiety, @$_ ) }
642 1         4 map { $moiety->edges_at( $_ ) } @ends;
  2         123  
643 1 50       7 next unless @neighbours_at_ends == 4;
644             warn sprintf 'allene system between atoms %s(%d) and %s(%d) ' .
645             'has 4 neighbours, but does not have cis/trans ' .
646             'setting' . "\n",
647             $ends[0]->{symbol}, $ends[0]->{number},
648 1         20 $ends[1]->{symbol}, $ends[1]->{number};
649             }
650              
651             # Check for bridging aromatic bonds
652 31         3050 my $aromatic = $moiety->copy_graph;
653 494         8130 $aromatic->delete_edges( map { @$_ }
654 31         86608 grep { !is_aromatic_bond( $moiety, @$_ ) }
  571         178631  
655             $moiety->edges );
656              
657             # Due to the issue in Graph, bridges() returns strings instead of real objects.
658             # Graph issue: https://github.com/graphviz-perl/Graph/issues/29
659             # The code below works on buggy (< 0.9727) as well as fixed (>= 0.9727) versions.
660 31         73536 my %vertices_by_name = map { $_ => $_ } $aromatic->vertices;
  580         2478  
661 31         254 my @bridges = map { [ map { $vertices_by_name{$_} } @$_ ] } $aromatic->bridges;
  1         1549  
  2         80  
662 31         33047 for my $bridge (sort { min( map { $_->{number} } @$a ) <=> min( map { $_->{number} } @$b ) ||
  0         0  
  0         0  
663 0 0       0 max( map { $_->{number} } @$a ) <=> max( map { $_->{number} } @$b ) }
  0         0  
  0         0  
664             @bridges) {
665 1         8 my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bridge;
  1         7  
666             warn sprintf 'aromatic bond between atoms %s(%d) and %s(%d) ' .
667             'is outside an aromatic ring' . "\n",
668 1         41 $A->{symbol}, $A->{number}, $B->{symbol}, $B->{number};
669             }
670              
671             # TODO: SP, TB, OH chiral centers
672             }
673              
674             sub _allene_graph
675             {
676 34     34   101 my( $moiety ) = @_;
677              
678 34         252 my $graph = $moiety->copy;
679 538         10874 $graph->delete_edges( map { @$_ }
680 34         104980 grep { !is_double_bond( $moiety, @$_ ) }
  597         224354  
681             $moiety->edges );
682 34         97726 $graph->delete_vertices( grep { !$graph->degree( $_ ) } $graph->vertices );
  609         173010  
683              
684 34         99083 for my $system ($graph->connected_components) {
685 39         69328 my @d1 = grep { $graph->degree( $_ ) == 1 } @$system;
  98         24273  
686 39         14829 my @d2 = grep { $graph->degree( $_ ) == 2 } @$system;
  98         23106  
687 39 100 66     13039 if (@d1 == 2 && @d2 && @d1 + @d2 == @$system ) {
      66        
688 12 100       54 if( @d2 % 2 ) {
689 6         50 my( $center ) = $graph->subgraph( $system )->center_vertices;
690 6         84359 $graph->set_edge_attribute( $center, $d1[0], 'allene', 'mid' );
691 6         2271 $graph->set_edge_attribute( $center, $d1[1], 'allene', 'mid' );
692             }
693 12         2216 $graph->set_edge_attribute( @d1, 'allene', 'end' );
694             } else {
695 27         111 $graph->delete_vertices( @$system );
696             }
697             }
698              
699 34         11700 return $graph;
700             }
701              
702             sub _neighbours_per_bond_type
703             {
704 90     90   207 my( $moiety, $atom ) = @_;
705 90         135 my %bond_types;
706 90         275 for my $neighbour ($moiety->neighbours($atom)) {
707 273         10664 my $bond_type;
708 273 100       5681 if( $moiety->has_edge_attribute( $atom, $neighbour, 'bond' ) ) {
709 190         51750 $bond_type = $moiety->get_edge_attribute( $atom, $neighbour, 'bond' );
710             } else {
711 83         21716 $bond_type = '';
712             }
713 273 100       45614 if( $atom->{number} > $neighbour->{number} ) {
714 84         250 $bond_type = toggle_cistrans $bond_type;
715             }
716 273         465 push @{$bond_types{$bond_type}}, $neighbour;
  273         956  
717             }
718 90         556 return %bond_types;
719             }
720              
721             1;
722              
723             __END__