File Coverage

blib/lib/Chemistry/OpenSMILES/Stereo.pm
Criterion Covered Total %
statement 186 215 86.5
branch 64 102 62.7
condition 18 51 35.2
subroutine 15 18 83.3
pod 0 6 0.0
total 283 392 72.1


line stmt bran cond sub pod time code
1             package Chemistry::OpenSMILES::Stereo;
2              
3 2     2   1154 use strict;
  2         8  
  2         62  
4 2     2   33 use warnings;
  2         4  
  2         178  
5              
6             # ABSTRACT: Stereochemistry handling routines
7             our $VERSION = '0.8.6'; # VERSION
8              
9             require Exporter;
10             our @ISA = qw( Exporter );
11             our @EXPORT_OK = qw(
12             chirality_to_pseudograph
13             cis_trans_to_pseudoedges
14             mark_all_double_bonds
15             mark_cis_trans
16             );
17              
18 2         127 use Chemistry::OpenSMILES qw(
19             is_cis_trans_bond
20             is_double_bond
21             is_ring_bond
22             is_single_bond
23             toggle_cistrans
24 2     2   14 );
  2         4  
25 2     2   955 use Chemistry::OpenSMILES::Writer qw( write_SMILES );
  2         5  
  2         159  
26 2     2   16 use Graph::Traversal::BFS;
  2         4  
  2         44  
27 2     2   10 use Graph::Undirected;
  2         4  
  2         46  
28 2     2   10 use List::Util qw( all any max min sum sum0 uniq );
  2         4  
  2         4889  
29              
30             sub mark_all_double_bonds
31             {
32 2     2 0 806 my( $graph, $setting_sub, $order_sub, $color_sub ) = @_;
33              
34             # By default, whenever there is a choice between atoms, the one with
35             # lowest position in the input SMILES is chosen:
36 2 50   6   12 $order_sub = sub { return $_[0]->{number} } unless $order_sub;
  6         31  
37              
38             # Select non-ring double bonds
39 2 100 66     9 my @double_bonds = grep { is_double_bond( $graph, @$_ ) &&
  15         2259  
40             !is_ring_bond( $graph, @$_ ) &&
41             !is_unimportant_double_bond( $graph, @$_, $color_sub ) }
42             $graph->edges;
43              
44             # Construct a double bond incidence graph. Vertices are double bonds
45             # and edges are between those double bonds that separated by a single
46             # single ('-') bond. Interestingly, incidence graph for SMILES C=C(C)=C
47             # is connected, but for C=C=C not. This is because allenal systems
48             # cannot be represented yet.
49 2         406 my $bond_graph = Graph::Undirected->new;
50 2         434 my %incident_double_bonds;
51 2         8 for my $bond (@double_bonds) {
52 1         14 $bond_graph->add_vertex( join '', sort @$bond );
53 1         39 push @{$incident_double_bonds{$bond->[0]}}, $bond;
  1         11  
54 1         3 push @{$incident_double_bonds{$bond->[1]}}, $bond;
  1         6  
55             }
56 2         9 for my $bond ($graph->edges) {
57 15 100       918 next unless is_single_bond( $graph, @$bond );
58 13         2696 my @adjacent_bonds;
59 13 100       38 if( $incident_double_bonds{$bond->[0]} ) {
60             push @adjacent_bonds,
61 3         14 @{$incident_double_bonds{$bond->[0]}};
  3         7  
62             }
63 13 100       31 if( $incident_double_bonds{$bond->[1]} ) {
64             push @adjacent_bonds,
65 1         3 @{$incident_double_bonds{$bond->[1]}};
  1         23  
66             }
67 13         30 for my $bond1 (@adjacent_bonds) {
68 4         11 for my $bond2 (@adjacent_bonds) {
69 4 50       13 next if $bond1 == $bond2;
70 0         0 $bond_graph->add_edge( join( '', sort @$bond1 ),
71             join( '', sort @$bond2 ) );
72             }
73             }
74             }
75              
76             # In principle, bond graph could be splitted into separate components
77             # to reduce the number of cycles needed by Morgan algorithm, but I do
78             # not think there is a failure case because of keeping them together.
79              
80             # Set up initial invariants
81 2         12 my %invariants;
82 2         6 for ($bond_graph->vertices) {
83 1         23 $invariants{$_} = $bond_graph->degree( $_ );
84             }
85 2         191 my %distinct_invariants = map { $_ => 1 } values %invariants;
  1         5  
86              
87             # Perform Morgan algorithm
88 2         2 while( 1 ) {
89 2         5 my %invariants_now;
90 2         5 for ($bond_graph->vertices) {
91 1         23 $invariants_now{$_} = sum0 map { $invariants{$_} }
  0         0  
92             $bond_graph->neighbours( $_ );
93             }
94              
95 2         89 my %distinct_invariants_now = map { $_ => 1 } values %invariants_now;
  1         5  
96 2 50       8 last if %distinct_invariants_now <= %distinct_invariants;
97              
98 0         0 %invariants = %invariants_now;
99 0         0 %distinct_invariants = %distinct_invariants_now;
100             }
101              
102             # Establish a deterministic order favouring bonds with higher invariants.
103             # If invariants are equal, order bonds by their atom numbers.
104 2         10 @double_bonds = sort { $invariants{join '', sort @$b} <=>
105             $invariants{join '', sort @$a} ||
106 0         0 (min map { $order_sub->($_) } @$a) <=>
107 0         0 (min map { $order_sub->($_) } @$b) ||
108 0         0 (max map { $order_sub->($_) } @$a) <=>
109 0 0 0     0 (max map { $order_sub->($_) } @$b) } @double_bonds;
  0         0  
110              
111 2         10 for (@double_bonds) {
112 1         5 mark_cis_trans( $graph, @$_, $setting_sub, $order_sub );
113             }
114             }
115              
116             # Requires double bonds in input. Does not check whether a bond belongs
117             # to a ring or not.
118             sub mark_cis_trans
119             {
120 1     1 0 5 my( $graph, $atom2, $atom3, $setting_sub, $order_sub ) = @_;
121              
122             # By default, whenever there is a choice between atoms, the one with
123             # lowest position in the input SMILES is chosen:
124 1 50   0   3 $order_sub = sub { return $_[0]->{number} } unless $order_sub;
  0         0  
125              
126 1         6 my @neighbours2 = $graph->neighbours( $atom2 );
127 1         134 my @neighbours3 = $graph->neighbours( $atom3 );
128 1 50 33     173 return if @neighbours2 < 2 || @neighbours3 < 2;
129              
130             # TODO: Currently we are choosing either a pair of
131             # neighbouring atoms which have no cis/trans markers or
132             # a pair of which a single atom has a cis/trans marker.
133             # The latter case allows to accommodate adjacent double
134             # bonds. However, there may be a situation where both
135             # atoms already have cis/trans markers, but could still
136             # be reconciled.
137              
138             my @cistrans_bonds2 =
139 1         18 grep { is_cis_trans_bond( $graph, $atom2, $_ ) } @neighbours2;
  3         590  
140             my @cistrans_bonds3 =
141 1         203 grep { is_cis_trans_bond( $graph, $atom3, $_ ) } @neighbours3;
  3         588  
142              
143 1 50       204 if( @cistrans_bonds2 + @cistrans_bonds3 > 1 ) {
144             warn 'cannot represent cis/trans bond between atoms ' .
145 0         0 join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) .
  0         0  
146             ' as there are other cis/trans bonds nearby' . "\n";
147 0         0 return;
148             }
149              
150 1 0 33     17 if( (@neighbours2 == 2 && !@cistrans_bonds2 &&
      33        
      33        
      33        
      33        
151 0     0   0 !any { is_single_bond( $graph, $atom2, $_ ) } @neighbours2) ||
152             (@neighbours3 == 2 && !@cistrans_bonds3 &&
153 0     0   0 !any { is_single_bond( $graph, $atom3, $_ ) } @neighbours3) ) {
154             # Azide group (N=N#N) or conjugated allene-like systems (=C=)
155             warn 'atoms ' .
156 0         0 join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) .
  0         0  
157             ' are part of conjugated double/triple bond system, thus ' .
158             'cis/trans setting of their bond is impossible to represent ' .
159             '(not supported yet)' . "\n";
160 0         0 return;
161             }
162              
163             # Making the $atom2 be the one which has a defined cis/trans bond.
164             # Also, a deterministic ordering of atoms in bond is achieved here.
165 1 50 33     24 if( @cistrans_bonds3 ||
      33        
166             (!@cistrans_bonds2 && $order_sub->($atom2) > $order_sub->($atom3)) ) {
167 0         0 ( $atom2, $atom3 ) = ( $atom3, $atom2 );
168 0         0 @neighbours2 = $graph->neighbours( $atom2 );
169 0         0 @neighbours3 = $graph->neighbours( $atom3 );
170              
171 0         0 @cistrans_bonds2 = @cistrans_bonds3;
172 0         0 @cistrans_bonds3 = ();
173             }
174              
175             # Establishing the canonical order
176 1         190 @neighbours2 = sort { $order_sub->($a) <=> $order_sub->($b) }
177 1         8 grep { is_single_bond( $graph, $atom2, $_ ) } @neighbours2;
  3         590  
178 1         190 @neighbours3 = sort { $order_sub->($a) <=> $order_sub->($b) }
179 1         3 grep { is_single_bond( $graph, $atom3, $_ ) } @neighbours3;
  3         613  
180              
181             # Check if there is a chance to have anything marked
182 1         2 my $bond_will_be_marked;
183 1         7 for my $atom1 (@cistrans_bonds2, @neighbours2) {
184 2         9 for my $atom4 (@neighbours3) {
185 2         8 my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 );
186 2 50       922 if( $setting ) {
187 2         5 $bond_will_be_marked = 1;
188 2         11 last;
189             }
190             }
191             }
192              
193 1 50       4 if( !$bond_will_be_marked ) {
194             warn 'cannot represent cis/trans bond between atoms ' .
195 0         0 join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) .
  0         0  
196             ' as there are no eligible single bonds nearby' . "\n";
197 0         0 return;
198             }
199              
200             # If there is an atom with cis/trans bond, then this is this one
201 1 50       4 my( $first_atom ) = @cistrans_bonds2 ? @cistrans_bonds2 : @neighbours2;
202 1 50       4 if( !@cistrans_bonds2 ) {
203 1         5 $graph->set_edge_attribute( $first_atom, $atom2, 'bond', '/' );
204             }
205              
206 1         205 my $atom4_marked;
207 1         4 for my $atom4 (@neighbours3) {
208 2         4 my $atom1 = $first_atom;
209 2         5 my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 );
210 2 50       954 next unless $setting;
211 2         6 my $other = $graph->get_edge_attribute( $atom1, $atom2, 'bond' );
212 2 100       384 $other = toggle_cistrans $other if $setting eq 'cis';
213 2 50       7 $other = toggle_cistrans $other if $atom1->{number} > $atom2->{number};
214 2 50       6 $other = toggle_cistrans $other if $atom4->{number} < $atom3->{number};
215 2         8 $graph->set_edge_attribute( $atom3, $atom4, 'bond', $other );
216 2 100       409 $atom4_marked = $atom4 unless $atom4_marked;
217             }
218              
219 1         5 for my $atom1 (@neighbours2) {
220 2 100       10 next if $atom1 eq $first_atom; # Marked already
221 1         2 my $atom4 = $atom4_marked;
222 1         4 my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 );
223 1 50       524 next unless $setting;
224 1         4 my $other = $graph->get_edge_attribute( $atom3, $atom4, 'bond' );
225 1 50       198 $other = toggle_cistrans $other if $setting eq 'cis';
226 1 50       18 $other = toggle_cistrans $other if $atom1->{number} > $atom2->{number};
227 1 50       8 $other = toggle_cistrans $other if $atom4->{number} < $atom3->{number};
228 1         5 $graph->set_edge_attribute( $atom1, $atom2, 'bond', $other );
229             }
230             }
231              
232             # Store the tetrahedral chirality character as additional pseudo vertices
233             # and edges.
234             sub chirality_to_pseudograph
235             {
236 1     1 0 6 my( $moiety ) = @_;
237              
238 1         3 for my $atom ($moiety->vertices) {
239 11 100       52 next unless Chemistry::OpenSMILES::is_chiral_tetrahedral( $atom );
240 1         16 next unless @{$atom->{chirality_neighbours}} >= 3 &&
241 1 50 33     3 @{$atom->{chirality_neighbours}} <= 4;
  1         6  
242              
243 1         6 my @chirality_neighbours = @{$atom->{chirality_neighbours}};
  1         4  
244 1 50       4 if( @chirality_neighbours == 3 ) {
245 0         0 @chirality_neighbours = ( $chirality_neighbours[0],
246             {}, # marking the lone pair
247             @chirality_neighbours[1..2] );
248             }
249 1 50       12 if( $atom->{chirality} eq '@' ) {
250             # Reverse the order if counter-clockwise
251 1         14 @chirality_neighbours = ( $chirality_neighbours[0],
252             reverse @chirality_neighbours[1..3] );
253             }
254              
255 1         8 for my $i (0..3) {
256 4         11 my $neighbour = $chirality_neighbours[$i];
257 4         10 my @chirality_neighbours_now = @chirality_neighbours;
258            
259 4 100       30 if( $i % 2 ) {
260             # Reverse the order due to projected atom change
261 2         9 @chirality_neighbours_now = ( $chirality_neighbours_now[0],
262             reverse @chirality_neighbours_now[1..3] );
263             }
264              
265 4         8 my @other = grep { $_ != $neighbour } @chirality_neighbours_now;
  16         40  
266 4         14 for my $offset (0..2) {
267 12         20 my $connector = {};
268 12         47 $moiety->set_edge_attribute( $neighbour, $connector, 'chiral', 'from' );
269 12         5301 $moiety->set_edge_attribute( $atom, $connector, 'chiral', 'to' );
270              
271 12         4379 $moiety->set_edge_attribute( $connector, $other[0], 'chiral', 1 );
272 12         4441 $moiety->set_edge_attribute( $connector, $other[1], 'chiral', 2 );
273 12         4219 $moiety->set_edge_attribute( $connector, $other[2], 'chiral', 3 );
274              
275 12         4413 push @other, shift @other;
276             }
277             }
278             }
279             }
280              
281             sub cis_trans_to_pseudoedges
282             {
283 2     2 0 9887 my( $moiety ) = @_;
284              
285             # Select non-ring double bonds
286             my @double_bonds =
287 2 100 66     7 grep { is_double_bond( $moiety, @$_ ) &&
  15         3010  
288             !is_ring_bond( $moiety, @$_ ) &&
289             !is_unimportant_double_bond( $moiety, @$_ ) } $moiety->edges;
290              
291             # Connect cis/trans atoms in double bonds with pseudo-edges
292 2         571 for my $bond (@double_bonds) {
293 1         3 my( $atom2, $atom3 ) = @$bond;
294 1         5 my @atom2_neighbours = grep { !is_pseudoedge( $moiety, $atom2, $_ ) }
  3         487  
295             $moiety->neighbours( $atom2 );
296 1         213 my @atom3_neighbours = grep { !is_pseudoedge( $moiety, $atom3, $_ ) }
  3         497  
297             $moiety->neighbours( $atom3 );
298 1 50 33     210 next if @atom2_neighbours < 2 || @atom2_neighbours > 3 ||
      33        
      33        
299             @atom3_neighbours < 2 || @atom3_neighbours > 3;
300              
301 1         5 my( $atom1 ) = grep { is_cis_trans_bond( $moiety, $atom2, $_ ) }
  3         774  
302             @atom2_neighbours;
303 1         201 my( $atom4 ) = grep { is_cis_trans_bond( $moiety, $atom3, $_ ) }
  3         631  
304             @atom3_neighbours;
305 1 50 33     388 next unless $atom1 && $atom4;
306              
307 1 100       11 my( $atom1_para ) = grep { $_ != $atom1 && $_ != $atom3 } @atom2_neighbours;
  3         16  
308 1 100       7 my( $atom4_para ) = grep { $_ != $atom4 && $_ != $atom2 } @atom3_neighbours;
  3         16  
309              
310 1         4 my $is_cis = $moiety->get_edge_attribute( $atom1, $atom2, 'bond' ) ne
311             $moiety->get_edge_attribute( $atom3, $atom4, 'bond' );
312              
313 1 50       405 $is_cis = !$is_cis if $atom1->{number} > $atom2->{number};
314 1 50       5 $is_cis = !$is_cis if $atom3->{number} > $atom4->{number};
315              
316 1 50       7 $moiety->set_edge_attribute( $atom1, $atom4, 'pseudo',
317             $is_cis ? 'cis' : 'trans' );
318 1 50       373 if( $atom1_para ) {
319 1 50       11 $moiety->set_edge_attribute( $atom1_para, $atom4, 'pseudo',
320             $is_cis ? 'trans' : 'cis' );
321             }
322 1 50       390 if( $atom4_para ) {
323 1 50       7 $moiety->set_edge_attribute( $atom1, $atom4_para, 'pseudo',
324             $is_cis ? 'trans' : 'cis' );
325             }
326 1 50 33     381 if( $atom1_para && $atom4_para ) {
327 1 50       8 $moiety->set_edge_attribute( $atom1_para, $atom4_para, 'pseudo',
328             $is_cis ? 'cis' : 'trans' );
329             }
330             }
331              
332             # Unset cis/trans bond markers during second pass
333 2         403 for my $bond ($moiety->edges) {
334 19 100       3902 next unless is_cis_trans_bond( $moiety, @$bond );
335 5         1970 $moiety->delete_edge_attribute( @$bond, 'bond' );
336             }
337             }
338              
339             sub is_pseudoedge
340             {
341 6     6 0 13 my( $moiety, $a, $b ) = @_;
342 6         17 return $moiety->has_edge_attribute( $a, $b, 'pseudo' );
343             }
344              
345             # An "unimportant" double bond is one which has chemically identical atoms on one of its sides.
346             # If C<$color_sub> is given, it is used to determine chemical identity of atoms.
347             # If not, only leaf atoms are considered and compared.
348             sub is_unimportant_double_bond
349             {
350 4     4 0 272 my( $moiety, $a, $b, $color_sub ) = @_;
351 4         13 my @a_neighbours = grep { $_ != $b } $moiety->neighbours( $a );
  10         427  
352 4         14 my @b_neighbours = grep { $_ != $a } $moiety->neighbours( $b );
  12         407  
353              
354 4         14 for (\@a_neighbours, \@b_neighbours) {
355 8 100       1257 next unless @$_ == 2;
356              
357 6         8 my @representations;
358 6 50       15 if( $color_sub ) {
359 0         0 @representations = map { $color_sub->( $_ ) } @$_;
  0         0  
360             } else {
361 6 100   12   29 next if any { $moiety->degree( $_ ) != 1 } @$_;
  12         2129  
362 2         675 @representations = map { write_SMILES( $_ ) } @$_;
  4         16  
363             }
364 2 50       30 return 1 if uniq( @representations ) == 1;
365             }
366              
367 2         1201 return;
368             }
369              
370             1;