File Coverage

blib/lib/Chemistry/OpenSMILES/Stereo.pm
Criterion Covered Total %
statement 182 210 86.6
branch 62 102 60.7
condition 22 57 38.6
subroutine 16 19 84.2
pod 0 6 0.0
total 282 394 71.5


line stmt bran cond sub pod time code
1             package Chemistry::OpenSMILES::Stereo;
2              
3 1     1   617 use strict;
  1         3  
  1         31  
4 1     1   13 use warnings;
  1         2  
  1         86  
5              
6             # ABSTRACT: Stereochemistry handling routines
7             our $VERSION = '0.8.5'; # VERSION
8              
9             require Exporter;
10             our @ISA = qw( Exporter );
11             our @EXPORT_OK = qw(
12             chirality_to_pseudograph
13             cis_trans_to_pseudoedges
14             mark_all_double_bonds
15             mark_cis_trans
16             );
17              
18 1         82 use Chemistry::OpenSMILES qw(
19             is_cis_trans_bond
20             is_double_bond
21             is_ring_bond
22             is_single_bond
23             toggle_cistrans
24 1     1   6 );
  1         3  
25 1     1   466 use Chemistry::OpenSMILES::Writer qw( write_SMILES );
  1         2  
  1         63  
26 1     1   7 use Graph::Traversal::BFS;
  1         2  
  1         21  
27 1     1   5 use Graph::Undirected;
  1         3  
  1         23  
28 1     1   5 use List::Util qw( all any max min sum sum0 );
  1         2  
  1         2421  
29              
30             sub mark_all_double_bonds
31             {
32 2     2 0 846 my( $graph, $setting_sub, $order_sub ) = @_;
33              
34             # By default, whenever there is a choice between atoms, the one with
35             # lowest position in the input SMILES is chosen:
36 2 50   6   12 $order_sub = sub { return $_[0]->{number} } unless $order_sub;
  6         22  
37              
38             # Select non-ring double bonds
39 2 100 66     7 my @double_bonds = grep { is_double_bond( $graph, @$_ ) &&
  15         2191  
40             !is_ring_bond( $graph, @$_ ) &&
41             !is_unimportant_double_bond( $graph, @$_ ) }
42             $graph->edges;
43              
44             # Construct a double bond incidence graph. Vertices are double bonds
45             # and edges are between those double bonds that separated by a single
46             # single ('-') bond. Interestingly, incidence graph for SMILES C=C(C)=C
47             # is connected, but for C=C=C not. This is because allenal systems
48             # cannot be represented yet.
49 2         387 my $bond_graph = Graph::Undirected->new;
50 2         413 my %incident_double_bonds;
51 2         6 for my $bond (@double_bonds) {
52 1         9 $bond_graph->add_vertex( join '', sort @$bond );
53 1         45 push @{$incident_double_bonds{$bond->[0]}}, $bond;
  1         9  
54 1         1 push @{$incident_double_bonds{$bond->[1]}}, $bond;
  1         5  
55             }
56 2         9 for my $bond ($graph->edges) {
57 15 100       871 next unless is_single_bond( $graph, @$bond );
58 13         2537 my @adjacent_bonds;
59 13 100       36 if( $incident_double_bonds{$bond->[0]} ) {
60             push @adjacent_bonds,
61 3         10 @{$incident_double_bonds{$bond->[0]}};
  3         9  
62             }
63 13 100       30 if( $incident_double_bonds{$bond->[1]} ) {
64             push @adjacent_bonds,
65 1         2 @{$incident_double_bonds{$bond->[1]}};
  1         5  
66             }
67 13         30 for my $bond1 (@adjacent_bonds) {
68 4         7 for my $bond2 (@adjacent_bonds) {
69 4 50       13 next if $bond1 == $bond2;
70 0         0 $bond_graph->add_edge( join( '', sort @$bond1 ),
71             join( '', sort @$bond2 ) );
72             }
73             }
74             }
75              
76             # In principle, bond graph could be splitted into separate components
77             # to reduce the number of cycles needed by Morgan algorithm, but I do
78             # not think there is a failure case because of keeping them together.
79              
80             # Set up initial invariants
81 2         14 my %invariants;
82 2         6 for ($bond_graph->vertices) {
83 1         29 $invariants{$_} = $bond_graph->degree( $_ );
84             }
85 2         184 my %distinct_invariants = map { $_ => 1 } values %invariants;
  1         5  
86              
87             # Perform Morgan algorithm
88 2         6 while( 1 ) {
89 2         4 my %invariants_now;
90 2         10 for ($bond_graph->vertices) {
91 1         20 $invariants_now{$_} = sum0 map { $invariants{$_} }
  0         0  
92             $bond_graph->neighbours( $_ );
93             }
94              
95 2         85 my %distinct_invariants_now = map { $_ => 1 } values %invariants_now;
  1         4  
96 2 50       12 last if %distinct_invariants_now <= %distinct_invariants;
97              
98 0         0 %invariants = %invariants_now;
99 0         0 %distinct_invariants = %distinct_invariants_now;
100             }
101              
102             # Establish a deterministic order favouring bonds with higher invariants.
103             # If invariants are equal, order bonds by their atom numbers.
104 2         19 @double_bonds = sort { $invariants{join '', sort @$b} <=>
105             $invariants{join '', sort @$a} ||
106 0         0 (min map { $order_sub->($_) } @$a) <=>
107 0         0 (min map { $order_sub->($_) } @$b) ||
108 0         0 (max map { $order_sub->($_) } @$a) <=>
109 0 0 0     0 (max map { $order_sub->($_) } @$b) } @double_bonds;
  0         0  
110              
111 2         23 for (@double_bonds) {
112 1         9 mark_cis_trans( $graph, @$_, $setting_sub, $order_sub );
113             }
114             }
115              
116             # Requires double bonds in input. Does not check whether a bond belongs
117             # to a ring or not.
118             sub mark_cis_trans
119             {
120 1     1 0 4 my( $graph, $atom2, $atom3, $setting_sub, $order_sub ) = @_;
121              
122             # By default, whenever there is a choice between atoms, the one with
123             # lowest position in the input SMILES is chosen:
124 1 50   0   5 $order_sub = sub { return $_[0]->{number} } unless $order_sub;
  0         0  
125              
126 1         6 my @neighbours2 = $graph->neighbours( $atom2 );
127 1         116 my @neighbours3 = $graph->neighbours( $atom3 );
128 1 50 33     114 return if @neighbours2 < 2 || @neighbours3 < 2;
129              
130             # TODO: Currently we are choosing either a pair of
131             # neighbouring atoms which have no cis/trans markers or
132             # a pair of which a single atom has a cis/trans marker.
133             # The latter case allows to accommodate adjacent double
134             # bonds. However, there may be a situation where both
135             # atoms already have cis/trans markers, but could still
136             # be reconciled.
137              
138             my @cistrans_bonds2 =
139 1         7 grep { is_cis_trans_bond( $graph, $atom2, $_ ) } @neighbours2;
  3         581  
140             my @cistrans_bonds3 =
141 1         192 grep { is_cis_trans_bond( $graph, $atom3, $_ ) } @neighbours3;
  3         567  
142              
143 1 50       233 if( @cistrans_bonds2 + @cistrans_bonds3 > 1 ) {
144             warn 'cannot represent cis/trans bond between atoms ' .
145 0         0 join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) .
  0         0  
146             ' as there are other cis/trans bonds nearby' . "\n";
147 0         0 return;
148             }
149              
150 1 0 33     15 if( (@neighbours2 == 2 && !@cistrans_bonds2 &&
      33        
      33        
      33        
      33        
151 0     0   0 !any { is_single_bond( $graph, $atom2, $_ ) } @neighbours2) ||
152             (@neighbours3 == 2 && !@cistrans_bonds3 &&
153 0     0   0 !any { is_single_bond( $graph, $atom3, $_ ) } @neighbours3) ) {
154             # Azide group (N=N#N) or conjugated allene-like systems (=C=)
155             warn 'atoms ' .
156 0         0 join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) .
  0         0  
157             ' are part of conjugated double/triple bond system, thus ' .
158             'cis/trans setting of their bond is impossible to represent ' .
159             '(not supported yet)' . "\n";
160 0         0 return;
161             }
162              
163             # Making the $atom2 be the one which has a defined cis/trans bond.
164             # Also, a deterministic ordering of atoms in bond is achieved here.
165 1 50 33     10 if( @cistrans_bonds3 ||
      33        
166             (!@cistrans_bonds2 && $order_sub->($atom2) > $order_sub->($atom3)) ) {
167 0         0 ( $atom2, $atom3 ) = ( $atom3, $atom2 );
168 0         0 @neighbours2 = $graph->neighbours( $atom2 );
169 0         0 @neighbours3 = $graph->neighbours( $atom3 );
170              
171 0         0 @cistrans_bonds2 = @cistrans_bonds3;
172 0         0 @cistrans_bonds3 = ();
173             }
174              
175             # Establishing the canonical order
176 1         218 @neighbours2 = sort { $order_sub->($a) <=> $order_sub->($b) }
177 1         7 grep { is_single_bond( $graph, $atom2, $_ ) } @neighbours2;
  3         594  
178 1         192 @neighbours3 = sort { $order_sub->($a) <=> $order_sub->($b) }
179 1         5 grep { is_single_bond( $graph, $atom3, $_ ) } @neighbours3;
  3         568  
180              
181             # Check if there is a chance to have anything marked
182 1         3 my $bond_will_be_marked;
183 1         4 for my $atom1 (@cistrans_bonds2, @neighbours2) {
184 2         6 for my $atom4 (@neighbours3) {
185 2         5 my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 );
186 2 50       939 if( $setting ) {
187 2         5 $bond_will_be_marked = 1;
188 2         5 last;
189             }
190             }
191             }
192              
193 1 50       6 if( !$bond_will_be_marked ) {
194             warn 'cannot represent cis/trans bond between atoms ' .
195 0         0 join( ' and ', sort map { $_->{number} } $atom2, $atom3 ) .
  0         0  
196             ' as there are no eligible single bonds nearby' . "\n";
197 0         0 return;
198             }
199              
200             # If there is an atom with cis/trans bond, then this is this one
201 1 50       6 my( $first_atom ) = @cistrans_bonds2 ? @cistrans_bonds2 : @neighbours2;
202 1 50       4 if( !@cistrans_bonds2 ) {
203 1         5 $graph->set_edge_attribute( $first_atom, $atom2, 'bond', '/' );
204             }
205              
206 1         249 my $atom4_marked;
207 1         4 for my $atom4 (@neighbours3) {
208 2         5 my $atom1 = $first_atom;
209 2         8 my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 );
210 2 50       1010 next unless $setting;
211 2         7 my $other = $graph->get_edge_attribute( $atom1, $atom2, 'bond' );
212 2 100       398 $other = toggle_cistrans $other if $setting eq 'cis';
213 2 50       9 $other = toggle_cistrans $other if $atom1->{number} > $atom2->{number};
214 2 50       7 $other = toggle_cistrans $other if $atom4->{number} < $atom3->{number};
215 2         7 $graph->set_edge_attribute( $atom3, $atom4, 'bond', $other );
216 2 100       421 $atom4_marked = $atom4 unless $atom4_marked;
217             }
218              
219 1         10 for my $atom1 (@neighbours2) {
220 2 100       7 next if $atom1 eq $first_atom; # Marked already
221 1         3 my $atom4 = $atom4_marked;
222 1         5 my $setting = $setting_sub->( $atom1, $atom2, $atom3, $atom4 );
223 1 50       467 next unless $setting;
224 1         17 my $other = $graph->get_edge_attribute( $atom3, $atom4, 'bond' );
225 1 50       271 $other = toggle_cistrans $other if $setting eq 'cis';
226 1 50       8 $other = toggle_cistrans $other if $atom1->{number} > $atom2->{number};
227 1 50       9 $other = toggle_cistrans $other if $atom4->{number} < $atom3->{number};
228 1         43 $graph->set_edge_attribute( $atom1, $atom2, 'bond', $other );
229             }
230             }
231              
232             # Store the tetrahedral chirality character as additional pseudo vertices
233             # and edges.
234             sub chirality_to_pseudograph
235             {
236 1     1 0 10 my( $moiety ) = @_;
237              
238 1         4 for my $atom ($moiety->vertices) {
239 11 100       60 next unless Chemistry::OpenSMILES::is_chiral_tetrahedral( $atom );
240 1         17 next unless @{$atom->{chirality_neighbours}} >= 3 &&
241 1 50 33     3 @{$atom->{chirality_neighbours}} <= 4;
  1         16  
242              
243 1         6 my @chirality_neighbours = @{$atom->{chirality_neighbours}};
  1         5  
244 1 50       13 if( @chirality_neighbours == 3 ) {
245 0         0 @chirality_neighbours = ( $chirality_neighbours[0],
246             {}, # marking the lone pair
247             @chirality_neighbours[1..2] );
248             }
249 1 50       6 if( $atom->{chirality} eq '@' ) {
250             # Reverse the order if counter-clockwise
251 1         5 @chirality_neighbours = ( $chirality_neighbours[0],
252             reverse @chirality_neighbours[1..3] );
253             }
254              
255 1         5 for my $i (0..3) {
256 4         7 my $neighbour = $chirality_neighbours[$i];
257 4         10 my @chirality_neighbours_now = @chirality_neighbours;
258            
259 4 100       21 if( $i % 2 ) {
260             # Reverse the order due to projected atom change
261 2         8 @chirality_neighbours_now = ( $chirality_neighbours_now[0],
262             reverse @chirality_neighbours_now[1..3] );
263             }
264              
265 4         12 my @other = grep { $_ != $neighbour } @chirality_neighbours_now;
  16         35  
266 4         12 for my $offset (0..2) {
267 12         25 my $connector = {};
268 12         40 $moiety->set_edge_attribute( $neighbour, $connector, 'chiral', 'from' );
269 12         4832 $moiety->set_edge_attribute( $atom, $connector, 'chiral', 'to' );
270              
271 12         4313 $moiety->set_edge_attribute( $connector, $other[0], 'chiral', 1 );
272 12         4385 $moiety->set_edge_attribute( $connector, $other[1], 'chiral', 2 );
273 12         4302 $moiety->set_edge_attribute( $connector, $other[2], 'chiral', 3 );
274              
275 12         4380 push @other, shift @other;
276             }
277             }
278             }
279             }
280              
281             sub cis_trans_to_pseudoedges
282             {
283 2     2 0 9939 my( $moiety ) = @_;
284              
285             # Select non-ring double bonds
286             my @double_bonds =
287 2 100 66     10 grep { is_double_bond( $moiety, @$_ ) &&
  15         3096  
288             !is_ring_bond( $moiety, @$_ ) &&
289             !is_unimportant_double_bond( $moiety, @$_ ) } $moiety->edges;
290              
291             # Connect cis/trans atoms in double bonds with pseudo-edges
292 2         576 for my $bond (@double_bonds) {
293 1         3 my( $atom2, $atom3 ) = @$bond;
294 1         20 my @atom2_neighbours = grep { !is_pseudoedge( $moiety, $atom2, $_ ) }
  3         505  
295             $moiety->neighbours( $atom2 );
296 1         218 my @atom3_neighbours = grep { !is_pseudoedge( $moiety, $atom3, $_ ) }
  3         879  
297             $moiety->neighbours( $atom3 );
298 1 50 33     202 next if @atom2_neighbours < 2 || @atom2_neighbours > 3 ||
      33        
      33        
299             @atom3_neighbours < 2 || @atom3_neighbours > 3;
300              
301 1         3 my( $atom1 ) = grep { is_cis_trans_bond( $moiety, $atom2, $_ ) }
  3         635  
302             @atom2_neighbours;
303 1         425 my( $atom4 ) = grep { is_cis_trans_bond( $moiety, $atom3, $_ ) }
  3         586  
304             @atom3_neighbours;
305 1 50 33     394 next unless $atom1 && $atom4;
306              
307 1 100       8 my( $atom1_para ) = grep { $_ != $atom1 && $_ != $atom3 } @atom2_neighbours;
  3         13  
308 1 100       9 my( $atom4_para ) = grep { $_ != $atom4 && $_ != $atom2 } @atom3_neighbours;
  3         12  
309              
310 1         6 my $is_cis = $moiety->get_edge_attribute( $atom1, $atom2, 'bond' ) ne
311             $moiety->get_edge_attribute( $atom3, $atom4, 'bond' );
312              
313 1 50       408 $is_cis = !$is_cis if $atom1->{number} > $atom2->{number};
314 1 50       6 $is_cis = !$is_cis if $atom3->{number} > $atom4->{number};
315              
316 1 50       8 $moiety->set_edge_attribute( $atom1, $atom4, 'pseudo',
317             $is_cis ? 'cis' : 'trans' );
318 1 50       386 if( $atom1_para ) {
319 1 50       6 $moiety->set_edge_attribute( $atom1_para, $atom4, 'pseudo',
320             $is_cis ? 'trans' : 'cis' );
321             }
322 1 50       365 if( $atom4_para ) {
323 1 50       18 $moiety->set_edge_attribute( $atom1, $atom4_para, 'pseudo',
324             $is_cis ? 'trans' : 'cis' );
325             }
326 1 50 33     374 if( $atom1_para && $atom4_para ) {
327 1 50       8 $moiety->set_edge_attribute( $atom1_para, $atom4_para, 'pseudo',
328             $is_cis ? 'cis' : 'trans' );
329             }
330             }
331              
332             # Unset cis/trans bond markers during second pass
333 2         364 for my $bond ($moiety->edges) {
334 19 100       3905 next unless is_cis_trans_bond( $moiety, @$bond );
335 5         1962 $moiety->delete_edge_attribute( @$bond, 'bond' );
336             }
337             }
338              
339             sub is_pseudoedge
340             {
341 6     6 0 14 my( $moiety, $a, $b ) = @_;
342 6         14 return $moiety->has_edge_attribute( $a, $b, 'pseudo' );
343             }
344              
345             # An "unimportant" double bond is one which has leaf atoms on one of its
346             # sides and both of these atoms are identical.
347             sub is_unimportant_double_bond
348             {
349 4     4 0 16 my( $moiety, $a, $b ) = @_;
350 4         15 my @a_neighbours = grep { $_ != $b } $moiety->neighbours( $a );
  10         438  
351 4         12 my @b_neighbours = grep { $_ != $a } $moiety->neighbours( $b );
  12         416  
352              
353 4 50 66     37 if( @a_neighbours == 2 &&
354 4     4   788 all { $moiety->degree( $_ ) == 1 } @a_neighbours ) {
355 0 0       0 return 1 if write_SMILES( $a_neighbours[0] ) eq
356             write_SMILES( $a_neighbours[1] );
357             }
358              
359 4 100 66     1280 if( @b_neighbours == 2 &&
360 8     8   1421 all { $moiety->degree( $_ ) == 1 } @b_neighbours ) {
361 2 50       680 return 1 if write_SMILES( $b_neighbours[0] ) eq
362             write_SMILES( $b_neighbours[1] );
363             }
364              
365 2         1205 return;
366             }
367              
368             1;