File Coverage

blib/lib/Chemistry/OpenSMILES.pm
Criterion Covered Total %
statement 125 125 100.0
branch 49 54 90.7
condition 22 27 81.4
subroutine 22 22 100.0
pod 0 10 0.0
total 218 238 91.6


line stmt bran cond sub pod time code
1             package Chemistry::OpenSMILES;
2              
3 21     21   3160 use strict;
  21         82  
  21         622  
4 21     21   104 use warnings;
  21         46  
  21         486  
5 21     21   386 use 5.0100;
  21         80  
6              
7             # ABSTRACT: OpenSMILES format reader and writer
8             our $VERSION = '0.8.4'; # VERSION
9              
10             require Exporter;
11             our @ISA = qw( Exporter );
12             our @EXPORT_OK = qw(
13             clean_chiral_centers
14             is_aromatic
15             is_chiral
16             is_cis_trans_bond
17             is_double_bond
18             is_ring_bond
19             is_single_bond
20             mirror
21             %normal_valence
22             toggle_cistrans
23             );
24              
25 21     21   9711 use Graph::Traversal::BFS;
  21         73038  
  21         892  
26 21     21   152 use List::Util qw(any);
  21         50  
  21         42959  
27              
28             sub is_chiral($);
29             sub is_chiral_tetrahedral($);
30             sub mirror($);
31             sub toggle_cistrans($);
32              
33             our %normal_valence = (
34             B => [ 3 ],
35             C => [ 4 ],
36             N => [ 3, 5 ],
37             O => [ 2 ],
38             P => [ 3, 5 ],
39             S => [ 2, 4, 6 ],
40             F => [ 1 ],
41             Cl => [ 1 ],
42             Br => [ 1 ],
43             I => [ 1 ],
44             c => [ 3 ], # Not from OpenSMILES specification
45             );
46              
47             # Removes chiral setting from tetrahedral chiral centers with less than
48             # four distinct neighbours. Only tetrahedral chiral centers with four atoms
49             # are affected, thus three-atom centers (implying lone pairs) are left
50             # untouched. Returns the affected atoms.
51             #
52             # CAVEAT: disregards anomers
53             # TODO: check other chiral centers
54             sub clean_chiral_centers($$)
55             {
56 7     7 0 5656 my( $moiety, $color_sub ) = @_;
57              
58 7         13 my @affected;
59 7         23 for my $atom ($moiety->vertices) {
60 65 100       268 next unless is_chiral_tetrahedral( $atom );
61              
62 7 100       22 my $hcount = exists $atom->{hcount} ? $atom->{hcount} : 0;
63 7 50       55 next if $moiety->degree($atom) + $hcount != 4;
64              
65 7         4140 my %colors = map { ($color_sub->( $_ ) => 1) }
  28         794  
66             $moiety->neighbours($atom),
67             ( { symbol => 'H' } ) x $hcount;
68 7 100       54 next if scalar keys %colors == 4;
69 5         15 delete $atom->{chirality};
70 5         19 push @affected, $atom;
71             }
72 7         29 return @affected;
73             }
74              
75             sub is_aromatic($)
76             {
77 976     976 0 1971 my( $atom ) = @_;
78 976         4963 return $atom->{symbol} ne ucfirst $atom->{symbol};
79             }
80              
81             sub is_chiral($)
82             {
83 3594     3594 0 6361 my( $what ) = @_;
84 3594 100       7194 if( ref $what eq 'HASH' ) { # Single atom
85 3589         11699 return exists $what->{chirality};
86             } else { # Graph representing moiety
87 5     16   33 return any { is_chiral( $_ ) } $what->vertices;
  16         123  
88             }
89             }
90              
91             sub is_chiral_tetrahedral($)
92             {
93 266     266 0 515 my( $what ) = @_;
94 266 100       530 if( ref $what eq 'HASH' ) { # Single atom
95             # CAVEAT: will fail for allenal configurations of @/@@ in raw mode
96 261   100     1020 return $what->{chirality} && $what->{chirality} =~ /^@@?$/
97             } else { # Graph representing moiety
98 5     19   29 return any { is_chiral_tetrahedral( $_ ) } $what->vertices;
  19         149  
99             }
100             }
101              
102             sub is_cis_trans_bond
103             {
104 61     61 0 15430 my( $moiety, $a, $b ) = @_;
105 61   100     141 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
106             $moiety->get_edge_attribute( $a, $b, 'bond' ) =~ /^[\\\/]$/;
107             }
108              
109             sub is_double_bond
110             {
111 64     64 0 4197 my( $moiety, $a, $b ) = @_;
112 64   100     144 return $moiety->has_edge_attribute( $a, $b, 'bond' ) &&
113             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '=';
114             }
115              
116             # A bond is deemed to be a ring bond if there is an alternative path
117             # joining its atoms not including the bond in consideration and this
118             # alternative path is not longer than 7 bonds. This is based on
119             # O'Boyle (2012) saying that Open Babel SMILES writer does not output
120             # cis/trans markers for double bonds in rings of size 8 or less due to
121             # them implicilty being cis bonds.
122             sub is_ring_bond
123             {
124 4     4 0 1594 my( $moiety, $a, $b, $max_length ) = @_;
125              
126 4 50       12 $max_length = 7 unless $max_length;
127              
128 4         10 my $copy = $moiety->copy;
129 4         8165 $copy->delete_edge( $a, $b );
130              
131 4         625 my %distance = ( $a => 0 );
132             my $record_length = sub {
133             # Record number of bonds between $a and any other vertex
134 12     12   4674 my( $u, $v ) = @_;
135 12         25 my @seen = grep { exists $distance{$_} } ( $u, $v );
  24         65  
136 12 50       33 return if @seen != 1; # Can this be 0?
137              
138 12         23 my $seen = shift @seen;
139 12         21 my( $unseen ) = grep { !exists $distance{$_} } ( $u, $v );
  24         57  
140 12         39 $distance{$unseen} = $distance{$seen} + 1;
141 4         21 };
142              
143             my $operations = {
144 4     4   188 start => sub { return $a },
145 4         19 tree_edge => $record_length,
146             };
147              
148 4         31 my $traversal = Graph::Traversal::BFS->new( $copy, %$operations );
149 4         1297 $traversal->bfs;
150              
151             # $distance{$b} is the distance in bonds. In 8-member rings adjacent
152             # ring atoms have distance of 7 bonds.
153 4   33     2463 return exists $distance{$b} && $distance{$b} <= $max_length;
154             }
155              
156             sub is_single_bond
157             {
158 117     117 0 236 my( $moiety, $a, $b ) = @_;
159 117   66     248 return !$moiety->has_edge_attribute( $a, $b, 'bond' ) ||
160             $moiety->get_edge_attribute( $a, $b, 'bond' ) eq '-';
161             }
162              
163             sub mirror($)
164             {
165 30     30 0 49 my( $what ) = @_;
166 30 100       63 if( ref $what eq 'HASH' ) { # Single atom
167             # FIXME: currently dealing only with tetrahedral chiral centers
168 25 100       36 if( is_chiral_tetrahedral( $what ) ) {
169 2 100       9 $what->{chirality} = $what->{chirality} eq '@' ? '@@' : '@';
170             }
171             } else {
172 5         16 for ($what->vertices) {
173 25         162 mirror( $_ );
174             }
175             }
176             }
177              
178             sub toggle_cistrans($)
179             {
180 24 100   24 0 104 return $_[0] eq '/' ? '\\' : '/';
181             }
182              
183             # CAVEAT: requires output from non-raw parsing due issue similar to GH#2
184             sub _validate($@)
185             {
186 13     13   2248 my( $moiety, $color_sub ) = @_;
187              
188 13         39 for my $atom (sort { $a->{number} <=> $b->{number} } $moiety->vertices) {
  244         732  
189             # TODO: AL chiral centers also have to be checked
190 141 100       37476 if( is_chiral_tetrahedral( $atom ) ) {
191 6 100 33     22 if( $moiety->degree($atom) < 3 ) {
    50          
192             # FIXME: there should be a strict mode to forbid lone pairs
193             # FIXME: tetrahedral allenes are false-positives
194             warn sprintf 'chiral center %s(%d) has %d bonds while ' .
195             'at least 3 is required' . "\n",
196             $atom->{symbol},
197             $atom->{number},
198 1         238 $moiety->degree($atom);
199             } elsif( $moiety->degree($atom) == 4 && $color_sub ) {
200 5         6060 my %colors = map { ($color_sub->( $_ ) => 1) }
  20         552  
201             $moiety->neighbours($atom);
202 5 100       45 if( scalar keys %colors != 4 ) {
203             # FIXME: anomers are false-positives, see COD entry
204             # 7111036
205             warn sprintf 'tetrahedral chiral setting for %s(%d) ' .
206             'is not needed as not all 4 neighbours ' .
207             'are distinct' . "\n",
208             $atom->{symbol},
209 3         36 $atom->{number};
210             }
211             }
212             }
213              
214             # Warn about unmarked tetrahedral chiral centers
215 141 100 100     539 if( !is_chiral( $atom ) && $moiety->degree( $atom ) == 4 ) {
216 24         14929 my $color_sub_local = $color_sub;
217 24 100       55 if( !$color_sub_local ) {
218 5     20   21 $color_sub_local = sub { return $_[0]->{symbol} };
  20         80  
219             }
220 24         97 my %colors = map { ($color_sub_local->( $_ ) => 1) }
  96         2705  
221             $moiety->neighbours($atom);
222 24 100       185 if( scalar keys %colors == 4 ) {
223             warn sprintf 'atom %s(%d) has 4 distinct neighbours, ' .
224             'but does not have a chiral setting' . "\n",
225             $atom->{symbol},
226 3         71 $atom->{number};
227             }
228             }
229             }
230              
231             # FIXME: establish deterministic order
232 13         4076 for my $bond ($moiety->edges) {
233 133         22744 my( $A, $B ) = sort { $a->{number} <=> $b->{number} } @$bond;
  133         350  
234 133 100       340 if( $A eq $B ) {
235             warn sprintf 'atom %s(%d) has bond to itself' . "\n",
236             $A->{symbol},
237 1         11 $A->{number};
238             }
239              
240 133 100       303 if( $moiety->has_edge_attribute( @$bond, 'bond' ) ) {
241 6         1167 my $bond_type = $moiety->get_edge_attribute( @$bond, 'bond' );
242 6 100       1198 if( $bond_type eq '=' ) {
    50          
243             # Test cis/trans bonds
244             # FIXME: Not sure how to check which definition belongs to
245             # which of the double bonds. See COD entry 1547257.
246 1         9 for my $atom (@$bond) {
247 2         15 my %bond_types = _neighbours_per_bond_type( $moiety,
248             $atom );
249 2         6 foreach ('/', '\\') {
250 4 100 100     35 if( $bond_types{$_} && @{$bond_types{$_}} > 1 ) {
  3         16  
251             warn sprintf 'atom %s(%d) has %d bonds of type \'%s\', ' .
252             'cis/trans definitions must not conflict' . "\n",
253             $atom->{symbol},
254             $atom->{number},
255 1         7 scalar @{$bond_types{$_}},
  1         12  
256             $_;
257             }
258             }
259             }
260             } elsif( $bond_type =~ /^[\\\/]$/ ) {
261             # Test if next to a double bond.
262             # FIXME: Yields false-positives for delocalised bonds,
263             # see COD entry 1501863.
264             # FIXME: What about triple bond? See COD entry 4103591.
265 5         10 my %bond_types;
266 5         10 for my $atom (@$bond) {
267 10         23 my %bond_types_now = _neighbours_per_bond_type( $moiety,
268             $atom );
269 10         24 for my $key (keys %bond_types_now) {
270 22         30 push @{$bond_types{$key}}, @{$bond_types_now{$key}};
  22         45  
  22         60  
271             }
272             }
273 5 100       21 if( !$bond_types{'='} ) {
274             warn sprintf 'cis/trans bond is defined between atoms ' .
275             '%s(%d) and %s(%d), but neither of them ' .
276             'is attached to a double bond' . "\n",
277             $A->{symbol},
278             $A->{number},
279             $B->{symbol},
280 1         14 $B->{number};
281             }
282             }
283             }
284             }
285              
286             # TODO: SP, TB, OH chiral centers
287             }
288              
289             sub _neighbours_per_bond_type
290             {
291 12     12   37 my( $moiety, $atom ) = @_;
292 12         18 my %bond_types;
293 12         38 for my $neighbour ($moiety->neighbours($atom)) {
294 38         1300 my $bond_type;
295 38 100       89 if( $moiety->has_edge_attribute( $atom, $neighbour, 'bond' ) ) {
296 24         4672 $bond_type = $moiety->get_edge_attribute( $atom, $neighbour, 'bond' );
297             } else {
298 14         3121 $bond_type = '';
299             }
300 38 100 100     4619 if( $bond_type =~ /^[\\\/]$/ &&
301             $atom->{number} > $neighbour->{number} ) {
302 7         19 $bond_type = toggle_cistrans $bond_type;
303             }
304 38         58 push @{$bond_types{$bond_type}}, $neighbour;
  38         110  
305             }
306 12         60 return %bond_types;
307             }
308              
309             1;
310              
311             __END__