line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File::SMILES; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
$VERSION = "0.47"; |
4
|
|
|
|
|
|
|
# $Id: SMILES.pm,v 1.16 2009/05/10 20:31:08 itubert Exp $ |
5
|
|
|
|
|
|
|
|
6
|
8
|
|
|
8
|
|
136676
|
use 5.006; |
|
8
|
|
|
|
|
30
|
|
|
8
|
|
|
|
|
604
|
|
7
|
8
|
|
|
8
|
|
50
|
use strict; |
|
8
|
|
|
|
|
19
|
|
|
8
|
|
|
|
|
775
|
|
8
|
8
|
|
|
8
|
|
46
|
use warnings; |
|
8
|
|
|
|
|
20
|
|
|
8
|
|
|
|
|
373
|
|
9
|
8
|
|
|
8
|
|
43
|
no warnings 'recursion'; |
|
8
|
|
|
|
|
21
|
|
|
8
|
|
|
|
|
584
|
|
10
|
8
|
|
|
8
|
|
47
|
use base "Chemistry::File"; |
|
8
|
|
|
|
|
15
|
|
|
8
|
|
|
|
|
41531
|
|
11
|
8
|
|
|
8
|
|
429306
|
use Chemistry::Mol; |
|
8
|
|
|
|
|
725461
|
|
|
8
|
|
|
|
|
1251
|
|
12
|
8
|
|
|
8
|
|
15014
|
use Chemistry::Bond::Find 'assign_bond_orders'; |
|
8
|
|
|
|
|
248080
|
|
|
8
|
|
|
|
|
1029
|
|
13
|
8
|
|
|
8
|
|
500
|
use List::Util 'first'; |
|
8
|
|
|
|
|
17
|
|
|
8
|
|
|
|
|
6518
|
|
14
|
8
|
|
|
8
|
|
54
|
use Carp; |
|
8
|
|
|
|
|
15
|
|
|
8
|
|
|
|
|
21312
|
|
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 NAME |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
Chemistry::File::SMILES - SMILES linear notation parser/writer |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 SYNOPSYS |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#!/usr/bin/perl |
24
|
|
|
|
|
|
|
use Chemistry::File::SMILES; |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# parse a SMILES string |
27
|
|
|
|
|
|
|
my $s = 'C1CC1(=O)[O-]'; |
28
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->parse($s, format => 'smiles'); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# print a SMILES string |
31
|
|
|
|
|
|
|
print $mol->print(format => 'smiles'); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# print a unique (canonical) SMILES string |
34
|
|
|
|
|
|
|
print $mol->print(format => 'smiles', unique => 1); |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
# parse a SMILES file |
37
|
|
|
|
|
|
|
my @mols = Chemistry::Mol->read("file.smi", format => 'smiles'); |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# write a multiline SMILES file |
40
|
|
|
|
|
|
|
Chemistry::Mol->write("file.smi", mols => \@mols); |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 DESCRIPTION |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
This module parses a SMILES (Simplified Molecular Input Line Entry |
46
|
|
|
|
|
|
|
Specification) string. This is a File I/O driver for the PerlMol project. |
47
|
|
|
|
|
|
|
L. It registers the 'smiles' format with |
48
|
|
|
|
|
|
|
Chemistry::Mol. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
This parser interprets anything after whitespace as the molecule's name; |
51
|
|
|
|
|
|
|
for example, when the following SMILES string is parsed, $mol->name will be |
52
|
|
|
|
|
|
|
set to "Methyl chloride": |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
CCl Methyl chloride |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
The name is not included by default on output. However, if the C option |
57
|
|
|
|
|
|
|
is defined, the name will be included after the SMILES string, separated by a |
58
|
|
|
|
|
|
|
tab. |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
print $mol->print(format => 'smiles', name => 1); |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
=head2 Multiline SMILES and SMILES files |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
A file or string can contain multiple molecules, one per line. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
CCl Methyl chloride |
67
|
|
|
|
|
|
|
CO Methanol |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Files with the extension '.smi' are assumed to have this format. |
70
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 Atom Mapping Numbers |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
As an extension for reaction processing, SMILES strings may have atom mapping |
74
|
|
|
|
|
|
|
numbers, which are introduced after a colon in a bracketed atom. For example, |
75
|
|
|
|
|
|
|
[C:1]. The mapping number need not be unique. This module reads the mapping |
76
|
|
|
|
|
|
|
numbers and stores them as the name of the atom ($atom->name). |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
On output, atom names are not included by default. See the C and |
79
|
|
|
|
|
|
|
C options below for ways of including them. |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
head1 OPTIONS |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
The following options are supported in addition to the options mentioned for |
84
|
|
|
|
|
|
|
L, such as C, C, and C. |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=over |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=item aromatic |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
On output, detect aromatic atoms and bonds by means of the Chemistry::Ring |
91
|
|
|
|
|
|
|
module, and represent the organic aromatic atoms with lowercase symbols. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=item unique |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
When used on output, canonicalize the structure if it hasn't been canonicalized |
96
|
|
|
|
|
|
|
already and generate a unique SMILES string. This option implies "aromatic". |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=item number |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
For atoms that have a defined name, print the name as the "atom number". For |
101
|
|
|
|
|
|
|
example, if an ethanol molecule has the name "42" for the oxygen atom and the |
102
|
|
|
|
|
|
|
other atoms have undefined names, the output would be: |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
CC[OH:42] |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
=item auto_number |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
When used on output, number all the atoms explicitly and sequentially. The |
109
|
|
|
|
|
|
|
output for ethanol would look something like this: |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
[CH3:1][CH2:2][OH:3] |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
=item name |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
Include the molecule name on output, as described in the previous section. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
=item kekulize |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
When used on input, assign single or double bond orders to "aromatic" or |
120
|
|
|
|
|
|
|
otherwise unspecified bonds (i.e., generate the Kekule structure). If false, |
121
|
|
|
|
|
|
|
the bond orders will remain single. This option is true by default. This uses |
122
|
|
|
|
|
|
|
C from the L module. |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=back |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=cut |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
# INITIALIZATION |
129
|
|
|
|
|
|
|
Chemistry::Mol->register_format('smiles'); |
130
|
|
|
|
|
|
|
my $Smiles_parser = __PACKAGE__->new_parser; |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
#=begin comment |
133
|
|
|
|
|
|
|
# |
134
|
|
|
|
|
|
|
#=over |
135
|
|
|
|
|
|
|
# |
136
|
|
|
|
|
|
|
#=cut |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub file_is { |
139
|
0
|
|
|
0
|
1
|
0
|
my $self = shift; |
140
|
0
|
|
|
|
|
0
|
$self->name_is(@_); |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub name_is { |
144
|
0
|
|
|
0
|
1
|
0
|
my ($self, $name) = @_; |
145
|
0
|
|
|
|
|
0
|
$name =~ /\.smi/; |
146
|
|
|
|
|
|
|
} |
147
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub slurp_mol { |
149
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fh) = @_; |
150
|
0
|
|
|
|
|
0
|
scalar <$fh>; |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub read_mol { |
154
|
111
|
|
|
111
|
1
|
89396
|
my ($self, $fh, %opts) = @_; |
155
|
111
|
|
|
|
|
852
|
%opts = (kekulize => 1, %opts); |
156
|
111
|
|
50
|
|
|
519
|
my $mol_class = $opts{mol_class} || "Chemistry::Mol"; |
157
|
|
|
|
|
|
|
|
158
|
111
|
|
|
|
|
639
|
my $line = <$fh>; |
159
|
111
|
100
|
|
|
|
568
|
return unless defined $line; |
160
|
57
|
|
|
|
|
197
|
$line =~ tr/\r\n//d; |
161
|
57
|
|
|
|
|
223
|
my ($smiles, $name) = split " ", $line, 2; |
162
|
|
|
|
|
|
|
|
163
|
57
|
|
|
|
|
401
|
my $mol = $mol_class->new; |
164
|
57
|
50
|
|
|
|
1620
|
unless ($Smiles_parser->parse($smiles, $mol, \%opts)) { |
165
|
0
|
|
|
|
|
0
|
warn "error parsing SMILES line '$line'\n"; |
166
|
0
|
|
|
|
|
0
|
$mol = $mol_class->new; |
167
|
|
|
|
|
|
|
} |
168
|
57
|
|
|
|
|
565
|
$mol->name($name); |
169
|
57
|
|
|
|
|
438
|
$self->add_implicit_hydrogens($mol); |
170
|
57
|
100
|
|
|
|
627
|
if ($opts{kekulize}) { |
171
|
56
|
|
|
|
|
355
|
assign_bond_orders($mol, method => "itub", use_coords => 0, |
172
|
|
|
|
|
|
|
scratch => 0, charges => 0); |
173
|
|
|
|
|
|
|
} |
174
|
57
|
|
|
|
|
55748
|
$mol; |
175
|
|
|
|
|
|
|
} |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
### The contents of the original Chemistry::Smiles module start below |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
my $Symbol = qr/ |
181
|
|
|
|
|
|
|
s|p|o|n|c|b|Zr|Zn|Yb|Y|Xe|W|V|U|Tm|Tl|Ti|Th| |
182
|
|
|
|
|
|
|
Te|Tc|Tb|Ta|Sr|Sn|Sm|Si|Sg|Se|Sc|Sb|S|Ru|Rn|Rh|Rf|Re|Rb|Ra| |
183
|
|
|
|
|
|
|
Pu|Pt|Pr|Po|Pm|Pd|Pb|Pa|P|Os|O|Np|No|Ni|Ne|Nd|Nb|Na|N|Mt|Mt| |
184
|
|
|
|
|
|
|
Mo|Mn|Mg|Md|Lu|Lr|Li|La|Kr|K|Ir|In|I|Hs|Hs|Ho|Hg|Hf|He|H|Ge| |
185
|
|
|
|
|
|
|
Gd|Ga|Fr|Fm|Fe|F|Eu|Es|Er|Dy|Ds|Db|Cu|Cs|Cr|Co|Cm|Cl|Cf|Ce| |
186
|
|
|
|
|
|
|
Cd|Ca|C|Br|Bk|Bi|Bh|Be|Ba|B|Au|At|As|Ar|Am|Al|Ag|Ac|\*|R|X |
187
|
|
|
|
|
|
|
/x; # Order is reverse alphabetical to ensure longest match |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
my $Simple_symbol = qr/Br|Cl|B|C|N|O|P|S|F|I|H|s|p|o|n|c|b/; |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
my $Bond = qr/(?:[-=#:.\/\\])?/; |
192
|
|
|
|
|
|
|
my $Simple_atom = qr/($Simple_symbol)/; #3 |
193
|
|
|
|
|
|
|
my $Complex_atom = qr/ |
194
|
|
|
|
|
|
|
(?: |
195
|
|
|
|
|
|
|
\[ #begin atom |
196
|
|
|
|
|
|
|
(\d*) #4 isotope |
197
|
|
|
|
|
|
|
($Symbol) #5 symbol |
198
|
|
|
|
|
|
|
(\@{0,2}) #6 chirality |
199
|
|
|
|
|
|
|
(?:(H\d*))? #7 H-count |
200
|
|
|
|
|
|
|
(\+{2,}|-{2,}|\+\d*|-\d*)? #8 charge |
201
|
|
|
|
|
|
|
(?::(\d+))? #9 name |
202
|
|
|
|
|
|
|
\] #end atom |
203
|
|
|
|
|
|
|
) |
204
|
|
|
|
|
|
|
/x; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
my $Digits = qr/(?:($Bond)(?:\d|%\d\d))*/; |
207
|
|
|
|
|
|
|
my $Chain = qr/ |
208
|
|
|
|
|
|
|
\G( #1 |
209
|
|
|
|
|
|
|
(?: |
210
|
|
|
|
|
|
|
($Bond) #2 |
211
|
|
|
|
|
|
|
(?:$Simple_atom|$Complex_atom) #3-9 |
212
|
|
|
|
|
|
|
($Digits) #10 |
213
|
|
|
|
|
|
|
) |
214
|
|
|
|
|
|
|
|\( |
215
|
|
|
|
|
|
|
|\) |
216
|
|
|
|
|
|
|
|.+ |
217
|
|
|
|
|
|
|
) |
218
|
|
|
|
|
|
|
/x; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
my $digits_re = qr/($Bond)(\%\d\d|\d)/; |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
my %type_to_order = ( |
223
|
|
|
|
|
|
|
'-' => 1, |
224
|
|
|
|
|
|
|
'=' => 2, |
225
|
|
|
|
|
|
|
'#' => 3, |
226
|
|
|
|
|
|
|
'/' => 1, |
227
|
|
|
|
|
|
|
'\\' => 1, |
228
|
|
|
|
|
|
|
'' => 1, # not strictly true |
229
|
|
|
|
|
|
|
'.' => 0, |
230
|
|
|
|
|
|
|
); |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
my %ORGANIC_ELEMS = ( |
233
|
|
|
|
|
|
|
Br => 1, Cl => 1, B => 3, C => 4, N => 3, O => 2, P => 3, S => 2, |
234
|
|
|
|
|
|
|
F => 1, I => 1, s => 1, p => 1, o => 1, n => 1, c => 1, b => 1, |
235
|
|
|
|
|
|
|
); |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
#=item Chemistry::Smiles->new([add_atom => \&sub1, add_bond => \&sub2]) |
238
|
|
|
|
|
|
|
# |
239
|
|
|
|
|
|
|
#Create a SMILES parser. If the add_atom and add_bond subroutine references |
240
|
|
|
|
|
|
|
#are given, they will be called whenever an atom or a bond needs to be added |
241
|
|
|
|
|
|
|
#to the molecule. If they are not specified, default methods, which |
242
|
|
|
|
|
|
|
#create a Chemistry::Mol object, will be used. |
243
|
|
|
|
|
|
|
# |
244
|
|
|
|
|
|
|
#=cut |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub new_parser { |
247
|
9
|
|
|
9
|
0
|
160
|
my $class = shift; |
248
|
9
|
|
|
|
|
31
|
my %opts = @_; |
249
|
9
|
|
100
|
|
|
165
|
my $self = bless { |
|
|
|
100
|
|
|
|
|
250
|
|
|
|
|
|
|
add_atom => $opts{add_atom} || \&add_atom, |
251
|
|
|
|
|
|
|
add_bond => $opts{add_bond} || \&add_bond, |
252
|
|
|
|
|
|
|
}, $class; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
#=item $obj->parse($string, $mol) |
256
|
|
|
|
|
|
|
# |
257
|
|
|
|
|
|
|
#Parse a Smiles $string. $mol is a "molecule state object". It can be anything; |
258
|
|
|
|
|
|
|
#the parser doesn't do anything with it except sending it as the first parameter |
259
|
|
|
|
|
|
|
#to the callback functions. If callback functions were not provided when |
260
|
|
|
|
|
|
|
#constructing the parser object, $mol must be a Chemistry::Mol object, because |
261
|
|
|
|
|
|
|
#that's what the default callback functions require. |
262
|
|
|
|
|
|
|
# |
263
|
|
|
|
|
|
|
#=cut |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub parse { |
266
|
64
|
|
|
64
|
0
|
1106
|
my ($self, $s, $mol, $opts) = @_; |
267
|
64
|
|
|
|
|
362
|
$self->{stack} = [ undef ]; |
268
|
64
|
|
|
|
|
187
|
$self->{digits} = {}; |
269
|
|
|
|
|
|
|
|
270
|
64
|
|
|
|
|
131
|
eval { |
271
|
64
|
|
|
|
|
2247
|
while ($s =~ /$Chain/g) { |
272
|
|
|
|
|
|
|
#my @a = ($1, $2, $3, $4, $5, $6, $7, $8); |
273
|
|
|
|
|
|
|
#print Dumper(\@a); |
274
|
538
|
|
|
|
|
3429
|
my ($all, $bnd, $sym, $iso, $sym2, $chir, $hcnt, $chg, $name, $dig) |
275
|
|
|
|
|
|
|
= ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10); |
276
|
538
|
100
|
|
|
|
1816
|
if ($all eq '(') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
277
|
58
|
|
|
|
|
167
|
$self->start_branch(); |
278
|
|
|
|
|
|
|
} elsif ($all eq ')') { |
279
|
58
|
|
|
|
|
169
|
$self->end_branch(); |
280
|
|
|
|
|
|
|
} elsif ($sym) { # Simple atom |
281
|
8
|
|
|
8
|
|
415
|
no warnings; |
|
8
|
|
|
|
|
17
|
|
|
8
|
|
|
|
|
810
|
|
282
|
409
|
|
|
|
|
1078
|
my @digs = parse_digits($dig); |
283
|
409
|
|
|
|
|
1263
|
$self->atom($mol, $bnd, '', $sym, '', undef, '', \@digs); |
284
|
|
|
|
|
|
|
} elsif ($sym2) { # Complex atom |
285
|
8
|
|
|
8
|
|
43
|
no warnings; |
|
8
|
|
|
|
|
17
|
|
|
8
|
|
|
|
|
12432
|
|
286
|
13
|
|
|
|
|
38
|
my @digs = parse_digits($dig); |
287
|
13
|
100
|
|
|
|
38
|
if ($hcnt eq 'H') { |
288
|
3
|
|
|
|
|
8
|
$hcnt = 1; |
289
|
|
|
|
|
|
|
} else { |
290
|
10
|
|
|
|
|
24
|
$hcnt =~ s/H//; |
291
|
|
|
|
|
|
|
} |
292
|
13
|
50
|
|
|
|
43
|
unless ($chg =~ /\d/) { |
293
|
13
|
100
|
|
|
|
44
|
$chg = ($chg =~ /-/) ? -length($chg) : length($chg); |
294
|
|
|
|
|
|
|
} |
295
|
13
|
|
100
|
|
|
125
|
$self->atom($mol, $bnd, $iso, $sym2, $chir, $hcnt || 0, |
|
|
|
100
|
|
|
|
|
296
|
|
|
|
|
|
|
$chg || 0, \@digs, $name); |
297
|
|
|
|
|
|
|
} else { |
298
|
0
|
|
|
|
|
0
|
die "SMILES ERROR: '$all in $s'\n"; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
}; |
302
|
|
|
|
|
|
|
# clean up to avoid memory leak |
303
|
64
|
|
|
|
|
194
|
$self->{stack} = undef; |
304
|
64
|
50
|
|
|
|
191
|
if ($@) { |
305
|
0
|
0
|
|
|
|
0
|
croak $@ if $opts->{fatal}; |
306
|
0
|
|
|
|
|
0
|
return; |
307
|
|
|
|
|
|
|
} |
308
|
64
|
|
|
|
|
288
|
$mol; |
309
|
|
|
|
|
|
|
} |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
sub parse_digits { |
312
|
422
|
|
|
422
|
0
|
592
|
my ($dig) = @_; |
313
|
422
|
|
|
|
|
497
|
my @digs; |
314
|
422
|
|
100
|
|
|
1705
|
while ($dig && $dig =~ /$digits_re/g) { |
315
|
56
|
|
|
|
|
597
|
push @digs, {bnd=>$1, dig=>$2}; |
316
|
|
|
|
|
|
|
} |
317
|
422
|
|
|
|
|
923
|
@digs; |
318
|
|
|
|
|
|
|
} |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
sub atom { |
321
|
422
|
|
|
422
|
0
|
523
|
my $self = shift; |
322
|
422
|
|
|
|
|
1076
|
my ($mol,$bnd,$iso,$sym,$chir,$hcount,$chg,$digs,$name) = @_; |
323
|
|
|
|
|
|
|
#{no warnings; local $" = ','; print "atom(@_)\n"} |
324
|
422
|
|
|
|
|
5680
|
my $a = $self->{add_atom}($mol,$iso,$sym,$chir,$hcount,$chg,$name); |
325
|
422
|
100
|
|
|
|
1912
|
if($self->{stack}[-1]) { |
326
|
358
|
|
|
|
|
2598
|
$self->{add_bond}($mol, $bnd, $self->{stack}[-1], $a); |
327
|
|
|
|
|
|
|
} |
328
|
422
|
|
|
|
|
1201
|
for my $dig (@$digs) { |
329
|
56
|
100
|
|
|
|
192
|
if ($self->{digits}{$dig->{dig}}) { |
330
|
28
|
0
|
33
|
|
|
116
|
if ($dig->{bnd} && $self->{digits}{$dig->{dig}}{bnd} |
|
|
|
33
|
|
|
|
|
331
|
|
|
|
|
|
|
&& $dig->{bnd} ne $self->{digits}{$dig->{dig}}{bnd}){ |
332
|
0
|
|
|
|
|
0
|
die "SMILES: Inconsistent ring closure\n"; |
333
|
|
|
|
|
|
|
} |
334
|
28
|
|
33
|
|
|
240
|
$self->{add_bond}($mol, |
335
|
|
|
|
|
|
|
$dig->{bnd} || $self->{digits}{$dig->{dig}}{bnd}, |
336
|
|
|
|
|
|
|
$self->{digits}{$dig->{dig}}{atom}, $a); |
337
|
28
|
|
|
|
|
167
|
delete $self->{digits}{$dig->{dig}}; |
338
|
|
|
|
|
|
|
} else { |
339
|
28
|
|
|
|
|
201
|
$self->{digits}{$dig->{dig}} = {atom=>$a, bnd=>$dig->{bnd}}; |
340
|
|
|
|
|
|
|
} |
341
|
|
|
|
|
|
|
} |
342
|
422
|
|
|
|
|
4856
|
$self->{stack}[-1] = $a; |
343
|
|
|
|
|
|
|
} |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
#=back |
346
|
|
|
|
|
|
|
# |
347
|
|
|
|
|
|
|
#=head1 CALLBACK FUNCTIONS |
348
|
|
|
|
|
|
|
# |
349
|
|
|
|
|
|
|
#=over |
350
|
|
|
|
|
|
|
# |
351
|
|
|
|
|
|
|
#=item $atom = add_atom($mol, $iso, $sym, $chir, $hcount, $chg) |
352
|
|
|
|
|
|
|
# |
353
|
|
|
|
|
|
|
#Called by the parser whenever an atom is found. The first parameter is the |
354
|
|
|
|
|
|
|
#state object given to $obj->parse(). The other parameters are the isotope, |
355
|
|
|
|
|
|
|
#symbol, chirality, hydrogen count, and charge of the atom. Only the symbol is |
356
|
|
|
|
|
|
|
#guaranteed to be defined. Mnemonic: the parameters are given in the same order |
357
|
|
|
|
|
|
|
#that is used in a SMILES string (such as [18OH-]). This callback is expected to |
358
|
|
|
|
|
|
|
#return something that uniquely identifies the atom that was created (it might |
359
|
|
|
|
|
|
|
#be a number, a string, or an object). |
360
|
|
|
|
|
|
|
# |
361
|
|
|
|
|
|
|
#=cut |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Default add_atom callback |
364
|
|
|
|
|
|
|
sub add_atom { |
365
|
378
|
|
|
378
|
0
|
704
|
my ($mol, $iso, $sym, $chir, $hcount, $chg, $name) = @_; |
366
|
378
|
|
|
|
|
1518
|
my $atom = $mol->new_atom(symbol => ucfirst $sym, name => $name); |
367
|
378
|
50
|
|
|
|
31821
|
$iso && $atom->attr('smiles/isotope' => $iso); |
368
|
378
|
50
|
|
|
|
720
|
$iso && $atom->mass($iso); |
369
|
378
|
50
|
|
|
|
704
|
$chir && $atom->attr('smiles/chirality' => $chir); |
370
|
378
|
100
|
|
|
|
813
|
defined $hcount && $atom->hydrogens($hcount); |
371
|
378
|
100
|
|
|
|
792
|
$chg && $atom->formal_charge($chg); |
372
|
378
|
100
|
|
|
|
1168
|
if ($sym =~ /^[a-z]/) { |
373
|
71
|
|
|
|
|
380
|
$atom->attr("smiles/aromatic", 1); |
374
|
|
|
|
|
|
|
} |
375
|
378
|
|
|
|
|
1594
|
$atom; |
376
|
|
|
|
|
|
|
} |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
#=item add_bond($mol, $type, $a1, $a2) |
379
|
|
|
|
|
|
|
# |
380
|
|
|
|
|
|
|
#Called by the parser whenever an bond needs to be created. The first parameter |
381
|
|
|
|
|
|
|
#is the state object given to $obj->parse(). The other parameters are the bond |
382
|
|
|
|
|
|
|
#type and the two atoms that need to be bonded. The atoms are identified using |
383
|
|
|
|
|
|
|
#the return values from the add_atom() callback. |
384
|
|
|
|
|
|
|
# |
385
|
|
|
|
|
|
|
#=back |
386
|
|
|
|
|
|
|
# |
387
|
|
|
|
|
|
|
#=end comment |
388
|
|
|
|
|
|
|
# |
389
|
|
|
|
|
|
|
#=cut |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
# Default add_bond callback |
392
|
|
|
|
|
|
|
sub add_bond { |
393
|
341
|
|
|
341
|
0
|
635
|
my ($mol, $type, $a1, $a2) = @_; |
394
|
341
|
100
|
|
|
|
1006
|
my $order = $type_to_order{$type} or return; # don't add bonds of order 0 |
395
|
333
|
|
|
|
|
3488
|
my $bond = $mol->new_bond(type=>$type, atoms=>[$a1, $a2], order=>$order); |
396
|
333
|
|
|
|
|
53981
|
$bond->attr("smiles/type" => $type); |
397
|
333
|
|
|
|
|
4308
|
$bond; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
sub start_branch { |
401
|
58
|
|
|
58
|
0
|
98
|
my $self = shift; |
402
|
|
|
|
|
|
|
#print "start_branch\n"; |
403
|
58
|
|
|
|
|
78
|
push @{$self->{stack}}, $self->{stack}[-1]; |
|
58
|
|
|
|
|
599
|
|
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub end_branch { |
407
|
58
|
|
|
58
|
0
|
88
|
my $self = shift; |
408
|
|
|
|
|
|
|
#print "end_branch\n"; |
409
|
58
|
|
|
|
|
86
|
pop @{$self->{stack}}; |
|
58
|
|
|
|
|
561
|
|
410
|
|
|
|
|
|
|
} |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
# returns the number of hydrogens for an atom, assuming it has |
413
|
|
|
|
|
|
|
# no charge or radical (because those require an explicit H-count anyway) |
414
|
|
|
|
|
|
|
sub calc_implicit_hydrogens { |
415
|
369
|
|
|
369
|
0
|
508
|
my ($self, $atom) = @_; |
416
|
8
|
|
|
8
|
|
59
|
no warnings 'uninitialized'; |
|
8
|
|
|
|
|
13
|
|
|
8
|
|
|
|
|
39603
|
|
417
|
369
|
|
|
|
|
906
|
my $h_count = $ORGANIC_ELEMS{$atom->symbol} - $atom->valence; |
418
|
369
|
100
|
100
|
|
|
14714
|
if ($atom->attr("smiles/aromatic") and $atom->symbol =~ /^[CN]$/) { |
419
|
70
|
|
|
|
|
1125
|
$h_count--; |
420
|
|
|
|
|
|
|
} |
421
|
369
|
100
|
|
|
|
4005
|
$h_count = 0 if $h_count < 0; |
422
|
369
|
|
|
|
|
636
|
$h_count; |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
# returns the number of hydrogens that an atom should have, |
426
|
|
|
|
|
|
|
# taking into account that it may or may not have a few hydrogens |
427
|
|
|
|
|
|
|
# defined already. This assumes that the atom is neutral and not radical |
428
|
|
|
|
|
|
|
sub calc_implicit_hydrogens_2 { |
429
|
407
|
|
|
407
|
0
|
18441
|
my ($self, $atom) = @_; |
430
|
407
|
|
|
|
|
1198
|
my $h_count = $ORGANIC_ELEMS{$atom->symbol} - $atom->valence |
431
|
|
|
|
|
|
|
+ $atom->total_hydrogens; |
432
|
407
|
100
|
|
|
|
27062
|
$h_count = 0 if $h_count < 0; |
433
|
407
|
|
|
|
|
2562
|
$h_count; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub add_implicit_hydrogens { |
437
|
57
|
|
|
57
|
0
|
108
|
my ($self, $mol) = @_; |
438
|
57
|
|
|
|
|
1504
|
for my $atom ($mol->atoms) { |
439
|
|
|
|
|
|
|
#print "H=".$atom->hydrogens."\n"; |
440
|
378
|
100
|
|
|
|
4129
|
unless (defined $atom->hydrogens) { |
441
|
369
|
|
|
|
|
2437
|
my $h_count = $self->calc_implicit_hydrogens($atom); |
442
|
369
|
|
|
|
|
1017
|
$atom->hydrogens($h_count); |
443
|
|
|
|
|
|
|
} |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
} |
446
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
##### SMILES WRITER ######## |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
sub write_string { |
450
|
59
|
|
|
59
|
1
|
12910
|
my ($self, $mol_ref, %opts) = @_; |
451
|
|
|
|
|
|
|
|
452
|
59
|
|
|
|
|
117
|
my $eol; |
453
|
|
|
|
|
|
|
my @mols; |
454
|
59
|
100
|
|
|
|
210
|
if ($opts{mols}) { |
455
|
1
|
|
|
|
|
2
|
@mols = @{$opts{mols}}; |
|
1
|
|
|
|
|
4
|
|
456
|
1
|
|
|
|
|
3
|
$eol = "\n"; |
457
|
|
|
|
|
|
|
} else { |
458
|
58
|
|
|
|
|
194
|
@mols = $mol_ref; |
459
|
58
|
|
|
|
|
129
|
$eol = ""; |
460
|
|
|
|
|
|
|
} |
461
|
|
|
|
|
|
|
|
462
|
59
|
|
|
|
|
96
|
my $smiles; |
463
|
59
|
|
|
|
|
159
|
for my $mol (@mols) { |
464
|
62
|
|
|
|
|
3507
|
$mol = $mol->clone; |
465
|
62
|
|
|
|
|
31012
|
$mol->collapse_hydrogens; |
466
|
62
|
|
|
|
|
18876
|
my @atoms = $mol->atoms; |
467
|
|
|
|
|
|
|
|
468
|
62
|
50
|
|
|
|
613
|
if (@atoms) { |
469
|
62
|
|
|
|
|
100
|
my $i; |
470
|
62
|
100
|
|
|
|
204
|
if ($opts{auto_number}) { |
471
|
1
|
|
|
|
|
7
|
$_->name(++$i) for @atoms; |
472
|
1
|
|
|
|
|
53
|
$opts{number} = 1; |
473
|
|
|
|
|
|
|
} |
474
|
62
|
100
|
|
|
|
267
|
if ($opts{unique}) { |
475
|
15
|
50
|
|
|
|
56
|
unless ($atoms[0]->attr("canon/class")) { |
476
|
15
|
|
|
|
|
2903
|
require Chemistry::Canonicalize; |
477
|
15
|
|
|
|
|
128938
|
Chemistry::Canonicalize::canonicalize($mol); |
478
|
|
|
|
|
|
|
} |
479
|
15
|
|
|
|
|
190286
|
$opts{aromatic} = 1; # all unique smiles have to be aromatic |
480
|
171
|
|
|
|
|
1839
|
@atoms = sort { |
481
|
15
|
|
|
|
|
86
|
$a->attr("canon/class") <=> $b->attr("canon/class") |
482
|
|
|
|
|
|
|
} @atoms; |
483
|
|
|
|
|
|
|
} |
484
|
|
|
|
|
|
|
|
485
|
62
|
100
|
|
|
|
329
|
if ($opts{aromatic}) { |
486
|
61
|
|
|
|
|
11510
|
require Chemistry::Ring; |
487
|
61
|
|
|
|
|
39729
|
Chemistry::Ring::aromatize_mol($mol); |
488
|
|
|
|
|
|
|
} |
489
|
|
|
|
|
|
|
|
490
|
62
|
|
|
|
|
112296
|
my $visited = {}; |
491
|
62
|
|
|
|
|
119
|
my @s; |
492
|
62
|
|
|
|
|
147
|
for my $atom (@atoms) { |
493
|
414
|
100
|
|
|
|
5342
|
next if $visited->{$atom}; |
494
|
70
|
|
|
|
|
719
|
my $ring_atoms = {}; |
495
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
# first pass to find and number the ring bonds |
497
|
70
|
|
|
|
|
413
|
$self->find_ring_bonds($mol, \%opts, $atom, undef, {}, $ring_atoms); |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
# second pass to actually generate the SMILES string |
500
|
70
|
|
|
|
|
1767
|
push @s, $self->branch($mol, \%opts, $atom, undef, $visited, $ring_atoms); |
501
|
|
|
|
|
|
|
} |
502
|
62
|
|
|
|
|
744
|
$smiles .= join '.', @s; |
503
|
|
|
|
|
|
|
} |
504
|
|
|
|
|
|
|
|
505
|
62
|
100
|
|
|
|
200
|
if ($opts{name}) { |
506
|
8
|
|
|
|
|
36
|
$smiles .= "\t" . $mol->name; |
507
|
|
|
|
|
|
|
} |
508
|
62
|
|
|
|
|
299
|
$smiles .= $eol; |
509
|
|
|
|
|
|
|
} |
510
|
59
|
|
|
|
|
3764
|
return $smiles; |
511
|
|
|
|
|
|
|
} |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
sub find_ring_bonds { |
514
|
414
|
|
|
414
|
0
|
989
|
my ($self, $mol, $opts, $atom, $from_bond, $visited, $ring_atoms) = @_; |
515
|
|
|
|
|
|
|
|
516
|
414
|
|
|
|
|
1022
|
$visited->{$atom} = 1; |
517
|
414
|
|
|
|
|
4051
|
for my $bn ($self->sorted_bonds_neighbors($atom, $opts)) { |
518
|
736
|
|
|
|
|
3993
|
my $nei = $bn->{to}; |
519
|
736
|
|
|
|
|
932
|
my $bond = $bn->{bond}; |
520
|
736
|
100
|
|
|
|
2123
|
next if $visited->{$bond}; |
521
|
368
|
|
|
|
|
3336
|
$visited->{$bond} = 1; |
522
|
368
|
100
|
|
|
|
3870
|
if ($visited->{$nei}) { # closed ring |
523
|
|
|
|
|
|
|
#print "closing ring\n"; |
524
|
24
|
|
|
|
|
236
|
$ring_atoms->{$nei}++; |
525
|
|
|
|
|
|
|
} else { |
526
|
344
|
|
|
|
|
3097
|
$self->find_ring_bonds($mol, $opts, $nei, $bond, $visited, $ring_atoms); |
527
|
|
|
|
|
|
|
} |
528
|
|
|
|
|
|
|
} |
529
|
|
|
|
|
|
|
} |
530
|
|
|
|
|
|
|
|
531
|
|
|
|
|
|
|
sub branch { |
532
|
414
|
|
|
414
|
0
|
816
|
my ($self, $mol, $opts, $atom, $from_bond, $visited, $digits) = @_; |
533
|
|
|
|
|
|
|
|
534
|
414
|
|
|
|
|
597
|
my $prev_branch = ""; |
535
|
414
|
|
|
|
|
428
|
my $smiles; |
536
|
414
|
|
|
|
|
1092
|
$smiles .= $self->bond_symbol($from_bond, $opts); |
537
|
|
|
|
|
|
|
#$digits->{count}++; |
538
|
414
|
|
|
|
|
3425
|
$smiles .= $self->format_atom($atom, $opts); |
539
|
414
|
100
|
|
|
|
1833
|
if ($digits->{$atom}) { # opening a ring |
540
|
24
|
|
|
|
|
202
|
my @d; |
541
|
24
|
|
|
|
|
74
|
for (1 .. $digits->{$atom}) { |
542
|
24
|
|
|
|
|
5860
|
push @d, $self->next_digit($digits); |
543
|
|
|
|
|
|
|
} |
544
|
24
|
|
|
|
|
98
|
$digits->{$atom} = \@d; |
545
|
24
|
50
|
|
|
|
255
|
$smiles .= join "", map { $_ < 10 ? $_ : "%$_"} @d; |
|
24
|
|
|
|
|
148
|
|
546
|
|
|
|
|
|
|
} |
547
|
|
|
|
|
|
|
|
548
|
414
|
|
|
|
|
3718
|
$visited->{$atom} = 1; |
549
|
414
|
|
|
|
|
3704
|
my @bns = $self->sorted_bonds_neighbors($atom, $opts); |
550
|
|
|
|
|
|
|
|
551
|
414
|
|
|
|
|
840
|
for my $bn (@bns) { |
552
|
736
|
|
|
|
|
3357
|
my $nei = $bn->{to}; |
553
|
736
|
|
|
|
|
949
|
my $bond = $bn->{bond}; |
554
|
736
|
100
|
|
|
|
1728
|
next if $visited->{$bond}; |
555
|
392
|
100
|
|
|
|
3601
|
if ($visited->{$nei}) { # closed a ring |
556
|
24
|
|
|
|
|
181
|
my $digit = shift @{$digits->{$nei}}; |
|
24
|
|
|
|
|
61
|
|
557
|
24
|
|
|
|
|
310
|
$smiles .= $self->bond_symbol($bond, $opts); |
558
|
24
|
50
|
|
|
|
244
|
$smiles .= $digit < 10 ? $digit : "%$digit"; |
559
|
24
|
|
|
|
|
64
|
$digits->{used_digits}[$digit] = 0; # free for future use |
560
|
24
|
|
|
|
|
68
|
$visited->{$bond} = 1; |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
|
564
|
414
|
|
|
|
|
4477
|
for my $bn (@bns) { |
565
|
736
|
|
|
|
|
2702
|
my $nei = $bn->{to}; |
566
|
736
|
|
|
|
|
1573
|
my $bond = $bn->{bond}; |
567
|
736
|
100
|
|
|
|
1650
|
next if $visited->{$bond}; |
568
|
344
|
|
|
|
|
2968
|
$visited->{$bond} = 1; |
569
|
344
|
50
|
|
|
|
4079
|
unless ($visited->{$nei}) { |
570
|
344
|
|
|
|
|
3931
|
my $branch = $self->branch($mol, $opts, $nei, $bond, $visited, $digits); |
571
|
344
|
100
|
|
|
|
848
|
if ($prev_branch) { |
572
|
54
|
|
|
|
|
106
|
$smiles .= "($prev_branch)"; |
573
|
|
|
|
|
|
|
} |
574
|
344
|
|
|
|
|
871
|
$prev_branch = $branch; |
575
|
|
|
|
|
|
|
} |
576
|
|
|
|
|
|
|
} |
577
|
414
|
|
|
|
|
3454
|
$smiles .= "$prev_branch"; |
578
|
414
|
|
|
|
|
1506
|
$smiles; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub next_digit { |
582
|
24
|
|
|
24
|
0
|
83
|
my ($self, $digits) = @_; |
583
|
24
|
|
|
|
|
122
|
for (my $i = 1; $i < 100; $i++) { |
584
|
26
|
100
|
|
|
|
123
|
unless ($digits->{used_digits}[$i]) { |
585
|
24
|
|
|
|
|
59
|
$digits->{used_digits}[$i] = 1; # mark as used |
586
|
24
|
|
|
|
|
97
|
return $i; |
587
|
|
|
|
|
|
|
} |
588
|
|
|
|
|
|
|
} |
589
|
0
|
|
|
|
|
0
|
die "no more available smiles digits!"; # shouldn't happen |
590
|
|
|
|
|
|
|
} |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
sub sorted_bonds_neighbors { |
593
|
828
|
|
|
828
|
0
|
1693
|
my ($self, $atom, $opts) = @_; |
594
|
828
|
|
|
|
|
2089
|
my @bn = $atom->bonds_neighbors; |
595
|
828
|
100
|
|
|
|
13583
|
if ($opts->{unique}) { |
596
|
160
|
|
|
|
|
1425
|
@bn = sort { |
597
|
190
|
|
|
|
|
454
|
$a->{to}->attr("canon/class") <=> $b->{to}->attr("canon/class") |
598
|
|
|
|
|
|
|
} @bn; |
599
|
|
|
|
|
|
|
} |
600
|
828
|
|
|
|
|
3598
|
@bn; |
601
|
|
|
|
|
|
|
} |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
my %ORDER_TO_TYPE = ( |
604
|
|
|
|
|
|
|
2 => '=', 1 => '', 3 => '#', |
605
|
|
|
|
|
|
|
); |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
sub bond_symbol { |
608
|
438
|
|
|
438
|
0
|
644
|
my ($self, $bond, $opts) = @_; |
609
|
438
|
100
|
|
|
|
1224
|
return '' unless $bond; |
610
|
368
|
100
|
100
|
|
|
3055
|
return '' if $opts->{aromatic} && $bond->aromatic; |
611
|
273
|
|
|
|
|
2442
|
return $ORDER_TO_TYPE{$bond->order}; |
612
|
|
|
|
|
|
|
} |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
sub format_atom { |
615
|
414
|
|
|
414
|
0
|
639
|
my ($self, $atom, $opts) = @_; |
616
|
|
|
|
|
|
|
|
617
|
414
|
|
|
|
|
1071
|
my $symbol = $atom->symbol; |
618
|
414
|
100
|
100
|
|
|
3751
|
$symbol = lc $symbol if $opts->{aromatic} && $atom->aromatic; |
619
|
414
|
|
|
|
|
2873
|
my $s = $symbol; |
620
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
# unless atom is "simple"... |
622
|
414
|
100
|
100
|
|
|
1125
|
if (!$ORGANIC_ELEMS{$atom->symbol} || $atom->formal_charge |
|
|
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
66
|
|
|
|
|
623
|
|
|
|
|
|
|
|| $atom->total_hydrogens != $self->calc_implicit_hydrogens_2($atom) |
624
|
|
|
|
|
|
|
|| ($opts->{number} && defined $atom->name) |
625
|
|
|
|
|
|
|
) { |
626
|
|
|
|
|
|
|
# "complex atom"; bracketed |
627
|
15
|
|
|
|
|
131
|
my $h_count = $atom->hydrogens; |
628
|
15
|
|
100
|
|
|
88
|
my $charge = $atom->formal_charge || ''; |
629
|
15
|
|
50
|
|
|
130
|
my $iso = $atom->attr("smiles/isotope") || ''; |
630
|
15
|
|
|
|
|
147
|
my $number = ''; |
631
|
|
|
|
|
|
|
|
632
|
15
|
50
|
66
|
|
|
73
|
if ($charge and abs($charge) > 1) { |
|
|
100
|
|
|
|
|
|
633
|
0
|
|
|
|
|
0
|
$charge = sprintf("%+d", $charge); |
634
|
|
|
|
|
|
|
} elsif ($charge) { |
635
|
1
|
50
|
|
|
|
8
|
$charge = $charge > 0 ? '+' : '-'; |
636
|
|
|
|
|
|
|
} |
637
|
|
|
|
|
|
|
|
638
|
15
|
100
|
|
|
|
42
|
$h_count = $h_count ? ($h_count > 1 ? "H$h_count" : 'H') : ''; |
|
|
100
|
|
|
|
|
|
639
|
|
|
|
|
|
|
|
640
|
15
|
100
|
66
|
|
|
64
|
$number = ':' . $atom->name if $opts->{number} and defined $atom->name; |
641
|
|
|
|
|
|
|
|
642
|
15
|
|
|
|
|
157
|
$s = "[$iso$symbol$h_count$charge$number]"; |
643
|
|
|
|
|
|
|
} |
644
|
414
|
|
|
|
|
1216
|
$s; |
645
|
|
|
|
|
|
|
} |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
|
648
|
|
|
|
|
|
|
1; |
649
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
=head1 CAVEATS |
651
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
Stereochemistry is not supported! Stereochemical descriptors such as @, @@, /, |
653
|
|
|
|
|
|
|
and \ will be silently ignored on input, and will certainly not be produced on |
654
|
|
|
|
|
|
|
output. |
655
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
Reading branches that start before an atom, such as (OC)C, which should be |
657
|
|
|
|
|
|
|
equivalent to C(OC) and COC, according to some variants of the SMILES |
658
|
|
|
|
|
|
|
specification. Many other tools don't implement this rule either. |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
The kekulize option works by increasing the bond orders of atoms that don't |
661
|
|
|
|
|
|
|
have their usual valences satisfied. This may cause problems if you have atoms |
662
|
|
|
|
|
|
|
with explicitly low hydrogen counts. |
663
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
=head1 VERSION |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
0.47 |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
=head1 SEE ALSO |
669
|
|
|
|
|
|
|
|
670
|
|
|
|
|
|
|
L, L |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
The SMILES Home Page at http://www.daylight.com/dayhtml/smiles/ |
673
|
|
|
|
|
|
|
|
674
|
|
|
|
|
|
|
The Daylight Theory Manual at |
675
|
|
|
|
|
|
|
http://www.daylight.com/dayhtml/doc/theory/theory.smiles.html |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
The PerlMol website L |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
=head1 AUTHOR |
680
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
Ivan Tubert-Brohman Eitub@cpan.orgE |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=head1 COPYRIGHT |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is |
686
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
687
|
|
|
|
|
|
|
Perl itself. |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
=cut |
690
|
|
|
|
|
|
|
|