line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File::SLN; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
$VERSION = "0.11"; |
4
|
|
|
|
|
|
|
# $Id: SLN.pm,v 1.4 2005/03/29 16:38:06 itubert Exp $ |
5
|
|
|
|
|
|
|
|
6
|
2
|
|
|
2
|
|
77701
|
use 5.006; |
|
2
|
|
|
|
|
11
|
|
|
2
|
|
|
|
|
87
|
|
7
|
2
|
|
|
2
|
|
11
|
use strict; |
|
2
|
|
|
|
|
4
|
|
|
2
|
|
|
|
|
76
|
|
8
|
2
|
|
|
2
|
|
20
|
use warnings; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
80
|
|
9
|
2
|
|
|
2
|
|
12
|
use base "Chemistry::File"; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
2457
|
|
10
|
2
|
|
|
2
|
|
69272
|
use Chemistry::Mol; |
|
2
|
|
|
|
|
108173
|
|
|
2
|
|
|
|
|
236
|
|
11
|
2
|
|
|
2
|
|
7469
|
use Chemistry::File::SLN::Parser; |
|
2
|
|
|
|
|
7
|
|
|
2
|
|
|
|
|
79
|
|
12
|
2
|
|
|
2
|
|
2186
|
use Chemistry::Bond::Find 'assign_bond_orders'; |
|
2
|
|
|
|
|
32893
|
|
|
2
|
|
|
|
|
182
|
|
13
|
2
|
|
|
2
|
|
25
|
use List::Util qw(sum); |
|
2
|
|
|
|
|
6
|
|
|
2
|
|
|
|
|
3810
|
|
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Chemistry::File::SLN - SLN linear notation parser/writer |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSYS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
#!/usr/bin/perl |
22
|
|
|
|
|
|
|
use Chemistry::File::SLN; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# parse a SLN string for benzene |
25
|
|
|
|
|
|
|
my $s = 'C[1]H:CH:CH:CH:CH:CH@1'; |
26
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->parse($s, format => 'sln'); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# print a SLN string |
29
|
|
|
|
|
|
|
print $mol->print(format => 'sln'); |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# print a unique (canonical) SLN string |
32
|
|
|
|
|
|
|
print $mol->print(format => 'sln', unique => 1); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# parse a multiline SLN file |
35
|
|
|
|
|
|
|
my @mols = Chemistry::Mol->read("file.sln", format => 'sln'); |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# write a multiline SLN file |
38
|
|
|
|
|
|
|
Chemistry::Mol->write("file.sln", mols => [@mols]); |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
=head1 DESCRIPTION |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
This module parses a SLN (Sybyl Line Notation) string. This is a File I/O |
44
|
|
|
|
|
|
|
driver for the PerlMol project. L. It registers the |
45
|
|
|
|
|
|
|
'sln' format with Chemistry::Mol, and recognizes filenames ending in '.sln'. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
Optional attributes for atoms, bonds, and molecules are stored as |
48
|
|
|
|
|
|
|
$atom->attr("sln/attr"), $bond->attr("sln/attr"), and $mol->attr("sln/attr"), |
49
|
|
|
|
|
|
|
respectively. Boolean attributes are stored with a value of 'TRUE'. That's the |
50
|
|
|
|
|
|
|
way boolean attributes are recognized when writing, so that they can be written |
51
|
|
|
|
|
|
|
in the shortened form. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
$sln_attr->{backbone} = 1; |
54
|
|
|
|
|
|
|
# would be ouput as "C[backbone=1]" |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
$sln_attr->{backbone} = 'TRUE'; |
57
|
|
|
|
|
|
|
# would be ouput as "C[backbone]" |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
Also note that attribute names are normalized to lowercase on reading. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=head1 OPTIONS |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
The following options are available when reading: |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
=over |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=item kekulize |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Assign bond orders for unsatisfied valences or for aromatic bonds. For example, |
70
|
|
|
|
|
|
|
benzene read as C[1]H:CH:CH:CH:CH:CH@1 will be converted internally to |
71
|
|
|
|
|
|
|
something like C[1]H=CHCH=CHCH=CH@1. This is needed if another format or |
72
|
|
|
|
|
|
|
module expects a Kekule representation without an aromatic bond type. |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
=back |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
The following options are available when writing: |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=over |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=item mols |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
If this option points to an array of molecules, these molecules will be |
83
|
|
|
|
|
|
|
written, one per line, as in the example in the SYNOPSYS. |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=item aromatic |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Detect aromaticity before writing. This will ensure that aromatic bond types |
88
|
|
|
|
|
|
|
are used instead of alternate single and double bonds. |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
=item unique |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
Canonicalize before writing, and produce a unique strucure. NOTE: this option |
93
|
|
|
|
|
|
|
does not guarantee a unique representation for molecules with bracketed |
94
|
|
|
|
|
|
|
attributes. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=item name |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
Include the name of the molecule ($mol->name) in the output string. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item coord3d, coords |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Include the 3D coordinates of every atom in the molecule in the output string. |
103
|
|
|
|
|
|
|
C and C may be used interchangeably. |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=item attr |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Output the atom, bond, and molecule attributes found in $mol->attr("sln/attr"), |
108
|
|
|
|
|
|
|
etc. |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
=back |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
=head1 CAVEATS |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
This version does not implement the full SLN specification. It supports |
115
|
|
|
|
|
|
|
simple structures and some attributes, but it does not support any of the |
116
|
|
|
|
|
|
|
following: |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=over |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=item Macro atoms |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item Pattern matching options |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
=item Markush structures |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item 2D Coordinates |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=back |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
The SLN specification is vague on several points, and I don't have a reference |
131
|
|
|
|
|
|
|
implementation available, so I had to make several arbitrary decisions. Also, |
132
|
|
|
|
|
|
|
this version of this module has not been tested exhaustively, so please report |
133
|
|
|
|
|
|
|
any bugs that you find. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
If the parser doesn't understand a string, it only says "syntax error", which |
136
|
|
|
|
|
|
|
may not be very helpful. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=cut |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
# INITIALIZATION |
141
|
|
|
|
|
|
|
Chemistry::Mol->register_format('sln'); |
142
|
|
|
|
|
|
|
my $Parser = Chemistry::File::SLN::Parser->new; |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
sub name_is { |
145
|
0
|
|
|
0
|
1
|
0
|
my ($self, $name) = @_; |
146
|
0
|
|
|
|
|
0
|
$name =~ /\.sln$/i; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub file_is { |
150
|
0
|
|
|
0
|
1
|
0
|
$_[0]->name_is($_[1]); |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub parse_string { |
154
|
72
|
|
|
72
|
1
|
64579
|
my ($self, $string, %opts) = @_; |
155
|
|
|
|
|
|
|
|
156
|
72
|
|
|
|
|
516
|
my (@lines) = split /(?:\n|\r\n?)/, $string; |
157
|
72
|
|
|
|
|
132
|
my @mols; |
158
|
72
|
|
|
|
|
162
|
for my $line (@lines) { |
159
|
72
|
|
|
|
|
419
|
my $mol = $self->parse_single_line($line, %opts); |
160
|
72
|
50
|
|
|
|
755
|
return $mol unless wantarray; |
161
|
0
|
|
|
|
|
0
|
push @mols, $mol; |
162
|
|
|
|
|
|
|
} |
163
|
0
|
|
|
|
|
0
|
@mols; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
sub parse_single_line { |
167
|
72
|
|
|
72
|
0
|
225
|
my ($self, $string, %opts) = @_; |
168
|
|
|
|
|
|
|
|
169
|
72
|
|
50
|
|
|
305
|
my $mol_class = $opts{mol_class} || "Chemistry::Mol"; |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
|
172
|
|
|
|
|
|
|
# call the actual yapp-generated parser |
173
|
72
|
50
|
|
|
|
466
|
my $tree = $Parser->run($string) or return; |
174
|
|
|
|
|
|
|
#use Data::Dumper; print Dumper $tree; |
175
|
|
|
|
|
|
|
|
176
|
72
|
|
|
|
|
3980
|
my $mol = $mol_class->new; |
177
|
72
|
|
|
|
|
1898
|
my @nodes = @{$tree->{chain}}; |
|
72
|
|
|
|
|
356
|
|
178
|
72
|
|
|
|
|
127
|
my %closures; |
179
|
|
|
|
|
|
|
my $last_atom; |
180
|
0
|
|
|
|
|
0
|
my @stack; |
181
|
|
|
|
|
|
|
|
182
|
72
|
|
|
|
|
277
|
while (my $node = shift @nodes) { |
183
|
631
|
100
|
|
|
|
3198
|
if ($node eq '(') { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
184
|
79
|
|
|
|
|
422
|
push @stack, $last_atom; |
185
|
|
|
|
|
|
|
} elsif ($node eq ')') { |
186
|
79
|
|
|
|
|
249
|
$last_atom = pop @stack; |
187
|
|
|
|
|
|
|
} elsif($last_atom) { # bond |
188
|
401
|
|
|
|
|
1886
|
my $next = shift @nodes; |
189
|
401
|
100
|
|
|
|
900
|
if ($next->{closure}) { |
190
|
23
|
|
|
|
|
50
|
my $atom = $closures{$next->{closure}}; |
191
|
23
|
|
|
|
|
64
|
$self->compile_bond($mol, $node, $last_atom, $atom); |
192
|
|
|
|
|
|
|
} else { |
193
|
378
|
|
|
|
|
951
|
my $atom = $self->compile_atom($mol, $next, \%closures); |
194
|
378
|
|
|
|
|
1038
|
$self->compile_bond($mol, $node, $last_atom, $atom); |
195
|
378
|
|
|
|
|
2389
|
$last_atom = $atom; |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
} else { # first atom |
198
|
72
|
|
|
|
|
2054
|
$last_atom = $self->compile_atom($mol, $node, \%closures); |
199
|
|
|
|
|
|
|
} |
200
|
|
|
|
|
|
|
} |
201
|
72
|
50
|
|
|
|
437
|
if ($opts{kekulize}) { |
202
|
0
|
|
|
|
|
0
|
assign_bond_orders($mol, method => "itub", use_coords => 0, |
203
|
|
|
|
|
|
|
scratch => 0, charges => 0); |
204
|
|
|
|
|
|
|
} |
205
|
72
|
|
|
|
|
123
|
my @sln_attr; |
206
|
72
|
|
|
|
|
162
|
while (my ($attr, $value) = each %{$tree->{attr}}) { |
|
140
|
|
|
|
|
1048
|
|
207
|
68
|
100
|
|
|
|
203
|
if ($attr eq 'name') { |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
208
|
66
|
|
|
|
|
275
|
$mol->name($value); |
209
|
|
|
|
|
|
|
} elsif ($attr eq 'type') { |
210
|
0
|
|
|
|
|
0
|
$mol->type($value); |
211
|
|
|
|
|
|
|
} elsif ($attr eq 'coord3d') { |
212
|
0
|
|
|
|
|
0
|
$self->read_coords($mol, $value); |
213
|
|
|
|
|
|
|
} else { |
214
|
2
|
|
|
|
|
7
|
push @sln_attr, $attr, $value; |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
} |
217
|
72
|
100
|
|
|
|
191
|
$mol->attr("sln/attr", {@sln_attr}) if @sln_attr; |
218
|
72
|
|
|
|
|
972
|
$mol; |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
sub compile_atom { |
223
|
450
|
|
|
450
|
0
|
734
|
my ($self, $mol, $node, $closures) = @_; |
224
|
450
|
|
|
|
|
2304
|
my $atom = $mol->new_atom( |
225
|
|
|
|
|
|
|
symbol => $node->{symbol}, |
226
|
|
|
|
|
|
|
hydrogens => $node->{hcount}, |
227
|
|
|
|
|
|
|
formal_charge => $node->{attr}{charge}, |
228
|
|
|
|
|
|
|
); |
229
|
450
|
|
|
|
|
45474
|
$atom->attr("sln/attr", $node->{attr}); |
230
|
450
|
|
|
|
|
6213
|
delete $node->{attr}{charge}; |
231
|
450
|
100
|
|
|
|
1255
|
$closures->{$node->{id}} = $atom if $node->{id}; |
232
|
450
|
|
|
|
|
1072
|
$atom; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
my %TYPE_TO_ORDER = ( |
236
|
|
|
|
|
|
|
'-' => 1, |
237
|
|
|
|
|
|
|
'=' => 2, |
238
|
|
|
|
|
|
|
'#' => 3, |
239
|
|
|
|
|
|
|
':' => 1, |
240
|
|
|
|
|
|
|
'.' => 0, |
241
|
|
|
|
|
|
|
); |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub compile_bond { |
244
|
401
|
|
|
401
|
0
|
863
|
my ($self, $mol, $node, $atom1, $atom2) = @_; |
245
|
401
|
|
|
|
|
812
|
my $order = $TYPE_TO_ORDER{$node->{type}}; |
246
|
401
|
100
|
|
|
|
969
|
if ($order) { |
247
|
396
|
|
|
|
|
1779
|
my $bond = $mol->new_bond( |
248
|
|
|
|
|
|
|
type => $node->{type}, |
249
|
|
|
|
|
|
|
atoms=>[$atom1, $atom2], |
250
|
|
|
|
|
|
|
order => $order, |
251
|
|
|
|
|
|
|
); |
252
|
396
|
|
|
|
|
50578
|
$bond->attr("sln/attr", $node->{attr}); |
253
|
396
|
100
|
|
|
|
5753
|
if ($node->{type} eq ':') { |
254
|
66
|
|
|
|
|
229
|
$_->aromatic(1) for ($atom1, $atom2, $bond); |
255
|
|
|
|
|
|
|
} |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub read_coords { |
260
|
0
|
|
|
0
|
0
|
0
|
my ($self, $mol, $coords_str) = @_; |
261
|
0
|
|
|
|
|
0
|
$coords_str =~ s/[()]//g; |
262
|
0
|
|
|
|
|
0
|
my (@coords) = split /,/, $coords_str; |
263
|
0
|
|
|
|
|
0
|
my $fh = $mol->formula_hash; |
264
|
0
|
|
|
|
|
0
|
my $n = sum(values %$fh); |
265
|
0
|
|
|
|
|
0
|
my $sprout = (@coords == 3*$n); |
266
|
0
|
|
|
|
|
0
|
for my $atom ($mol->atoms) { |
267
|
0
|
|
|
|
|
0
|
$atom->coords(splice @coords, 0, 3); |
268
|
0
|
0
|
|
|
|
0
|
if ($sprout) { |
269
|
0
|
|
|
|
|
0
|
for (1 .. $atom->implicit_hydrogens) { |
270
|
0
|
|
|
|
|
0
|
my $H = $mol->new_atom(symbol => 'H', |
271
|
|
|
|
|
|
|
coords => [splice @coords, 0, 3]); |
272
|
0
|
|
|
|
|
0
|
$mol->new_bond(atoms => [$atom, $H]); |
273
|
|
|
|
|
|
|
} |
274
|
0
|
|
|
|
|
0
|
$atom->implicit_hydrogens(0); |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
########### WRITER ################# |
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub write_string { |
284
|
73
|
|
|
73
|
1
|
2405
|
my ($self, $mol_ref, %opts) = @_; |
285
|
|
|
|
|
|
|
|
286
|
73
|
|
|
|
|
145
|
my $eol; |
287
|
|
|
|
|
|
|
my @mols; |
288
|
73
|
50
|
|
|
|
222
|
if ($opts{mols}) { |
289
|
0
|
|
|
|
|
0
|
@mols = @{$opts{mols}}; |
|
0
|
|
|
|
|
0
|
|
290
|
0
|
|
|
|
|
0
|
$eol = "\n"; |
291
|
|
|
|
|
|
|
} else { |
292
|
73
|
|
|
|
|
136
|
@mols = $mol_ref; |
293
|
73
|
|
|
|
|
133
|
$eol = ""; |
294
|
|
|
|
|
|
|
} |
295
|
|
|
|
|
|
|
|
296
|
73
|
|
|
|
|
132
|
my $sln; |
297
|
73
|
|
|
|
|
152
|
for my $mol (@mols) { |
298
|
73
|
|
|
|
|
332
|
$sln .= $self->write_mol($mol, %opts) . $eol; |
299
|
|
|
|
|
|
|
} |
300
|
73
|
|
|
|
|
534
|
$sln; |
301
|
|
|
|
|
|
|
} |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub write_mol { |
304
|
73
|
|
|
73
|
1
|
267
|
my ($self, $mol, %opts) = @_; |
305
|
|
|
|
|
|
|
|
306
|
73
|
|
|
|
|
118
|
my $oldmol = $mol; |
307
|
73
|
|
|
|
|
316
|
$mol = $mol->clone; |
308
|
|
|
|
|
|
|
|
309
|
73
|
|
|
|
|
28999
|
my $sln = ''; |
310
|
73
|
|
|
|
|
108
|
my @id_log; |
311
|
73
|
50
|
|
|
|
348
|
if ($mol->atoms) { |
312
|
73
|
|
|
|
|
1023
|
my @atoms = $self->clean_mol($mol, %opts); |
313
|
|
|
|
|
|
|
|
314
|
73
|
|
|
|
|
191
|
my $visited = {}; |
315
|
73
|
|
|
|
|
94
|
my @s; |
316
|
73
|
|
|
|
|
166
|
for my $atom (@atoms) { |
317
|
453
|
100
|
|
|
|
3565
|
next if $visited->{$atom}; |
318
|
78
|
|
|
|
|
847
|
my $ring_atoms = {}; |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
# first pass to find and number the ring bonds |
321
|
78
|
|
|
|
|
356
|
$self->find_ring_bonds($mol, \%opts, $atom, undef, {}, $ring_atoms); |
322
|
|
|
|
|
|
|
|
323
|
|
|
|
|
|
|
# second pass to actually generate the sln string |
324
|
78
|
|
|
|
|
1432
|
push @s, $self->branch($mol, \%opts, $atom, undef, $visited, |
325
|
|
|
|
|
|
|
$ring_atoms, \@id_log); |
326
|
|
|
|
|
|
|
} |
327
|
73
|
|
|
|
|
892
|
$sln .= join '.', @s; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
73
|
|
|
|
|
355
|
$sln .= $self->format_ctab_attr($mol, \%opts, $oldmol, \@id_log); |
331
|
|
|
|
|
|
|
} |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub clean_mol { |
334
|
73
|
|
|
73
|
0
|
299
|
my ($self, $mol, %opts) = @_; |
335
|
|
|
|
|
|
|
|
336
|
73
|
|
|
|
|
313
|
$self->collapse_hydrogens($mol); |
337
|
73
|
|
|
|
|
832
|
my @atoms = $mol->atoms; |
338
|
73
|
50
|
|
|
|
694
|
if ($opts{unique}) { |
339
|
0
|
0
|
|
|
|
0
|
unless ($atoms[0]->attr("canon/class")) { |
340
|
0
|
|
|
|
|
0
|
require Chemistry::Canonicalize; |
341
|
0
|
|
|
|
|
0
|
Chemistry::Canonicalize::canonicalize($mol); |
342
|
|
|
|
|
|
|
} |
343
|
|
|
|
|
|
|
#$opts{aromatic} = 1; # all unique sln have to be aromatic |
344
|
0
|
|
|
|
|
0
|
@atoms = sort { |
345
|
0
|
|
|
|
|
0
|
$a->attr("canon/class") <=> $b->attr("canon/class") |
346
|
|
|
|
|
|
|
} @atoms; |
347
|
|
|
|
|
|
|
} |
348
|
|
|
|
|
|
|
|
349
|
73
|
50
|
|
|
|
244
|
if ($opts{aromatic}) { |
350
|
0
|
|
|
|
|
0
|
require Chemistry::Ring; |
351
|
0
|
|
|
|
|
0
|
Chemistry::Ring::aromatize_mol($mol); |
352
|
|
|
|
|
|
|
} |
353
|
73
|
|
|
|
|
348
|
@atoms; |
354
|
|
|
|
|
|
|
} |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
sub format_ctab_attr { |
357
|
73
|
|
|
73
|
0
|
160
|
my ($self, $mol, $opts, $oldmol, $id_log) = @_; |
358
|
|
|
|
|
|
|
|
359
|
73
|
|
|
|
|
141
|
my $sln = ''; |
360
|
73
|
50
|
66
|
|
|
306
|
if ($opts->{name} or $opts->{attr} or $opts->{coords} or $opts->{coord3d}) { |
|
|
|
66
|
|
|
|
|
|
|
|
33
|
|
|
|
|
361
|
2
|
|
|
2
|
|
19
|
no warnings 'uninitialized'; |
|
2
|
|
|
|
|
9
|
|
|
2
|
|
|
|
|
2245
|
|
362
|
71
|
|
|
|
|
84
|
my @attr; |
363
|
71
|
|
|
|
|
284
|
my $name = $mol->name; |
364
|
71
|
|
|
|
|
553
|
$name =~ s/[\r\n]//g; |
365
|
71
|
100
|
66
|
|
|
382
|
push @attr, 'name="' . $mol->name . '"' |
366
|
|
|
|
|
|
|
if $opts->{name} and length $mol->name; |
367
|
71
|
|
|
|
|
860
|
my @coords; |
368
|
71
|
50
|
33
|
|
|
387
|
if ($opts->{coord3d} or $opts->{coords}) { |
369
|
0
|
|
|
|
|
0
|
my @all_atoms = map { |
370
|
0
|
|
|
|
|
0
|
( |
371
|
|
|
|
|
|
|
$oldmol->by_id($_), |
372
|
0
|
|
|
|
|
0
|
grep {$_->symbol eq 'H'} |
373
|
|
|
|
|
|
|
$oldmol->by_id($_)->neighbors |
374
|
|
|
|
|
|
|
) |
375
|
|
|
|
|
|
|
} @$id_log; |
376
|
|
|
|
|
|
|
push @coords, sprintf("(%.3f,%.3f,%.3f)",$_->coords->array) |
377
|
0
|
|
|
|
|
0
|
for @all_atoms; |
378
|
0
|
|
|
|
|
0
|
push @attr, 'coord3d=' . join(',',@coords); |
379
|
|
|
|
|
|
|
} |
380
|
71
|
50
|
|
|
|
175
|
if ($opts->{attr}) { |
381
|
71
|
|
|
|
|
167
|
push @attr, $self->format_sln_attr($mol); |
382
|
|
|
|
|
|
|
} |
383
|
71
|
100
|
|
|
|
318
|
$sln .= '<' . join(';', @attr) . '>' if @attr; |
384
|
|
|
|
|
|
|
} |
385
|
73
|
|
|
|
|
3247
|
$sln; |
386
|
|
|
|
|
|
|
} |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
sub find_ring_bonds { |
390
|
453
|
|
|
453
|
0
|
852
|
my ($self, $mol, $opts, $atom, $from_bond, $visited, $ring_atoms) = @_; |
391
|
|
|
|
|
|
|
|
392
|
453
|
|
|
|
|
1043
|
$visited->{$atom} = 1; |
393
|
453
|
|
|
|
|
3426
|
for my $bn (sorted_bonds_neighbors($atom, $opts)) { |
394
|
796
|
|
|
|
|
3911
|
my $nei = $bn->{to}; |
395
|
796
|
|
|
|
|
1034
|
my $bond = $bn->{bond}; |
396
|
796
|
100
|
|
|
|
1776
|
next if $visited->{$bond}; |
397
|
398
|
|
|
|
|
3243
|
$visited->{$bond} = 1; |
398
|
398
|
100
|
|
|
|
3462
|
if ($visited->{$nei}) { # closed ring |
399
|
|
|
|
|
|
|
#print "closing ring\n"; |
400
|
23
|
|
|
|
|
173
|
$ring_atoms->{$nei}++; |
401
|
|
|
|
|
|
|
} else { |
402
|
375
|
|
|
|
|
3149
|
$self->find_ring_bonds($mol, $opts, $nei, |
403
|
|
|
|
|
|
|
$bond, $visited, $ring_atoms); |
404
|
|
|
|
|
|
|
} |
405
|
|
|
|
|
|
|
} |
406
|
|
|
|
|
|
|
} |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
sub branch { |
409
|
453
|
|
|
453
|
0
|
982
|
my ($self, $mol, $opts, $atom, $from_bond, $visited, $digits, $id_log) = @_; |
410
|
|
|
|
|
|
|
|
411
|
453
|
|
|
|
|
572
|
my $prev_branch = ""; |
412
|
453
|
|
|
|
|
479
|
my $sln; |
413
|
453
|
|
|
|
|
1100
|
$sln .= $self->format_bond($from_bond, $opts); |
414
|
453
|
|
|
|
|
567
|
my $digit; |
415
|
453
|
100
|
|
|
|
1111
|
if ($digits->{$atom}) { # opening a ring |
416
|
23
|
|
|
|
|
212
|
$digit = $self->next_digit($digits); |
417
|
23
|
|
|
|
|
70
|
$digits->{$atom} = $digit; |
418
|
|
|
|
|
|
|
} |
419
|
453
|
|
|
|
|
3972
|
$sln .= $self->format_atom($atom, $opts, $digit); |
420
|
453
|
|
|
|
|
1353
|
push @$id_log, $atom->id; |
421
|
|
|
|
|
|
|
|
422
|
453
|
|
|
|
|
2830
|
$visited->{$atom} = 1; |
423
|
453
|
|
|
|
|
3397
|
my @bns = sorted_bonds_neighbors($atom, $opts); |
424
|
|
|
|
|
|
|
|
425
|
453
|
|
|
|
|
752
|
for my $bn (@bns) { |
426
|
796
|
|
|
|
|
2917
|
my $nei = $bn->{to}; |
427
|
796
|
|
|
|
|
1077
|
my $bond = $bn->{bond}; |
428
|
796
|
100
|
|
|
|
1915
|
next if $visited->{$bond}; |
429
|
398
|
|
|
|
|
3262
|
$visited->{$bond} = 1; |
430
|
398
|
100
|
|
|
|
3049
|
if ($visited->{$nei}) { # closed a ring |
431
|
23
|
100
|
|
|
|
170
|
if ($prev_branch) { |
432
|
6
|
|
|
|
|
17
|
$sln .= "($prev_branch)"; |
433
|
|
|
|
|
|
|
} |
434
|
23
|
|
|
|
|
73
|
$prev_branch = $self->format_bond($bond, $opts) |
435
|
|
|
|
|
|
|
. '@' . $digits->{$nei}; |
436
|
23
|
|
|
|
|
199
|
$visited->{$bond} = 1; |
437
|
|
|
|
|
|
|
} else { |
438
|
375
|
|
|
|
|
3307
|
my $branch = $self->branch($mol, $opts, $nei, $bond, $visited, |
439
|
|
|
|
|
|
|
$digits, $id_log); |
440
|
375
|
100
|
|
|
|
856
|
if ($prev_branch) { |
441
|
73
|
|
|
|
|
148
|
$sln .= "($prev_branch)"; |
442
|
|
|
|
|
|
|
} |
443
|
375
|
|
|
|
|
830
|
$prev_branch = $branch; |
444
|
|
|
|
|
|
|
} |
445
|
|
|
|
|
|
|
} |
446
|
453
|
|
|
|
|
1949
|
$sln .= "$prev_branch"; |
447
|
453
|
|
|
|
|
1547
|
$sln; |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
sub next_digit { |
451
|
23
|
|
|
23
|
0
|
43
|
my ($self, $digits) = @_; |
452
|
23
|
|
|
|
|
54
|
++$digits->{used_digits}; |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
sub collapse_hydrogens { |
456
|
73
|
|
|
73
|
0
|
114
|
my ($self, $mol) = @_; |
457
|
|
|
|
|
|
|
|
458
|
73
|
|
|
|
|
225
|
for my $atom (grep {$_->symbol eq 'H'} $mol->atoms) { |
|
453
|
|
|
|
|
3093
|
|
459
|
0
|
0
|
|
|
|
0
|
my ($neighbor) = $atom->neighbors or next; |
460
|
0
|
|
|
|
|
0
|
$atom->delete; |
461
|
0
|
|
|
|
|
0
|
my $h_count = $neighbor->hydrogens; |
462
|
0
|
|
|
|
|
0
|
$h_count++; |
463
|
0
|
|
|
|
|
0
|
$neighbor->hydrogens($h_count); |
464
|
|
|
|
|
|
|
} |
465
|
|
|
|
|
|
|
} |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
sub sorted_bonds_neighbors { |
468
|
906
|
|
|
906
|
0
|
1219
|
my ($atom, $opts) = @_; |
469
|
906
|
|
|
|
|
2543
|
my @bn = $atom->bonds_neighbors; |
470
|
906
|
50
|
|
|
|
14282
|
if ($opts->{unique}) { |
471
|
0
|
|
|
|
|
0
|
@bn = sort { |
472
|
0
|
|
|
|
|
0
|
$a->{to}->attr("canon/class") <=> $b->{to}->attr("canon/class") |
473
|
|
|
|
|
|
|
} @bn; |
474
|
|
|
|
|
|
|
} |
475
|
906
|
|
|
|
|
2132
|
@bn; |
476
|
|
|
|
|
|
|
} |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
my %ORDER_TO_TYPE = ( |
479
|
|
|
|
|
|
|
1 => '', 2 => '=', 3 => '#', 4 => '', 0 => '.', |
480
|
|
|
|
|
|
|
); |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
sub format_bond { |
483
|
476
|
|
|
476
|
0
|
686
|
my ($self, $bond, $opts) = @_; |
484
|
476
|
100
|
|
|
|
1183
|
return '' unless $bond; |
485
|
398
|
100
|
|
|
|
2600
|
my $s = $bond->aromatic ? ':' : $ORDER_TO_TYPE{$bond->order}; |
486
|
398
|
|
|
|
|
4383
|
my @attr; |
487
|
398
|
100
|
|
|
|
1432
|
@attr = $self->format_sln_attr($bond) if $opts->{attr}; |
488
|
398
|
100
|
|
|
|
1425
|
if (@attr) { |
489
|
1
|
|
|
|
|
5
|
$s .= '[' . join(";", @attr) . ']'; |
490
|
|
|
|
|
|
|
} |
491
|
398
|
|
|
|
|
875
|
$s; |
492
|
|
|
|
|
|
|
} |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
sub format_atom { |
495
|
453
|
|
|
453
|
0
|
710
|
my ($self, $atom, $opts, $digit) = @_; |
496
|
453
|
|
|
|
|
506
|
my $s; |
497
|
2
|
|
|
2
|
|
15
|
no warnings 'uninitialized'; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
656
|
|
498
|
453
|
|
|
|
|
1106
|
my $h_count = $atom->hydrogens; |
499
|
453
|
|
|
|
|
2634
|
my $charge = $atom->formal_charge; |
500
|
453
|
|
|
|
|
2414
|
my $symbol = $atom->symbol; |
501
|
|
|
|
|
|
|
|
502
|
453
|
100
|
|
|
|
2819
|
$charge = $charge ? sprintf("%+d", $charge): ''; |
503
|
453
|
100
|
|
|
|
1187
|
$h_count = $h_count ? ($h_count > 1 ? "H$h_count" : 'H') : ''; |
|
|
100
|
|
|
|
|
|
504
|
|
|
|
|
|
|
|
505
|
453
|
|
|
|
|
538
|
$s = $symbol; |
506
|
453
|
|
|
|
|
442
|
my @attr; |
507
|
453
|
100
|
|
|
|
1621
|
@attr = $self->format_sln_attr($atom) if $opts->{attr}; |
508
|
453
|
100
|
100
|
|
|
2890
|
if ($charge or $digit or @attr) { |
|
|
|
100
|
|
|
|
|
509
|
27
|
|
|
|
|
48
|
$s .= '['; |
510
|
27
|
|
|
|
|
48
|
$s .= $digit; |
511
|
27
|
100
|
|
|
|
105
|
unshift @attr, $charge if $charge; |
512
|
27
|
100
|
|
|
|
68
|
if (@attr) { |
513
|
5
|
100
|
|
|
|
15
|
$s .= ':' if $digit; |
514
|
5
|
|
|
|
|
14
|
$s .= join ';', @attr; |
515
|
|
|
|
|
|
|
} |
516
|
27
|
|
|
|
|
40
|
$s .= ']'; |
517
|
|
|
|
|
|
|
} |
518
|
453
|
|
|
|
|
600
|
$s .= $h_count; |
519
|
453
|
|
|
|
|
1009
|
$s; |
520
|
|
|
|
|
|
|
} |
521
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
sub format_sln_attr { |
523
|
912
|
|
|
912
|
0
|
1339
|
my ($self, $obj) = @_; |
524
|
912
|
|
100
|
|
|
2429
|
my $sln_attr = $obj->attr("sln/attr") || {}; |
525
|
912
|
|
|
|
|
8071
|
my @attr; |
526
|
912
|
|
|
|
|
2829
|
for my $key (sort keys %$sln_attr) { |
527
|
6
|
|
|
|
|
11
|
my $val = $sln_attr->{$key}; |
528
|
6
|
100
|
|
|
|
32
|
push @attr, "$key" . ($val eq 'TRUE' ? "" : "=$val"); |
529
|
|
|
|
|
|
|
} |
530
|
912
|
|
|
|
|
2397
|
@attr; |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
1; |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=head1 VERSION |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
0.11 |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
=head1 SEE ALSO |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
L, L, L |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
The PerlMol website L |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
Ash, S.; Cline, M. A.; Homer, R. W.; Hurst, T.; Smith, G. B., SYBYL Line |
546
|
|
|
|
|
|
|
Notation (SLN): A Versatile Language for Chemical Structure Representation. J. |
547
|
|
|
|
|
|
|
Chem. Inf. Comput. Sci; 1997; 37(1); 71-79. DOI: 10.1021/ci960109j |
548
|
|
|
|
|
|
|
(L) |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head1 AUTHOR |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
Ivan Tubert-Brohman Eitub@cpan.orgE |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
=head1 COPYRIGHT |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
Copyright (c) 2004 Ivan Tubert-Brohman. All rights reserved. This program is |
557
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
558
|
|
|
|
|
|
|
Perl itself. |
559
|
|
|
|
|
|
|
|
560
|
|
|
|
|
|
|
=cut |
561
|
|
|
|
|
|
|
|