line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File::MDLMol; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.24'; # VERSION |
4
|
|
|
|
|
|
|
# $Id$ |
5
|
|
|
|
|
|
|
|
6
|
3
|
|
|
3
|
|
147199
|
use base "Chemistry::File"; |
|
3
|
|
|
|
|
16
|
|
|
3
|
|
|
|
|
1169
|
|
7
|
3
|
|
|
3
|
|
31538
|
use Chemistry::Mol; |
|
3
|
|
|
|
|
62354
|
|
|
3
|
|
|
|
|
136
|
|
8
|
3
|
|
|
3
|
|
19
|
use Carp; |
|
3
|
|
|
|
|
5
|
|
|
3
|
|
|
|
|
128
|
|
9
|
3
|
|
|
3
|
|
14
|
use List::Util; |
|
3
|
|
|
|
|
8
|
|
|
3
|
|
|
|
|
114
|
|
10
|
3
|
|
|
3
|
|
15
|
use strict; |
|
3
|
|
|
|
|
4
|
|
|
3
|
|
|
|
|
49
|
|
11
|
3
|
|
|
3
|
|
12
|
use warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
1339
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $DEBUG = 0; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Chemistry::File::MDLMol - MDL molfile reader/writer |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use Chemistry::File::MDLMol; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# read a molecule |
24
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->read('myfile.mol'); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# write a molecule |
27
|
|
|
|
|
|
|
$mol->write("myfile.mol"); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# use a molecule as a query for substructure matching |
30
|
|
|
|
|
|
|
use Chemistry::Pattern; |
31
|
|
|
|
|
|
|
use Chemistry::Ring; |
32
|
|
|
|
|
|
|
Chemistry::Ring::aromatize_mol($mol); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $patt = Chemistry::Pattern->read('query.mol'); |
35
|
|
|
|
|
|
|
if ($patt->match($mol)) { |
36
|
|
|
|
|
|
|
print "it matches!\n"; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=cut |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Chemistry::Mol->register_format(mdl => __PACKAGE__); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 DESCRIPTION |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
MDL Molfile (V2000) reader/writer. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
This module automatically registers the 'mdl' format with Chemistry::Mol. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
The first three lines of the molfile are stored as $mol->name, |
50
|
|
|
|
|
|
|
$mol->attr("mdlmol/line2"), and $mol->attr("mdlmol/comment"). |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
This version only reads and writes some of the information available in a |
53
|
|
|
|
|
|
|
molfile: it reads coordinates, atom and bond types, charges, isotopes, |
54
|
|
|
|
|
|
|
radicals, and atom lists. It does not read other things such as |
55
|
|
|
|
|
|
|
stereochemistry, 3d properties, etc. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
This module is part of the PerlMol project, L. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 Query properties |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
The MDL molfile format supports query properties such as atom lists, and |
62
|
|
|
|
|
|
|
special bond types such as "single or double", "single or aromatic", "double or |
63
|
|
|
|
|
|
|
aromatic", "ring bond", or "any". These properties are supported by this module |
64
|
|
|
|
|
|
|
in conjunction with L. However, support for query properties |
65
|
|
|
|
|
|
|
is currently read-only, and the other properties listed in the specification |
66
|
|
|
|
|
|
|
are not supported yet. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
So that atom and bond objects can use these special query options, the |
69
|
|
|
|
|
|
|
conditions are represented as Perl subroutines. The generated code can be |
70
|
|
|
|
|
|
|
read from the 'mdlmol/test_sub' attribute: |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
$atom->attr('mdlmol/test_sub'); |
73
|
|
|
|
|
|
|
$bond->attr('mdlmol/test_sub'); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
This may be useful for debugging, such as when an atom doesn't seem to match as |
76
|
|
|
|
|
|
|
expected. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head2 Aromatic Queries |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
To be able to search for aromatic substructures are represented by Kekule |
81
|
|
|
|
|
|
|
structures, molfiles that are read as patterns (with |
82
|
|
|
|
|
|
|
Cread) are aromatized automatically by using the |
83
|
|
|
|
|
|
|
L module. The default bond test from Chemistry::Pattern::Bond |
84
|
|
|
|
|
|
|
is overridden by one that checks the aromaticity in addition to the bond order. |
85
|
|
|
|
|
|
|
The test is, |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
$patt->aromatic ? $bond->aromatic |
88
|
|
|
|
|
|
|
: (!$bond->aromatic && $patt->order == $bond->order); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
That is, aromatic pattern bonds match aromatic bonds, and aliphatic pattern |
91
|
|
|
|
|
|
|
bonds match aliphatic bonds with the same bond order. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=cut |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# some constants, based on tables from the file format specification |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my %OLD_CHARGE_MAP = ( |
100
|
|
|
|
|
|
|
1 => 3, |
101
|
|
|
|
|
|
|
2 => 2, |
102
|
|
|
|
|
|
|
3 => 1, |
103
|
|
|
|
|
|
|
4 => 0, |
104
|
|
|
|
|
|
|
5 => -1, |
105
|
|
|
|
|
|
|
6 => -2, |
106
|
|
|
|
|
|
|
7 => -3, |
107
|
|
|
|
|
|
|
); |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my %BOND_TYPE_EXPR = ( |
110
|
|
|
|
|
|
|
4 => '($bond->aromatic)', |
111
|
|
|
|
|
|
|
5 => '($bond->order == 1 or $bond->order == 2)', |
112
|
|
|
|
|
|
|
6 => '($bond->order == 1 or $bond->aromatic)', |
113
|
|
|
|
|
|
|
7 => '($bond->order == 2 or $bond->aromatic)', |
114
|
|
|
|
|
|
|
8 => '(1)', # any bond |
115
|
|
|
|
|
|
|
); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
my %BOND_TOPOLOGY_EXPR = ( |
118
|
|
|
|
|
|
|
1 => '@{$bond->attr("ring/rings")||[]}', |
119
|
|
|
|
|
|
|
2 => '! @{$bond->attr("ring/rings")||[]}', |
120
|
|
|
|
|
|
|
); |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub read_mol { |
124
|
66
|
|
|
66
|
1
|
6619
|
my ($self, $fh, %opts) = @_; |
125
|
66
|
100
|
|
|
|
246
|
return if $fh->eof; |
126
|
|
|
|
|
|
|
|
127
|
33
|
|
|
|
|
744
|
%opts = ( slurp => 1, %opts ); |
128
|
33
|
|
100
|
|
|
94
|
my $mol_class = $opts{mol_class} || "Chemistry::Mol"; |
129
|
33
|
|
33
|
|
|
201
|
my $atom_class = $opts{atom_class} || $mol_class->atom_class; |
130
|
33
|
|
33
|
|
|
202
|
my $bond_class = $opts{bond_class} || $mol_class->bond_class; |
131
|
33
|
|
|
|
|
117
|
local $_; |
132
|
|
|
|
|
|
|
|
133
|
33
|
|
|
|
|
79
|
my $mol = $mol_class->new(); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# header |
136
|
33
|
|
|
|
|
758
|
my $name = <$fh>; chomp $name; |
|
33
|
|
|
|
|
61
|
|
137
|
33
|
|
|
|
|
74
|
my $line2 = <$fh>; chomp $line2; |
|
33
|
|
|
|
|
43
|
|
138
|
33
|
|
|
|
|
48
|
my $comment = <$fh>; chomp $comment; |
|
33
|
|
|
|
|
54
|
|
139
|
33
|
|
|
|
|
105
|
$mol->name($name); |
140
|
33
|
|
|
|
|
358
|
$mol->attr("mdlmol/line2", $line2); |
141
|
33
|
|
|
|
|
418
|
$mol->attr("mdlmol/comment", $comment); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# counts line |
144
|
33
|
50
|
|
|
|
336
|
defined ($_ = <$fh>) or croak "unexpected end of file"; |
145
|
33
|
|
|
|
|
184
|
my ($na, $nb, undef, undef, $is_chiral) = unpack("A3A3A3A3A3", $_); |
146
|
|
|
|
|
|
|
|
147
|
33
|
|
|
|
|
138
|
$mol->attr("mdlmol/chiral", int $is_chiral); |
148
|
|
|
|
|
|
|
|
149
|
33
|
|
|
|
|
309
|
my %old_charges; |
150
|
|
|
|
|
|
|
my %old_radicals; |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
# atom block |
153
|
33
|
|
|
|
|
115
|
for my $i (1 .. $na) { # for each atom... |
154
|
439
|
50
|
|
|
|
32414
|
defined (my $line = <$fh>) or croak "unexpected end of file"; |
155
|
439
|
|
|
|
|
1632
|
my ($x, $y, $z, $symbol, $mass, $charge) |
156
|
|
|
|
|
|
|
= unpack("A10A10A10xA3A2A3", $line); |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
$old_charges{$i} = $OLD_CHARGE_MAP{$charge} |
159
|
439
|
50
|
|
|
|
869
|
if $OLD_CHARGE_MAP{$charge}; |
160
|
439
|
50
|
33
|
|
|
1311
|
$old_radicals{$i} = 2 |
161
|
|
|
|
|
|
|
if $charge && $charge == 4; |
162
|
439
|
|
|
|
|
505
|
my $mass_number; |
163
|
439
|
100
|
66
|
|
|
988
|
if( int $mass && eval { require Chemistry::Isotope } ) { |
|
1
|
50
|
|
|
|
10
|
|
164
|
1
|
|
|
|
|
5
|
my $abundance = |
165
|
|
|
|
|
|
|
Chemistry::Isotope::isotope_abundance($symbol); |
166
|
1
|
|
|
|
|
15
|
($mass_number) = sort { $abundance->{$b} cmp |
167
|
1
|
|
|
|
|
5
|
$abundance->{$a} } |
168
|
|
|
|
|
|
|
sort keys %$abundance; |
169
|
1
|
|
|
|
|
4
|
$mass_number += $mass; |
170
|
|
|
|
|
|
|
} elsif( int $mass ) { |
171
|
0
|
|
|
|
|
0
|
warn "no Chemistry::Isotope, cannot read mass number " . |
172
|
|
|
|
|
|
|
"from atom block\n"; |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
$mol->new_atom( |
175
|
439
|
|
|
|
|
1489
|
symbol => $symbol, |
176
|
|
|
|
|
|
|
coords => [$x*1, $y*1, $z*1], |
177
|
|
|
|
|
|
|
mass_number => $mass_number, |
178
|
|
|
|
|
|
|
); |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
# bond block |
182
|
33
|
|
|
|
|
2431
|
for my $i (1 .. $nb) { # for each bond... |
183
|
3
|
|
|
3
|
|
21
|
no warnings 'numeric'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
1651
|
|
184
|
451
|
50
|
|
|
|
1265
|
defined ($_ = <$fh>) or croak "unexpected end of file"; |
185
|
|
|
|
|
|
|
my ($a1, $a2, $type, $stereo, $topology, $rxn) |
186
|
451
|
|
|
|
|
1515
|
= map {$_*1} unpack("A3A3A3A3x3A3A3", $_); |
|
2706
|
|
|
|
|
4225
|
|
187
|
451
|
100
|
|
|
|
1713
|
my $order = $type =~ /^[123]$/ ? $type : 1; |
188
|
451
|
|
|
|
|
986
|
my $bond = $mol->new_bond( |
189
|
|
|
|
|
|
|
type => $type, |
190
|
|
|
|
|
|
|
atoms => [$mol->atoms($a1,$a2)], |
191
|
|
|
|
|
|
|
order => $order, |
192
|
|
|
|
|
|
|
); |
193
|
451
|
100
|
|
|
|
49912
|
if ($mol->isa('Chemistry::Pattern')) { |
194
|
9
|
|
|
|
|
22
|
$self->bond_expr($bond, $i, $type, $topology); |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
} |
197
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
# properties block |
199
|
33
|
|
|
|
|
118
|
while (<$fh>) { |
200
|
46
|
100
|
66
|
|
|
477
|
if (/^M END/ or /^\$\$\$\$/) { |
|
|
100
|
|
|
|
|
|
201
|
33
|
|
|
|
|
53
|
last; |
202
|
|
|
|
|
|
|
} elsif (/^M (...)/) { # generic extended property handler |
203
|
12
|
100
|
100
|
|
|
58
|
if ($1 eq 'CHG' or $1 eq 'RAD'){ # XXX |
204
|
|
|
|
|
|
|
# clear old-style info |
205
|
10
|
|
|
|
|
21
|
%old_radicals = (); |
206
|
10
|
|
|
|
|
14
|
%old_charges = (); |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
12
|
|
|
|
|
29
|
my $method = "M_$1"; |
210
|
12
|
50
|
|
|
|
102
|
$self->$method($mol, $_) if $self->can($method); |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
} |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
# add old-style charges and radicals if they still apply |
215
|
33
|
|
|
|
|
100
|
while (my ($key, $val) = each %old_charges) { |
216
|
0
|
|
|
|
|
0
|
$mol->atoms($key)->formal_charge($val); |
217
|
|
|
|
|
|
|
} |
218
|
33
|
|
|
|
|
71
|
while (my ($key, $val) = each %old_radicals) { |
219
|
0
|
|
|
|
|
0
|
$mol->atoms($key)->formal_radical($val); |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
# make sure we get to the end of the file |
223
|
33
|
50
|
|
|
|
66
|
if ($opts{slurp}) { |
224
|
33
|
|
|
|
|
658
|
1 while <$fh>; |
225
|
|
|
|
|
|
|
} |
226
|
|
|
|
|
|
|
|
227
|
33
|
|
|
|
|
149
|
$mol->add_implicit_hydrogens; |
228
|
|
|
|
|
|
|
|
229
|
33
|
100
|
|
|
|
21499
|
if ($mol->isa('Chemistry::Pattern')) { |
230
|
2
|
|
|
|
|
500
|
require Chemistry::Ring; |
231
|
2
|
|
|
|
|
5852
|
Chemistry::Ring::aromatize_mol($mol); |
232
|
|
|
|
|
|
|
} |
233
|
|
|
|
|
|
|
|
234
|
33
|
|
|
|
|
4408
|
return $mol; |
235
|
|
|
|
|
|
|
} |
236
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
sub bond_expr { |
238
|
9
|
|
|
9
|
0
|
20
|
my ($self, $bond, $i, $type, $topology) = @_; |
239
|
9
|
|
|
|
|
11
|
my @bond_exprs; |
240
|
9
|
|
|
|
|
17
|
my $s = $BOND_TOPOLOGY_EXPR{$topology}; |
241
|
9
|
100
|
|
|
|
15
|
push @bond_exprs, $s if $s; |
242
|
9
|
|
|
|
|
14
|
$s = $BOND_TYPE_EXPR{$type}; |
243
|
9
|
100
|
|
|
|
16
|
push @bond_exprs, $s if $s; |
244
|
9
|
100
|
|
|
|
15
|
if (@bond_exprs) { |
245
|
1
|
|
|
|
|
4
|
my $expr = join " and ", @bond_exprs; |
246
|
1
|
|
|
|
|
5
|
my $sub_txt = <
|
247
|
|
|
|
|
|
|
sub { |
248
|
|
|
|
|
|
|
no warnings; |
249
|
|
|
|
|
|
|
my (\$patt, \$bond) = \@_; |
250
|
|
|
|
|
|
|
$expr; |
251
|
|
|
|
|
|
|
}; |
252
|
|
|
|
|
|
|
SUB |
253
|
1
|
50
|
|
|
|
4
|
print "MDLMol bond($i) sub: <<<<$sub_txt>>>\n" if $DEBUG; |
254
|
1
|
|
|
|
|
3
|
$bond->attr('mdlmol/test_sub' => $sub_txt); |
255
|
1
|
|
|
1
|
|
113
|
$bond->test_sub(eval $sub_txt); |
|
1
|
|
|
|
|
10
|
|
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
119
|
|
256
|
|
|
|
|
|
|
} else { # default bond sub |
257
|
8
|
|
|
|
|
19
|
$bond->test_sub(\&default_bond_test); |
258
|
|
|
|
|
|
|
} |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
sub default_bond_test { |
262
|
3
|
|
|
3
|
|
23
|
no warnings; |
|
3
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
2083
|
|
263
|
138
|
|
|
138
|
0
|
41732
|
my ($patt, $bond) = @_; |
264
|
138
|
100
|
100
|
|
|
241
|
$patt->aromatic ? $bond->aromatic |
265
|
|
|
|
|
|
|
: (!$bond->aromatic && $patt->order == $bond->order); |
266
|
|
|
|
|
|
|
} |
267
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
sub M_CHG { |
270
|
6
|
|
|
6
|
0
|
20
|
my ($self, $mol, $line) = @_; |
271
|
6
|
|
|
|
|
49
|
my ($m, $type, $n, %data) = split " ", $line; |
272
|
6
|
|
|
|
|
41
|
while (my ($key, $val) = each %data) { |
273
|
10
|
|
|
|
|
86
|
$mol->atoms($key)->formal_charge($val); |
274
|
|
|
|
|
|
|
} |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
|
277
|
|
|
|
|
|
|
sub M_RAD { |
278
|
4
|
|
|
4
|
0
|
13
|
my ($self, $mol, $line) = @_; |
279
|
4
|
|
|
|
|
28
|
my ($m, $type, $n, %data) = split " ", $line; |
280
|
4
|
|
|
|
|
18
|
while (my ($key, $val) = each %data) { |
281
|
4
|
|
|
|
|
12
|
$mol->atoms($key)->formal_radical($val); |
282
|
|
|
|
|
|
|
} |
283
|
|
|
|
|
|
|
} |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
sub M_ISO { |
286
|
1
|
|
|
1
|
0
|
3
|
my ($self, $mol, $line) = @_; |
287
|
1
|
|
|
|
|
5
|
my ($m, $type, $n, %data) = split " ", $line; |
288
|
1
|
|
|
|
|
6
|
while (my ($key, $val) = each %data) { |
289
|
1
|
|
|
|
|
4
|
$mol->atoms($key)->mass_number($val); |
290
|
|
|
|
|
|
|
} |
291
|
|
|
|
|
|
|
} |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
sub M_ALS { |
294
|
1
|
|
|
1
|
0
|
5
|
my ($self, $mol, $line) = @_; |
295
|
|
|
|
|
|
|
|
296
|
1
|
50
|
|
|
|
6
|
return unless $line =~ /^M ALS (...)(...) (.)/; |
297
|
1
|
|
|
|
|
6
|
my ($n, $cnt, $exclude) = ($1, $2, $3); |
298
|
1
|
|
|
|
|
2
|
my @symbols; |
299
|
1
|
50
|
|
|
|
4
|
$exclude = $exclude =~ /^[Tt]$/ ? '!' : ''; |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
# parse the symbols out of the atom list line |
302
|
1
|
|
|
|
|
5
|
for my $i (0 .. $cnt-1) { |
303
|
4
|
|
|
|
|
10
|
my $s = substr $_, 16+$i*4, 4; |
304
|
4
|
|
|
|
|
10
|
$s =~ s/\s+//g; |
305
|
4
|
|
|
|
|
9
|
push @symbols, $s; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
# save attr |
309
|
1
|
|
|
|
|
6
|
$mol->atoms($n)->attr('mdlmol/atom_list' => $_); |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
# create test sub |
312
|
1
|
50
|
|
|
|
28
|
if ($mol->isa('Chemistry::Pattern')) { |
313
|
1
|
|
|
|
|
4
|
my $sub_txt = <
|
314
|
|
|
|
|
|
|
sub{ |
315
|
|
|
|
|
|
|
my (\$patt, \$atom) = \@_; |
316
|
|
|
|
|
|
|
my \$sym = \$atom->symbol; |
317
|
|
|
|
|
|
|
$exclude (List::Util::first {\$sym eq \$_} \@symbols); |
318
|
|
|
|
|
|
|
}; |
319
|
|
|
|
|
|
|
SUB |
320
|
1
|
50
|
|
|
|
3
|
print "MDLMol atom($n) sub: <<<<$sub_txt>>>\n" if $DEBUG; |
321
|
1
|
50
|
|
|
|
3
|
print "MDLMol symbol list: (@symbols)\n" if $DEBUG; |
322
|
1
|
|
|
|
|
8
|
$mol->atoms($n)->attr('mdlmol/test_sub' => $sub_txt); |
323
|
1
|
|
|
|
|
20
|
$mol->atoms($n)->test_sub(eval $sub_txt); |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
} |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub name_is { |
328
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fname) = @_; |
329
|
0
|
|
|
|
|
0
|
$fname =~ /\.mol$/i; |
330
|
|
|
|
|
|
|
} |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub file_is { |
334
|
17
|
|
|
17
|
1
|
40759
|
my ($self, $fname) = @_; |
335
|
17
|
|
|
|
|
82
|
$fname =~ /\.mol$/i; |
336
|
|
|
|
|
|
|
} |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
sub write_string { |
340
|
10
|
|
|
10
|
1
|
1700
|
my ($self, $mol, %opts) = @_; |
341
|
|
|
|
|
|
|
|
342
|
3
|
|
|
3
|
|
21
|
no warnings 'uninitialized'; |
|
3
|
|
|
|
|
12
|
|
|
3
|
|
|
|
|
1387
|
|
343
|
10
|
|
|
|
|
27
|
my $s = sprintf "%s\n perlmol \n\n", $mol->name; |
344
|
10
|
|
|
|
|
152
|
$s .= sprintf "%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s\n", |
345
|
|
|
|
|
|
|
0+$mol->atoms, 0+$mol->bonds, |
346
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 999, "V2000"; # "counts" line |
347
|
|
|
|
|
|
|
|
348
|
10
|
|
|
|
|
136
|
my $i = 1; |
349
|
10
|
|
|
|
|
28
|
my %idx_map; |
350
|
|
|
|
|
|
|
my @charged_atoms; |
351
|
10
|
|
|
|
|
0
|
my @isotope_atoms; |
352
|
10
|
|
|
|
|
0
|
my @radical_atoms; |
353
|
10
|
|
|
|
|
20
|
for my $atom ($mol->atoms) { |
354
|
176
|
|
|
|
|
842
|
my ($x, $y, $z) = $atom->coords->array; |
355
|
|
|
|
|
|
|
|
356
|
176
|
|
|
|
|
1507
|
$s .= sprintf |
357
|
|
|
|
|
|
|
"%10.4f%10.4f%10.4f %-3s%2i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i\n", |
358
|
|
|
|
|
|
|
$x, $y, $z, $atom->symbol, |
359
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0; |
360
|
176
|
100
|
|
|
|
1555
|
push @charged_atoms, $i if $atom->formal_charge; |
361
|
176
|
100
|
|
|
|
819
|
push @isotope_atoms, $i if $atom->mass_number; |
362
|
176
|
100
|
|
|
|
741
|
push @radical_atoms, $i if $atom->formal_radical; |
363
|
176
|
|
|
|
|
719
|
$idx_map{$atom->id} = $i++; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
10
|
|
|
|
|
63
|
for my $bond ($mol->bonds) { |
367
|
184
|
|
|
|
|
1081
|
my ($a1, $a2) = map {$idx_map{$_->id}} $bond->atoms; |
|
368
|
|
|
|
|
1829
|
|
368
|
184
|
|
|
|
|
896
|
$s .= sprintf "%3i%3i%3i%3i%3i%3i%3i\n", |
369
|
|
|
|
|
|
|
$a1, $a2, $bond->order, |
370
|
|
|
|
|
|
|
0, 0, 0, 0; |
371
|
|
|
|
|
|
|
} |
372
|
|
|
|
|
|
|
|
373
|
10
|
|
|
|
|
84
|
while (@charged_atoms) { |
374
|
1
|
50
|
|
|
|
4
|
my $n = @charged_atoms > 8 ? 8 : @charged_atoms; |
375
|
1
|
|
|
|
|
4
|
$s .= "M CHG $n"; |
376
|
1
|
|
|
|
|
3
|
for my $key (splice @charged_atoms, 0, $n) { |
377
|
1
|
|
|
|
|
3
|
$s .= sprintf "%4d%4d", $key, $mol->atoms($key)->formal_charge; |
378
|
|
|
|
|
|
|
} |
379
|
1
|
|
|
|
|
15
|
$s .= "\n"; |
380
|
|
|
|
|
|
|
} |
381
|
10
|
|
|
|
|
21
|
while (@isotope_atoms) { |
382
|
1
|
50
|
|
|
|
72
|
my $n = @isotope_atoms > 8 ? 8 : @isotope_atoms; |
383
|
1
|
|
|
|
|
6
|
$s .= "M ISO $n"; |
384
|
1
|
|
|
|
|
4
|
for my $key (splice @isotope_atoms, 0, $n) { |
385
|
2
|
|
|
|
|
26
|
$s .= sprintf "%4d%4d", $key, $mol->atoms($key)->mass_number; |
386
|
|
|
|
|
|
|
} |
387
|
1
|
|
|
|
|
15
|
$s .= "\n"; |
388
|
|
|
|
|
|
|
} |
389
|
10
|
|
|
|
|
22
|
while (@radical_atoms) { |
390
|
1
|
50
|
|
|
|
3
|
my $n = @radical_atoms > 8 ? 8 : @radical_atoms; |
391
|
1
|
|
|
|
|
3
|
$s .= "M RAD $n"; |
392
|
1
|
|
|
|
|
3
|
for my $key (splice @radical_atoms, 0, $n) { |
393
|
1
|
|
|
|
|
3
|
$s .= sprintf "%4d%4d", $key, $mol->atoms($key)->formal_radical; |
394
|
|
|
|
|
|
|
} |
395
|
1
|
|
|
|
|
27
|
$s .= "\n"; |
396
|
|
|
|
|
|
|
} |
397
|
|
|
|
|
|
|
|
398
|
10
|
|
|
|
|
20
|
$s .= "M END\n"; |
399
|
10
|
|
|
|
|
102
|
$s; |
400
|
|
|
|
|
|
|
} |
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
1; |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
L |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
=head1 SEE ALSO |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
L |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
The MDL file format specification. |
415
|
|
|
|
|
|
|
L, |
416
|
|
|
|
|
|
|
L, |
417
|
|
|
|
|
|
|
L, |
418
|
|
|
|
|
|
|
or Arthur Dalby et al., J. Chem. Inf. Comput. Sci, 1992, 32, 244-255. |
419
|
|
|
|
|
|
|
L. |
420
|
|
|
|
|
|
|
|
421
|
|
|
|
|
|
|
=head1 AUTHOR |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
Ivan Tubert-Brohman |
424
|
|
|
|
|
|
|
|
425
|
|
|
|
|
|
|
=head1 COPYRIGHT |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is |
428
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
429
|
|
|
|
|
|
|
Perl itself. |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=cut |
432
|
|
|
|
|
|
|
|