line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File::MDLMol; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.22'; # VERSION |
4
|
|
|
|
|
|
|
# $Id$ |
5
|
|
|
|
|
|
|
|
6
|
3
|
|
|
3
|
|
156708
|
use base "Chemistry::File"; |
|
3
|
|
|
|
|
18
|
|
|
3
|
|
|
|
|
1503
|
|
7
|
3
|
|
|
3
|
|
39441
|
use Chemistry::Mol; |
|
3
|
|
|
|
|
73271
|
|
|
3
|
|
|
|
|
231
|
|
8
|
3
|
|
|
3
|
|
35
|
use Carp; |
|
3
|
|
|
|
|
9
|
|
|
3
|
|
|
|
|
197
|
|
9
|
3
|
|
|
3
|
|
19
|
use List::Util; |
|
3
|
|
|
|
|
10
|
|
|
3
|
|
|
|
|
168
|
|
10
|
3
|
|
|
3
|
|
20
|
use strict; |
|
3
|
|
|
|
|
15
|
|
|
3
|
|
|
|
|
90
|
|
11
|
3
|
|
|
3
|
|
18
|
use warnings; |
|
3
|
|
|
|
|
7
|
|
|
3
|
|
|
|
|
1514
|
|
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
our $DEBUG = 0; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
=head1 NAME |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
Chemistry::File::MDLMol - MDL molfile reader/writer |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
=head1 SYNOPSIS |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
use Chemistry::File::MDLMol; |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
# read a molecule |
24
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->read('myfile.mol'); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# write a molecule |
27
|
|
|
|
|
|
|
$mol->write("myfile.mol"); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# use a molecule as a query for substructure matching |
30
|
|
|
|
|
|
|
use Chemistry::Pattern; |
31
|
|
|
|
|
|
|
use Chemistry::Ring; |
32
|
|
|
|
|
|
|
Chemistry::Ring::aromatize_mol($mol); |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
my $patt = Chemistry::Pattern->read('query.mol'); |
35
|
|
|
|
|
|
|
if ($patt->match($mol)) { |
36
|
|
|
|
|
|
|
print "it matches!\n"; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
=cut |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
Chemistry::Mol->register_format(mdl => __PACKAGE__); |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
=head1 DESCRIPTION |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
MDL Molfile (V2000) reader/writer. |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
This module automatically registers the 'mdl' format with Chemistry::Mol. |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
The first three lines of the molfile are stored as $mol->name, |
50
|
|
|
|
|
|
|
$mol->attr("mdlmol/line2"), and $mol->attr("mdlmol/comment"). |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
This version only reads and writes some of the information available in a |
53
|
|
|
|
|
|
|
molfile: it reads coordinates, atom and bond types, charges, isotopes, |
54
|
|
|
|
|
|
|
radicals, and atom lists. It does not read other things such as |
55
|
|
|
|
|
|
|
stereochemistry, 3d properties, etc. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
This module is part of the PerlMol project, L. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=head2 Query properties |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
The MDL molfile format supports query properties such as atom lists, and |
62
|
|
|
|
|
|
|
special bond types such as "single or double", "single or aromatic", "double or |
63
|
|
|
|
|
|
|
aromatic", "ring bond", or "any". These properties are supported by this module |
64
|
|
|
|
|
|
|
in conjunction with L. However, support for query properies |
65
|
|
|
|
|
|
|
is currently read-only, and the other properties listed in the specification |
66
|
|
|
|
|
|
|
are not supported yet. |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
So that atom and bond objects can use these special query options, the |
69
|
|
|
|
|
|
|
conditions are represented as Perl subroutines. The generated code can be |
70
|
|
|
|
|
|
|
read from the 'mdlmol/test_sub' attribute: |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
$atom->attr('mdlmol/test_sub'); |
73
|
|
|
|
|
|
|
$bond->attr('mdlmol/test_sub'); |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
This may be useful for debugging, such as when an atom doesn't seem to match as |
76
|
|
|
|
|
|
|
expected. |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head2 Aromatic Queries |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
To be able to search for aromatic substructures are represented by Kekule |
81
|
|
|
|
|
|
|
structures, molfiles that are read as patterns (with |
82
|
|
|
|
|
|
|
Cread) are aromatized automatically by using the |
83
|
|
|
|
|
|
|
L module. The default bond test from Chemistry::Pattern::Bond |
84
|
|
|
|
|
|
|
is overriden by one that checks the aromaticity in addition to the bond order. |
85
|
|
|
|
|
|
|
The test is, |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
$patt->aromatic ? $bond->aromatic |
88
|
|
|
|
|
|
|
: (!$bond->aromatic && $patt->order == $bond->order); |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
That is, aromatic pattern bonds match aromatic bonds, and aliphatic pattern |
91
|
|
|
|
|
|
|
bonds match aliphatic bonds with the same bond order. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=cut |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
# some constants, based on tables from the file format specification |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
my %OLD_CHARGE_MAP = ( |
100
|
|
|
|
|
|
|
1 => 3, |
101
|
|
|
|
|
|
|
2 => 2, |
102
|
|
|
|
|
|
|
3 => 1, |
103
|
|
|
|
|
|
|
4 => 0, |
104
|
|
|
|
|
|
|
5 => -1, |
105
|
|
|
|
|
|
|
6 => -2, |
106
|
|
|
|
|
|
|
7 => -3, |
107
|
|
|
|
|
|
|
); |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
my %BOND_TYPE_EXPR = ( |
110
|
|
|
|
|
|
|
4 => '($bond->aromatic)', |
111
|
|
|
|
|
|
|
5 => '($bond->order == 1 or $bond->order == 2)', |
112
|
|
|
|
|
|
|
6 => '($bond->order == 1 or $bond->aromatic)', |
113
|
|
|
|
|
|
|
7 => '($bond->order == 2 or $bond->aromatic)', |
114
|
|
|
|
|
|
|
8 => '(1)', # any bond |
115
|
|
|
|
|
|
|
); |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
my %BOND_TOPOLOGY_EXPR = ( |
118
|
|
|
|
|
|
|
1 => '@{$bond->attr("ring/rings")||[]}', |
119
|
|
|
|
|
|
|
2 => '! @{$bond->attr("ring/rings")||[]}', |
120
|
|
|
|
|
|
|
); |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
sub read_mol { |
124
|
66
|
|
|
66
|
1
|
9221
|
my ($self, $fh, %opts) = @_; |
125
|
66
|
100
|
|
|
|
322
|
return if $fh->eof; |
126
|
|
|
|
|
|
|
|
127
|
33
|
|
|
|
|
786
|
%opts = ( slurp => 1, %opts ); |
128
|
33
|
|
100
|
|
|
140
|
my $mol_class = $opts{mol_class} || "Chemistry::Mol"; |
129
|
33
|
|
33
|
|
|
273
|
my $atom_class = $opts{atom_class} || $mol_class->atom_class; |
130
|
33
|
|
33
|
|
|
290
|
my $bond_class = $opts{bond_class} || $mol_class->bond_class; |
131
|
33
|
|
|
|
|
179
|
local $_; |
132
|
|
|
|
|
|
|
|
133
|
33
|
|
|
|
|
168
|
my $mol = $mol_class->new(); |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
# header |
136
|
33
|
|
|
|
|
1098
|
my $name = <$fh>; chomp $name; |
|
33
|
|
|
|
|
97
|
|
137
|
33
|
|
|
|
|
98
|
my $line2 = <$fh>; chomp $line2; |
|
33
|
|
|
|
|
68
|
|
138
|
33
|
|
|
|
|
69
|
my $comment = <$fh>; chomp $comment; |
|
33
|
|
|
|
|
59
|
|
139
|
33
|
|
|
|
|
151
|
$mol->name($name); |
140
|
33
|
|
|
|
|
464
|
$mol->attr("mdlmol/line2", $line2); |
141
|
33
|
|
|
|
|
569
|
$mol->attr("mdlmol/comment", $comment); |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
# counts line |
144
|
33
|
50
|
|
|
|
456
|
defined ($_ = <$fh>) or croak "unexpected end of file"; |
145
|
33
|
|
|
|
|
210
|
my ($na, $nb) = unpack("A3A3", $_); |
146
|
|
|
|
|
|
|
|
147
|
33
|
|
|
|
|
94
|
my %old_charges; |
148
|
|
|
|
|
|
|
my %old_radicals; |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
# atom block |
151
|
33
|
|
|
|
|
155
|
for my $i (1 .. $na) { # for each atom... |
152
|
439
|
50
|
|
|
|
39793
|
defined (my $line = <$fh>) or croak "unexpected end of file"; |
153
|
439
|
|
|
|
|
1935
|
my ($x, $y, $z, $symbol, $mass, $charge) |
154
|
|
|
|
|
|
|
= unpack("A10A10A10xA3A2A3", $line); |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
$old_charges{$i} = $OLD_CHARGE_MAP{$charge} |
157
|
439
|
50
|
|
|
|
1093
|
if $OLD_CHARGE_MAP{$charge}; |
158
|
439
|
50
|
33
|
|
|
1699
|
$old_radicals{$i} = 2 |
159
|
|
|
|
|
|
|
if $charge && $charge == 4; |
160
|
439
|
|
|
|
|
585
|
my $mass_number; |
161
|
439
|
100
|
66
|
|
|
1283
|
if( int $mass && eval { require Chemistry::Isotope } ) { |
|
1
|
50
|
|
|
|
14
|
|
162
|
1
|
|
|
|
|
6
|
my $abundance = |
163
|
|
|
|
|
|
|
Chemistry::Isotope::isotope_abundance($symbol); |
164
|
1
|
|
|
|
|
22
|
($mass_number) = sort { $abundance->{$b} cmp |
165
|
1
|
|
|
|
|
7
|
$abundance->{$a} } |
166
|
|
|
|
|
|
|
sort keys %$abundance; |
167
|
1
|
|
|
|
|
5
|
$mass_number += $mass; |
168
|
|
|
|
|
|
|
} elsif( int $mass ) { |
169
|
0
|
|
|
|
|
0
|
warn "no Chemistry::Isotope, cannot read mass number " . |
170
|
|
|
|
|
|
|
"from atom block\n"; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
$mol->new_atom( |
173
|
439
|
|
|
|
|
1809
|
symbol => $symbol, |
174
|
|
|
|
|
|
|
coords => [$x*1, $y*1, $z*1], |
175
|
|
|
|
|
|
|
mass_number => $mass_number, |
176
|
|
|
|
|
|
|
); |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
# bond block |
180
|
33
|
|
|
|
|
3095
|
for my $i (1 .. $nb) { # for each bond... |
181
|
3
|
|
|
3
|
|
26
|
no warnings 'numeric'; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
2038
|
|
182
|
451
|
50
|
|
|
|
1550
|
defined ($_ = <$fh>) or croak "unexpected end of file"; |
183
|
|
|
|
|
|
|
my ($a1, $a2, $type, $stereo, $topology, $rxn) |
184
|
451
|
|
|
|
|
1832
|
= map {$_*1} unpack("A3A3A3A3x3A3A3", $_); |
|
2706
|
|
|
|
|
5233
|
|
185
|
451
|
100
|
|
|
|
2127
|
my $order = $type =~ /^[123]$/ ? $type : 1; |
186
|
451
|
|
|
|
|
1262
|
my $bond = $mol->new_bond( |
187
|
|
|
|
|
|
|
type => $type, |
188
|
|
|
|
|
|
|
atoms => [$mol->atoms($a1,$a2)], |
189
|
|
|
|
|
|
|
order => $order, |
190
|
|
|
|
|
|
|
); |
191
|
451
|
100
|
|
|
|
61260
|
if ($mol->isa('Chemistry::Pattern')) { |
192
|
9
|
|
|
|
|
23
|
$self->bond_expr($bond, $i, $type, $topology); |
193
|
|
|
|
|
|
|
} |
194
|
|
|
|
|
|
|
} |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# properties block |
197
|
33
|
|
|
|
|
168
|
while (<$fh>) { |
198
|
46
|
100
|
66
|
|
|
597
|
if (/^M END/ or /^\$\$\$\$/) { |
|
|
100
|
|
|
|
|
|
199
|
33
|
|
|
|
|
91
|
last; |
200
|
|
|
|
|
|
|
} elsif (/^M (...)/) { # generic extended property handler |
201
|
12
|
100
|
100
|
|
|
104
|
if ($1 eq 'CHG' or $1 eq 'RAD'){ # XXX |
202
|
|
|
|
|
|
|
# clear old-style info |
203
|
10
|
|
|
|
|
34
|
%old_radicals = (); |
204
|
10
|
|
|
|
|
24
|
%old_charges = (); |
205
|
|
|
|
|
|
|
} |
206
|
|
|
|
|
|
|
|
207
|
12
|
|
|
|
|
40
|
my $method = "M_$1"; |
208
|
12
|
50
|
|
|
|
139
|
$self->$method($mol, $_) if $self->can($method); |
209
|
|
|
|
|
|
|
} |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
# add old-style charges and radicals if they still apply |
213
|
33
|
|
|
|
|
149
|
while (my ($key, $val) = each %old_charges) { |
214
|
0
|
|
|
|
|
0
|
$mol->atoms($key)->formal_charge($val); |
215
|
|
|
|
|
|
|
} |
216
|
33
|
|
|
|
|
109
|
while (my ($key, $val) = each %old_radicals) { |
217
|
0
|
|
|
|
|
0
|
$mol->atoms($key)->formal_radical($val); |
218
|
|
|
|
|
|
|
} |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# make sure we get to the end of the file |
221
|
33
|
50
|
|
|
|
98
|
if ($opts{slurp}) { |
222
|
33
|
|
|
|
|
819
|
1 while <$fh>; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
33
|
|
|
|
|
189
|
$mol->add_implicit_hydrogens; |
226
|
|
|
|
|
|
|
|
227
|
33
|
100
|
|
|
|
26873
|
if ($mol->isa('Chemistry::Pattern')) { |
228
|
2
|
|
|
|
|
771
|
require Chemistry::Ring; |
229
|
2
|
|
|
|
|
6929
|
Chemistry::Ring::aromatize_mol($mol); |
230
|
|
|
|
|
|
|
} |
231
|
|
|
|
|
|
|
|
232
|
33
|
|
|
|
|
5585
|
return $mol; |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub bond_expr { |
236
|
9
|
|
|
9
|
0
|
21
|
my ($self, $bond, $i, $type, $topology) = @_; |
237
|
9
|
|
|
|
|
17
|
my @bond_exprs; |
238
|
9
|
|
|
|
|
15
|
my $s = $BOND_TOPOLOGY_EXPR{$topology}; |
239
|
9
|
100
|
|
|
|
21
|
push @bond_exprs, $s if $s; |
240
|
9
|
|
|
|
|
16
|
$s = $BOND_TYPE_EXPR{$type}; |
241
|
9
|
100
|
|
|
|
17
|
push @bond_exprs, $s if $s; |
242
|
9
|
100
|
|
|
|
20
|
if (@bond_exprs) { |
243
|
1
|
|
|
|
|
5
|
my $expr = join " and ", @bond_exprs; |
244
|
1
|
|
|
|
|
4
|
my $sub_txt = <
|
245
|
|
|
|
|
|
|
sub { |
246
|
|
|
|
|
|
|
no warnings; |
247
|
|
|
|
|
|
|
my (\$patt, \$bond) = \@_; |
248
|
|
|
|
|
|
|
$expr; |
249
|
|
|
|
|
|
|
}; |
250
|
|
|
|
|
|
|
SUB |
251
|
1
|
50
|
|
|
|
5
|
print "MDLMol bond($i) sub: <<<<$sub_txt>>>\n" if $DEBUG; |
252
|
1
|
|
|
|
|
4
|
$bond->attr('mdlmol/test_sub' => $sub_txt); |
253
|
1
|
|
|
1
|
|
120
|
$bond->test_sub(eval $sub_txt); |
|
1
|
|
|
|
|
12
|
|
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
116
|
|
254
|
|
|
|
|
|
|
} else { # default bond sub |
255
|
8
|
|
|
|
|
22
|
$bond->test_sub(\&default_bond_test); |
256
|
|
|
|
|
|
|
} |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
sub default_bond_test { |
260
|
3
|
|
|
3
|
|
27
|
no warnings; |
|
3
|
|
|
|
|
6
|
|
|
3
|
|
|
|
|
2393
|
|
261
|
138
|
|
|
138
|
0
|
53765
|
my ($patt, $bond) = @_; |
262
|
138
|
100
|
100
|
|
|
296
|
$patt->aromatic ? $bond->aromatic |
263
|
|
|
|
|
|
|
: (!$bond->aromatic && $patt->order == $bond->order); |
264
|
|
|
|
|
|
|
} |
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub M_CHG { |
268
|
6
|
|
|
6
|
0
|
25
|
my ($self, $mol, $line) = @_; |
269
|
6
|
|
|
|
|
57
|
my ($m, $type, $n, %data) = split " ", $line; |
270
|
6
|
|
|
|
|
42
|
while (my ($key, $val) = each %data) { |
271
|
10
|
|
|
|
|
110
|
$mol->atoms($key)->formal_charge($val); |
272
|
|
|
|
|
|
|
} |
273
|
|
|
|
|
|
|
} |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
sub M_RAD { |
276
|
4
|
|
|
4
|
0
|
17
|
my ($self, $mol, $line) = @_; |
277
|
4
|
|
|
|
|
22
|
my ($m, $type, $n, %data) = split " ", $line; |
278
|
4
|
|
|
|
|
23
|
while (my ($key, $val) = each %data) { |
279
|
4
|
|
|
|
|
15
|
$mol->atoms($key)->formal_radical($val); |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
sub M_ISO { |
284
|
1
|
|
|
1
|
0
|
4
|
my ($self, $mol, $line) = @_; |
285
|
1
|
|
|
|
|
7
|
my ($m, $type, $n, %data) = split " ", $line; |
286
|
1
|
|
|
|
|
7
|
while (my ($key, $val) = each %data) { |
287
|
1
|
|
|
|
|
8
|
$mol->atoms($key)->mass_number($val); |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
} |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
sub M_ALS { |
292
|
1
|
|
|
1
|
0
|
4
|
my ($self, $mol, $line) = @_; |
293
|
|
|
|
|
|
|
|
294
|
1
|
50
|
|
|
|
7
|
return unless $line =~ /^M ALS (...)(...) (.)/; |
295
|
1
|
|
|
|
|
6
|
my ($n, $cnt, $exclude) = ($1, $2, $3); |
296
|
1
|
|
|
|
|
2
|
my @symbols; |
297
|
1
|
50
|
|
|
|
5
|
$exclude = $exclude =~ /^[Tt]$/ ? '!' : ''; |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
# parse the symbols out of the atom list line |
300
|
1
|
|
|
|
|
5
|
for my $i (0 .. $cnt-1) { |
301
|
4
|
|
|
|
|
11
|
my $s = substr $_, 16+$i*4, 4; |
302
|
4
|
|
|
|
|
14
|
$s =~ s/\s+//g; |
303
|
4
|
|
|
|
|
10
|
push @symbols, $s; |
304
|
|
|
|
|
|
|
} |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
# save attr |
307
|
1
|
|
|
|
|
4
|
$mol->atoms($n)->attr('mdlmol/atom_list' => $_); |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
# create test sub |
310
|
1
|
50
|
|
|
|
32
|
if ($mol->isa('Chemistry::Pattern')) { |
311
|
1
|
|
|
|
|
5
|
my $sub_txt = <
|
312
|
|
|
|
|
|
|
sub{ |
313
|
|
|
|
|
|
|
my (\$patt, \$atom) = \@_; |
314
|
|
|
|
|
|
|
my \$sym = \$atom->symbol; |
315
|
|
|
|
|
|
|
$exclude (List::Util::first {\$sym eq \$_} \@symbols); |
316
|
|
|
|
|
|
|
}; |
317
|
|
|
|
|
|
|
SUB |
318
|
1
|
50
|
|
|
|
5
|
print "MDLMol atom($n) sub: <<<<$sub_txt>>>\n" if $DEBUG; |
319
|
1
|
50
|
|
|
|
5
|
print "MDLMol symbol list: (@symbols)\n" if $DEBUG; |
320
|
1
|
|
|
|
|
4
|
$mol->atoms($n)->attr('mdlmol/test_sub' => $sub_txt); |
321
|
1
|
|
|
|
|
26
|
$mol->atoms($n)->test_sub(eval $sub_txt); |
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
sub name_is { |
326
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fname) = @_; |
327
|
0
|
|
|
|
|
0
|
$fname =~ /\.mol$/i; |
328
|
|
|
|
|
|
|
} |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
sub file_is { |
332
|
17
|
|
|
17
|
1
|
55987
|
my ($self, $fname) = @_; |
333
|
17
|
|
|
|
|
123
|
$fname =~ /\.mol$/i; |
334
|
|
|
|
|
|
|
} |
335
|
|
|
|
|
|
|
|
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
sub write_string { |
338
|
10
|
|
|
10
|
1
|
2362
|
my ($self, $mol, %opts) = @_; |
339
|
|
|
|
|
|
|
|
340
|
3
|
|
|
3
|
|
112
|
no warnings 'uninitialized'; |
|
3
|
|
|
|
|
11
|
|
|
3
|
|
|
|
|
1581
|
|
341
|
10
|
|
|
|
|
41
|
my $s = sprintf "%s\n perlmol \n\n", $mol->name; |
342
|
10
|
|
|
|
|
118
|
$s .= sprintf "%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%6s\n", |
343
|
|
|
|
|
|
|
0+$mol->atoms, 0+$mol->bonds, |
344
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 999, "V2000"; # "counts" line |
345
|
|
|
|
|
|
|
|
346
|
10
|
|
|
|
|
186
|
my $i = 1; |
347
|
10
|
|
|
|
|
39
|
my %idx_map; |
348
|
|
|
|
|
|
|
my @charged_atoms; |
349
|
10
|
|
|
|
|
0
|
my @isotope_atoms; |
350
|
10
|
|
|
|
|
0
|
my @radical_atoms; |
351
|
10
|
|
|
|
|
105
|
for my $atom ($mol->atoms) { |
352
|
176
|
|
|
|
|
1080
|
my ($x, $y, $z) = $atom->coords->array; |
353
|
|
|
|
|
|
|
|
354
|
176
|
|
|
|
|
2016
|
$s .= sprintf |
355
|
|
|
|
|
|
|
"%10.4f%10.4f%10.4f %-3s%2i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i%3i\n", |
356
|
|
|
|
|
|
|
$x, $y, $z, $atom->symbol, |
357
|
|
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0; |
358
|
176
|
100
|
|
|
|
1975
|
push @charged_atoms, $i if $atom->formal_charge; |
359
|
176
|
100
|
|
|
|
980
|
push @isotope_atoms, $i if $atom->mass_number; |
360
|
176
|
100
|
|
|
|
891
|
push @radical_atoms, $i if $atom->formal_radical; |
361
|
176
|
|
|
|
|
985
|
$idx_map{$atom->id} = $i++; |
362
|
|
|
|
|
|
|
} |
363
|
|
|
|
|
|
|
|
364
|
10
|
|
|
|
|
78
|
for my $bond ($mol->bonds) { |
365
|
184
|
|
|
|
|
1407
|
my ($a1, $a2) = map {$idx_map{$_->id}} $bond->atoms; |
|
368
|
|
|
|
|
2671
|
|
366
|
184
|
|
|
|
|
1860
|
$s .= sprintf "%3i%3i%3i%3i%3i%3i%3i\n", |
367
|
|
|
|
|
|
|
$a1, $a2, $bond->order, |
368
|
|
|
|
|
|
|
0, 0, 0, 0; |
369
|
|
|
|
|
|
|
} |
370
|
|
|
|
|
|
|
|
371
|
10
|
|
|
|
|
100
|
while (@charged_atoms) { |
372
|
1
|
50
|
|
|
|
5
|
my $n = @charged_atoms > 8 ? 8 : @charged_atoms; |
373
|
1
|
|
|
|
|
3
|
$s .= "M CHG $n"; |
374
|
1
|
|
|
|
|
5
|
for my $key (splice @charged_atoms, 0, $n) { |
375
|
1
|
|
|
|
|
5
|
$s .= sprintf "%4d%4d", $key, $mol->atoms($key)->formal_charge; |
376
|
|
|
|
|
|
|
} |
377
|
1
|
|
|
|
|
18
|
$s .= "\n"; |
378
|
|
|
|
|
|
|
} |
379
|
10
|
|
|
|
|
50
|
while (@isotope_atoms) { |
380
|
1
|
50
|
|
|
|
4
|
my $n = @isotope_atoms > 8 ? 8 : @isotope_atoms; |
381
|
1
|
|
|
|
|
5
|
$s .= "M ISO $n"; |
382
|
1
|
|
|
|
|
5
|
for my $key (splice @isotope_atoms, 0, $n) { |
383
|
2
|
|
|
|
|
24
|
$s .= sprintf "%4d%4d", $key, $mol->atoms($key)->mass_number; |
384
|
|
|
|
|
|
|
} |
385
|
1
|
|
|
|
|
17
|
$s .= "\n"; |
386
|
|
|
|
|
|
|
} |
387
|
10
|
|
|
|
|
33
|
while (@radical_atoms) { |
388
|
1
|
50
|
|
|
|
4
|
my $n = @radical_atoms > 8 ? 8 : @radical_atoms; |
389
|
1
|
|
|
|
|
6
|
$s .= "M RAD $n"; |
390
|
1
|
|
|
|
|
3
|
for my $key (splice @radical_atoms, 0, $n) { |
391
|
1
|
|
|
|
|
4
|
$s .= sprintf "%4d%4d", $key, $mol->atoms($key)->formal_radical; |
392
|
|
|
|
|
|
|
} |
393
|
1
|
|
|
|
|
19
|
$s .= "\n"; |
394
|
|
|
|
|
|
|
} |
395
|
|
|
|
|
|
|
|
396
|
10
|
|
|
|
|
20
|
$s .= "M END\n"; |
397
|
10
|
|
|
|
|
134
|
$s; |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
|
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
1; |
403
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
405
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
L |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
=head1 SEE ALSO |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
L |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
The MDL file format specification. |
413
|
|
|
|
|
|
|
L or |
414
|
|
|
|
|
|
|
Arthur Dalby et al., J. Chem. Inf. Comput. Sci, 1992, 32, 244-255. |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=head1 AUTHOR |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
Ivan Tubert-Brohman |
419
|
|
|
|
|
|
|
|
420
|
|
|
|
|
|
|
=head1 COPYRIGHT |
421
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is |
423
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
424
|
|
|
|
|
|
|
Perl itself. |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
=cut |
427
|
|
|
|
|
|
|
|