line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File::SDF; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.24'; # VERSION |
4
|
|
|
|
|
|
|
# $Id$ |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
60400
|
use base "Chemistry::File"; |
|
1
|
|
|
|
|
11
|
|
|
1
|
|
|
|
|
508
|
|
7
|
1
|
|
|
1
|
|
15370
|
use Chemistry::Mol; |
|
1
|
|
|
|
|
29854
|
|
|
1
|
|
|
|
|
49
|
|
8
|
1
|
|
|
1
|
|
452
|
use Chemistry::File::MDLMol; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
7
|
|
9
|
1
|
|
|
1
|
|
27
|
use strict; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
31
|
|
10
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
725
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
=head1 NAME |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
Chemistry::File::SDF - MDL Structure Data File reader/writer |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
=head1 SYNOPSIS |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
use Chemistry::File::SDF; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# Simple interface (all at once) |
21
|
|
|
|
|
|
|
# read all the molecules in the file |
22
|
|
|
|
|
|
|
my @mols = Chemistry::Mol->read('myfile.sdf'); |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# assuming that the file includes a data item... |
25
|
|
|
|
|
|
|
print $mols[0]->attr("sdf/data")->{PKA}; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
# write a bunch of molecules to an SDF file |
28
|
|
|
|
|
|
|
Chemistry::Mol->write('myfile.sdf', mols => \@mols); |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
# or write just one molecule |
31
|
|
|
|
|
|
|
$mol->write('myfile.sdf'); |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
# Low level interface (one at a time) |
35
|
|
|
|
|
|
|
# create reader |
36
|
|
|
|
|
|
|
my $reader = Chemistry::Mol->file('myfile.sdf'); |
37
|
|
|
|
|
|
|
$reader->open('<'); |
38
|
|
|
|
|
|
|
while (my $mol = $reader->read_mol($reader->fh)) { |
39
|
|
|
|
|
|
|
# do something with $mol |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=cut |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
Chemistry::Mol->register_format(sdf => __PACKAGE__); |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=head1 DESCRIPTION |
47
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
MDL SDF (V2000) reader. |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
This module automatically registers the 'sdf' format with Chemistry::Mol. |
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
The parser returns a list of Chemistry::Mol objects. SDF data can be accessed |
53
|
|
|
|
|
|
|
by the $mol->attr method. Attribute names are stored as a hash ref at the |
54
|
|
|
|
|
|
|
"sdf/data" attribute, as shown in the synopsis. When a data item has a single |
55
|
|
|
|
|
|
|
line in the SDF file, the attribute is stored as a string; when there's more |
56
|
|
|
|
|
|
|
than one line, they are stored as an array reference. The rest of the |
57
|
|
|
|
|
|
|
information on the line that holds the field name is ignored. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
This module is part of the PerlMol project, L. |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=cut |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub slurp_mol { |
64
|
21
|
|
|
21
|
1
|
41
|
my ($self, $fh, %opts) = @_; |
65
|
21
|
100
|
|
|
|
49
|
return if $fh->eof; |
66
|
18
|
|
|
|
|
168
|
my $s; |
67
|
18
|
|
|
|
|
171
|
while (<$fh>) { |
68
|
1905
|
100
|
|
|
|
2608
|
last if /^\$\$\$\$/; |
69
|
1887
|
|
|
|
|
2953
|
$s .= $_; |
70
|
|
|
|
|
|
|
} |
71
|
18
|
|
|
|
|
435
|
$s =~ s/\r\n?/\n/g; # normalize EOL |
72
|
18
|
|
|
|
|
138
|
$s; |
73
|
|
|
|
|
|
|
} |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub skip_mol { |
76
|
8
|
|
|
8
|
1
|
1561
|
my ($self, $fh, %opts) = @_; |
77
|
8
|
100
|
|
|
|
20
|
return if $fh->eof; |
78
|
7
|
|
|
|
|
148
|
while (<$fh>) { |
79
|
771
|
100
|
|
|
|
1377
|
return 1 if /^\$\$\$\$/; |
80
|
|
|
|
|
|
|
} |
81
|
0
|
|
|
|
|
0
|
return 0; |
82
|
|
|
|
|
|
|
} |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
sub read_mol { |
85
|
21
|
|
|
21
|
1
|
1357
|
my ($self, $fh, %opts) = @_; |
86
|
21
|
100
|
|
|
|
51
|
my $s = $self->slurp_mol($fh, %opts) or return; |
87
|
18
|
|
|
|
|
128
|
my $mol = Chemistry::File::MDLMol->parse_string($s, %opts); |
88
|
18
|
|
|
|
|
788
|
$self->parse_data($mol, $s); |
89
|
18
|
|
|
|
|
260
|
$mol; |
90
|
|
|
|
|
|
|
} |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub parse_data { |
93
|
18
|
|
|
18
|
0
|
342
|
my ($self, $mol, $mol_string) = @_; |
94
|
18
|
|
|
|
|
228
|
my (@items) = split /\n>/, $mol_string; |
95
|
18
|
|
|
|
|
35
|
shift @items; # drop everything until first datum |
96
|
18
|
|
|
|
|
32
|
my %data_block; |
97
|
18
|
|
|
|
|
30
|
for my $item (@items) { |
98
|
347
|
|
|
|
|
717
|
my ($header, @data) = split /\n/, $item; |
99
|
347
|
|
|
|
|
980
|
my ($field_name) = $header =~ /<(.*?)>/g; |
100
|
347
|
50
|
|
|
|
559
|
warn "SDF: no field name\n", next unless $field_name; |
101
|
|
|
|
|
|
|
#$mol->attr("sdf/$field_name", @data == 1 ? $data[0] : \@data); |
102
|
347
|
50
|
|
|
|
781
|
$data_block{$field_name} = @data == 1 ? $data[0] : \@data; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
} |
105
|
18
|
|
|
|
|
53
|
$mol->attr("sdf/data", \%data_block); |
106
|
|
|
|
|
|
|
} |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
sub write_string { |
109
|
2
|
|
|
2
|
1
|
2608
|
my ($self, $mol_ref, %opts) = @_; |
110
|
2
|
|
|
|
|
7
|
my @mols; |
111
|
2
|
|
|
|
|
4
|
my $ret = ''; |
112
|
|
|
|
|
|
|
|
113
|
2
|
50
|
|
|
|
6
|
if ($opts{mols}) { |
114
|
2
|
|
|
|
|
3
|
@mols = @{$opts{mols}}; |
|
2
|
|
|
|
|
6
|
|
115
|
|
|
|
|
|
|
} else { |
116
|
0
|
|
|
|
|
0
|
@mols = $mol_ref; |
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
|
119
|
2
|
|
|
|
|
5
|
for my $mol (@mols) { |
120
|
9
|
|
|
|
|
31
|
$ret .= $mol->print(format => 'mdl'); |
121
|
9
|
|
|
|
|
29
|
$ret .= format_data($mol->attr('sdf/data')) . '$$$$'."\n"; |
122
|
|
|
|
|
|
|
} |
123
|
2
|
|
|
|
|
37
|
$ret; |
124
|
|
|
|
|
|
|
} |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
sub format_data { |
127
|
9
|
|
|
9
|
0
|
71
|
my ($data) = @_; |
128
|
9
|
|
|
|
|
11
|
my $ret = ''; |
129
|
9
|
50
|
|
|
|
18
|
return $ret unless $data; |
130
|
9
|
|
|
|
|
73
|
for my $field_name (sort keys %$data) { |
131
|
163
|
|
|
|
|
258
|
$ret .= "> <$field_name>\n"; |
132
|
163
|
|
|
|
|
186
|
my $value = $data->{$field_name}; |
133
|
163
|
50
|
|
|
|
199
|
if (ref $value) { |
134
|
0
|
|
|
|
|
0
|
$ret .= join "\n", @$value; |
135
|
|
|
|
|
|
|
} else { |
136
|
163
|
|
|
|
|
185
|
$ret .= "$value\n"; |
137
|
|
|
|
|
|
|
} |
138
|
163
|
|
|
|
|
197
|
$ret .= "\n"; |
139
|
|
|
|
|
|
|
} |
140
|
9
|
|
|
|
|
75
|
$ret; |
141
|
|
|
|
|
|
|
} |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub file_is { |
144
|
3
|
|
|
3
|
1
|
41
|
my ($self, $fname) = @_; |
145
|
|
|
|
|
|
|
|
146
|
3
|
50
|
|
|
|
21
|
return 1 if $fname =~ /\.sdf?$/i; |
147
|
0
|
|
|
|
|
|
return 0; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
sub name_is { |
151
|
0
|
|
|
0
|
1
|
|
my ($self, $fname) = @_; |
152
|
0
|
|
|
|
|
|
$fname =~ /\.sdf?$/i; |
153
|
|
|
|
|
|
|
} |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
sub string_is { |
156
|
0
|
|
|
0
|
1
|
|
my ($self, $s) = @_; |
157
|
0
|
0
|
|
|
|
|
/\$\$\$\$/ ? 1 : 0; |
158
|
|
|
|
|
|
|
} |
159
|
|
|
|
|
|
|
1; |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head1 CAVEATS |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Note that by storing the SDF data as a hash, there can be only one field with |
164
|
|
|
|
|
|
|
a given name. The SDF format description is not entirely clear in this regard. |
165
|
|
|
|
|
|
|
Also note that SDF data field names are considered to be case-sensitive. |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
L |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
=head1 SEE ALSO |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
L |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
The MDL file format specification. |
176
|
|
|
|
|
|
|
L or |
177
|
|
|
|
|
|
|
Arthur Dalby et al., J. Chem. Inf. Comput. Sci, 1992, 32, 244-255. |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
=head1 AUTHOR |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
Ivan Tubert-Brohman |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head1 COPYRIGHT |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is |
186
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
187
|
|
|
|
|
|
|
Perl itself. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=cut |
190
|
|
|
|
|
|
|
|