| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Chemistry::File; |
|
2
|
|
|
|
|
|
|
$VERSION = '0.37'; |
|
3
|
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
Chemistry::File - Molecule file I/O base class |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# As a convenient interface for several mol readers: |
|
11
|
|
|
|
|
|
|
use Chemistry::File qw(PDB MDLMol); # load PDB and MDL modules |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# or try to use every file I/O module installed in the system: |
|
14
|
|
|
|
|
|
|
use Chemistry::File ':auto'; |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $mol1 = Chemistry::Mol->read("file.pdb"); |
|
17
|
|
|
|
|
|
|
my $mol2 = Chemistry::Mol->read("file.mol"); |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# as a base for a mol reader: |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package Chemistry::File::Myfile; |
|
23
|
|
|
|
|
|
|
use base qw(Chemistry::File); |
|
24
|
|
|
|
|
|
|
Chemistry::Mol->register_type("myfile", __PACKAGE__); |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# override the read_mol method |
|
27
|
|
|
|
|
|
|
sub read_mol { |
|
28
|
|
|
|
|
|
|
my ($self, $fh, %opts) = shift; |
|
29
|
|
|
|
|
|
|
my $mol_class = $opts{mol_class} || "Chemistry::Mol"; |
|
30
|
|
|
|
|
|
|
my $mol = $mol_class->new; |
|
31
|
|
|
|
|
|
|
# ... do some stuff with $fh and $mol ... |
|
32
|
|
|
|
|
|
|
return $mol; |
|
33
|
|
|
|
|
|
|
} |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# override the write_mol method |
|
36
|
|
|
|
|
|
|
sub write_mol { |
|
37
|
|
|
|
|
|
|
my ($self, $fh, $mol, %opts) = shift; |
|
38
|
|
|
|
|
|
|
print $fh $mol->name, "\n"; |
|
39
|
|
|
|
|
|
|
# ... do some stuff with $fh and $mol ... |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
The main use of this module is as a base class for other molecule file I/O |
|
45
|
|
|
|
|
|
|
modules (for example, Chemistry::File::PDB). Such modules should override and |
|
46
|
|
|
|
|
|
|
extend the Chemistry::File methods as needed. You only need to care about the |
|
47
|
|
|
|
|
|
|
methods here if if you are writing a file I/O module or if you want a finer |
|
48
|
|
|
|
|
|
|
degree of control than what is offered by the simple read and write methods |
|
49
|
|
|
|
|
|
|
in the Chemistry::Mol class. |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
From the user's point of view, this module can also be used as shorthand |
|
52
|
|
|
|
|
|
|
for using several Chemistry::File modules at the same time. |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
use Chemistry::File qw(PDB MDLMol); |
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
is exactly equivalent to |
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
use Chemistry::File::PDB; |
|
59
|
|
|
|
|
|
|
use Chemistry::File::MDLMol; |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
If you use the :auto keyword, Chemistry::File will autodetect and load |
|
62
|
|
|
|
|
|
|
all the Chemistry::File::* modules installed in your system. |
|
63
|
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
use Chemistry::File ':auto'; |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 FILE I/O MODEL |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Before version 0.30, file I/O modules typically used only parse_string, |
|
69
|
|
|
|
|
|
|
write_string, parse_file, and write_file, and they were generally used as class |
|
70
|
|
|
|
|
|
|
methods. A file could contain one or more molecules and only be read or written |
|
71
|
|
|
|
|
|
|
whole; reading it would return every molecule on the file. This was problematic |
|
72
|
|
|
|
|
|
|
when dealing with large multi-molecule files (such as SDF files), because all |
|
73
|
|
|
|
|
|
|
the molecules would have to be loaded into memory at the same time. |
|
74
|
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
While version 0.30 retains backward compatibility with that simple model, it |
|
76
|
|
|
|
|
|
|
also allows a more flexible interface that allows reading one molecule at a |
|
77
|
|
|
|
|
|
|
time, skipping molecules, and reading and writing file-level information that |
|
78
|
|
|
|
|
|
|
is not associated with specific molecules. The following diagram shows the |
|
79
|
|
|
|
|
|
|
global structure of a file according to the new model: |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
+-----------+ |
|
82
|
|
|
|
|
|
|
| header | |
|
83
|
|
|
|
|
|
|
+-----------+ |
|
84
|
|
|
|
|
|
|
| molecule | |
|
85
|
|
|
|
|
|
|
+-----------+ |
|
86
|
|
|
|
|
|
|
| molecule | |
|
87
|
|
|
|
|
|
|
+-----------+ |
|
88
|
|
|
|
|
|
|
| ... | |
|
89
|
|
|
|
|
|
|
+-----------+ |
|
90
|
|
|
|
|
|
|
| footer | |
|
91
|
|
|
|
|
|
|
+-----------+ |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
In cases where the header and the footer are empty, the model reduces to the |
|
94
|
|
|
|
|
|
|
pre-0.30 version. The low-level steps to read a file are the following: |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
$file = Chemistry::File::MyFormat->new(file => 'xyz.mol'); |
|
97
|
|
|
|
|
|
|
$file->open('<'); |
|
98
|
|
|
|
|
|
|
$file->read_header; |
|
99
|
|
|
|
|
|
|
while (my $mol = $self->read_mol($file->fh, %opts)) { |
|
100
|
|
|
|
|
|
|
# do something with $mol... |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
$self->read_footer; |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
The C method does all the above automatically, and it stores all the |
|
105
|
|
|
|
|
|
|
molecules read in the mols property. |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 STANDARD OPTIONS |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
All the methods below include a list of options %opts at the end of the |
|
110
|
|
|
|
|
|
|
parameter list. Each class implementing this interface may have its own |
|
111
|
|
|
|
|
|
|
particular options. However, the following options should be recognized by all |
|
112
|
|
|
|
|
|
|
classes: |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=over |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item mol_class |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
A class or object with a C method that constructs a molecule. This is |
|
119
|
|
|
|
|
|
|
needed when the user want to specify a molecule subclass different from the |
|
120
|
|
|
|
|
|
|
default. When this option is not defined, the module may use Chemistry::Mol |
|
121
|
|
|
|
|
|
|
or whichever class is appropriate for that file format. |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item format |
|
124
|
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
The name of the file format being used, as registered by |
|
126
|
|
|
|
|
|
|
Chemistry::Mol->register_format. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item fatal |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
If true, parsing errors should throw an exception; if false, they should just |
|
131
|
|
|
|
|
|
|
try to recover if possible. True by default. |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=back |
|
134
|
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 CLASS METHODS |
|
136
|
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
The class methods in this class (or rather, its derived classes) are usually |
|
138
|
|
|
|
|
|
|
not called directly. Instead, use Chemistry::Mol->read, write, print, parse, |
|
139
|
|
|
|
|
|
|
and file. These methods also work if called as instance methods. |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=over |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=cut |
|
145
|
|
|
|
|
|
|
|
|
146
|
14
|
|
|
14
|
|
35030
|
use strict; |
|
|
14
|
|
|
|
|
30
|
|
|
|
14
|
|
|
|
|
636
|
|
|
147
|
14
|
|
|
14
|
|
76
|
use warnings; |
|
|
14
|
|
|
|
|
29
|
|
|
|
14
|
|
|
|
|
455
|
|
|
148
|
14
|
|
|
14
|
|
69
|
no warnings qw(uninitialized); |
|
|
14
|
|
|
|
|
33
|
|
|
|
14
|
|
|
|
|
665
|
|
|
149
|
14
|
|
|
14
|
|
75
|
use Carp; |
|
|
14
|
|
|
|
|
25
|
|
|
|
14
|
|
|
|
|
2253
|
|
|
150
|
14
|
|
|
14
|
|
28324
|
use FileHandle; |
|
|
14
|
|
|
|
|
312207
|
|
|
|
14
|
|
|
|
|
111
|
|
|
151
|
14
|
|
|
14
|
|
6847
|
use base qw(Chemistry::Obj); |
|
|
14
|
|
|
|
|
36
|
|
|
|
14
|
|
|
|
|
17817
|
|
|
152
|
|
|
|
|
|
|
# don't blame our problems in the Chemistry::Mol module ;-) |
|
153
|
|
|
|
|
|
|
our @CARP_NOT = qw(Chemistry::Mol); |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# This subroutine implements the :auto functionality |
|
156
|
|
|
|
|
|
|
sub import { |
|
157
|
13
|
|
|
13
|
|
235
|
my $pack = shift; |
|
158
|
13
|
|
|
|
|
41095
|
for my $param (@_){ |
|
159
|
0
|
0
|
|
|
|
0
|
if ($param eq ':auto') { |
|
160
|
0
|
|
|
|
|
0
|
for my $pmfile (map {glob "$_/Chemistry/File/*.pm"} @INC) { |
|
|
0
|
|
|
|
|
0
|
|
|
161
|
0
|
|
|
|
|
0
|
my ($pm) = $pmfile =~ m|(Chemistry/File/.*\.pm)$|; |
|
162
|
|
|
|
|
|
|
#warn "requiring $pm\n"; |
|
163
|
0
|
|
|
|
|
0
|
eval { require $pm }; |
|
|
0
|
|
|
|
|
0
|
|
|
164
|
0
|
0
|
|
|
|
0
|
die "Error in Chemistry::File: '$@'; pmfile='$pmfile'; pm='$pm'\n" if $@; |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
} else { |
|
167
|
0
|
|
|
|
|
0
|
eval "use ${pack}::$param"; |
|
168
|
0
|
0
|
|
|
|
0
|
die "$@" if $@; |
|
169
|
|
|
|
|
|
|
} |
|
170
|
|
|
|
|
|
|
} |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=item $class->parse_string($s, %options) |
|
174
|
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Parse a string $s and return one or mole molecule objects. This is an abstract |
|
176
|
|
|
|
|
|
|
method, so it should be provided by all derived classes. |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub parse_string { |
|
181
|
0
|
|
|
0
|
1
|
0
|
my ($self, $s, %opts) = @_; |
|
182
|
0
|
0
|
|
|
|
0
|
if ($opts{_must_override}) { |
|
183
|
0
|
|
0
|
|
|
0
|
my $class = ref $self || $self; |
|
184
|
0
|
|
|
|
|
0
|
croak "parse_string() is not implemented for $class"; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
0
|
|
|
|
|
0
|
$self->new(file => \$s, opts => \%opts)->read; |
|
187
|
|
|
|
|
|
|
} |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item $class->write_string($mol, %options) |
|
191
|
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Convert a molecule to a string. This is an abstract method, so it should be |
|
193
|
|
|
|
|
|
|
provided by all derived classes. |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=cut |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub write_string { |
|
198
|
3
|
|
|
3
|
1
|
10
|
my ($self, $mol, %opts) = @_; |
|
199
|
3
|
50
|
|
|
|
11
|
if ($opts{_must_override}) { |
|
200
|
0
|
|
0
|
|
|
0
|
my $class = ref $self || $self; |
|
201
|
0
|
|
|
|
|
0
|
croak "write_string() is not implemented for $class"; |
|
202
|
|
|
|
|
|
|
} |
|
203
|
3
|
|
|
|
|
6
|
my $s; |
|
204
|
3
|
|
|
|
|
17
|
$self->new(file => \$s, mols => [$mol], opts => \%opts)->write; |
|
205
|
3
|
|
|
|
|
13
|
$s; |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=item $class->parse_file($file, %options) |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Reads the file $file and returns one or more molecules. The default method |
|
211
|
|
|
|
|
|
|
slurps the whole file and then calls parse_string, but derived classes may |
|
212
|
|
|
|
|
|
|
choose to override it. $file can be a filehandle, a filename, or a scalar |
|
213
|
|
|
|
|
|
|
reference. See C for details. |
|
214
|
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=cut |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub parse_file { |
|
218
|
11
|
|
|
11
|
1
|
42
|
my ($self, $file, %opts) = @_; |
|
219
|
11
|
|
|
|
|
89
|
$self->new(file => $file, opts => \%opts)->read; |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=item $class->write_file($mol, $file, %options) |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Writes a file $file containing the molecule $mol. The default method calls |
|
225
|
|
|
|
|
|
|
write_string first and then saves the string to a file, but derived classes |
|
226
|
|
|
|
|
|
|
may choose to override it. $file can be either a filehandle or a filename. |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=cut |
|
229
|
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub write_file { |
|
231
|
3
|
|
|
3
|
1
|
12
|
my ($self, $mol, $file, %opts) = @_; |
|
232
|
|
|
|
|
|
|
|
|
233
|
3
|
|
|
|
|
20
|
$self->new(file => $file, mols => [$mol], opts => \%opts)->write; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=item $class->name_is($fname, %options) |
|
237
|
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Returns true if a filename is of the format corresponding to the class. |
|
239
|
|
|
|
|
|
|
It should look at the filename only, because it may be called with |
|
240
|
|
|
|
|
|
|
non-existent files. It is used to determine with which format to save a file. |
|
241
|
|
|
|
|
|
|
For example, the Chemistry::File::PDB returns true if the file ends in .pdb. |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=cut |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub name_is { |
|
246
|
0
|
|
|
0
|
1
|
0
|
0; |
|
247
|
|
|
|
|
|
|
} |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item $class->string_is($s, %options) |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Examines the string $s and returns true if it has the format of the class. |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub string_is { |
|
256
|
0
|
|
|
0
|
1
|
0
|
0; |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=item $class->file_is($file, %options) |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
Examines the file $file and returns true if it has the format of the class. |
|
262
|
|
|
|
|
|
|
The default method slurps the whole file and then calls string_is, but derived |
|
263
|
|
|
|
|
|
|
classes may choose to override it. |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=cut |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub file_is { |
|
268
|
9
|
|
|
9
|
1
|
28
|
my ($self, $file, %opts) = @_; |
|
269
|
|
|
|
|
|
|
|
|
270
|
9
|
|
|
|
|
18
|
my $s = eval { |
|
271
|
9
|
|
|
|
|
62
|
$self->open('<'); |
|
272
|
0
|
|
|
|
|
0
|
$self->slurp; |
|
273
|
|
|
|
|
|
|
}; |
|
274
|
9
|
50
|
|
|
|
80
|
if ($s) { |
|
|
|
50
|
|
|
|
|
|
|
275
|
0
|
|
|
|
|
0
|
$self->string_is($s, %opts); |
|
276
|
|
|
|
|
|
|
} elsif (! ref $file) { |
|
277
|
9
|
|
|
|
|
66
|
$self->name_is($file, %opts); |
|
278
|
|
|
|
|
|
|
} |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=item $class->slurp |
|
282
|
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Reads a file into a scalar. Automatic decompression of gzipped files is |
|
284
|
|
|
|
|
|
|
supported if the Compress::Zlib module is installed. Files ending in .gz are |
|
285
|
|
|
|
|
|
|
assumed to be compressed; otherwise it is possible to force decompression by |
|
286
|
|
|
|
|
|
|
passing the gzip => 1 option (or no decompression with gzip => 0). |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=cut |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# slurp a file into a scalar, with transparent decompression |
|
291
|
|
|
|
|
|
|
sub slurp { |
|
292
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
|
293
|
|
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
0
|
my $fh = $self->fh; |
|
295
|
0
|
|
|
|
|
0
|
local $/; |
|
296
|
0
|
|
|
|
|
0
|
<$fh>; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=item $class->new(file => $file, opts => \%opts) |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
Create a new file object. This method is usually called indirectly via |
|
302
|
|
|
|
|
|
|
the Chemistry::Mol->file method. $file may be a scalar with a filename, an |
|
303
|
|
|
|
|
|
|
open filehandle, or a reference to a scalar. If a reference to a scalar is |
|
304
|
|
|
|
|
|
|
used, the string contained in the scalar is used as an in-memory file. |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=cut |
|
307
|
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub new { |
|
309
|
22
|
|
|
22
|
1
|
229
|
my $self = shift->SUPER::new(@_); |
|
310
|
22
|
50
|
|
|
|
229
|
$self->{opts}{fatal} = 1 unless exists $self->{opts}{fatal}; |
|
311
|
22
|
|
|
|
|
169
|
$self; |
|
312
|
|
|
|
|
|
|
} |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
Chemistry::Obj::accessor(qw(file fh opts mols mode)); |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=back |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head1 INSTANCE METHODS |
|
319
|
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 Accessors |
|
321
|
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
Chemistry::File objects are derived from Chemistry::Obj and have the same |
|
323
|
|
|
|
|
|
|
properties (name, id, and type), as well as the following ones: |
|
324
|
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=over |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=item file |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
The "file" as described above under C. |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=item fh |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
The filehandle used for reading and writing molecules. It is opened by C. |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=item opts |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
A hashref containing the options that are passed through to the old-style class |
|
338
|
|
|
|
|
|
|
methods. They are also passed to the instance method to keep a similar |
|
339
|
|
|
|
|
|
|
interface, but they could access them via $self->opts anyway. |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item mode |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
'>' if the file is open for writing, '<' for reading, and false if not open. |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item mols |
|
346
|
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
C stores all the molecules that were read in this property as an array |
|
348
|
|
|
|
|
|
|
reference. C gets the molecules to write from here. |
|
349
|
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=back |
|
351
|
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=head2 Abstract methods |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
These methods should be overridden, because they don't really do much by |
|
355
|
|
|
|
|
|
|
default. |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=over |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=item $file->read_header |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
Read whatever information is available in the file before the first molecule. |
|
362
|
|
|
|
|
|
|
Does nothing by default. |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=cut |
|
365
|
|
|
|
|
|
|
|
|
366
|
14
|
|
|
14
|
1
|
55
|
sub read_header { } |
|
367
|
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=item $file->read_footer |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Read whatever information is available in the file after the last molecule. |
|
371
|
|
|
|
|
|
|
Does nothing by default. |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=cut |
|
374
|
|
|
|
|
|
|
|
|
375
|
14
|
|
|
14
|
1
|
26
|
sub read_footer { } |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=item $self->slurp_mol($fh) |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Reads from the input string until the end of the current molecule and returns |
|
380
|
|
|
|
|
|
|
the "slurped" string. It does not parse the string. It returns undefined if |
|
381
|
|
|
|
|
|
|
there are no more molecules in the file. This method should be overridden if |
|
382
|
|
|
|
|
|
|
needed; by default, it slurps until the end of the file. |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=cut |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub slurp_mol { |
|
387
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fh) = @_; |
|
388
|
0
|
|
|
|
|
0
|
local $/; <$fh>; |
|
|
0
|
|
|
|
|
0
|
|
|
389
|
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=item $self->skip_mol($fh) |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Similar to slurp_mol, but it doesn't need to return anything except true or |
|
394
|
|
|
|
|
|
|
false. It should also be overridden if needed; by default, it just calls |
|
395
|
|
|
|
|
|
|
slurp_mol. |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=cut |
|
398
|
|
|
|
|
|
|
|
|
399
|
0
|
|
|
0
|
1
|
0
|
sub skip_mol { shift->slurp_mol(@_) } |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=item $file->read_mol($fh, %opts) |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
Read the next molecule in the input stream. It returns false if there are no |
|
404
|
|
|
|
|
|
|
more molecules in the file. This method should be overridden by derived |
|
405
|
|
|
|
|
|
|
classes; otherwise it will call slurp_mol and parse_string (for backwards |
|
406
|
|
|
|
|
|
|
compatibility; it is recommended to override read_mol directly in new modules). |
|
407
|
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
Note: some old file I/O modules (written before the 0.30 interface) may return |
|
409
|
|
|
|
|
|
|
more than one molecule anyway, so it is recommended to call read_mol in list |
|
410
|
|
|
|
|
|
|
context to be safe: |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
($mol) = $file->read_mol($fh, %opts); |
|
413
|
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=cut |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub read_mol { |
|
417
|
4
|
|
|
4
|
1
|
11
|
my ($self, $fh, %opts) = @_; |
|
418
|
4
|
|
|
|
|
12
|
my $s = $self->slurp_mol($fh); |
|
419
|
4
|
100
|
66
|
|
|
43
|
return unless defined $s and length $s; |
|
420
|
3
|
|
|
|
|
12
|
$self->parse_string($s, %opts, _must_override => 1); |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
=item $file->write_header |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
Write whatever information is needed before the first molecule. |
|
425
|
|
|
|
|
|
|
Does nothing by default. |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=cut |
|
428
|
|
|
|
|
|
|
|
|
429
|
6
|
|
|
6
|
0
|
13
|
sub write_header { } |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item $file->write_footer |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Write whatever information is needed after the last molecule. |
|
434
|
|
|
|
|
|
|
Does nothing by default. |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=cut |
|
437
|
|
|
|
|
|
|
|
|
438
|
6
|
|
|
6
|
1
|
10
|
sub write_footer { } |
|
439
|
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=item $self->write_mol($fh, $mol, %opts) |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
Write one molecule to $fh. By default and for backward compatibility, it just |
|
443
|
|
|
|
|
|
|
calls C and prints its return value to $self->fh. New classes |
|
444
|
|
|
|
|
|
|
should override it. |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=cut |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
sub write_mol { |
|
449
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fh, $mol, %opts) = @_; |
|
450
|
0
|
|
|
|
|
0
|
print $fh $self->write_string($mol, %opts, _must_override => 1); |
|
451
|
|
|
|
|
|
|
} |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
########################## OTHER ################################## |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=back |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head2 Other methods |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=over |
|
460
|
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item $self->open($mode) |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Opens the file (held in $self->file) for reading by default, or for writing if |
|
464
|
|
|
|
|
|
|
$mode eq '>'. This method sets $self->fh transparently regardless of whether |
|
465
|
|
|
|
|
|
|
$self->file is a filename (compressed or not), a scalar reference, or a |
|
466
|
|
|
|
|
|
|
filehandle. |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=cut |
|
469
|
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
sub open { |
|
471
|
30
|
|
|
30
|
1
|
55
|
my ($self, $mode) = @_; |
|
472
|
30
|
|
|
|
|
45
|
my $fh; |
|
473
|
|
|
|
|
|
|
my $s; |
|
474
|
30
|
|
50
|
|
|
96
|
$mode ||= '<'; |
|
475
|
30
|
|
|
|
|
161
|
$self->mode($mode); |
|
476
|
30
|
|
|
|
|
153
|
my $file = $self->file; |
|
477
|
30
|
100
|
|
|
|
2910
|
croak "Chemistry::File::open: no file supplied" unless defined $file; |
|
478
|
21
|
100
|
66
|
|
|
301
|
if (ref $file eq 'SCALAR') { |
|
|
|
50
|
66
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
479
|
4
|
50
|
|
|
|
19
|
croak "decompression only supported for files" if $self->{opts}{gzip}; |
|
480
|
4
|
50
|
|
|
|
20
|
if ($] >= 5.008) { |
|
481
|
4
|
|
|
2
|
|
114
|
open $fh, $mode, $file; |
|
|
2
|
|
|
|
|
23
|
|
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
16
|
|
|
482
|
|
|
|
|
|
|
} else { |
|
483
|
0
|
|
|
|
|
0
|
require IO::String; |
|
484
|
0
|
|
|
|
|
0
|
$fh = IO::String->new($$file); |
|
485
|
|
|
|
|
|
|
} |
|
486
|
|
|
|
|
|
|
} elsif (ref $file) { |
|
487
|
0
|
0
|
|
|
|
0
|
croak "decompression only supported for files" if $self->{opts}{gzip}; |
|
488
|
0
|
|
|
|
|
0
|
$fh = $file; |
|
489
|
|
|
|
|
|
|
} elsif ($self->{opts}{gzip} |
|
490
|
|
|
|
|
|
|
or !defined $self->{opts}{gzip} and $file =~ /.gz$/) |
|
491
|
|
|
|
|
|
|
{ |
|
492
|
4
|
50
|
|
|
|
9
|
eval { require Compress::Zlib } # Carp |
|
|
4
|
|
|
|
|
57
|
|
|
493
|
|
|
|
|
|
|
or croak "Compress::Zlib not installed!"; |
|
494
|
4
|
|
|
|
|
1606
|
require File::Temp; |
|
495
|
|
|
|
|
|
|
|
|
496
|
4
|
|
|
|
|
15388
|
$fh = File::Temp::tempfile(); |
|
497
|
4
|
|
100
|
|
|
3541
|
$self->{opts}{gzip} ||= 1; |
|
498
|
4
|
100
|
|
|
|
18
|
unless ($mode eq '>') { |
|
499
|
2
|
50
|
|
|
|
12
|
my $gz = Compress::Zlib::gzopen($file, "rb") |
|
500
|
|
|
|
|
|
|
or croak "Cannot open compressed $file: " |
|
501
|
|
|
|
|
|
|
. "$Compress::Zlib::gzerrno\n"; |
|
502
|
|
|
|
|
|
|
|
|
503
|
2
|
|
|
|
|
4883
|
my $buffer; |
|
504
|
2
|
|
|
|
|
12
|
print $fh $buffer while $gz->gzread($buffer) > 0; |
|
505
|
|
|
|
|
|
|
|
|
506
|
2
|
50
|
|
|
|
2315
|
if ($Compress::Zlib::gzerrno != Compress::Zlib::Z_STREAM_END()) { |
|
507
|
0
|
|
|
|
|
0
|
croak "Error reading from $file: $Compress::Zlib::gzerrno" |
|
508
|
|
|
|
|
|
|
. ($Compress::Zlib::gzerrno+0) . "\n"; |
|
509
|
|
|
|
|
|
|
} |
|
510
|
2
|
|
|
|
|
20
|
$gz->gzclose(); |
|
511
|
2
|
|
|
|
|
348
|
seek $fh, 0, 0; |
|
512
|
|
|
|
|
|
|
} |
|
513
|
|
|
|
|
|
|
} else { |
|
514
|
13
|
50
|
|
|
|
163
|
$fh = FileHandle->new("$mode$file") |
|
515
|
|
|
|
|
|
|
or croak "Could not open file $file: $!"; |
|
516
|
|
|
|
|
|
|
} |
|
517
|
21
|
|
|
|
|
5161
|
$self->fh($fh); |
|
518
|
21
|
|
|
|
|
43
|
$self; |
|
519
|
|
|
|
|
|
|
} |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=item $self->close |
|
522
|
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
Close the file. For regular files this just closes the filehandle, but for |
|
524
|
|
|
|
|
|
|
gzipped files it does some additional postprocessing. This method is called |
|
525
|
|
|
|
|
|
|
automatically on object destruction, so it is not mandatory to call it |
|
526
|
|
|
|
|
|
|
explicitly. |
|
527
|
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=cut |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
sub close { |
|
531
|
43
|
|
|
43
|
1
|
75
|
my ($self) = @_; |
|
532
|
43
|
|
|
|
|
156
|
my $fh = $self->fh; |
|
533
|
43
|
100
|
100
|
|
|
261
|
if ($fh and $self->mode eq '>' and $self->{opts}{gzip}) { |
|
|
|
|
100
|
|
|
|
|
|
534
|
2
|
|
50
|
|
|
8
|
my $level = $self->{opts}{gzip} || 6; |
|
535
|
2
|
50
|
|
|
|
7
|
$level = 6 if $level == 1; |
|
536
|
2
|
|
|
|
|
9
|
my $file = $self->file; |
|
537
|
2
|
50
|
|
|
|
8
|
if (ref $file) { |
|
538
|
0
|
|
|
|
|
0
|
croak "compression only supported for files"; |
|
539
|
|
|
|
|
|
|
} else { |
|
540
|
2
|
|
|
|
|
109
|
seek $fh, 0, 0; |
|
541
|
2
|
50
|
|
|
|
16
|
my $gz = Compress::Zlib::gzopen($file, "wb$level") |
|
542
|
|
|
|
|
|
|
or croak "Cannot open $file $Compress::Zlib::gzerrno\n"; |
|
543
|
2
|
|
|
|
|
4682
|
local $_; |
|
544
|
2
|
|
|
|
|
57
|
while (<$fh>) { |
|
545
|
508
|
50
|
|
|
|
45583
|
$gz->gzwrite($_) |
|
546
|
|
|
|
|
|
|
or croak "error writing: $Compress::Zlib::gzerrno\n"; |
|
547
|
|
|
|
|
|
|
} |
|
548
|
2
|
|
|
|
|
209
|
$gz->gzclose; |
|
549
|
|
|
|
|
|
|
} |
|
550
|
|
|
|
|
|
|
} |
|
551
|
43
|
100
|
|
|
|
1456
|
if ($self->mode) { |
|
552
|
21
|
50
|
|
|
|
75
|
if ($fh) { $fh->close or croak "$!" }; |
|
|
21
|
50
|
|
|
|
126
|
|
|
553
|
21
|
|
|
|
|
903
|
$self->mode(''); |
|
554
|
|
|
|
|
|
|
} |
|
555
|
|
|
|
|
|
|
} |
|
556
|
|
|
|
|
|
|
|
|
557
|
22
|
|
|
22
|
|
1231
|
sub DESTROY { shift->close } |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=item $file->read |
|
560
|
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
Read the whole file. This calls open, read_header, read_mol until there are no |
|
562
|
|
|
|
|
|
|
more molecules left, read_footer, and close. Returns a list of molecules if |
|
563
|
|
|
|
|
|
|
called in list context, or the first molecule in scalar context. |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=cut |
|
566
|
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub read { |
|
568
|
15
|
|
|
15
|
1
|
889
|
my ($self) = @_; |
|
569
|
15
|
|
|
|
|
67
|
$self->open('<'); |
|
570
|
15
|
|
|
|
|
86
|
$self->read_header; |
|
571
|
15
|
|
|
|
|
23
|
my @all_mols; |
|
572
|
15
|
|
|
|
|
110
|
$self->mols(\@all_mols); |
|
573
|
15
|
|
|
|
|
58
|
while (my @mols = $self->read_mol($self->fh, %{$self->{opts}})) { |
|
|
32
|
|
|
|
|
215
|
|
|
574
|
17
|
|
|
|
|
137
|
push @all_mols, @mols; |
|
575
|
|
|
|
|
|
|
} |
|
576
|
15
|
|
|
|
|
230
|
$self->read_footer; |
|
577
|
15
|
|
|
|
|
92
|
$self->close; |
|
578
|
15
|
100
|
|
|
|
139
|
wantarray ? @all_mols : $all_mols[0]; |
|
579
|
|
|
|
|
|
|
} |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=item $self->write |
|
582
|
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
Write all the molecules in $self->mols. It just calls open, write_header, |
|
584
|
|
|
|
|
|
|
write_mol (per each molecule), write_footer, and close. |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=cut |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
sub write { |
|
589
|
6
|
|
|
6
|
1
|
14
|
my ($self) = @_; |
|
590
|
6
|
|
|
|
|
25
|
$self->open('>'); |
|
591
|
6
|
|
|
|
|
31
|
$self->write_header; |
|
592
|
6
|
|
|
|
|
7
|
for my $mol (@{$self->mols}) { |
|
|
6
|
|
|
|
|
21
|
|
|
593
|
6
|
|
|
|
|
21
|
$self->write_mol($self->fh, $mol, %{$self->{opts}}); |
|
|
6
|
|
|
|
|
46
|
|
|
594
|
|
|
|
|
|
|
} |
|
595
|
6
|
|
|
|
|
7367
|
$self->write_footer; |
|
596
|
6
|
|
|
|
|
21
|
$self->close; |
|
597
|
|
|
|
|
|
|
} |
|
598
|
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
1; |
|
600
|
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
=back |
|
602
|
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=head1 CAVEATS |
|
604
|
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
The :auto feature may not be entirely portable, but it is known to work under |
|
606
|
|
|
|
|
|
|
Unix and Windows (either Cygwin or ActiveState). |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=head1 VERSION |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
0.37 |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
L |
|
615
|
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
The PerlMol website L |
|
617
|
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head1 AUTHOR |
|
619
|
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Ivan Tubert-Brohman-Brohman |
|
621
|
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is |
|
625
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
|
626
|
|
|
|
|
|
|
Perl itself. |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=cut |
|
629
|
|
|
|
|
|
|
|