line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File; |
2
|
|
|
|
|
|
|
$VERSION = '0.37'; |
3
|
|
|
|
|
|
|
|
4
|
|
|
|
|
|
|
=head1 NAME |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
Chemistry::File - Molecule file I/O base class |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
=head1 SYNOPSIS |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
# As a convenient interface for several mol readers: |
11
|
|
|
|
|
|
|
use Chemistry::File qw(PDB MDLMol); # load PDB and MDL modules |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# or try to use every file I/O module installed in the system: |
14
|
|
|
|
|
|
|
use Chemistry::File ':auto'; |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
my $mol1 = Chemistry::Mol->read("file.pdb"); |
17
|
|
|
|
|
|
|
my $mol2 = Chemistry::Mol->read("file.mol"); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
# as a base for a mol reader: |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
package Chemistry::File::Myfile; |
23
|
|
|
|
|
|
|
use base qw(Chemistry::File); |
24
|
|
|
|
|
|
|
Chemistry::Mol->register_type("myfile", __PACKAGE__); |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# override the read_mol method |
27
|
|
|
|
|
|
|
sub read_mol { |
28
|
|
|
|
|
|
|
my ($self, $fh, %opts) = shift; |
29
|
|
|
|
|
|
|
my $mol_class = $opts{mol_class} || "Chemistry::Mol"; |
30
|
|
|
|
|
|
|
my $mol = $mol_class->new; |
31
|
|
|
|
|
|
|
# ... do some stuff with $fh and $mol ... |
32
|
|
|
|
|
|
|
return $mol; |
33
|
|
|
|
|
|
|
} |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# override the write_mol method |
36
|
|
|
|
|
|
|
sub write_mol { |
37
|
|
|
|
|
|
|
my ($self, $fh, $mol, %opts) = shift; |
38
|
|
|
|
|
|
|
print $fh $mol->name, "\n"; |
39
|
|
|
|
|
|
|
# ... do some stuff with $fh and $mol ... |
40
|
|
|
|
|
|
|
} |
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=head1 DESCRIPTION |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
The main use of this module is as a base class for other molecule file I/O |
45
|
|
|
|
|
|
|
modules (for example, Chemistry::File::PDB). Such modules should override and |
46
|
|
|
|
|
|
|
extend the Chemistry::File methods as needed. You only need to care about the |
47
|
|
|
|
|
|
|
methods here if if you are writing a file I/O module or if you want a finer |
48
|
|
|
|
|
|
|
degree of control than what is offered by the simple read and write methods |
49
|
|
|
|
|
|
|
in the Chemistry::Mol class. |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
From the user's point of view, this module can also be used as shorthand |
52
|
|
|
|
|
|
|
for using several Chemistry::File modules at the same time. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
use Chemistry::File qw(PDB MDLMol); |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
is exactly equivalent to |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
use Chemistry::File::PDB; |
59
|
|
|
|
|
|
|
use Chemistry::File::MDLMol; |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
If you use the :auto keyword, Chemistry::File will autodetect and load |
62
|
|
|
|
|
|
|
all the Chemistry::File::* modules installed in your system. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
use Chemistry::File ':auto'; |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 FILE I/O MODEL |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Before version 0.30, file I/O modules typically used only parse_string, |
69
|
|
|
|
|
|
|
write_string, parse_file, and write_file, and they were generally used as class |
70
|
|
|
|
|
|
|
methods. A file could contain one or more molecules and only be read or written |
71
|
|
|
|
|
|
|
whole; reading it would return every molecule on the file. This was problematic |
72
|
|
|
|
|
|
|
when dealing with large multi-molecule files (such as SDF files), because all |
73
|
|
|
|
|
|
|
the molecules would have to be loaded into memory at the same time. |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
While version 0.30 retains backward compatibility with that simple model, it |
76
|
|
|
|
|
|
|
also allows a more flexible interface that allows reading one molecule at a |
77
|
|
|
|
|
|
|
time, skipping molecules, and reading and writing file-level information that |
78
|
|
|
|
|
|
|
is not associated with specific molecules. The following diagram shows the |
79
|
|
|
|
|
|
|
global structure of a file according to the new model: |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
+-----------+ |
82
|
|
|
|
|
|
|
| header | |
83
|
|
|
|
|
|
|
+-----------+ |
84
|
|
|
|
|
|
|
| molecule | |
85
|
|
|
|
|
|
|
+-----------+ |
86
|
|
|
|
|
|
|
| molecule | |
87
|
|
|
|
|
|
|
+-----------+ |
88
|
|
|
|
|
|
|
| ... | |
89
|
|
|
|
|
|
|
+-----------+ |
90
|
|
|
|
|
|
|
| footer | |
91
|
|
|
|
|
|
|
+-----------+ |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
In cases where the header and the footer are empty, the model reduces to the |
94
|
|
|
|
|
|
|
pre-0.30 version. The low-level steps to read a file are the following: |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
$file = Chemistry::File::MyFormat->new(file => 'xyz.mol'); |
97
|
|
|
|
|
|
|
$file->open('<'); |
98
|
|
|
|
|
|
|
$file->read_header; |
99
|
|
|
|
|
|
|
while (my $mol = $self->read_mol($file->fh, %opts)) { |
100
|
|
|
|
|
|
|
# do something with $mol... |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
$self->read_footer; |
103
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
The C method does all the above automatically, and it stores all the |
105
|
|
|
|
|
|
|
molecules read in the mols property. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=head1 STANDARD OPTIONS |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
All the methods below include a list of options %opts at the end of the |
110
|
|
|
|
|
|
|
parameter list. Each class implementing this interface may have its own |
111
|
|
|
|
|
|
|
particular options. However, the following options should be recognized by all |
112
|
|
|
|
|
|
|
classes: |
113
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
=over |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=item mol_class |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
A class or object with a C method that constructs a molecule. This is |
119
|
|
|
|
|
|
|
needed when the user want to specify a molecule subclass different from the |
120
|
|
|
|
|
|
|
default. When this option is not defined, the module may use Chemistry::Mol |
121
|
|
|
|
|
|
|
or whichever class is appropriate for that file format. |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
=item format |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
The name of the file format being used, as registered by |
126
|
|
|
|
|
|
|
Chemistry::Mol->register_format. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=item fatal |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
If true, parsing errors should throw an exception; if false, they should just |
131
|
|
|
|
|
|
|
try to recover if possible. True by default. |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=back |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=head1 CLASS METHODS |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
The class methods in this class (or rather, its derived classes) are usually |
138
|
|
|
|
|
|
|
not called directly. Instead, use Chemistry::Mol->read, write, print, parse, |
139
|
|
|
|
|
|
|
and file. These methods also work if called as instance methods. |
140
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
=over |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=cut |
145
|
|
|
|
|
|
|
|
146
|
14
|
|
|
14
|
|
35030
|
use strict; |
|
14
|
|
|
|
|
30
|
|
|
14
|
|
|
|
|
636
|
|
147
|
14
|
|
|
14
|
|
76
|
use warnings; |
|
14
|
|
|
|
|
29
|
|
|
14
|
|
|
|
|
455
|
|
148
|
14
|
|
|
14
|
|
69
|
no warnings qw(uninitialized); |
|
14
|
|
|
|
|
33
|
|
|
14
|
|
|
|
|
665
|
|
149
|
14
|
|
|
14
|
|
75
|
use Carp; |
|
14
|
|
|
|
|
25
|
|
|
14
|
|
|
|
|
2253
|
|
150
|
14
|
|
|
14
|
|
28324
|
use FileHandle; |
|
14
|
|
|
|
|
312207
|
|
|
14
|
|
|
|
|
111
|
|
151
|
14
|
|
|
14
|
|
6847
|
use base qw(Chemistry::Obj); |
|
14
|
|
|
|
|
36
|
|
|
14
|
|
|
|
|
17817
|
|
152
|
|
|
|
|
|
|
# don't blame our problems in the Chemistry::Mol module ;-) |
153
|
|
|
|
|
|
|
our @CARP_NOT = qw(Chemistry::Mol); |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# This subroutine implements the :auto functionality |
156
|
|
|
|
|
|
|
sub import { |
157
|
13
|
|
|
13
|
|
235
|
my $pack = shift; |
158
|
13
|
|
|
|
|
41095
|
for my $param (@_){ |
159
|
0
|
0
|
|
|
|
0
|
if ($param eq ':auto') { |
160
|
0
|
|
|
|
|
0
|
for my $pmfile (map {glob "$_/Chemistry/File/*.pm"} @INC) { |
|
0
|
|
|
|
|
0
|
|
161
|
0
|
|
|
|
|
0
|
my ($pm) = $pmfile =~ m|(Chemistry/File/.*\.pm)$|; |
162
|
|
|
|
|
|
|
#warn "requiring $pm\n"; |
163
|
0
|
|
|
|
|
0
|
eval { require $pm }; |
|
0
|
|
|
|
|
0
|
|
164
|
0
|
0
|
|
|
|
0
|
die "Error in Chemistry::File: '$@'; pmfile='$pmfile'; pm='$pm'\n" if $@; |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
} else { |
167
|
0
|
|
|
|
|
0
|
eval "use ${pack}::$param"; |
168
|
0
|
0
|
|
|
|
0
|
die "$@" if $@; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
} |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=item $class->parse_string($s, %options) |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Parse a string $s and return one or mole molecule objects. This is an abstract |
176
|
|
|
|
|
|
|
method, so it should be provided by all derived classes. |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
sub parse_string { |
181
|
0
|
|
|
0
|
1
|
0
|
my ($self, $s, %opts) = @_; |
182
|
0
|
0
|
|
|
|
0
|
if ($opts{_must_override}) { |
183
|
0
|
|
0
|
|
|
0
|
my $class = ref $self || $self; |
184
|
0
|
|
|
|
|
0
|
croak "parse_string() is not implemented for $class"; |
185
|
|
|
|
|
|
|
} |
186
|
0
|
|
|
|
|
0
|
$self->new(file => \$s, opts => \%opts)->read; |
187
|
|
|
|
|
|
|
} |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
=item $class->write_string($mol, %options) |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
Convert a molecule to a string. This is an abstract method, so it should be |
193
|
|
|
|
|
|
|
provided by all derived classes. |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=cut |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub write_string { |
198
|
3
|
|
|
3
|
1
|
10
|
my ($self, $mol, %opts) = @_; |
199
|
3
|
50
|
|
|
|
11
|
if ($opts{_must_override}) { |
200
|
0
|
|
0
|
|
|
0
|
my $class = ref $self || $self; |
201
|
0
|
|
|
|
|
0
|
croak "write_string() is not implemented for $class"; |
202
|
|
|
|
|
|
|
} |
203
|
3
|
|
|
|
|
6
|
my $s; |
204
|
3
|
|
|
|
|
17
|
$self->new(file => \$s, mols => [$mol], opts => \%opts)->write; |
205
|
3
|
|
|
|
|
13
|
$s; |
206
|
|
|
|
|
|
|
} |
207
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
=item $class->parse_file($file, %options) |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Reads the file $file and returns one or more molecules. The default method |
211
|
|
|
|
|
|
|
slurps the whole file and then calls parse_string, but derived classes may |
212
|
|
|
|
|
|
|
choose to override it. $file can be a filehandle, a filename, or a scalar |
213
|
|
|
|
|
|
|
reference. See C for details. |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
=cut |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub parse_file { |
218
|
11
|
|
|
11
|
1
|
42
|
my ($self, $file, %opts) = @_; |
219
|
11
|
|
|
|
|
89
|
$self->new(file => $file, opts => \%opts)->read; |
220
|
|
|
|
|
|
|
} |
221
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
=item $class->write_file($mol, $file, %options) |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
Writes a file $file containing the molecule $mol. The default method calls |
225
|
|
|
|
|
|
|
write_string first and then saves the string to a file, but derived classes |
226
|
|
|
|
|
|
|
may choose to override it. $file can be either a filehandle or a filename. |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=cut |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub write_file { |
231
|
3
|
|
|
3
|
1
|
12
|
my ($self, $mol, $file, %opts) = @_; |
232
|
|
|
|
|
|
|
|
233
|
3
|
|
|
|
|
20
|
$self->new(file => $file, mols => [$mol], opts => \%opts)->write; |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
=item $class->name_is($fname, %options) |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
Returns true if a filename is of the format corresponding to the class. |
239
|
|
|
|
|
|
|
It should look at the filename only, because it may be called with |
240
|
|
|
|
|
|
|
non-existent files. It is used to determine with which format to save a file. |
241
|
|
|
|
|
|
|
For example, the Chemistry::File::PDB returns true if the file ends in .pdb. |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
=cut |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub name_is { |
246
|
0
|
|
|
0
|
1
|
0
|
0; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
=item $class->string_is($s, %options) |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
Examines the string $s and returns true if it has the format of the class. |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
=cut |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
sub string_is { |
256
|
0
|
|
|
0
|
1
|
0
|
0; |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=item $class->file_is($file, %options) |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
Examines the file $file and returns true if it has the format of the class. |
262
|
|
|
|
|
|
|
The default method slurps the whole file and then calls string_is, but derived |
263
|
|
|
|
|
|
|
classes may choose to override it. |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
=cut |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
sub file_is { |
268
|
9
|
|
|
9
|
1
|
28
|
my ($self, $file, %opts) = @_; |
269
|
|
|
|
|
|
|
|
270
|
9
|
|
|
|
|
18
|
my $s = eval { |
271
|
9
|
|
|
|
|
62
|
$self->open('<'); |
272
|
0
|
|
|
|
|
0
|
$self->slurp; |
273
|
|
|
|
|
|
|
}; |
274
|
9
|
50
|
|
|
|
80
|
if ($s) { |
|
|
50
|
|
|
|
|
|
275
|
0
|
|
|
|
|
0
|
$self->string_is($s, %opts); |
276
|
|
|
|
|
|
|
} elsif (! ref $file) { |
277
|
9
|
|
|
|
|
66
|
$self->name_is($file, %opts); |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
=item $class->slurp |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
Reads a file into a scalar. Automatic decompression of gzipped files is |
284
|
|
|
|
|
|
|
supported if the Compress::Zlib module is installed. Files ending in .gz are |
285
|
|
|
|
|
|
|
assumed to be compressed; otherwise it is possible to force decompression by |
286
|
|
|
|
|
|
|
passing the gzip => 1 option (or no decompression with gzip => 0). |
287
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
=cut |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# slurp a file into a scalar, with transparent decompression |
291
|
|
|
|
|
|
|
sub slurp { |
292
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
293
|
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
0
|
my $fh = $self->fh; |
295
|
0
|
|
|
|
|
0
|
local $/; |
296
|
0
|
|
|
|
|
0
|
<$fh>; |
297
|
|
|
|
|
|
|
} |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
=item $class->new(file => $file, opts => \%opts) |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
Create a new file object. This method is usually called indirectly via |
302
|
|
|
|
|
|
|
the Chemistry::Mol->file method. $file may be a scalar with a filename, an |
303
|
|
|
|
|
|
|
open filehandle, or a reference to a scalar. If a reference to a scalar is |
304
|
|
|
|
|
|
|
used, the string contained in the scalar is used as an in-memory file. |
305
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
=cut |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
sub new { |
309
|
22
|
|
|
22
|
1
|
229
|
my $self = shift->SUPER::new(@_); |
310
|
22
|
50
|
|
|
|
229
|
$self->{opts}{fatal} = 1 unless exists $self->{opts}{fatal}; |
311
|
22
|
|
|
|
|
169
|
$self; |
312
|
|
|
|
|
|
|
} |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
Chemistry::Obj::accessor(qw(file fh opts mols mode)); |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
=back |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=head1 INSTANCE METHODS |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head2 Accessors |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
Chemistry::File objects are derived from Chemistry::Obj and have the same |
323
|
|
|
|
|
|
|
properties (name, id, and type), as well as the following ones: |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=over |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=item file |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
The "file" as described above under C. |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=item fh |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
The filehandle used for reading and writing molecules. It is opened by C. |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
=item opts |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
A hashref containing the options that are passed through to the old-style class |
338
|
|
|
|
|
|
|
methods. They are also passed to the instance method to keep a similar |
339
|
|
|
|
|
|
|
interface, but they could access them via $self->opts anyway. |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item mode |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
'>' if the file is open for writing, '<' for reading, and false if not open. |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
=item mols |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
C stores all the molecules that were read in this property as an array |
348
|
|
|
|
|
|
|
reference. C gets the molecules to write from here. |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
=back |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=head2 Abstract methods |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
These methods should be overridden, because they don't really do much by |
355
|
|
|
|
|
|
|
default. |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
=over |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=item $file->read_header |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
Read whatever information is available in the file before the first molecule. |
362
|
|
|
|
|
|
|
Does nothing by default. |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=cut |
365
|
|
|
|
|
|
|
|
366
|
14
|
|
|
14
|
1
|
55
|
sub read_header { } |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
=item $file->read_footer |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
Read whatever information is available in the file after the last molecule. |
371
|
|
|
|
|
|
|
Does nothing by default. |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=cut |
374
|
|
|
|
|
|
|
|
375
|
14
|
|
|
14
|
1
|
26
|
sub read_footer { } |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=item $self->slurp_mol($fh) |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
Reads from the input string until the end of the current molecule and returns |
380
|
|
|
|
|
|
|
the "slurped" string. It does not parse the string. It returns undefined if |
381
|
|
|
|
|
|
|
there are no more molecules in the file. This method should be overridden if |
382
|
|
|
|
|
|
|
needed; by default, it slurps until the end of the file. |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
=cut |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
sub slurp_mol { |
387
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fh) = @_; |
388
|
0
|
|
|
|
|
0
|
local $/; <$fh>; |
|
0
|
|
|
|
|
0
|
|
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=item $self->skip_mol($fh) |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Similar to slurp_mol, but it doesn't need to return anything except true or |
394
|
|
|
|
|
|
|
false. It should also be overridden if needed; by default, it just calls |
395
|
|
|
|
|
|
|
slurp_mol. |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=cut |
398
|
|
|
|
|
|
|
|
399
|
0
|
|
|
0
|
1
|
0
|
sub skip_mol { shift->slurp_mol(@_) } |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=item $file->read_mol($fh, %opts) |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
Read the next molecule in the input stream. It returns false if there are no |
404
|
|
|
|
|
|
|
more molecules in the file. This method should be overridden by derived |
405
|
|
|
|
|
|
|
classes; otherwise it will call slurp_mol and parse_string (for backwards |
406
|
|
|
|
|
|
|
compatibility; it is recommended to override read_mol directly in new modules). |
407
|
|
|
|
|
|
|
|
408
|
|
|
|
|
|
|
Note: some old file I/O modules (written before the 0.30 interface) may return |
409
|
|
|
|
|
|
|
more than one molecule anyway, so it is recommended to call read_mol in list |
410
|
|
|
|
|
|
|
context to be safe: |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
($mol) = $file->read_mol($fh, %opts); |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=cut |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
sub read_mol { |
417
|
4
|
|
|
4
|
1
|
11
|
my ($self, $fh, %opts) = @_; |
418
|
4
|
|
|
|
|
12
|
my $s = $self->slurp_mol($fh); |
419
|
4
|
100
|
66
|
|
|
43
|
return unless defined $s and length $s; |
420
|
3
|
|
|
|
|
12
|
$self->parse_string($s, %opts, _must_override => 1); |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
=item $file->write_header |
423
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
Write whatever information is needed before the first molecule. |
425
|
|
|
|
|
|
|
Does nothing by default. |
426
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
=cut |
428
|
|
|
|
|
|
|
|
429
|
6
|
|
|
6
|
0
|
13
|
sub write_header { } |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
=item $file->write_footer |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
Write whatever information is needed after the last molecule. |
434
|
|
|
|
|
|
|
Does nothing by default. |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
=cut |
437
|
|
|
|
|
|
|
|
438
|
6
|
|
|
6
|
1
|
10
|
sub write_footer { } |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
|
|
|
=item $self->write_mol($fh, $mol, %opts) |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
Write one molecule to $fh. By default and for backward compatibility, it just |
443
|
|
|
|
|
|
|
calls C and prints its return value to $self->fh. New classes |
444
|
|
|
|
|
|
|
should override it. |
445
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
=cut |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
sub write_mol { |
449
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fh, $mol, %opts) = @_; |
450
|
0
|
|
|
|
|
0
|
print $fh $self->write_string($mol, %opts, _must_override => 1); |
451
|
|
|
|
|
|
|
} |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
########################## OTHER ################################## |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
=back |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=head2 Other methods |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=over |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=item $self->open($mode) |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
Opens the file (held in $self->file) for reading by default, or for writing if |
464
|
|
|
|
|
|
|
$mode eq '>'. This method sets $self->fh transparently regardless of whether |
465
|
|
|
|
|
|
|
$self->file is a filename (compressed or not), a scalar reference, or a |
466
|
|
|
|
|
|
|
filehandle. |
467
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
=cut |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
sub open { |
471
|
30
|
|
|
30
|
1
|
55
|
my ($self, $mode) = @_; |
472
|
30
|
|
|
|
|
45
|
my $fh; |
473
|
|
|
|
|
|
|
my $s; |
474
|
30
|
|
50
|
|
|
96
|
$mode ||= '<'; |
475
|
30
|
|
|
|
|
161
|
$self->mode($mode); |
476
|
30
|
|
|
|
|
153
|
my $file = $self->file; |
477
|
30
|
100
|
|
|
|
2910
|
croak "Chemistry::File::open: no file supplied" unless defined $file; |
478
|
21
|
100
|
66
|
|
|
301
|
if (ref $file eq 'SCALAR') { |
|
|
50
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
479
|
4
|
50
|
|
|
|
19
|
croak "decompression only supported for files" if $self->{opts}{gzip}; |
480
|
4
|
50
|
|
|
|
20
|
if ($] >= 5.008) { |
481
|
4
|
|
|
2
|
|
114
|
open $fh, $mode, $file; |
|
2
|
|
|
|
|
23
|
|
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
16
|
|
482
|
|
|
|
|
|
|
} else { |
483
|
0
|
|
|
|
|
0
|
require IO::String; |
484
|
0
|
|
|
|
|
0
|
$fh = IO::String->new($$file); |
485
|
|
|
|
|
|
|
} |
486
|
|
|
|
|
|
|
} elsif (ref $file) { |
487
|
0
|
0
|
|
|
|
0
|
croak "decompression only supported for files" if $self->{opts}{gzip}; |
488
|
0
|
|
|
|
|
0
|
$fh = $file; |
489
|
|
|
|
|
|
|
} elsif ($self->{opts}{gzip} |
490
|
|
|
|
|
|
|
or !defined $self->{opts}{gzip} and $file =~ /.gz$/) |
491
|
|
|
|
|
|
|
{ |
492
|
4
|
50
|
|
|
|
9
|
eval { require Compress::Zlib } # Carp |
|
4
|
|
|
|
|
57
|
|
493
|
|
|
|
|
|
|
or croak "Compress::Zlib not installed!"; |
494
|
4
|
|
|
|
|
1606
|
require File::Temp; |
495
|
|
|
|
|
|
|
|
496
|
4
|
|
|
|
|
15388
|
$fh = File::Temp::tempfile(); |
497
|
4
|
|
100
|
|
|
3541
|
$self->{opts}{gzip} ||= 1; |
498
|
4
|
100
|
|
|
|
18
|
unless ($mode eq '>') { |
499
|
2
|
50
|
|
|
|
12
|
my $gz = Compress::Zlib::gzopen($file, "rb") |
500
|
|
|
|
|
|
|
or croak "Cannot open compressed $file: " |
501
|
|
|
|
|
|
|
. "$Compress::Zlib::gzerrno\n"; |
502
|
|
|
|
|
|
|
|
503
|
2
|
|
|
|
|
4883
|
my $buffer; |
504
|
2
|
|
|
|
|
12
|
print $fh $buffer while $gz->gzread($buffer) > 0; |
505
|
|
|
|
|
|
|
|
506
|
2
|
50
|
|
|
|
2315
|
if ($Compress::Zlib::gzerrno != Compress::Zlib::Z_STREAM_END()) { |
507
|
0
|
|
|
|
|
0
|
croak "Error reading from $file: $Compress::Zlib::gzerrno" |
508
|
|
|
|
|
|
|
. ($Compress::Zlib::gzerrno+0) . "\n"; |
509
|
|
|
|
|
|
|
} |
510
|
2
|
|
|
|
|
20
|
$gz->gzclose(); |
511
|
2
|
|
|
|
|
348
|
seek $fh, 0, 0; |
512
|
|
|
|
|
|
|
} |
513
|
|
|
|
|
|
|
} else { |
514
|
13
|
50
|
|
|
|
163
|
$fh = FileHandle->new("$mode$file") |
515
|
|
|
|
|
|
|
or croak "Could not open file $file: $!"; |
516
|
|
|
|
|
|
|
} |
517
|
21
|
|
|
|
|
5161
|
$self->fh($fh); |
518
|
21
|
|
|
|
|
43
|
$self; |
519
|
|
|
|
|
|
|
} |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
=item $self->close |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
Close the file. For regular files this just closes the filehandle, but for |
524
|
|
|
|
|
|
|
gzipped files it does some additional postprocessing. This method is called |
525
|
|
|
|
|
|
|
automatically on object destruction, so it is not mandatory to call it |
526
|
|
|
|
|
|
|
explicitly. |
527
|
|
|
|
|
|
|
|
528
|
|
|
|
|
|
|
=cut |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
sub close { |
531
|
43
|
|
|
43
|
1
|
75
|
my ($self) = @_; |
532
|
43
|
|
|
|
|
156
|
my $fh = $self->fh; |
533
|
43
|
100
|
100
|
|
|
261
|
if ($fh and $self->mode eq '>' and $self->{opts}{gzip}) { |
|
|
|
100
|
|
|
|
|
534
|
2
|
|
50
|
|
|
8
|
my $level = $self->{opts}{gzip} || 6; |
535
|
2
|
50
|
|
|
|
7
|
$level = 6 if $level == 1; |
536
|
2
|
|
|
|
|
9
|
my $file = $self->file; |
537
|
2
|
50
|
|
|
|
8
|
if (ref $file) { |
538
|
0
|
|
|
|
|
0
|
croak "compression only supported for files"; |
539
|
|
|
|
|
|
|
} else { |
540
|
2
|
|
|
|
|
109
|
seek $fh, 0, 0; |
541
|
2
|
50
|
|
|
|
16
|
my $gz = Compress::Zlib::gzopen($file, "wb$level") |
542
|
|
|
|
|
|
|
or croak "Cannot open $file $Compress::Zlib::gzerrno\n"; |
543
|
2
|
|
|
|
|
4682
|
local $_; |
544
|
2
|
|
|
|
|
57
|
while (<$fh>) { |
545
|
508
|
50
|
|
|
|
45583
|
$gz->gzwrite($_) |
546
|
|
|
|
|
|
|
or croak "error writing: $Compress::Zlib::gzerrno\n"; |
547
|
|
|
|
|
|
|
} |
548
|
2
|
|
|
|
|
209
|
$gz->gzclose; |
549
|
|
|
|
|
|
|
} |
550
|
|
|
|
|
|
|
} |
551
|
43
|
100
|
|
|
|
1456
|
if ($self->mode) { |
552
|
21
|
50
|
|
|
|
75
|
if ($fh) { $fh->close or croak "$!" }; |
|
21
|
50
|
|
|
|
126
|
|
553
|
21
|
|
|
|
|
903
|
$self->mode(''); |
554
|
|
|
|
|
|
|
} |
555
|
|
|
|
|
|
|
} |
556
|
|
|
|
|
|
|
|
557
|
22
|
|
|
22
|
|
1231
|
sub DESTROY { shift->close } |
558
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
=item $file->read |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
Read the whole file. This calls open, read_header, read_mol until there are no |
562
|
|
|
|
|
|
|
more molecules left, read_footer, and close. Returns a list of molecules if |
563
|
|
|
|
|
|
|
called in list context, or the first molecule in scalar context. |
564
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
=cut |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
sub read { |
568
|
15
|
|
|
15
|
1
|
889
|
my ($self) = @_; |
569
|
15
|
|
|
|
|
67
|
$self->open('<'); |
570
|
15
|
|
|
|
|
86
|
$self->read_header; |
571
|
15
|
|
|
|
|
23
|
my @all_mols; |
572
|
15
|
|
|
|
|
110
|
$self->mols(\@all_mols); |
573
|
15
|
|
|
|
|
58
|
while (my @mols = $self->read_mol($self->fh, %{$self->{opts}})) { |
|
32
|
|
|
|
|
215
|
|
574
|
17
|
|
|
|
|
137
|
push @all_mols, @mols; |
575
|
|
|
|
|
|
|
} |
576
|
15
|
|
|
|
|
230
|
$self->read_footer; |
577
|
15
|
|
|
|
|
92
|
$self->close; |
578
|
15
|
100
|
|
|
|
139
|
wantarray ? @all_mols : $all_mols[0]; |
579
|
|
|
|
|
|
|
} |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
=item $self->write |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
Write all the molecules in $self->mols. It just calls open, write_header, |
584
|
|
|
|
|
|
|
write_mol (per each molecule), write_footer, and close. |
585
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
=cut |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
sub write { |
589
|
6
|
|
|
6
|
1
|
14
|
my ($self) = @_; |
590
|
6
|
|
|
|
|
25
|
$self->open('>'); |
591
|
6
|
|
|
|
|
31
|
$self->write_header; |
592
|
6
|
|
|
|
|
7
|
for my $mol (@{$self->mols}) { |
|
6
|
|
|
|
|
21
|
|
593
|
6
|
|
|
|
|
21
|
$self->write_mol($self->fh, $mol, %{$self->{opts}}); |
|
6
|
|
|
|
|
46
|
|
594
|
|
|
|
|
|
|
} |
595
|
6
|
|
|
|
|
7367
|
$self->write_footer; |
596
|
6
|
|
|
|
|
21
|
$self->close; |
597
|
|
|
|
|
|
|
} |
598
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
1; |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
=back |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=head1 CAVEATS |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
The :auto feature may not be entirely portable, but it is known to work under |
606
|
|
|
|
|
|
|
Unix and Windows (either Cygwin or ActiveState). |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
=head1 VERSION |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
0.37 |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
=head1 SEE ALSO |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
L |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
The PerlMol website L |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head1 AUTHOR |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Ivan Tubert-Brohman-Brohman |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=head1 COPYRIGHT |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is |
625
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
626
|
|
|
|
|
|
|
Perl itself. |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=cut |
629
|
|
|
|
|
|
|
|