line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.39'; # VERSION |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Chemistry::File - Molecule file I/O base class |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
# As a convenient interface for several mol readers: |
12
|
|
|
|
|
|
|
use Chemistry::File qw(PDB MDLMol); # load PDB and MDL modules |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# or try to use every file I/O module installed in the system: |
15
|
|
|
|
|
|
|
use Chemistry::File ':auto'; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
my $mol1 = Chemistry::Mol->read("file.pdb"); |
18
|
|
|
|
|
|
|
my $mol2 = Chemistry::Mol->read("file.mol"); |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# as a base for a mol reader: |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
package Chemistry::File::Myfile; |
24
|
|
|
|
|
|
|
use base qw(Chemistry::File); |
25
|
|
|
|
|
|
|
use Chemistry::Mol; |
26
|
|
|
|
|
|
|
Chemistry::Mol->register_format("myfile", __PACKAGE__); |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
# override the read_mol method |
29
|
|
|
|
|
|
|
sub read_mol { |
30
|
|
|
|
|
|
|
my ($self, $fh, %opts) = shift; |
31
|
|
|
|
|
|
|
my $mol_class = $opts{mol_class} || "Chemistry::Mol"; |
32
|
|
|
|
|
|
|
my $mol = $mol_class->new; |
33
|
|
|
|
|
|
|
# ... do some stuff with $fh and $mol ... |
34
|
|
|
|
|
|
|
return $mol; |
35
|
|
|
|
|
|
|
} |
36
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# override the write_mol method |
38
|
|
|
|
|
|
|
sub write_mol { |
39
|
|
|
|
|
|
|
my ($self, $fh, $mol, %opts) = shift; |
40
|
|
|
|
|
|
|
print $fh $mol->name, "\n"; |
41
|
|
|
|
|
|
|
# ... do some stuff with $fh and $mol ... |
42
|
|
|
|
|
|
|
} |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
=head1 DESCRIPTION |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
The main use of this module is as a base class for other molecule file I/O |
47
|
|
|
|
|
|
|
modules (for example, Chemistry::File::PDB). Such modules should override and |
48
|
|
|
|
|
|
|
extend the Chemistry::File methods as needed. You only need to care about the |
49
|
|
|
|
|
|
|
methods here if if you are writing a file I/O module or if you want a finer |
50
|
|
|
|
|
|
|
degree of control than what is offered by the simple read and write methods |
51
|
|
|
|
|
|
|
in the Chemistry::Mol class. |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
From the user's point of view, this module can also be used as shorthand |
54
|
|
|
|
|
|
|
for using several Chemistry::File modules at the same time. |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
use Chemistry::File qw(PDB MDLMol); |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
is exactly equivalent to |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
use Chemistry::File::PDB; |
61
|
|
|
|
|
|
|
use Chemistry::File::MDLMol; |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
If you use the :auto keyword, Chemistry::File will autodetect and load |
64
|
|
|
|
|
|
|
all the Chemistry::File::* modules installed in your system. |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
use Chemistry::File ':auto'; |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=head1 FILE I/O MODEL |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
Before version 0.30, file I/O modules typically used only parse_string, |
71
|
|
|
|
|
|
|
write_string, parse_file, and write_file, and they were generally used as class |
72
|
|
|
|
|
|
|
methods. A file could contain one or more molecules and only be read or written |
73
|
|
|
|
|
|
|
whole; reading it would return every molecule on the file. This was problematic |
74
|
|
|
|
|
|
|
when dealing with large multi-molecule files (such as SDF files), because all |
75
|
|
|
|
|
|
|
the molecules would have to be loaded into memory at the same time. |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
While version 0.30 retains backward compatibility with that simple model, it |
78
|
|
|
|
|
|
|
also allows a more flexible interface that allows reading one molecule at a |
79
|
|
|
|
|
|
|
time, skipping molecules, and reading and writing file-level information that |
80
|
|
|
|
|
|
|
is not associated with specific molecules. The following diagram shows the |
81
|
|
|
|
|
|
|
global structure of a file according to the new model: |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
+-----------+ |
84
|
|
|
|
|
|
|
| header | |
85
|
|
|
|
|
|
|
+-----------+ |
86
|
|
|
|
|
|
|
| molecule | |
87
|
|
|
|
|
|
|
+-----------+ |
88
|
|
|
|
|
|
|
| molecule | |
89
|
|
|
|
|
|
|
+-----------+ |
90
|
|
|
|
|
|
|
| ... | |
91
|
|
|
|
|
|
|
+-----------+ |
92
|
|
|
|
|
|
|
| footer | |
93
|
|
|
|
|
|
|
+-----------+ |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
In cases where the header and the footer are empty, the model reduces to the |
96
|
|
|
|
|
|
|
pre-0.30 version. The low-level steps to read a file are the following: |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
$file = Chemistry::File::MyFormat->new(file => 'xyz.mol'); |
99
|
|
|
|
|
|
|
$file->open('<'); |
100
|
|
|
|
|
|
|
$file->read_header; |
101
|
|
|
|
|
|
|
while (my $mol = $self->read_mol($file->fh, %opts)) { |
102
|
|
|
|
|
|
|
# do something with $mol... |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
$self->read_footer; |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
The C method does all the above automatically, and it stores all the |
107
|
|
|
|
|
|
|
molecules read in the mols property. |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 STANDARD OPTIONS |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
All the methods below include a list of options %opts at the end of the |
112
|
|
|
|
|
|
|
parameter list. Each class implementing this interface may have its own |
113
|
|
|
|
|
|
|
particular options. However, the following options should be recognized by all |
114
|
|
|
|
|
|
|
classes: |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
=over |
117
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
=item mol_class |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
A class or object with a C method that constructs a molecule. This is |
121
|
|
|
|
|
|
|
needed when the user want to specify a molecule subclass different from the |
122
|
|
|
|
|
|
|
default. When this option is not defined, the module may use Chemistry::Mol |
123
|
|
|
|
|
|
|
or whichever class is appropriate for that file format. |
124
|
|
|
|
|
|
|
|
125
|
|
|
|
|
|
|
=item format |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
The name of the file format being used, as registered by |
128
|
|
|
|
|
|
|
Chemistry::Mol->register_format. |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=item fatal |
131
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
If true, parsing errors should throw an exception; if false, they should just |
133
|
|
|
|
|
|
|
try to recover if possible. True by default. |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
=back |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=head1 CLASS METHODS |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
The class methods in this class (or rather, its derived classes) are usually |
140
|
|
|
|
|
|
|
not called directly. Instead, use Chemistry::Mol->read, write, print, parse, |
141
|
|
|
|
|
|
|
and file. These methods also work if called as instance methods. |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
=over |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=cut |
147
|
|
|
|
|
|
|
|
148
|
13
|
|
|
13
|
|
63275
|
use strict; |
|
13
|
|
|
|
|
33
|
|
|
13
|
|
|
|
|
337
|
|
149
|
13
|
|
|
13
|
|
57
|
use warnings; |
|
13
|
|
|
|
|
23
|
|
|
13
|
|
|
|
|
314
|
|
150
|
13
|
|
|
13
|
|
51
|
no warnings qw(uninitialized); |
|
13
|
|
|
|
|
19
|
|
|
13
|
|
|
|
|
431
|
|
151
|
13
|
|
|
13
|
|
60
|
use Carp; |
|
13
|
|
|
|
|
19
|
|
|
13
|
|
|
|
|
754
|
|
152
|
13
|
|
|
13
|
|
5080
|
use FileHandle; |
|
13
|
|
|
|
|
112928
|
|
|
13
|
|
|
|
|
70
|
|
153
|
13
|
|
|
13
|
|
3708
|
use base qw(Chemistry::Obj); |
|
13
|
|
|
|
|
21
|
|
|
13
|
|
|
|
|
11345
|
|
154
|
|
|
|
|
|
|
# don't blame our problems in the Chemistry::Mol module ;-) |
155
|
|
|
|
|
|
|
our @CARP_NOT = qw(Chemistry::Mol); |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
# This subroutine implements the :auto functionality |
158
|
|
|
|
|
|
|
sub import { |
159
|
13
|
|
|
13
|
|
185
|
my $pack = shift; |
160
|
13
|
|
|
|
|
10994
|
for my $param (@_){ |
161
|
0
|
0
|
|
|
|
0
|
if ($param eq ':auto') { |
162
|
0
|
|
|
|
|
0
|
for my $pmfile (map {glob "$_/Chemistry/File/*.pm"} @INC) { |
|
0
|
|
|
|
|
0
|
|
163
|
0
|
|
|
|
|
0
|
my ($pm) = $pmfile =~ m|(Chemistry/File/.*\.pm)$|; |
164
|
|
|
|
|
|
|
#warn "requiring $pm\n"; |
165
|
0
|
|
|
|
|
0
|
eval { require $pm }; |
|
0
|
|
|
|
|
0
|
|
166
|
0
|
0
|
|
|
|
0
|
die "Error in Chemistry::File: '$@'; pmfile='$pmfile'; pm='$pm'\n" if $@; |
167
|
|
|
|
|
|
|
} |
168
|
|
|
|
|
|
|
} else { |
169
|
0
|
|
|
|
|
0
|
eval "use ${pack}::$param"; |
170
|
0
|
0
|
|
|
|
0
|
die "$@" if $@; |
171
|
|
|
|
|
|
|
} |
172
|
|
|
|
|
|
|
} |
173
|
|
|
|
|
|
|
} |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
=item $class->parse_string($s, %options) |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
Parse a string $s and return one or more molecule objects. This is an abstract |
178
|
|
|
|
|
|
|
method, so it should be provided by all derived classes. |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=cut |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
sub parse_string { |
183
|
0
|
|
|
0
|
1
|
0
|
my ($self, $s, %opts) = @_; |
184
|
0
|
0
|
|
|
|
0
|
if ($opts{_must_override}) { |
185
|
0
|
|
0
|
|
|
0
|
my $class = ref $self || $self; |
186
|
0
|
|
|
|
|
0
|
croak "parse_string() is not implemented for $class"; |
187
|
|
|
|
|
|
|
} |
188
|
0
|
|
|
|
|
0
|
$self->new(file => \$s, opts => \%opts)->read; |
189
|
|
|
|
|
|
|
} |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=item $class->write_string($mol, %options) |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
Convert a molecule to a string. This is an abstract method, so it should be |
195
|
|
|
|
|
|
|
provided by all derived classes. |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=cut |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
sub write_string { |
200
|
3
|
|
|
3
|
1
|
8
|
my ($self, $mol, %opts) = @_; |
201
|
3
|
50
|
|
|
|
10
|
if ($opts{_must_override}) { |
202
|
0
|
|
0
|
|
|
0
|
my $class = ref $self || $self; |
203
|
0
|
|
|
|
|
0
|
croak "write_string() is not implemented for $class"; |
204
|
|
|
|
|
|
|
} |
205
|
3
|
|
|
|
|
4
|
my $s; |
206
|
3
|
|
|
|
|
10
|
$self->new(file => \$s, mols => [$mol], opts => \%opts)->write; |
207
|
3
|
|
|
|
|
7
|
$s; |
208
|
|
|
|
|
|
|
} |
209
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
=item $class->parse_file($file, %options) |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
Reads the file $file and returns one or more molecules. The default method |
213
|
|
|
|
|
|
|
slurps the whole file and then calls parse_string, but derived classes may |
214
|
|
|
|
|
|
|
choose to override it. $file can be a filehandle, a filename, or a scalar |
215
|
|
|
|
|
|
|
reference. See C for details. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
=cut |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
sub parse_file { |
220
|
11
|
|
|
11
|
1
|
41
|
my ($self, $file, %opts) = @_; |
221
|
11
|
|
|
|
|
92
|
$self->new(file => $file, opts => \%opts)->read; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item $class->write_file($mol, $file, %options) |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Writes a file $file containing the molecule $mol. The default method calls |
227
|
|
|
|
|
|
|
write_string first and then saves the string to a file, but derived classes |
228
|
|
|
|
|
|
|
may choose to override it. $file can be either a filehandle or a filename. |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
=cut |
231
|
|
|
|
|
|
|
|
232
|
|
|
|
|
|
|
sub write_file { |
233
|
3
|
|
|
3
|
1
|
12
|
my ($self, $mol, $file, %opts) = @_; |
234
|
|
|
|
|
|
|
|
235
|
3
|
|
|
|
|
13
|
$self->new(file => $file, mols => [$mol], opts => \%opts)->write; |
236
|
|
|
|
|
|
|
} |
237
|
|
|
|
|
|
|
|
238
|
|
|
|
|
|
|
=item $class->name_is($fname, %options) |
239
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
Returns true if a filename is of the format corresponding to the class. |
241
|
|
|
|
|
|
|
It should look at the filename only, because it may be called with |
242
|
|
|
|
|
|
|
non-existent files. It is used to determine with which format to save a file. |
243
|
|
|
|
|
|
|
For example, the Chemistry::File::PDB returns true if the file ends in .pdb. |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
=cut |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
sub name_is { |
248
|
0
|
|
|
0
|
1
|
0
|
0; |
249
|
|
|
|
|
|
|
} |
250
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
=item $class->string_is($s, %options) |
252
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
Examines the string $s and returns true if it has the format of the class. |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=cut |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
sub string_is { |
258
|
0
|
|
|
0
|
1
|
0
|
0; |
259
|
|
|
|
|
|
|
} |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=item $class->file_is($file, %options) |
262
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Examines the file $file and returns true if it has the format of the class. |
264
|
|
|
|
|
|
|
The default method slurps the whole file and then calls string_is, but derived |
265
|
|
|
|
|
|
|
classes may choose to override it. |
266
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
=cut |
268
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
sub file_is { |
270
|
9
|
|
|
9
|
1
|
33
|
my ($self, $file, %opts) = @_; |
271
|
|
|
|
|
|
|
|
272
|
9
|
|
|
|
|
19
|
my $s = eval { |
273
|
9
|
|
|
|
|
55
|
$self->open('<'); |
274
|
0
|
|
|
|
|
0
|
$self->slurp; |
275
|
|
|
|
|
|
|
}; |
276
|
9
|
50
|
|
|
|
88
|
if ($s) { |
|
|
50
|
|
|
|
|
|
277
|
0
|
|
|
|
|
0
|
$self->string_is($s, %opts); |
278
|
|
|
|
|
|
|
} elsif (! ref $file) { |
279
|
9
|
|
|
|
|
58
|
$self->name_is($file, %opts); |
280
|
|
|
|
|
|
|
} |
281
|
|
|
|
|
|
|
} |
282
|
|
|
|
|
|
|
|
283
|
|
|
|
|
|
|
=item $class->slurp |
284
|
|
|
|
|
|
|
|
285
|
|
|
|
|
|
|
Reads a file into a scalar. Automatic decompression of gzipped files is |
286
|
|
|
|
|
|
|
supported if the Compress::Zlib module is installed. Files ending in .gz are |
287
|
|
|
|
|
|
|
assumed to be compressed; otherwise it is possible to force decompression by |
288
|
|
|
|
|
|
|
passing the gzip => 1 option (or no decompression with gzip => 0). |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=cut |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
# slurp a file into a scalar, with transparent decompression |
293
|
|
|
|
|
|
|
sub slurp { |
294
|
0
|
|
|
0
|
1
|
0
|
my ($self) = @_; |
295
|
|
|
|
|
|
|
|
296
|
0
|
|
|
|
|
0
|
my $fh = $self->fh; |
297
|
0
|
|
|
|
|
0
|
local $/; |
298
|
0
|
|
|
|
|
0
|
<$fh>; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
=item $class->new(file => $file, opts => \%opts) |
302
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
Create a new file object. This method is usually called indirectly via |
304
|
|
|
|
|
|
|
the Chemistry::Mol->file method. $file may be a scalar with a filename, an |
305
|
|
|
|
|
|
|
open filehandle, or a reference to a scalar. If a reference to a scalar is |
306
|
|
|
|
|
|
|
used, the string contained in the scalar is used as an in-memory file. |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=cut |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
sub new { |
311
|
22
|
|
|
22
|
1
|
230
|
my $self = shift->SUPER::new(@_); |
312
|
22
|
50
|
|
|
|
124
|
$self->{opts}{fatal} = 1 unless exists $self->{opts}{fatal}; |
313
|
22
|
|
|
|
|
112
|
$self; |
314
|
|
|
|
|
|
|
} |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
Chemistry::Obj::accessor(qw(file fh opts mols mode)); |
317
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
=back |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
=head1 INSTANCE METHODS |
321
|
|
|
|
|
|
|
|
322
|
|
|
|
|
|
|
=head2 Accessors |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
Chemistry::File objects are derived from Chemistry::Obj and have the same |
325
|
|
|
|
|
|
|
properties (name, id, and type), as well as the following ones: |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
=over |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
=item file |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
The "file" as described above under C. |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=item fh |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
The filehandle used for reading and writing molecules. It is opened by C. |
336
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
=item opts |
338
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
A hashref containing the options that are passed through to the old-style class |
340
|
|
|
|
|
|
|
methods. They are also passed to the instance method to keep a similar |
341
|
|
|
|
|
|
|
interface, but they could access them via $self->opts anyway. |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=item mode |
344
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
'>' if the file is open for writing, '<' for reading, and false if not open. |
346
|
|
|
|
|
|
|
|
347
|
|
|
|
|
|
|
=item mols |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
C stores all the molecules that were read in this property as an array |
350
|
|
|
|
|
|
|
reference. C gets the molecules to write from here. |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
=back |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
=head2 Abstract methods |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
These methods should be overridden, because they don't really do much by |
357
|
|
|
|
|
|
|
default. |
358
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
=over |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
=item $file->read_header |
362
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
Read whatever information is available in the file before the first molecule. |
364
|
|
|
|
|
|
|
Does nothing by default. |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=cut |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
14
|
1
|
|
sub read_header { } |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=item $file->read_footer |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
Read whatever information is available in the file after the last molecule. |
373
|
|
|
|
|
|
|
Does nothing by default. |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
=cut |
376
|
|
|
|
|
|
|
|
377
|
|
|
|
14
|
1
|
|
sub read_footer { } |
378
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
=item $self->slurp_mol($fh) |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
Reads from the input string until the end of the current molecule and returns |
382
|
|
|
|
|
|
|
the "slurped" string. It does not parse the string. It returns undefined if |
383
|
|
|
|
|
|
|
there are no more molecules in the file. This method should be overridden if |
384
|
|
|
|
|
|
|
needed; by default, it slurps until the end of the file. |
385
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=cut |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
sub slurp_mol { |
389
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fh) = @_; |
390
|
0
|
|
|
|
|
0
|
local $/; <$fh>; |
|
0
|
|
|
|
|
0
|
|
391
|
|
|
|
|
|
|
} |
392
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
=item $self->skip_mol($fh) |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
Similar to slurp_mol, but it doesn't need to return anything except true or |
396
|
|
|
|
|
|
|
false. It should also be overridden if needed; by default, it just calls |
397
|
|
|
|
|
|
|
slurp_mol. |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
=cut |
400
|
|
|
|
|
|
|
|
401
|
0
|
|
|
0
|
1
|
0
|
sub skip_mol { shift->slurp_mol(@_) } |
402
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
=item $file->read_mol($fh, %opts) |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
Read the next molecule in the input stream. It returns false if there are no |
406
|
|
|
|
|
|
|
more molecules in the file. This method should be overridden by derived |
407
|
|
|
|
|
|
|
classes; otherwise it will call slurp_mol and parse_string (for backwards |
408
|
|
|
|
|
|
|
compatibility; it is recommended to override read_mol directly in new modules). |
409
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
Note: some old file I/O modules (written before the 0.30 interface) may return |
411
|
|
|
|
|
|
|
more than one molecule anyway, so it is recommended to call read_mol in list |
412
|
|
|
|
|
|
|
context to be safe: |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
($mol) = $file->read_mol($fh, %opts); |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
=cut |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
sub read_mol { |
419
|
4
|
|
|
4
|
1
|
7
|
my ($self, $fh, %opts) = @_; |
420
|
4
|
|
|
|
|
11
|
my $s = $self->slurp_mol($fh); |
421
|
4
|
100
|
66
|
|
|
34
|
return unless defined $s and length $s; |
422
|
3
|
|
|
|
|
10
|
$self->parse_string($s, %opts, _must_override => 1); |
423
|
|
|
|
|
|
|
} |
424
|
|
|
|
|
|
|
=item $file->write_header |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
Write whatever information is needed before the first molecule. |
427
|
|
|
|
|
|
|
Does nothing by default. |
428
|
|
|
|
|
|
|
|
429
|
|
|
|
|
|
|
=cut |
430
|
|
|
|
|
|
|
|
431
|
|
|
|
6
|
0
|
|
sub write_header { } |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
=item $file->write_footer |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
Write whatever information is needed after the last molecule. |
436
|
|
|
|
|
|
|
Does nothing by default. |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
=cut |
439
|
|
|
|
|
|
|
|
440
|
|
|
|
6
|
1
|
|
sub write_footer { } |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=item $self->write_mol($fh, $mol, %opts) |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
Write one molecule to $fh. By default and for backward compatibility, it just |
445
|
|
|
|
|
|
|
calls C and prints its return value to $self->fh. New classes |
446
|
|
|
|
|
|
|
should override it. |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
=cut |
449
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
sub write_mol { |
451
|
0
|
|
|
0
|
1
|
0
|
my ($self, $fh, $mol, %opts) = @_; |
452
|
0
|
|
|
|
|
0
|
print $fh $self->write_string($mol, %opts, _must_override => 1); |
453
|
|
|
|
|
|
|
} |
454
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
########################## OTHER ################################## |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
=back |
458
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
=head2 Other methods |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
=over |
462
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
=item $self->open($mode) |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
Opens the file (held in $self->file) for reading by default, or for writing if |
466
|
|
|
|
|
|
|
$mode eq '>'. This method sets $self->fh transparently regardless of whether |
467
|
|
|
|
|
|
|
$self->file is a filename (compressed or not), a scalar reference, or a |
468
|
|
|
|
|
|
|
filehandle. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub open { |
473
|
30
|
|
|
30
|
1
|
66
|
my ($self, $mode) = @_; |
474
|
30
|
|
|
|
|
49
|
my $fh; |
475
|
|
|
|
|
|
|
my $s; |
476
|
30
|
|
50
|
|
|
79
|
$mode ||= '<'; |
477
|
30
|
|
|
|
|
143
|
$self->mode($mode); |
478
|
30
|
|
|
|
|
81
|
my $file = $self->file; |
479
|
30
|
100
|
|
|
|
1931
|
croak "Chemistry::File::open: no file supplied" unless defined $file; |
480
|
21
|
100
|
66
|
|
|
245
|
if (ref $file eq 'SCALAR') { |
|
|
50
|
66
|
|
|
|
|
|
|
100
|
|
|
|
|
|
481
|
4
|
50
|
|
|
|
13
|
croak "decompression only supported for files" if $self->{opts}{gzip}; |
482
|
4
|
50
|
|
|
|
14
|
if ($] >= 5.008) { |
483
|
4
|
|
|
2
|
|
97
|
open $fh, $mode, $file; |
|
2
|
|
|
|
|
14
|
|
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
10
|
|
484
|
|
|
|
|
|
|
} else { |
485
|
0
|
|
|
|
|
0
|
require IO::String; |
486
|
0
|
|
|
|
|
0
|
$fh = IO::String->new($$file); |
487
|
|
|
|
|
|
|
} |
488
|
|
|
|
|
|
|
} elsif (ref $file) { |
489
|
0
|
0
|
|
|
|
0
|
croak "decompression only supported for files" if $self->{opts}{gzip}; |
490
|
0
|
|
|
|
|
0
|
$fh = $file; |
491
|
|
|
|
|
|
|
} elsif ($self->{opts}{gzip} |
492
|
|
|
|
|
|
|
or !defined $self->{opts}{gzip} and $file =~ /.gz$/) |
493
|
|
|
|
|
|
|
{ |
494
|
4
|
50
|
|
|
|
11
|
eval { require Compress::Zlib } # Carp |
|
4
|
|
|
|
|
26
|
|
495
|
|
|
|
|
|
|
or croak "Compress::Zlib not installed!"; |
496
|
4
|
|
|
|
|
652
|
require File::Temp; |
497
|
|
|
|
|
|
|
|
498
|
4
|
|
|
|
|
7801
|
$fh = File::Temp::tempfile(); |
499
|
4
|
|
100
|
|
|
1794
|
$self->{opts}{gzip} ||= 1; |
500
|
4
|
100
|
|
|
|
11
|
unless ($mode eq '>') { |
501
|
2
|
50
|
|
|
|
6
|
my $gz = Compress::Zlib::gzopen($file, "rb") |
502
|
|
|
|
|
|
|
or croak "Cannot open compressed $file: " |
503
|
|
|
|
|
|
|
. "$Compress::Zlib::gzerrno\n"; |
504
|
|
|
|
|
|
|
|
505
|
2
|
|
|
|
|
3533
|
my $buffer; |
506
|
2
|
|
|
|
|
6
|
print $fh $buffer while $gz->gzread($buffer) > 0; |
507
|
|
|
|
|
|
|
|
508
|
2
|
50
|
|
|
|
1556
|
if ($Compress::Zlib::gzerrno != Compress::Zlib::Z_STREAM_END()) { |
509
|
0
|
|
|
|
|
0
|
croak "Error reading from $file: $Compress::Zlib::gzerrno" |
510
|
|
|
|
|
|
|
. ($Compress::Zlib::gzerrno+0) . "\n"; |
511
|
|
|
|
|
|
|
} |
512
|
2
|
|
|
|
|
11
|
$gz->gzclose(); |
513
|
2
|
|
|
|
|
255
|
seek $fh, 0, 0; |
514
|
|
|
|
|
|
|
} |
515
|
|
|
|
|
|
|
} else { |
516
|
13
|
50
|
|
|
|
130
|
$fh = FileHandle->new("$mode$file") |
517
|
|
|
|
|
|
|
or croak "Could not open file $file: $!"; |
518
|
|
|
|
|
|
|
} |
519
|
21
|
|
|
|
|
3232
|
$self->fh($fh); |
520
|
21
|
|
|
|
|
35
|
$self; |
521
|
|
|
|
|
|
|
} |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
=item $self->close |
524
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
Close the file. For regular files this just closes the filehandle, but for |
526
|
|
|
|
|
|
|
gzipped files it does some additional postprocessing. This method is called |
527
|
|
|
|
|
|
|
automatically on object destruction, so it is not mandatory to call it |
528
|
|
|
|
|
|
|
explicitly. |
529
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
=cut |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
sub close { |
533
|
43
|
|
|
43
|
1
|
85
|
my ($self) = @_; |
534
|
43
|
|
|
|
|
99
|
my $fh = $self->fh; |
535
|
43
|
100
|
100
|
|
|
163
|
if ($fh and $self->mode eq '>' and $self->{opts}{gzip}) { |
|
|
|
100
|
|
|
|
|
536
|
2
|
|
50
|
|
|
5
|
my $level = $self->{opts}{gzip} || 6; |
537
|
2
|
50
|
|
|
|
6
|
$level = 6 if $level == 1; |
538
|
2
|
|
|
|
|
3
|
my $file = $self->file; |
539
|
2
|
50
|
|
|
|
5
|
if (ref $file) { |
540
|
0
|
|
|
|
|
0
|
croak "compression only supported for files"; |
541
|
|
|
|
|
|
|
} else { |
542
|
2
|
|
|
|
|
66
|
seek $fh, 0, 0; |
543
|
2
|
50
|
|
|
|
16
|
my $gz = Compress::Zlib::gzopen($file, "wb$level") |
544
|
|
|
|
|
|
|
or croak "Cannot open $file $Compress::Zlib::gzerrno\n"; |
545
|
2
|
|
|
|
|
2867
|
local $_; |
546
|
2
|
|
|
|
|
47
|
while (<$fh>) { |
547
|
508
|
50
|
|
|
|
33226
|
$gz->gzwrite($_) |
548
|
|
|
|
|
|
|
or croak "error writing: $Compress::Zlib::gzerrno\n"; |
549
|
|
|
|
|
|
|
} |
550
|
2
|
|
|
|
|
163
|
$gz->gzclose; |
551
|
|
|
|
|
|
|
} |
552
|
|
|
|
|
|
|
} |
553
|
43
|
100
|
|
|
|
743
|
if ($self->mode) { |
554
|
21
|
50
|
|
|
|
50
|
if ($fh) { $fh->close or croak "$!" }; |
|
21
|
50
|
|
|
|
98
|
|
555
|
21
|
|
|
|
|
607
|
$self->mode(''); |
556
|
|
|
|
|
|
|
} |
557
|
|
|
|
|
|
|
} |
558
|
|
|
|
|
|
|
|
559
|
22
|
|
|
22
|
|
755
|
sub DESTROY { shift->close } |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
=item $file->read |
562
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
Read the whole file. This calls open, read_header, read_mol until there are no |
564
|
|
|
|
|
|
|
more molecules left, read_footer, and close. Returns a list of molecules if |
565
|
|
|
|
|
|
|
called in list context, or the first molecule in scalar context. |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=cut |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
sub read { |
570
|
15
|
|
|
15
|
1
|
1380
|
my ($self) = @_; |
571
|
15
|
|
|
|
|
53
|
$self->open('<'); |
572
|
15
|
|
|
|
|
75
|
$self->read_header; |
573
|
15
|
|
|
|
|
23
|
my @all_mols; |
574
|
15
|
|
|
|
|
96
|
$self->mols(\@all_mols); |
575
|
15
|
|
|
|
|
49
|
while (my @mols = $self->read_mol($self->fh, %{$self->{opts}})) { |
|
32
|
|
|
|
|
189
|
|
576
|
17
|
|
|
|
|
142
|
push @all_mols, @mols; |
577
|
|
|
|
|
|
|
} |
578
|
15
|
|
|
|
|
118
|
$self->read_footer; |
579
|
15
|
|
|
|
|
73
|
$self->close; |
580
|
15
|
100
|
|
|
|
207
|
wantarray ? @all_mols : $all_mols[0]; |
581
|
|
|
|
|
|
|
} |
582
|
|
|
|
|
|
|
|
583
|
|
|
|
|
|
|
=item $self->write |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
Write all the molecules in $self->mols. It just calls open, write_header, |
586
|
|
|
|
|
|
|
write_mol (per each molecule), write_footer, and close. |
587
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
=cut |
589
|
|
|
|
|
|
|
|
590
|
|
|
|
|
|
|
sub write { |
591
|
6
|
|
|
6
|
1
|
10
|
my ($self) = @_; |
592
|
6
|
|
|
|
|
14
|
$self->open('>'); |
593
|
6
|
|
|
|
|
28
|
$self->write_header; |
594
|
6
|
|
|
|
|
10
|
for my $mol (@{$self->mols}) { |
|
6
|
|
|
|
|
16
|
|
595
|
6
|
|
|
|
|
15
|
$self->write_mol($self->fh, $mol, %{$self->{opts}}); |
|
6
|
|
|
|
|
24
|
|
596
|
|
|
|
|
|
|
} |
597
|
6
|
|
|
|
|
3511
|
$self->write_footer; |
598
|
6
|
|
|
|
|
30
|
$self->close; |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
|
601
|
|
|
|
|
|
|
1; |
602
|
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
=back |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
=head1 CAVEATS |
606
|
|
|
|
|
|
|
|
607
|
|
|
|
|
|
|
The :auto feature may not be entirely portable, but it is known to work under |
608
|
|
|
|
|
|
|
Unix and Windows (either Cygwin or ActiveState). |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
L |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
=head1 SEE ALSO |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
L |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
=head1 AUTHOR |
619
|
|
|
|
|
|
|
|
620
|
|
|
|
|
|
|
Ivan Tubert-Brohman-Brohman |
621
|
|
|
|
|
|
|
|
622
|
|
|
|
|
|
|
=head1 COPYRIGHT |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is |
625
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
626
|
|
|
|
|
|
|
Perl itself. |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=cut |
629
|
|
|
|
|
|
|
|