line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::Mol; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.39'; # VERSION |
4
|
|
|
|
|
|
|
# $Id$ |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
Chemistry::Mol - Molecule object toolkit |
9
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 SYNOPSIS |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
use Chemistry::Mol; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
$mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule"); |
15
|
|
|
|
|
|
|
$c = $mol->new_atom(symbol => "C", coords => [0,0,0]); |
16
|
|
|
|
|
|
|
$o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]); |
17
|
|
|
|
|
|
|
$mol->new_bond(atoms => [$c, $o], order => 3); |
18
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
print $mol->print; |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 DESCRIPTION |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
This package, along with Chemistry::Atom and Chemistry::Bond, includes basic |
24
|
|
|
|
|
|
|
objects and methods to describe molecules. |
25
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
The core methods try not to enforce a particular convention. This means that |
27
|
|
|
|
|
|
|
only a minimal set of attributes is provided by default, and some attributes |
28
|
|
|
|
|
|
|
have very loosely defined meaning. This is because each program and file type |
29
|
|
|
|
|
|
|
has different idea of what each concept (such as bond and atom type) means. |
30
|
|
|
|
|
|
|
Bonds are defined as a list of atoms (typically two) with an arbitrary type. |
31
|
|
|
|
|
|
|
Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates |
32
|
|
|
|
|
|
|
(2D coming soon). |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
=cut |
35
|
|
|
|
|
|
|
|
36
|
16
|
|
|
16
|
|
300916
|
use 5.006; |
|
16
|
|
|
|
|
87
|
|
37
|
16
|
|
|
16
|
|
75
|
use strict; |
|
16
|
|
|
|
|
24
|
|
|
16
|
|
|
|
|
322
|
|
38
|
16
|
|
|
16
|
|
69
|
use warnings; |
|
16
|
|
|
|
|
174
|
|
|
16
|
|
|
|
|
497
|
|
39
|
16
|
|
|
16
|
|
6491
|
use Chemistry::Atom; |
|
16
|
|
|
|
|
51
|
|
|
16
|
|
|
|
|
931
|
|
40
|
16
|
|
|
16
|
|
7547
|
use Chemistry::Bond; |
|
16
|
|
|
|
|
37
|
|
|
16
|
|
|
|
|
384
|
|
41
|
16
|
|
|
16
|
|
90
|
use Carp; |
|
16
|
|
|
|
|
27
|
|
|
16
|
|
|
|
|
846
|
|
42
|
16
|
|
|
16
|
|
79
|
use base qw(Chemistry::Obj Exporter); |
|
16
|
|
|
|
|
33
|
|
|
16
|
|
|
|
|
1624
|
|
43
|
16
|
|
|
16
|
|
9169
|
use Storable 'dclone'; |
|
16
|
|
|
|
|
45497
|
|
|
16
|
|
|
|
|
19500
|
|
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
our @EXPORT_OK = qw(read_mol); |
46
|
|
|
|
|
|
|
our @EXPORT = (); |
47
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
48
|
|
|
|
|
|
|
all => [@EXPORT, @EXPORT_OK], |
49
|
|
|
|
|
|
|
); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
our $clone_backend = 'Storable'; |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
my %FILE_FORMATS = (); |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 METHODS |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
See also L for generic attributes. |
58
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
=over 4 |
60
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
=item Chemistry::Mol->new(name => value, ...) |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
Create a new Mol object with the specified attributes. |
64
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
$mol = Chemistry::Mol->new(id => 'm123', name => 'my mol') |
66
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
is the same as |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Chemistry::Mol->new() |
70
|
|
|
|
|
|
|
$mol->id('m123') |
71
|
|
|
|
|
|
|
$mol->name('my mol') |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
=cut |
74
|
|
|
|
|
|
|
|
75
|
|
|
|
|
|
|
sub new { |
76
|
26
|
|
|
26
|
1
|
1689
|
my $class = shift; |
77
|
26
|
|
|
|
|
48
|
my %args = @_; |
78
|
26
|
|
66
|
|
|
61
|
my $self = bless { |
79
|
|
|
|
|
|
|
id => $class->nextID, |
80
|
|
|
|
|
|
|
byId => {}, |
81
|
|
|
|
|
|
|
atoms => [], |
82
|
|
|
|
|
|
|
bonds => [], |
83
|
|
|
|
|
|
|
name => "", |
84
|
|
|
|
|
|
|
}, ref $class || $class; |
85
|
26
|
|
|
|
|
78
|
$self->$_($args{$_}) for (keys %args); |
86
|
26
|
|
|
|
|
78
|
return $self; |
87
|
|
|
|
|
|
|
} |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
my $N = 0; # molecule ID counter |
90
|
27
|
|
|
27
|
0
|
216
|
sub nextID { "mol".++$N; } |
91
|
0
|
|
|
0
|
0
|
0
|
sub reset_id { $N = 0; } |
92
|
0
|
|
|
0
|
0
|
0
|
sub next_id { $N = $_[1] } |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item $mol->add_atom($atom, ...) |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
Add one or more Atom objects to the molecule. Returns the last atom added. |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=cut |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub add_atom { |
101
|
552
|
|
|
552
|
1
|
1419
|
my $self = shift; |
102
|
552
|
|
|
|
|
681
|
for my $atom (@_){ |
103
|
|
|
|
|
|
|
#if ($self->by_id($atom->id)) { |
104
|
|
|
|
|
|
|
#croak "Duplicate ID when adding atom '$atom' to mol '$self'"; |
105
|
|
|
|
|
|
|
#} |
106
|
553
|
|
|
|
|
557
|
push @{$self->{atoms}}, $atom; |
|
553
|
|
|
|
|
1028
|
|
107
|
553
|
|
|
|
|
941
|
$self->{byId}{$atom->id} = $atom; |
108
|
553
|
|
|
|
|
1042
|
$atom->parent($self); |
109
|
|
|
|
|
|
|
} |
110
|
552
|
|
|
|
|
1132
|
$_[-1]; |
111
|
|
|
|
|
|
|
} |
112
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
sub add_atom_np { |
114
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
115
|
0
|
|
|
|
|
0
|
for my $atom (@_){ |
116
|
0
|
|
|
|
|
0
|
push @{$self->{atoms}}, $atom; |
|
0
|
|
|
|
|
0
|
|
117
|
0
|
|
|
|
|
0
|
$self->{byId}{$atom->id} = $atom; |
118
|
|
|
|
|
|
|
} |
119
|
0
|
|
|
|
|
0
|
$_[-1]; |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=item $mol->atom_class |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
Returns the atom class that a molecule or molecule class expects to use by |
125
|
|
|
|
|
|
|
default. L objects return "Chemistry::Atom", but subclasses |
126
|
|
|
|
|
|
|
will likely override this method. |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=cut |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
sub atom_class { |
131
|
14
|
|
|
14
|
1
|
46
|
"Chemistry::Atom"; |
132
|
|
|
|
|
|
|
} |
133
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
=item $mol->new_atom(name => value, ...) |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>. |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
=cut |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub new_atom { |
141
|
14
|
|
|
14
|
1
|
61
|
my $self = shift; |
142
|
14
|
|
|
|
|
34
|
$self->add_atom($self->atom_class->new(@_)); |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=item $mol->delete_atom($atom, ...) |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
Deletes an atom from the molecule. It automatically deletes all the bonds in |
148
|
|
|
|
|
|
|
which the atom participates as well. $atom should be a Chemistry::Atom |
149
|
|
|
|
|
|
|
reference. This method also accepts the atom index, but this use is deprecated |
150
|
|
|
|
|
|
|
(and buggy if multiple indices are given, unless they are in descending order). |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub delete_atom { |
155
|
2
|
|
|
2
|
1
|
4
|
my $self = shift; |
156
|
2
|
|
|
|
|
5
|
for my $i (@_) { |
157
|
2
|
|
|
|
|
4
|
my ($atom); |
158
|
2
|
100
|
|
|
|
6
|
if (ref $i) { |
159
|
1
|
|
|
|
|
2
|
$atom = $i; |
160
|
|
|
|
|
|
|
} else { |
161
|
1
|
50
|
|
|
|
4
|
$atom = $self->atoms($i) |
162
|
|
|
|
|
|
|
or croak "$self->delete_atom: no such atom $i\n"; |
163
|
|
|
|
|
|
|
} |
164
|
2
|
|
|
|
|
8
|
$atom->delete($i); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
|
|
|
|
|
|
# takes an atom ref to delete and optionally the atom index |
169
|
|
|
|
|
|
|
# 1) deletes bonds that belonged to atom |
170
|
|
|
|
|
|
|
# 2) deletes atom |
171
|
|
|
|
|
|
|
sub _delete_atom { |
172
|
7
|
|
|
7
|
|
20
|
my ($self, $atom) = @_; |
173
|
7
|
50
|
|
|
|
19
|
my $index = $self->get_atom_index($atom) |
174
|
|
|
|
|
|
|
or croak "$self->delete_atom: no such atom $atom\n"; |
175
|
7
|
|
|
|
|
71
|
my $id = $atom->id; |
176
|
7
|
|
|
|
|
40
|
$self->delete_bond($atom->bonds); |
177
|
7
|
|
|
|
|
39
|
delete $self->{byId}{$id}; |
178
|
7
|
|
|
|
|
22
|
splice @{$self->{atoms}}, $index - 1, 1; |
|
7
|
|
|
|
|
28
|
|
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=item $mol->add_bond($bond, ...) |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Add one or more Bond objects to the molecule. Returns the last bond added. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=cut |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub add_bond { |
188
|
25
|
|
|
25
|
1
|
38
|
my $self = shift; |
189
|
25
|
|
|
|
|
40
|
for my $bond (@_){ |
190
|
|
|
|
|
|
|
#if ($self->by_id($bond->id)) { |
191
|
|
|
|
|
|
|
#croak "Duplicate ID when adding bond '$bond' to mol '$self'"; |
192
|
|
|
|
|
|
|
#} |
193
|
25
|
|
|
|
|
30
|
push @{$self->{bonds}}, $bond; |
|
25
|
|
|
|
|
47
|
|
194
|
25
|
|
|
|
|
61
|
$self->{byId}{$bond->id} = $bond; |
195
|
25
|
100
|
|
|
|
56
|
if ($bond->{deleted}) { |
196
|
1
|
|
|
|
|
2
|
$_->add_bond($bond) for $bond->atoms; |
197
|
1
|
|
|
|
|
3
|
$bond->{deleted} = 0; |
198
|
|
|
|
|
|
|
} |
199
|
25
|
|
|
|
|
57
|
$bond->parent($self); |
200
|
|
|
|
|
|
|
} |
201
|
25
|
|
|
|
|
64
|
$_[-1]; |
202
|
|
|
|
|
|
|
} |
203
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub add_bond_np { |
205
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
206
|
0
|
|
|
|
|
0
|
for my $bond (@_){ |
207
|
0
|
|
|
|
|
0
|
push @{$self->{bonds}}, $bond; |
|
0
|
|
|
|
|
0
|
|
208
|
0
|
|
|
|
|
0
|
$self->{byId}{$bond->id} = $bond; |
209
|
|
|
|
|
|
|
} |
210
|
0
|
|
|
|
|
0
|
$_[-1]; |
211
|
|
|
|
|
|
|
} |
212
|
|
|
|
|
|
|
|
213
|
|
|
|
|
|
|
=item $mol->bond_class |
214
|
|
|
|
|
|
|
|
215
|
|
|
|
|
|
|
Returns the bond class that a molecule or molecule class expects to use by |
216
|
|
|
|
|
|
|
default. L objects return "Chemistry::Bond", but subclasses |
217
|
|
|
|
|
|
|
will likely override this method. |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
=cut |
220
|
|
|
|
|
|
|
|
221
|
|
|
|
|
|
|
sub bond_class { |
222
|
8
|
|
|
8
|
1
|
55
|
"Chemistry::Bond"; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
=item $mol->new_bond(name => value, ...) |
226
|
|
|
|
|
|
|
|
227
|
|
|
|
|
|
|
Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>. |
228
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
=cut |
230
|
|
|
|
|
|
|
|
231
|
|
|
|
|
|
|
sub new_bond { |
232
|
8
|
|
|
8
|
1
|
28
|
my $self = shift; |
233
|
8
|
|
|
|
|
28
|
$self->add_bond($self->bond_class->new(@_)); |
234
|
|
|
|
|
|
|
} |
235
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
sub get_bond_index { |
237
|
14
|
|
|
14
|
0
|
29
|
my ($self, $bond) = @_; |
238
|
14
|
|
|
|
|
18
|
my $i; |
239
|
14
|
|
|
|
|
31
|
for ($self->bonds) { |
240
|
42
|
|
|
|
|
50
|
++$i; |
241
|
42
|
100
|
|
|
|
74
|
return $i if ($_ eq $bond); |
242
|
|
|
|
|
|
|
} |
243
|
0
|
|
|
|
|
0
|
undef; |
244
|
|
|
|
|
|
|
} |
245
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
sub get_atom_index { |
247
|
7
|
|
|
7
|
0
|
12
|
my ($self, $atom) = @_; |
248
|
7
|
|
|
|
|
11
|
my $i; |
249
|
7
|
|
|
|
|
16
|
for ($self->atoms) { |
250
|
12
|
|
|
|
|
19
|
++$i; |
251
|
12
|
100
|
|
|
|
32
|
return $i if ($_ eq $atom); |
252
|
|
|
|
|
|
|
} |
253
|
0
|
|
|
|
|
0
|
undef; |
254
|
|
|
|
|
|
|
} |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
=item $mol->delete_bond($bond, ...) |
257
|
|
|
|
|
|
|
|
258
|
|
|
|
|
|
|
Deletes a bond from the molecule. $bond should be a L object. |
259
|
|
|
|
|
|
|
|
260
|
|
|
|
|
|
|
=cut |
261
|
|
|
|
|
|
|
|
262
|
|
|
|
|
|
|
# mol deletes bond |
263
|
|
|
|
|
|
|
# bond tells atoms involved to forget about it |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
sub delete_bond { |
266
|
7
|
|
|
7
|
1
|
14
|
my $self = shift; |
267
|
7
|
|
|
|
|
15
|
for my $i (@_){ |
268
|
11
|
|
|
|
|
13
|
my ($bond); |
269
|
11
|
50
|
|
|
|
24
|
if (ref $i) { |
270
|
11
|
|
|
|
|
18
|
$bond = $i; |
271
|
|
|
|
|
|
|
} else { |
272
|
0
|
0
|
|
|
|
0
|
$bond = $self->bonds($i) |
273
|
|
|
|
|
|
|
or croak "$self->delete_bond($i): no such bond $i\n"; |
274
|
|
|
|
|
|
|
} |
275
|
11
|
|
|
|
|
29
|
$bond->delete; |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
} |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
sub _delete_bond { |
280
|
14
|
|
|
14
|
|
29
|
my ($self, $bond) = @_; |
281
|
14
|
50
|
|
|
|
35
|
my $index = $self->get_bond_index($bond) |
282
|
|
|
|
|
|
|
#or croak "$self->delete_bond: no such bond $bond\n"; |
283
|
|
|
|
|
|
|
or return; |
284
|
14
|
|
|
|
|
42
|
my $id = $bond->id; |
285
|
14
|
|
|
|
|
30
|
delete $self->{byId}{$id}; |
286
|
14
|
|
|
|
|
22
|
splice @{$self->{bonds}}, $index - 1, 1; |
|
14
|
|
|
|
|
38
|
|
287
|
14
|
|
|
|
|
37
|
$bond->delete_atoms; |
288
|
|
|
|
|
|
|
} |
289
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
=item $mol->by_id($id) |
291
|
|
|
|
|
|
|
|
292
|
|
|
|
|
|
|
Return the atom or bond object with the corresponding id. |
293
|
|
|
|
|
|
|
|
294
|
|
|
|
|
|
|
=cut |
295
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
sub by_id { |
297
|
3
|
|
|
3
|
1
|
6
|
my $self = shift; |
298
|
3
|
|
|
|
|
7
|
my ($id) = @_; |
299
|
3
|
|
|
|
|
14
|
$self->{byId}{$id}; |
300
|
|
|
|
|
|
|
} |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
sub _change_id { |
303
|
4
|
|
|
4
|
|
9
|
my ($self, $old_id, $new_id) = @_; |
304
|
4
|
|
|
|
|
7
|
my $ref = $self->{byId}{$old_id}; |
305
|
4
|
|
|
|
|
9
|
$self->{byId}{$new_id} = $ref; |
306
|
4
|
|
|
|
|
10
|
delete $self->{byId}{$old_id}; |
307
|
|
|
|
|
|
|
} |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
=item $mol->atoms($n1, ...) |
310
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
Returns the atoms with the given indices, or all by default. |
312
|
|
|
|
|
|
|
Indices start from one, not from zero. |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
=cut |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
sub atoms { |
317
|
90
|
|
|
90
|
1
|
8230
|
my $self = shift; |
318
|
90
|
100
|
|
|
|
214
|
if (@_) { |
319
|
18
|
|
|
|
|
48
|
my @ats = map {$_ - 1} @_; |
|
24
|
|
|
|
|
68
|
|
320
|
18
|
|
|
|
|
32
|
@{$self->{atoms}}[@ats]; |
|
18
|
|
|
|
|
84
|
|
321
|
|
|
|
|
|
|
} else { |
322
|
72
|
|
|
|
|
88
|
@{$self->{atoms}}; |
|
72
|
|
|
|
|
383
|
|
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
} |
325
|
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
=item $mol->atoms_by_name($name) |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
Returns the atoms with the given name (treated as an anchored regular |
329
|
|
|
|
|
|
|
expression). |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=cut |
332
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
sub atoms_by_name { |
334
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
335
|
1
|
|
|
|
|
28
|
my $re = qr/^$_[0]$/; |
336
|
16
|
|
|
16
|
|
179
|
no warnings; |
|
16
|
|
|
|
|
36
|
|
|
16
|
|
|
|
|
7112
|
|
337
|
1
|
|
|
|
|
5
|
my @ret = grep {$_->name =~ $re} $self->atoms; |
|
2
|
|
|
|
|
8
|
|
338
|
1
|
50
|
|
|
|
8
|
wantarray ? @ret : $ret[0]; |
339
|
|
|
|
|
|
|
} |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=item $mol->sort_atoms($sub_ref) |
342
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
Sort the atoms in the molecule by using the comparison function given in |
344
|
|
|
|
|
|
|
$sub_ref. This function should take two atoms as parameters and return -1, 0, |
345
|
|
|
|
|
|
|
or 1 depending on whether the first atom should go before, same, or after the |
346
|
|
|
|
|
|
|
second atom. For example, to sort by atomic number, you could use the |
347
|
|
|
|
|
|
|
following: |
348
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
$mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } ); |
350
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
Note that the atoms are passed as parameters and not as the package variables |
352
|
|
|
|
|
|
|
$a and $b like the core sort function does. This is because $mol->sort will |
353
|
|
|
|
|
|
|
likely be called from another package and we don't want to play with another |
354
|
|
|
|
|
|
|
package's symbol table. |
355
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
=cut |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
sub sort_atoms { |
359
|
0
|
|
|
0
|
1
|
0
|
my ($self, $sub) = @_; |
360
|
0
|
|
|
|
|
0
|
my @a = $self->atoms; |
361
|
0
|
|
|
|
|
0
|
@a = sort { $sub->($a,$b) } @a; |
|
0
|
|
|
|
|
0
|
|
362
|
0
|
|
|
|
|
0
|
$self->{atoms} = \@a; |
363
|
0
|
|
|
|
|
0
|
$self; |
364
|
|
|
|
|
|
|
} |
365
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
=item $mol->bonds($n1, ...) |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
Returns the bonds with the given indices, or all by default. |
369
|
|
|
|
|
|
|
Indices start from one, not from zero. |
370
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
=cut |
372
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
sub bonds { |
374
|
49
|
|
|
49
|
1
|
648
|
my $self = shift; |
375
|
49
|
100
|
|
|
|
113
|
if (@_) { |
376
|
6
|
|
|
|
|
15
|
my @bonds = map {$_ - 1} @_; |
|
6
|
|
|
|
|
24
|
|
377
|
6
|
|
|
|
|
12
|
@{$self->{bonds}}[@bonds]; |
|
6
|
|
|
|
|
33
|
|
378
|
|
|
|
|
|
|
} else { |
379
|
43
|
|
|
|
|
54
|
@{$self->{bonds}}; |
|
43
|
|
|
|
|
155
|
|
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
} |
382
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
=item $mol->print(option => value...) |
384
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
Convert the molecule to a string representation. If no options are given, |
386
|
|
|
|
|
|
|
a default YAML-like format is used (this may change in the future). Otherwise, |
387
|
|
|
|
|
|
|
the format should be specified by using the C option. |
388
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
=cut |
390
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
sub print { |
392
|
19
|
|
|
19
|
1
|
349
|
my $self = shift; |
393
|
19
|
|
|
|
|
42
|
my (%opts) = @_; |
394
|
19
|
|
|
|
|
21
|
my $ret; |
395
|
19
|
|
|
|
|
25
|
local $" = ""; #" |
396
|
|
|
|
|
|
|
|
397
|
19
|
50
|
|
|
|
39
|
if ($opts{format}) { |
398
|
19
|
|
|
|
|
41
|
return $self->formats($opts{format})->write_string($self, %opts); |
399
|
|
|
|
|
|
|
} |
400
|
|
|
|
|
|
|
# else use default printout |
401
|
0
|
|
|
|
|
0
|
$ret = <
|
402
|
|
|
|
|
|
|
$self->{id}: |
403
|
|
|
|
|
|
|
name: $self->{name} |
404
|
|
|
|
|
|
|
END |
405
|
0
|
|
|
|
|
0
|
$ret .= " attr:\n"; |
406
|
0
|
|
|
|
|
0
|
$ret .= $self->print_attr(2); |
407
|
0
|
|
|
|
|
0
|
$ret .= " atoms:\n"; |
408
|
0
|
|
|
|
|
0
|
for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
409
|
0
|
|
|
|
|
0
|
$ret .= " bonds:\n"; |
410
|
0
|
|
|
|
|
0
|
for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
411
|
0
|
|
|
|
|
0
|
$ret; |
412
|
|
|
|
|
|
|
} |
413
|
|
|
|
|
|
|
|
414
|
|
|
|
|
|
|
=item $s = $mol->sprintf($format) |
415
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
Format interesting molecular information in a concise way, as specified by |
417
|
|
|
|
|
|
|
a printf-like format. |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
%n - name |
420
|
|
|
|
|
|
|
%f - formula |
421
|
|
|
|
|
|
|
%f{formula with format} - (note: right braces within |
422
|
|
|
|
|
|
|
the format should be escaped with a backslash) |
423
|
|
|
|
|
|
|
%s - SMILES representation |
424
|
|
|
|
|
|
|
%S - canonical SMILES representation |
425
|
|
|
|
|
|
|
%m - mass |
426
|
|
|
|
|
|
|
%8.3m - mass, formatted as %8.3f with core sprintf |
427
|
|
|
|
|
|
|
%q - formal charge |
428
|
|
|
|
|
|
|
%a - atom count |
429
|
|
|
|
|
|
|
%b - bond count |
430
|
|
|
|
|
|
|
%t - type |
431
|
|
|
|
|
|
|
%i - id |
432
|
|
|
|
|
|
|
%% - % |
433
|
|
|
|
|
|
|
|
434
|
|
|
|
|
|
|
For example, if you want just about everything: |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
$mol->sprintf("%s - %n (%f). %a atoms, %b bonds; " |
437
|
|
|
|
|
|
|
. "mass=%m; charge =%q; type=%t; id=%i"); |
438
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
Note that you have to C |
440
|
|
|
|
|
|
|
C<%S> on C<< $mol->sprintf >>. |
441
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
=cut |
443
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
sub sprintf { |
445
|
0
|
|
|
0
|
1
|
0
|
my ($mol, $format) = @_; |
446
|
16
|
|
|
16
|
|
116
|
no warnings 'uninitialized'; # don't care if some properties are undefined |
|
16
|
|
|
|
|
35
|
|
|
16
|
|
|
|
|
34072
|
|
447
|
0
|
|
0
|
|
|
0
|
$format ||= "%f"; |
448
|
0
|
|
|
|
|
0
|
$format =~ s/%%/\\%/g; # escape %% with a \ |
449
|
0
|
|
|
|
|
0
|
$format =~ s/(?formula($1)/eg; # %f{} |
|
0
|
|
|
|
|
0
|
|
450
|
0
|
|
|
|
|
0
|
$format =~ s/(?formula/eg; # %f |
|
0
|
|
|
|
|
0
|
|
451
|
0
|
|
|
|
|
0
|
$format =~ s/(?print(format=>'smiles')/eg; # %s |
|
0
|
|
|
|
|
0
|
|
452
|
0
|
|
|
|
|
0
|
$format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s |
|
0
|
|
|
|
|
0
|
|
453
|
0
|
|
|
|
|
0
|
$format =~ s/(?name/eg; # %n |
|
0
|
|
|
|
|
0
|
|
454
|
0
|
|
|
|
|
0
|
$format =~ s/(?
|
455
|
0
|
0
|
|
|
|
0
|
$1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m |
456
|
0
|
|
|
|
|
0
|
$format =~ s/(?charge/eg; # %q |
|
0
|
|
|
|
|
0
|
|
457
|
0
|
|
|
|
|
0
|
$format =~ s/(?atoms/eg; # %a |
|
0
|
|
|
|
|
0
|
|
458
|
0
|
|
|
|
|
0
|
$format =~ s/(?bonds/eg; # %b |
|
0
|
|
|
|
|
0
|
|
459
|
0
|
|
|
|
|
0
|
$format =~ s/(?type/eg; # %t |
|
0
|
|
|
|
|
0
|
|
460
|
0
|
|
|
|
|
0
|
$format =~ s/(?id/eg; # %i |
|
0
|
|
|
|
|
0
|
|
461
|
0
|
|
|
|
|
0
|
$format =~ s/\\(.)/$1/g; # other \ escapes |
462
|
0
|
|
|
|
|
0
|
$format; |
463
|
|
|
|
|
|
|
} |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
=item $mol->printf($format) |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
Same as C<< $mol->sprintf >>, but prints to standard output automatically. |
468
|
|
|
|
|
|
|
Used for quick and dirty molecular information dumping. |
469
|
|
|
|
|
|
|
|
470
|
|
|
|
|
|
|
=cut |
471
|
|
|
|
|
|
|
|
472
|
|
|
|
|
|
|
sub printf { |
473
|
0
|
|
|
0
|
1
|
0
|
my ($mol, $format) = @_; |
474
|
0
|
|
|
|
|
0
|
print $mol->sprintf($format); |
475
|
|
|
|
|
|
|
} |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=item Chemistry::Mol->parse($string, option => value...) |
478
|
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
Parse the molecule encoded in C<$string>. The format should be specified |
480
|
|
|
|
|
|
|
with the the C option; otherwise, it will be guessed. |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
=cut |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
sub parse { |
485
|
14
|
|
|
14
|
1
|
8053
|
my $self = shift; |
486
|
14
|
|
|
|
|
21
|
my $s = shift; |
487
|
14
|
|
|
|
|
38
|
my %opts = (mol_class => $self, @_); |
488
|
|
|
|
|
|
|
|
489
|
14
|
50
|
|
|
|
33
|
if ($opts{format}) { |
490
|
14
|
|
|
|
|
29
|
return $self->formats($opts{format})->parse_string($s, %opts); |
491
|
|
|
|
|
|
|
} else { |
492
|
0
|
|
|
|
|
0
|
croak "Parse does not support autodetection yet.", |
493
|
|
|
|
|
|
|
"Please specify a format."; |
494
|
|
|
|
|
|
|
} |
495
|
0
|
|
|
|
|
0
|
return; |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
=item Chemistry::Mol->read($fname, option => value ...) |
499
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
Read a file and return a list of Mol objects, or croaks if there was a problem. |
501
|
|
|
|
|
|
|
The type of file will be guessed if not specified via the C option. |
502
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
Note that only registered file readers will be used. Readers may be registered |
504
|
|
|
|
|
|
|
using C; modules that include readers (such as |
505
|
|
|
|
|
|
|
L) usually register them automatically when they are |
506
|
|
|
|
|
|
|
loaded. |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
Automatic decompression of gzipped files is supported if the L |
509
|
|
|
|
|
|
|
module is installed. Files ending in .gz are assumed to be compressed; |
510
|
|
|
|
|
|
|
otherwise it is possible to force decompression by passing the gzip => 1 |
511
|
|
|
|
|
|
|
option (or no decompression with gzip => 0). |
512
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
=cut |
514
|
|
|
|
|
|
|
|
515
|
|
|
|
|
|
|
sub read_mol { # for backwards compatibility |
516
|
0
|
|
|
0
|
0
|
0
|
my ($fname, $type) = shift; |
517
|
0
|
|
|
|
|
0
|
__PACKAGE__->read($fname, format => $type); |
518
|
|
|
|
|
|
|
} |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
sub read { |
521
|
11
|
|
|
11
|
1
|
51683
|
my $self = shift; |
522
|
11
|
|
|
|
|
24
|
my $fname = shift; |
523
|
11
|
|
|
|
|
43
|
my %opts = (mol_class => $self, @_); |
524
|
|
|
|
|
|
|
|
525
|
11
|
100
|
|
|
|
42
|
if ($opts{format}) { |
526
|
3
|
|
|
|
|
12
|
return $self->formats($opts{format})->parse_file($fname, %opts); |
527
|
|
|
|
|
|
|
} else { # guess format |
528
|
8
|
|
|
|
|
31
|
for my $type ($self->formats) { |
529
|
8
|
50
|
|
|
|
22
|
if ($self->formats($type)->file_is($fname)) { |
530
|
8
|
|
|
|
|
26
|
return $self->formats($type)->parse_file($fname, %opts); |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
} |
533
|
|
|
|
|
|
|
} |
534
|
0
|
|
|
|
|
0
|
croak "Couldn't guess format of file '$fname'"; |
535
|
|
|
|
|
|
|
} |
536
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
=item $mol->write($fname, option => value ...) |
538
|
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
Write a molecule file, or croak if there was a problem. The type of file will |
540
|
|
|
|
|
|
|
be guessed if not specified via the C option. |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
Note that only registered file formats will be used. |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
Automatic gzip compression is supported if the IO::Zlib module is installed. |
545
|
|
|
|
|
|
|
Files ending in .gz are assumed to be compressed; otherwise it is possible to |
546
|
|
|
|
|
|
|
force compression by passing the gzip => 1 option (or no compression with gzip |
547
|
|
|
|
|
|
|
=> 0). Specific compression levels between 2 (fastest) and 9 (most compressed) |
548
|
|
|
|
|
|
|
may also be used (e.g., gzip => 9). |
549
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=cut |
551
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
sub write { |
553
|
3
|
|
|
3
|
1
|
572
|
my ($self, $fname, %opts) = (@_); |
554
|
|
|
|
|
|
|
|
555
|
3
|
100
|
|
|
|
11
|
if ($opts{format}) { |
556
|
2
|
|
|
|
|
7
|
return $self->formats($opts{format})->write_file(@_); |
557
|
|
|
|
|
|
|
} else { # guess format |
558
|
1
|
|
|
|
|
4
|
for my $type ($self->formats) { |
559
|
1
|
50
|
|
|
|
3
|
if ($self->formats($type)->name_is($fname)) { |
560
|
1
|
|
|
|
|
5
|
return $self->formats($type)->write_file(@_); |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
} |
563
|
|
|
|
|
|
|
} |
564
|
0
|
|
|
|
|
0
|
croak "Couldn't guess format for writing file '$fname'"; |
565
|
|
|
|
|
|
|
} |
566
|
|
|
|
|
|
|
|
567
|
|
|
|
|
|
|
=item Chemistry::Mol->file($file, option => value ...) |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
Create a L-derived object for reading or writing to a file. |
570
|
|
|
|
|
|
|
The object can then be used to read the molecules or other information in the |
571
|
|
|
|
|
|
|
file. |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
This has more flexibility than calling C<< Chemistry::Mol->read >> when |
574
|
|
|
|
|
|
|
dealing with multi-molecule files or files that have higher structure or that |
575
|
|
|
|
|
|
|
have information that does not belong to the molecules themselves. For |
576
|
|
|
|
|
|
|
example, a reaction file may have a list of molecules, but also general |
577
|
|
|
|
|
|
|
information like the reaction name, yield, etc. as well as the classification |
578
|
|
|
|
|
|
|
of the molecules as reactants or products. The exact information that is |
579
|
|
|
|
|
|
|
available will depend on the file reader class that is being used. The |
580
|
|
|
|
|
|
|
following is a hypothetical example for reading MDL rxnfiles. |
581
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
# assuming this module existed... |
583
|
|
|
|
|
|
|
use Chemistry::File::Rxn; |
584
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
my $rxn = Chemistry::Mol->file('test.rxn'); |
586
|
|
|
|
|
|
|
$rxn->read; |
587
|
|
|
|
|
|
|
$name = $rxn->name; |
588
|
|
|
|
|
|
|
@reactants = $rxn->reactants; # mol objects |
589
|
|
|
|
|
|
|
@products = $rxn->products; |
590
|
|
|
|
|
|
|
$yield = $rxn->yield; # a number |
591
|
|
|
|
|
|
|
|
592
|
|
|
|
|
|
|
Note that only registered file readers will be used. Readers may be registered |
593
|
|
|
|
|
|
|
using register_format(); modules that include readers (such as |
594
|
|
|
|
|
|
|
Chemistry::File::PDB) usually register them automatically. |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
=cut |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
sub file { |
599
|
1
|
|
|
1
|
1
|
4
|
my ($self, $file, %opts) = @_; |
600
|
1
|
|
|
|
|
6
|
%opts = (mol_class => $self, %opts); |
601
|
|
|
|
|
|
|
|
602
|
1
|
50
|
|
|
|
5
|
if ($opts{format}) { |
603
|
0
|
|
|
|
|
0
|
return $self->formats($opts{format})->new(file => $file, |
604
|
|
|
|
|
|
|
opts => \%opts); |
605
|
|
|
|
|
|
|
} else { # guess format |
606
|
1
|
|
|
|
|
5
|
for my $type ($self->formats) { |
607
|
1
|
50
|
|
|
|
5
|
if ($self->formats($type)->file_is($file)) { |
608
|
1
|
|
|
|
|
15
|
return $self->formats($type)->new(file => $file, |
609
|
|
|
|
|
|
|
opts => \%opts); |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
} |
612
|
|
|
|
|
|
|
} |
613
|
0
|
|
|
|
|
0
|
croak "Couldn't guess format of file '$file'"; |
614
|
|
|
|
|
|
|
} |
615
|
|
|
|
|
|
|
|
616
|
|
|
|
|
|
|
=item Chemistry::Mol->register_format($name, $ref) |
617
|
|
|
|
|
|
|
|
618
|
|
|
|
|
|
|
Register a file type. The identifier $name must be unique. $ref is either a |
619
|
|
|
|
|
|
|
class name (a package) or an object that complies with the L |
620
|
|
|
|
|
|
|
interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the |
621
|
|
|
|
|
|
|
calling package is used automatically. More than one format can be registered |
622
|
|
|
|
|
|
|
at a time, but then $ref must be included for each format (e.g., |
623
|
|
|
|
|
|
|
Chemistry::Mol->register_format(format1 => "package1", format2 => package2). |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
The typical user doesn't have to care about this function. It is used |
626
|
|
|
|
|
|
|
automatically by molecule file I/O modules. |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
=cut |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
sub register_format { |
631
|
14
|
|
|
14
|
1
|
36
|
my $class = shift; |
632
|
14
|
100
|
|
|
|
83
|
if (@_ == 1) { |
633
|
3
|
|
|
|
|
9
|
$FILE_FORMATS{$_[0]} = caller; |
634
|
3
|
|
|
|
|
8
|
return; |
635
|
|
|
|
|
|
|
} |
636
|
11
|
|
|
|
|
67
|
my %opts = @_; |
637
|
11
|
|
|
|
|
90
|
$FILE_FORMATS{$_} = $opts{$_} for keys %opts; |
638
|
|
|
|
|
|
|
} |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
=item Chemistry::Mol->formats |
641
|
|
|
|
|
|
|
|
642
|
|
|
|
|
|
|
Returns a list of the file formats that have been installed by |
643
|
|
|
|
|
|
|
register_format() |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
=cut |
646
|
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
sub formats { |
648
|
68
|
|
|
68
|
1
|
95
|
my $self = shift; |
649
|
68
|
100
|
|
|
|
131
|
if (@_) { |
650
|
58
|
|
|
|
|
95
|
my ($type) = @_; |
651
|
58
|
|
|
|
|
91
|
my $file_class = $FILE_FORMATS{$type}; |
652
|
58
|
50
|
|
|
|
117
|
unless ($file_class) { |
653
|
0
|
|
|
|
|
0
|
croak "No class installed for type '$type'"; |
654
|
|
|
|
|
|
|
} |
655
|
58
|
|
|
|
|
368
|
return $file_class; |
656
|
|
|
|
|
|
|
} else { |
657
|
10
|
|
|
|
|
61
|
return sort keys %FILE_FORMATS; |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
} |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
=item $mol->mass |
662
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
Return the molar mass. This is just the sum of the masses of the atoms. See |
664
|
|
|
|
|
|
|
L::mass for details such as the handling of isotopes. |
665
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
=cut |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
sub mass { |
669
|
2
|
|
|
2
|
1
|
9
|
my ($self) = @_; |
670
|
2
|
|
|
|
|
5
|
my $mass = 0; |
671
|
2
|
|
|
|
|
5
|
for my $atom ($self->atoms) { |
672
|
6
|
|
|
|
|
12
|
$mass += $atom->mass; |
673
|
|
|
|
|
|
|
} |
674
|
2
|
|
|
|
|
13
|
$mass; |
675
|
|
|
|
|
|
|
} |
676
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=item $mol->charge |
678
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
Return the charge of the molecule. By default it returns the sum of the formal |
680
|
|
|
|
|
|
|
charges of the atoms. However, it is possible to set an arbitrary charge by |
681
|
|
|
|
|
|
|
calling C<< $mol->charge($new_charge) >> |
682
|
|
|
|
|
|
|
|
683
|
|
|
|
|
|
|
=cut |
684
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
sub charge { |
686
|
0
|
|
|
0
|
1
|
0
|
my ($self) = shift; |
687
|
0
|
0
|
|
|
|
0
|
if (@_) { |
688
|
0
|
|
|
|
|
0
|
$self->{charge} = shift; |
689
|
0
|
|
|
|
|
0
|
$self; |
690
|
|
|
|
|
|
|
} else { |
691
|
0
|
0
|
|
|
|
0
|
return $self->{charge} if defined $self->{charge}; |
692
|
0
|
|
|
|
|
0
|
my $charge = 0; |
693
|
0
|
|
0
|
|
|
0
|
$charge += $_->formal_charge || 0 for $self->atoms; |
694
|
0
|
|
|
|
|
0
|
$charge; |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
} |
697
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=item $mol->formula_hash |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
Returns a hash reference describing the molecular formula. For methane it would |
701
|
|
|
|
|
|
|
return { C => 1, H => 4 }. |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
=cut |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
sub formula_hash { |
706
|
17
|
|
|
17
|
1
|
25
|
my ($self) = @_; |
707
|
17
|
|
|
|
|
26
|
my $formula = {}; |
708
|
17
|
|
|
|
|
26
|
for my $atom ($self->atoms) { |
709
|
538
|
|
|
|
|
782
|
$formula->{$atom->symbol}++; |
710
|
538
|
50
|
|
|
|
756
|
$formula->{H} += $atom->hydrogens if $atom->hydrogens; |
711
|
|
|
|
|
|
|
} |
712
|
17
|
|
|
|
|
37
|
$formula; |
713
|
|
|
|
|
|
|
} |
714
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
=item $mol->formula($format) |
716
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
Returns a string with the formula. The format can be specified as a printf-like |
718
|
|
|
|
|
|
|
string with the control sequences specified in the L |
719
|
|
|
|
|
|
|
documentation. |
720
|
|
|
|
|
|
|
|
721
|
|
|
|
|
|
|
=cut |
722
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
sub formula { |
724
|
5
|
|
|
5
|
1
|
1562
|
my ($self, $format) = @_; |
725
|
5
|
|
|
|
|
406
|
require Chemistry::File::Formula; |
726
|
5
|
|
|
|
|
15
|
$self->print(format => "formula", formula_format => $format); |
727
|
|
|
|
|
|
|
} |
728
|
|
|
|
|
|
|
|
729
|
|
|
|
|
|
|
=item my $mol2 = $mol->clone; |
730
|
|
|
|
|
|
|
|
731
|
|
|
|
|
|
|
Makes a copy of a molecule. Note that this is a B copy; if your molecule |
732
|
|
|
|
|
|
|
has a pointer to the rest of the universe, the entire universe will be cloned! |
733
|
|
|
|
|
|
|
|
734
|
|
|
|
|
|
|
By default, clone() uses L to copy the Perl data structure. L |
735
|
|
|
|
|
|
|
can be used instead by setting variable C<$Chemistry::Mol::clone_backend> to |
736
|
|
|
|
|
|
|
C (default is C). The documentation of Storable claims L |
737
|
|
|
|
|
|
|
is less memory-intensive. |
738
|
|
|
|
|
|
|
|
739
|
|
|
|
|
|
|
=cut |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
sub clone { |
742
|
9
|
|
|
9
|
1
|
754
|
my ($self) = @_; |
743
|
9
|
|
|
|
|
17
|
my $clone; |
744
|
9
|
100
|
|
|
|
26
|
if ($clone_backend eq "Storable") { |
|
|
50
|
|
|
|
|
|
745
|
8
|
|
|
|
|
1309
|
$clone = dclone $self; |
746
|
8
|
50
|
|
|
|
116
|
$clone->_weaken if Storable->VERSION < 2.14; |
747
|
|
|
|
|
|
|
} elsif ($clone_backend eq "Clone") { |
748
|
1
|
|
|
|
|
383
|
require Clone; |
749
|
1
|
|
|
|
|
2150
|
$clone = Clone::clone $self; |
750
|
|
|
|
|
|
|
} else { |
751
|
0
|
|
|
|
|
0
|
croak "Unknown clone backend '$clone_backend'"; |
752
|
|
|
|
|
|
|
} |
753
|
9
|
|
|
|
|
38
|
$clone; |
754
|
|
|
|
|
|
|
} |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
=item my $mol2 = $mol->safe_clone; |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
Like clone, it makes a deep copy of a molecule. The difference is that the copy |
759
|
|
|
|
|
|
|
is not "exact" in that new molecule and its atoms and bonds get assigned new |
760
|
|
|
|
|
|
|
IDs. This makes it safe to combine cloned molecules. For example, this is an |
761
|
|
|
|
|
|
|
error: |
762
|
|
|
|
|
|
|
|
763
|
|
|
|
|
|
|
# XXX don't try this at home! |
764
|
|
|
|
|
|
|
my $mol2 = Chemistry::Mol->combine($mol1, $mol1); |
765
|
|
|
|
|
|
|
# the atoms in $mol1 will clash |
766
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
But this is ok: |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
# the "safe clone" of $mol1 will have new IDs |
770
|
|
|
|
|
|
|
my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone); |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=cut |
773
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
sub safe_clone { |
775
|
1
|
|
|
1
|
1
|
2
|
my ($mol) = @_; |
776
|
1
|
|
|
|
|
5
|
my $clone = $mol->clone; |
777
|
1
|
|
|
|
|
3
|
for ($clone, $clone->atoms, $clone->bonds) { |
778
|
4
|
|
|
|
|
13
|
$_->id($_->nextID); |
779
|
|
|
|
|
|
|
} |
780
|
1
|
|
|
|
|
21
|
$clone; |
781
|
|
|
|
|
|
|
} |
782
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
sub _weaken { |
784
|
14
|
|
|
14
|
|
45
|
my ($self) = @_; |
785
|
14
|
|
|
|
|
59
|
for ($self->atoms, $self->bonds) { |
786
|
196
|
|
|
|
|
358
|
$_->_weaken; |
787
|
|
|
|
|
|
|
} |
788
|
14
|
|
|
|
|
33
|
$self; |
789
|
|
|
|
|
|
|
} |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
=item ($distance, $atom_here, $atom_there) = $mol->distance($obj) |
792
|
|
|
|
|
|
|
|
793
|
|
|
|
|
|
|
Returns the minimum distance to $obj, which can be an atom, a molecule, or a |
794
|
|
|
|
|
|
|
vector. In scalar context it returns only the distance; in list context it |
795
|
|
|
|
|
|
|
also returns the atoms involved. The current implementation for calculating |
796
|
|
|
|
|
|
|
the minimum distance between two molecules compares every possible pair of |
797
|
|
|
|
|
|
|
atoms, so it's not efficient for large molecules. |
798
|
|
|
|
|
|
|
|
799
|
|
|
|
|
|
|
=cut |
800
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
sub distance { |
802
|
0
|
|
|
0
|
1
|
0
|
my ($self, $other) = @_; |
803
|
0
|
0
|
|
|
|
0
|
if ($other->isa("Chemistry::Mol")) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
804
|
0
|
|
|
|
|
0
|
my @atoms = $self->atoms; |
805
|
0
|
0
|
|
|
|
0
|
my $atom = shift @atoms or return; # need at least one atom |
806
|
0
|
|
|
|
|
0
|
my $closest_here = $atom; |
807
|
0
|
|
|
|
|
0
|
my ($min_length, $closest_there) = $atom->distance($other); |
808
|
0
|
|
|
|
|
0
|
for $atom (@atoms) { |
809
|
0
|
|
|
|
|
0
|
my ($d, $o) = $atom->distance($other); |
810
|
0
|
0
|
|
|
|
0
|
if ($d < $min_length) { |
811
|
0
|
|
|
|
|
0
|
($min_length, $closest_there, $closest_here) = ($d, $o, $atom); |
812
|
|
|
|
|
|
|
} |
813
|
|
|
|
|
|
|
} |
814
|
|
|
|
|
|
|
return wantarray ? |
815
|
0
|
0
|
|
|
|
0
|
($min_length, $closest_here, $closest_there) : $min_length; |
816
|
|
|
|
|
|
|
} elsif ($other->isa("Chemistry::Atom")) { |
817
|
0
|
|
|
|
|
0
|
return $other->distance($self); |
818
|
|
|
|
|
|
|
} elsif ($other->isa("Math::VectorReal")) { |
819
|
0
|
|
|
|
|
0
|
return Chemistry::Atom->new(coords => $other)->distance($self); |
820
|
|
|
|
|
|
|
} |
821
|
|
|
|
|
|
|
} |
822
|
|
|
|
|
|
|
|
823
|
|
|
|
|
|
|
=item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...) |
824
|
|
|
|
|
|
|
|
825
|
|
|
|
|
|
|
=item $mol1->combine($mol2, $mol3, ...) |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Combines several molecules in one bigger molecule. If called as a class method, |
828
|
|
|
|
|
|
|
as in the first example, it returns a new combined molecule without altering |
829
|
|
|
|
|
|
|
any of the parameters. If called as an instance method, as in the second |
830
|
|
|
|
|
|
|
example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not |
831
|
|
|
|
|
|
|
altered. B: Make sure you don't combine molecules which contain atoms |
832
|
|
|
|
|
|
|
with duplicate IDs (for example, if they were cloned). |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
=cut |
835
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
# joins several molecules into one |
837
|
|
|
|
|
|
|
sub combine { |
838
|
2
|
|
|
2
|
1
|
519
|
my ($self, @others) = @_; |
839
|
2
|
|
|
|
|
4
|
my $mol; |
840
|
2
|
100
|
|
|
|
5
|
if (ref $self) { |
841
|
1
|
|
|
|
|
3
|
$mol = $self; |
842
|
|
|
|
|
|
|
} else { |
843
|
1
|
|
|
|
|
3
|
$mol = $self->new; |
844
|
|
|
|
|
|
|
} |
845
|
2
|
|
|
|
|
8
|
for my $other (@others) { |
846
|
3
|
|
|
|
|
7
|
my $mol2 = $other->clone; |
847
|
3
|
|
|
|
|
7
|
for my $atom ($mol2->atoms) { |
848
|
12
|
|
|
|
|
18
|
$mol->add_atom($atom); |
849
|
|
|
|
|
|
|
} |
850
|
3
|
|
|
|
|
6
|
for my $bond ($mol2->bonds) { |
851
|
9
|
|
|
|
|
15
|
$mol->add_bond($bond); |
852
|
|
|
|
|
|
|
} |
853
|
|
|
|
|
|
|
} |
854
|
2
|
|
|
|
|
7
|
$mol; |
855
|
|
|
|
|
|
|
} |
856
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
=item my @mols = $mol->separate |
858
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
Separates a molecule into "connected fragments". The original object is not |
860
|
|
|
|
|
|
|
modified; the fragments are clones of the original ones. Example: if you have |
861
|
|
|
|
|
|
|
ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within |
862
|
|
|
|
|
|
|
one molecule object ($mol). When you call $mol->separate you get two molecules, |
863
|
|
|
|
|
|
|
each one with a CH3. |
864
|
|
|
|
|
|
|
|
865
|
|
|
|
|
|
|
=cut |
866
|
|
|
|
|
|
|
|
867
|
|
|
|
|
|
|
# splits a molecule into connected fragments |
868
|
|
|
|
|
|
|
# returns a list of molecules. Does not touch the original copy. |
869
|
|
|
|
|
|
|
sub separate { |
870
|
1
|
|
|
1
|
1
|
546
|
my ($self) = @_; |
871
|
1
|
|
|
|
|
12
|
$self = $self->clone; |
872
|
1
|
|
|
|
|
2
|
$self->{_paint_tab} = {}; |
873
|
1
|
|
|
|
|
2
|
my $color = 0; |
874
|
1
|
|
|
|
|
3
|
for my $atom ($self->atoms) { |
875
|
8
|
100
|
|
|
|
17
|
next if defined $self->{_paint_tab}{$atom->id}; |
876
|
2
|
|
|
|
|
6
|
$self->_paint($atom, $color++); |
877
|
|
|
|
|
|
|
} |
878
|
1
|
|
|
|
|
4
|
my @mols; |
879
|
1
|
|
|
|
|
6
|
push @mols, $self->new for (1 .. $color); |
880
|
1
|
|
|
|
|
6
|
for my $atom ($self->atoms) { |
881
|
8
|
|
|
|
|
16
|
$mols[$self->{_paint_tab}{$atom->id}]->add_atom($atom); |
882
|
|
|
|
|
|
|
} |
883
|
1
|
|
|
|
|
2
|
for my $bond ($self->bonds) { |
884
|
6
|
|
|
|
|
9
|
$mols[$self->{_paint_tab}{$bond->id}]->add_bond($bond); |
885
|
|
|
|
|
|
|
} |
886
|
1
|
|
|
|
|
8
|
@mols; |
887
|
|
|
|
|
|
|
} |
888
|
|
|
|
|
|
|
|
889
|
|
|
|
|
|
|
# this method fills the _paint_tab attribute for every atom connected |
890
|
|
|
|
|
|
|
# to the given start atom $atom with $color. Used for separating |
891
|
|
|
|
|
|
|
# connected fragments. Uses a depth-first search |
892
|
|
|
|
|
|
|
sub _paint { |
893
|
14
|
|
|
14
|
|
20
|
my ($self, $atom, $color) = @_; |
894
|
14
|
100
|
|
|
|
47
|
return if defined $self->{_paint_tab}{$atom->id}; |
895
|
8
|
|
|
|
|
19
|
$self->{_paint_tab}{$atom->id} = $color; |
896
|
8
|
|
|
|
|
16
|
$self->{_paint_tab}{$_->id} = $color for ($atom->bonds); |
897
|
8
|
|
|
|
|
16
|
for my $neighbor ($atom->neighbors) { |
898
|
12
|
|
|
|
|
20
|
$self->_paint($neighbor, $color); |
899
|
|
|
|
|
|
|
} |
900
|
|
|
|
|
|
|
} |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
=item $mol->sprout_hydrogens |
903
|
|
|
|
|
|
|
|
904
|
|
|
|
|
|
|
Convert all the implicit hydrogen atoms in the molecule to explicit atoms. |
905
|
|
|
|
|
|
|
It does B generate coordinates for the atoms. |
906
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
=cut |
908
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
sub sprout_hydrogens { |
910
|
1
|
|
|
1
|
1
|
3
|
my ($self) = @_; |
911
|
1
|
|
|
|
|
4
|
$_->sprout_hydrogens for $self->atoms; |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
=item $mol->collapse_hydrogens |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens. |
917
|
|
|
|
|
|
|
(Exception: hydrogen atoms that are adjacent to a hydrogen atom are not |
918
|
|
|
|
|
|
|
collapsed.) |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=cut |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
sub collapse_hydrogens { |
923
|
1
|
|
|
1
|
1
|
4
|
my ($self) = @_; |
924
|
1
|
|
|
|
|
4
|
for my $atom (grep { $_->symbol ne 'H' } $self->atoms) { |
|
3
|
|
|
|
|
11
|
|
925
|
1
|
|
|
|
|
14
|
$atom->collapse_hydrogens; |
926
|
|
|
|
|
|
|
} |
927
|
|
|
|
|
|
|
} |
928
|
|
|
|
|
|
|
|
929
|
|
|
|
|
|
|
=item $mol->add_implicit_hydrogens |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
Use heuristics to figure out how many implicit hydrogens should each atom in |
932
|
|
|
|
|
|
|
the molecule have to satisfy its normal "organic" valence. |
933
|
|
|
|
|
|
|
|
934
|
|
|
|
|
|
|
=cut |
935
|
|
|
|
|
|
|
|
936
|
|
|
|
|
|
|
sub add_implicit_hydrogens { |
937
|
1
|
|
|
1
|
1
|
5
|
my ($self) = @_; |
938
|
1
|
|
|
|
|
3
|
$_->add_implicit_hydrogens for $self->atoms; |
939
|
|
|
|
|
|
|
} |
940
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
my %DESCRIPTORS = (); |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
=item Chemistry::Mol->register_descriptor($name => $sub_ref) |
945
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
Adds a callback that can be used to add functionality to the molecule class |
947
|
|
|
|
|
|
|
(originally meant to add custom molecule descriptors.) A descriptor is a |
948
|
|
|
|
|
|
|
function that takes a molecule object as its only argument and returns a value |
949
|
|
|
|
|
|
|
or values. For example, to add a descriptor function that computes the number |
950
|
|
|
|
|
|
|
of atoms: |
951
|
|
|
|
|
|
|
|
952
|
|
|
|
|
|
|
Chemistry::Mol->register_descriptor( |
953
|
|
|
|
|
|
|
number_of_atoms => sub { |
954
|
|
|
|
|
|
|
my $mol = shift; |
955
|
|
|
|
|
|
|
return scalar $mol->atoms; |
956
|
|
|
|
|
|
|
} |
957
|
|
|
|
|
|
|
); |
958
|
|
|
|
|
|
|
|
959
|
|
|
|
|
|
|
The descriptor is accessed by name via the C instance method: |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
my $n = $mol->descriptor('number_of_atoms'); |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
=cut |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
sub register_descriptor { |
966
|
1
|
|
|
1
|
1
|
450
|
my ($self, %opts) = @_; |
967
|
1
|
|
|
|
|
7
|
$DESCRIPTORS{$_} = $opts{$_} for keys %opts; |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
=item my $value = $mol->descriptor($descriptor_name) |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
Calls a previously registered descriptor function giving it $mol as an |
973
|
|
|
|
|
|
|
argument, as shown above for C. |
974
|
|
|
|
|
|
|
|
975
|
|
|
|
|
|
|
=cut |
976
|
|
|
|
|
|
|
|
977
|
|
|
|
|
|
|
sub descriptor { |
978
|
1
|
|
|
1
|
1
|
7
|
my ($self, $descriptor) = @_; |
979
|
1
|
50
|
|
|
|
4
|
my $sub = $DESCRIPTORS{$descriptor} |
980
|
|
|
|
|
|
|
or croak "unknown descriptor '$descriptor'"; |
981
|
1
|
|
|
|
|
5
|
return $sub->($self); |
982
|
|
|
|
|
|
|
} |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
1; |
985
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
=back |
987
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
L |
991
|
|
|
|
|
|
|
|
992
|
|
|
|
|
|
|
=head1 SEE ALSO |
993
|
|
|
|
|
|
|
|
994
|
|
|
|
|
|
|
L, L, L, |
995
|
|
|
|
|
|
|
L |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
=head1 AUTHOR |
998
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
Ivan Tubert-Brohman Eitub@cpan.orgE |
1000
|
|
|
|
|
|
|
|
1001
|
|
|
|
|
|
|
=head1 COPYRIGHT |
1002
|
|
|
|
|
|
|
|
1003
|
|
|
|
|
|
|
Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is |
1004
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
1005
|
|
|
|
|
|
|
Perl itself. |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
=cut |
1008
|
|
|
|
|
|
|
|