line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::Mol; |
2
|
|
|
|
|
|
|
$VERSION = '0.37'; |
3
|
|
|
|
|
|
|
# $Id: Mol.pm,v 1.50 2009/05/10 19:37:58 itubert Exp $ |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Chemistry::Mol - Molecule object toolkit |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Chemistry::Mol; |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
$mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule"); |
14
|
|
|
|
|
|
|
$c = $mol->new_atom(symbol => "C", coords => [0,0,0]); |
15
|
|
|
|
|
|
|
$o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]); |
16
|
|
|
|
|
|
|
$mol->new_bond(atoms => [$c, $o], order => 3); |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
print $mol->print; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
=head1 DESCRIPTION |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
This package, along with Chemistry::Atom and Chemistry::Bond, includes basic |
23
|
|
|
|
|
|
|
objects and methods to describe molecules. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
The core methods try not to enforce a particular convention. This means that |
26
|
|
|
|
|
|
|
only a minimal set of attributes is provided by default, and some attributes |
27
|
|
|
|
|
|
|
have very loosely defined meaning. This is because each program and file type |
28
|
|
|
|
|
|
|
has different idea of what each concept (such as bond and atom type) means. |
29
|
|
|
|
|
|
|
Bonds are defined as a list of atoms (typically two) with an arbitrary type. |
30
|
|
|
|
|
|
|
Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates |
31
|
|
|
|
|
|
|
(2D coming soon). |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
=cut |
34
|
|
|
|
|
|
|
|
35
|
17
|
|
|
17
|
|
208055
|
use 5.006; |
|
17
|
|
|
|
|
65
|
|
|
17
|
|
|
|
|
717
|
|
36
|
17
|
|
|
17
|
|
119
|
use strict; |
|
17
|
|
|
|
|
30
|
|
|
17
|
|
|
|
|
586
|
|
37
|
17
|
|
|
17
|
|
113
|
use warnings; |
|
17
|
|
|
|
|
35
|
|
|
17
|
|
|
|
|
553
|
|
38
|
17
|
|
|
17
|
|
14771
|
use Chemistry::Atom; |
|
17
|
|
|
|
|
52
|
|
|
17
|
|
|
|
|
1226
|
|
39
|
17
|
|
|
17
|
|
17087
|
use Chemistry::Bond; |
|
17
|
|
|
|
|
47
|
|
|
17
|
|
|
|
|
489
|
|
40
|
17
|
|
|
17
|
|
293
|
use Carp; |
|
17
|
|
|
|
|
35
|
|
|
17
|
|
|
|
|
1493
|
|
41
|
17
|
|
|
17
|
|
93
|
use base qw(Chemistry::Obj Exporter); |
|
17
|
|
|
|
|
30
|
|
|
17
|
|
|
|
|
4017
|
|
42
|
17
|
|
|
17
|
|
47881
|
use Storable 'dclone'; |
|
17
|
|
|
|
|
98601
|
|
|
17
|
|
|
|
|
47434
|
|
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
our @EXPORT_OK = qw(read_mol); |
45
|
|
|
|
|
|
|
our @EXPORT = (); |
46
|
|
|
|
|
|
|
our %EXPORT_TAGS = ( |
47
|
|
|
|
|
|
|
all => [@EXPORT, @EXPORT_OK], |
48
|
|
|
|
|
|
|
); |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
my %FILE_FORMATS = (); |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
=head1 METHODS |
55
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
See also L for generic attributes. |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
=over 4 |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
=item Chemistry::Mol->new(name => value, ...) |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
Create a new Mol object with the specified attributes. |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
$mol = Chemistry::Mol->new(id => 'm123', name => 'my mol') |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
is the same as |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
Chemistry::Mol->new() |
69
|
|
|
|
|
|
|
$mol->id('m123') |
70
|
|
|
|
|
|
|
$mol->name('my mol') |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
=cut |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
sub new { |
75
|
26
|
|
|
26
|
1
|
1565
|
my $class = shift; |
76
|
26
|
|
|
|
|
61
|
my %args = @_; |
77
|
26
|
|
66
|
|
|
104
|
my $self = bless { |
78
|
|
|
|
|
|
|
id => $class->nextID, |
79
|
|
|
|
|
|
|
byId => {}, |
80
|
|
|
|
|
|
|
atoms => [], |
81
|
|
|
|
|
|
|
bonds => [], |
82
|
|
|
|
|
|
|
name => "", |
83
|
|
|
|
|
|
|
}, ref $class || $class; |
84
|
26
|
|
|
|
|
137
|
$self->$_($args{$_}) for (keys %args); |
85
|
26
|
|
|
|
|
92
|
return $self; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
my $N = 0; # molecule ID counter |
89
|
27
|
|
|
27
|
0
|
368
|
sub nextID { "mol".++$N; } |
90
|
0
|
|
|
0
|
0
|
0
|
sub reset_id { $N = 0; } |
91
|
0
|
|
|
0
|
0
|
0
|
sub next_id { $N = $_[1] } |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
=item $mol->add_atom($atom, ...) |
94
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Add one or more Atom objects to the molecule. Returns the last atom added. |
96
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
=cut |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
sub add_atom { |
100
|
552
|
|
|
552
|
1
|
1747
|
my $self = shift; |
101
|
552
|
|
|
|
|
848
|
for my $atom (@_){ |
102
|
|
|
|
|
|
|
#if ($self->by_id($atom->id)) { |
103
|
|
|
|
|
|
|
#croak "Duplicate ID when adding atom '$atom' to mol '$self'"; |
104
|
|
|
|
|
|
|
#} |
105
|
553
|
|
|
|
|
757
|
push @{$self->{atoms}}, $atom; |
|
553
|
|
|
|
|
2199
|
|
106
|
553
|
|
|
|
|
2728
|
$self->{byId}{$atom->id} = $atom; |
107
|
553
|
|
|
|
|
2628
|
$atom->parent($self); |
108
|
|
|
|
|
|
|
} |
109
|
552
|
|
|
|
|
2588
|
$_[-1]; |
110
|
|
|
|
|
|
|
} |
111
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
sub add_atom_np { |
113
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
114
|
0
|
|
|
|
|
0
|
for my $atom (@_){ |
115
|
0
|
|
|
|
|
0
|
push @{$self->{atoms}}, $atom; |
|
0
|
|
|
|
|
0
|
|
116
|
0
|
|
|
|
|
0
|
$self->{byId}{$atom->id} = $atom; |
117
|
|
|
|
|
|
|
} |
118
|
0
|
|
|
|
|
0
|
$_[-1]; |
119
|
|
|
|
|
|
|
} |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
=item $mol->atom_class |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
Returns the atom class that a molecule or molecule class expects to use by |
124
|
|
|
|
|
|
|
default. L objects return "Chemistry::Atom", but subclasses |
125
|
|
|
|
|
|
|
will likely override this method. |
126
|
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
=cut |
128
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub atom_class { |
130
|
14
|
|
|
14
|
1
|
63
|
"Chemistry::Atom"; |
131
|
|
|
|
|
|
|
} |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
=item $mol->new_atom(name => value, ...) |
134
|
|
|
|
|
|
|
|
135
|
|
|
|
|
|
|
Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
=cut |
138
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
sub new_atom { |
140
|
14
|
|
|
14
|
1
|
80
|
my $self = shift; |
141
|
14
|
|
|
|
|
36
|
$self->add_atom($self->atom_class->new(@_)); |
142
|
|
|
|
|
|
|
} |
143
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
=item $mol->delete_atom($atom, ...) |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
Deletes an atom from the molecule. It automatically deletes all the bonds in |
147
|
|
|
|
|
|
|
which the atom participates as well. $atom should be a Chemistry::Atom |
148
|
|
|
|
|
|
|
reference. This method also accepts the atom index, but this use is deprecated |
149
|
|
|
|
|
|
|
(and buggy if multiple indices are given, unless they are in descending order). |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=cut |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
sub delete_atom { |
154
|
2
|
|
|
2
|
1
|
5
|
my $self = shift; |
155
|
2
|
|
|
|
|
6
|
for my $i (@_) { |
156
|
2
|
|
|
|
|
4
|
my ($atom); |
157
|
2
|
100
|
|
|
|
7
|
if (ref $i) { |
158
|
1
|
|
|
|
|
3
|
$atom = $i; |
159
|
|
|
|
|
|
|
} else { |
160
|
1
|
50
|
|
|
|
4
|
$atom = $self->atoms($i) |
161
|
|
|
|
|
|
|
or croak "$self->delete_atom: no such atom $i\n"; |
162
|
|
|
|
|
|
|
} |
163
|
2
|
|
|
|
|
10
|
$atom->delete($i); |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
# takes an atom ref to delete and optionally the atom index |
168
|
|
|
|
|
|
|
# 1) deletes bonds that belonged to atom |
169
|
|
|
|
|
|
|
# 2) deletes atom |
170
|
|
|
|
|
|
|
sub _delete_atom { |
171
|
7
|
|
|
7
|
|
13
|
my ($self, $atom) = @_; |
172
|
7
|
50
|
|
|
|
32
|
my $index = $self->get_atom_index($atom) |
173
|
|
|
|
|
|
|
or croak "$self->delete_atom: no such atom $atom\n"; |
174
|
7
|
|
|
|
|
39
|
my $id = $atom->id; |
175
|
7
|
|
|
|
|
31
|
$self->delete_bond($atom->bonds); |
176
|
7
|
|
|
|
|
56
|
delete $self->{byId}{$id}; |
177
|
7
|
|
|
|
|
25
|
splice @{$self->{atoms}}, $index - 1, 1; |
|
7
|
|
|
|
|
41
|
|
178
|
|
|
|
|
|
|
} |
179
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
=item $mol->add_bond($bond, ...) |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Add one or more Bond objects to the molecule. Returns the last bond added. |
183
|
|
|
|
|
|
|
|
184
|
|
|
|
|
|
|
=cut |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
sub add_bond { |
187
|
25
|
|
|
25
|
1
|
45
|
my $self = shift; |
188
|
25
|
|
|
|
|
43
|
for my $bond (@_){ |
189
|
|
|
|
|
|
|
#if ($self->by_id($bond->id)) { |
190
|
|
|
|
|
|
|
#croak "Duplicate ID when adding bond '$bond' to mol '$self'"; |
191
|
|
|
|
|
|
|
#} |
192
|
25
|
|
|
|
|
34
|
push @{$self->{bonds}}, $bond; |
|
25
|
|
|
|
|
55
|
|
193
|
25
|
|
|
|
|
107
|
$self->{byId}{$bond->id} = $bond; |
194
|
25
|
100
|
|
|
|
106
|
if ($bond->{deleted}) { |
195
|
1
|
|
|
|
|
4
|
$_->add_bond($bond) for $bond->atoms; |
196
|
1
|
|
|
|
|
3
|
$bond->{deleted} = 0; |
197
|
|
|
|
|
|
|
} |
198
|
25
|
|
|
|
|
88
|
$bond->parent($self); |
199
|
|
|
|
|
|
|
} |
200
|
25
|
|
|
|
|
71
|
$_[-1]; |
201
|
|
|
|
|
|
|
} |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub add_bond_np { |
204
|
0
|
|
|
0
|
0
|
0
|
my $self = shift; |
205
|
0
|
|
|
|
|
0
|
for my $bond (@_){ |
206
|
0
|
|
|
|
|
0
|
push @{$self->{bonds}}, $bond; |
|
0
|
|
|
|
|
0
|
|
207
|
0
|
|
|
|
|
0
|
$self->{byId}{$bond->id} = $bond; |
208
|
|
|
|
|
|
|
} |
209
|
0
|
|
|
|
|
0
|
$_[-1]; |
210
|
|
|
|
|
|
|
} |
211
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
=item $mol->bond_class |
213
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
Returns the bond class that a molecule or molecule class expects to use by |
215
|
|
|
|
|
|
|
default. L objects return "Chemistry::Bond", but subclasses |
216
|
|
|
|
|
|
|
will likely override this method. |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=cut |
219
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
sub bond_class { |
221
|
8
|
|
|
8
|
1
|
122
|
"Chemistry::Bond"; |
222
|
|
|
|
|
|
|
} |
223
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=item $mol->new_bond(name => value, ...) |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>. |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
=cut |
229
|
|
|
|
|
|
|
|
230
|
|
|
|
|
|
|
sub new_bond { |
231
|
8
|
|
|
8
|
1
|
22
|
my $self = shift; |
232
|
8
|
|
|
|
|
29
|
$self->add_bond($self->bond_class->new(@_)); |
233
|
|
|
|
|
|
|
} |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
sub get_bond_index { |
236
|
14
|
|
|
14
|
0
|
23
|
my ($self, $bond) = @_; |
237
|
14
|
|
|
|
|
43
|
my $i; |
238
|
14
|
|
|
|
|
37
|
for ($self->bonds) { |
239
|
42
|
|
|
|
|
59
|
++$i; |
240
|
42
|
100
|
|
|
|
99
|
return $i if ($_ eq $bond); |
241
|
|
|
|
|
|
|
} |
242
|
0
|
|
|
|
|
0
|
undef; |
243
|
|
|
|
|
|
|
} |
244
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
sub get_atom_index { |
246
|
7
|
|
|
7
|
0
|
14
|
my ($self, $atom) = @_; |
247
|
7
|
|
|
|
|
12
|
my $i; |
248
|
7
|
|
|
|
|
21
|
for ($self->atoms) { |
249
|
12
|
|
|
|
|
20
|
++$i; |
250
|
12
|
100
|
|
|
|
41
|
return $i if ($_ eq $atom); |
251
|
|
|
|
|
|
|
} |
252
|
0
|
|
|
|
|
0
|
undef; |
253
|
|
|
|
|
|
|
} |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
=item $mol->delete_bond($bond, ...) |
256
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
Deletes a bond from the molecule. $bond should be a L object. |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
=cut |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
# mol deletes bond |
262
|
|
|
|
|
|
|
# bond tells atoms involved to forget about it |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
sub delete_bond { |
265
|
7
|
|
|
7
|
1
|
18
|
my $self = shift; |
266
|
7
|
|
|
|
|
19
|
for my $i (@_){ |
267
|
11
|
|
|
|
|
17
|
my ($bond); |
268
|
11
|
50
|
|
|
|
31
|
if (ref $i) { |
269
|
11
|
|
|
|
|
15
|
$bond = $i; |
270
|
|
|
|
|
|
|
} else { |
271
|
0
|
0
|
|
|
|
0
|
$bond = $self->bonds($i) |
272
|
|
|
|
|
|
|
or croak "$self->delete_bond($i): no such bond $i\n"; |
273
|
|
|
|
|
|
|
} |
274
|
11
|
|
|
|
|
125
|
$bond->delete; |
275
|
|
|
|
|
|
|
} |
276
|
|
|
|
|
|
|
} |
277
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
sub _delete_bond { |
279
|
14
|
|
|
14
|
|
23
|
my ($self, $bond) = @_; |
280
|
14
|
50
|
|
|
|
44
|
my $index = $self->get_bond_index($bond) |
281
|
|
|
|
|
|
|
#or croak "$self->delete_bond: no such bond $bond\n"; |
282
|
|
|
|
|
|
|
or return; |
283
|
14
|
|
|
|
|
63
|
my $id = $bond->id; |
284
|
14
|
|
|
|
|
39
|
delete $self->{byId}{$id}; |
285
|
14
|
|
|
|
|
24
|
splice @{$self->{bonds}}, $index - 1, 1; |
|
14
|
|
|
|
|
39
|
|
286
|
14
|
|
|
|
|
64
|
$bond->delete_atoms; |
287
|
|
|
|
|
|
|
} |
288
|
|
|
|
|
|
|
|
289
|
|
|
|
|
|
|
=item $mol->by_id($id) |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
Return the atom or bond object with the corresponding id. |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
=cut |
294
|
|
|
|
|
|
|
|
295
|
|
|
|
|
|
|
sub by_id { |
296
|
3
|
|
|
3
|
1
|
7
|
my $self = shift; |
297
|
3
|
|
|
|
|
6
|
my ($id) = @_; |
298
|
3
|
|
|
|
|
20
|
$self->{byId}{$id}; |
299
|
|
|
|
|
|
|
} |
300
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
sub _change_id { |
302
|
4
|
|
|
4
|
|
118
|
my ($self, $old_id, $new_id) = @_; |
303
|
4
|
|
|
|
|
11
|
my $ref = $self->{byId}{$old_id}; |
304
|
4
|
|
|
|
|
13
|
$self->{byId}{$new_id} = $ref; |
305
|
4
|
|
|
|
|
18
|
delete $self->{byId}{$old_id}; |
306
|
|
|
|
|
|
|
} |
307
|
|
|
|
|
|
|
|
308
|
|
|
|
|
|
|
=item $mol->atoms($n1, ...) |
309
|
|
|
|
|
|
|
|
310
|
|
|
|
|
|
|
Returns the atoms with the given indices, or all by default. |
311
|
|
|
|
|
|
|
Indices start from one, not from zero. |
312
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
=cut |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
sub atoms { |
316
|
90
|
|
|
90
|
1
|
4773
|
my $self = shift; |
317
|
90
|
100
|
|
|
|
266
|
if (@_) { |
318
|
18
|
|
|
|
|
41
|
my @ats = map {$_ - 1} @_; |
|
24
|
|
|
|
|
76
|
|
319
|
18
|
|
|
|
|
34
|
@{$self->{atoms}}[@ats]; |
|
18
|
|
|
|
|
121
|
|
320
|
|
|
|
|
|
|
} else { |
321
|
72
|
|
|
|
|
92
|
@{$self->{atoms}}; |
|
72
|
|
|
|
|
453
|
|
322
|
|
|
|
|
|
|
} |
323
|
|
|
|
|
|
|
} |
324
|
|
|
|
|
|
|
|
325
|
|
|
|
|
|
|
=item $mol->atoms_by_name($name) |
326
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
Returns the atoms with the given name (treated as an anchored regular |
328
|
|
|
|
|
|
|
expression). |
329
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
=cut |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
sub atoms_by_name { |
333
|
1
|
|
|
1
|
1
|
3
|
my $self = shift; |
334
|
1
|
|
|
|
|
20
|
my $re = qr/^$_[0]$/; |
335
|
17
|
|
|
17
|
|
226
|
no warnings; |
|
17
|
|
|
|
|
43
|
|
|
17
|
|
|
|
|
10043
|
|
336
|
1
|
|
|
|
|
5
|
my @ret = grep {$_->name =~ $re} $self->atoms; |
|
2
|
|
|
|
|
8
|
|
337
|
1
|
50
|
|
|
|
10
|
wantarray ? @ret : $ret[0]; |
338
|
|
|
|
|
|
|
} |
339
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
=item $mol->sort_atoms($sub_ref) |
341
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
Sort the atoms in the molecule by using the comparison function given in |
343
|
|
|
|
|
|
|
$sub_ref. This function should take two atoms as parameters and return -1, 0, |
344
|
|
|
|
|
|
|
or 1 depending on whether the first atom should go before, same, or after the |
345
|
|
|
|
|
|
|
second atom. For example, to sort by atomic number, you could use the |
346
|
|
|
|
|
|
|
following: |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
$mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } ); |
349
|
|
|
|
|
|
|
|
350
|
|
|
|
|
|
|
Note that the atoms are passed as parameters and not as the package variables |
351
|
|
|
|
|
|
|
$a and $b like the core sort function does. This is because $mol->sort will |
352
|
|
|
|
|
|
|
likely be called from another package and we don't want to play with another |
353
|
|
|
|
|
|
|
package's symbol table. |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
=cut |
356
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
sub sort_atoms { |
358
|
0
|
|
|
0
|
1
|
0
|
my ($self, $sub) = @_; |
359
|
0
|
|
|
|
|
0
|
my @a = $self->atoms; |
360
|
0
|
|
|
|
|
0
|
@a = sort { $sub->($a,$b) } @a; |
|
0
|
|
|
|
|
0
|
|
361
|
0
|
|
|
|
|
0
|
$self->{atoms} = \@a; |
362
|
0
|
|
|
|
|
0
|
$self; |
363
|
|
|
|
|
|
|
} |
364
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
=item $mol->bonds($n1, ...) |
366
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Returns the bonds with the given indices, or all by default. |
368
|
|
|
|
|
|
|
Indices start from one, not from zero. |
369
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
=cut |
371
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
sub bonds { |
373
|
49
|
|
|
49
|
1
|
531
|
my $self = shift; |
374
|
49
|
100
|
|
|
|
507
|
if (@_) { |
375
|
6
|
|
|
|
|
16
|
my @bonds = map {$_ - 1} @_; |
|
6
|
|
|
|
|
24
|
|
376
|
6
|
|
|
|
|
14
|
@{$self->{bonds}}[@bonds]; |
|
6
|
|
|
|
|
44
|
|
377
|
|
|
|
|
|
|
} else { |
378
|
43
|
|
|
|
|
60
|
@{$self->{bonds}}; |
|
43
|
|
|
|
|
181
|
|
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
} |
381
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
=item $mol->print(option => value...) |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
Convert the molecule to a string representation. If no options are given, |
385
|
|
|
|
|
|
|
a default YAML-like format is used (this may change in the future). Otherwise, |
386
|
|
|
|
|
|
|
the format should be specified by using the C option. |
387
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
=cut |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
sub print { |
391
|
19
|
|
|
19
|
1
|
145
|
my $self = shift; |
392
|
19
|
|
|
|
|
58
|
my (%opts) = @_; |
393
|
19
|
|
|
|
|
26
|
my $ret; |
394
|
19
|
|
|
|
|
31
|
local $" = ""; #" |
395
|
|
|
|
|
|
|
|
396
|
19
|
50
|
|
|
|
126
|
if ($opts{format}) { |
397
|
19
|
|
|
|
|
57
|
return $self->formats($opts{format})->write_string($self, %opts); |
398
|
|
|
|
|
|
|
} |
399
|
|
|
|
|
|
|
# else use default printout |
400
|
0
|
|
|
|
|
0
|
$ret = <
|
401
|
|
|
|
|
|
|
$self->{id}: |
402
|
|
|
|
|
|
|
name: $self->{name} |
403
|
|
|
|
|
|
|
END |
404
|
0
|
|
|
|
|
0
|
$ret .= " attr:\n"; |
405
|
0
|
|
|
|
|
0
|
$ret .= $self->print_attr(2); |
406
|
0
|
|
|
|
|
0
|
$ret .= " atoms:\n"; |
407
|
0
|
|
|
|
|
0
|
for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
408
|
0
|
|
|
|
|
0
|
$ret .= " bonds:\n"; |
409
|
0
|
|
|
|
|
0
|
for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
410
|
0
|
|
|
|
|
0
|
$ret; |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
=item $s = $mol->sprintf($format) |
414
|
|
|
|
|
|
|
|
415
|
|
|
|
|
|
|
Format interesting molecular information in a concise way, as specified by |
416
|
|
|
|
|
|
|
a printf-like format. |
417
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
%n - name |
419
|
|
|
|
|
|
|
%f - formula |
420
|
|
|
|
|
|
|
%f{formula with format} - (note: right braces within |
421
|
|
|
|
|
|
|
the format should be escaped with a backslash) |
422
|
|
|
|
|
|
|
%s - SMILES representation |
423
|
|
|
|
|
|
|
%S - canonical SMILES representation |
424
|
|
|
|
|
|
|
%m - mass |
425
|
|
|
|
|
|
|
%8.3m - mass, formatted as %8.3f with core sprintf |
426
|
|
|
|
|
|
|
%q - formal charge |
427
|
|
|
|
|
|
|
%a - atom count |
428
|
|
|
|
|
|
|
%b - bond count |
429
|
|
|
|
|
|
|
%t - type |
430
|
|
|
|
|
|
|
%i - id |
431
|
|
|
|
|
|
|
%% - % |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
For example, if you want just about everything: |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
$mol->sprintf("%s - %n (%f). %a atoms, %b bonds; " |
436
|
|
|
|
|
|
|
. "mass=%m; charge =%q; type=%t; id=%i"); |
437
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
Note that you have to C |
439
|
|
|
|
|
|
|
C<%S> on C<< $mol->sprintf >>. |
440
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
=cut |
442
|
|
|
|
|
|
|
|
443
|
|
|
|
|
|
|
sub sprintf { |
444
|
0
|
|
|
0
|
1
|
0
|
my ($mol, $format) = @_; |
445
|
17
|
|
|
17
|
|
125
|
no warnings 'uninitialized'; # don't care if some properties are undefined |
|
17
|
|
|
|
|
37
|
|
|
17
|
|
|
|
|
63099
|
|
446
|
0
|
|
0
|
|
|
0
|
$format ||= "%f"; |
447
|
0
|
|
|
|
|
0
|
$format =~ s/%%/\\%/g; # escape %% with a \ |
448
|
0
|
|
|
|
|
0
|
$format =~ s/(?formula($1)/eg; # %f{} |
|
0
|
|
|
|
|
0
|
|
449
|
0
|
|
|
|
|
0
|
$format =~ s/(?formula/eg; # %f |
|
0
|
|
|
|
|
0
|
|
450
|
0
|
|
|
|
|
0
|
$format =~ s/(?print(format=>'smiles')/eg; # %s |
|
0
|
|
|
|
|
0
|
|
451
|
0
|
|
|
|
|
0
|
$format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s |
|
0
|
|
|
|
|
0
|
|
452
|
0
|
|
|
|
|
0
|
$format =~ s/(?name/eg; # %n |
|
0
|
|
|
|
|
0
|
|
453
|
0
|
|
|
|
|
0
|
$format =~ s/(?
|
454
|
0
|
0
|
|
|
|
0
|
$1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m |
455
|
0
|
|
|
|
|
0
|
$format =~ s/(?charge/eg; # %q |
|
0
|
|
|
|
|
0
|
|
456
|
0
|
|
|
|
|
0
|
$format =~ s/(?atoms/eg; # %a |
|
0
|
|
|
|
|
0
|
|
457
|
0
|
|
|
|
|
0
|
$format =~ s/(?bonds/eg; # %b |
|
0
|
|
|
|
|
0
|
|
458
|
0
|
|
|
|
|
0
|
$format =~ s/(?type/eg; # %t |
|
0
|
|
|
|
|
0
|
|
459
|
0
|
|
|
|
|
0
|
$format =~ s/(?id/eg; # %i |
|
0
|
|
|
|
|
0
|
|
460
|
0
|
|
|
|
|
0
|
$format =~ s/\\(.)/$1/g; # other \ escapes |
461
|
0
|
|
|
|
|
0
|
$format; |
462
|
|
|
|
|
|
|
} |
463
|
|
|
|
|
|
|
|
464
|
|
|
|
|
|
|
=item $mol->printf($format) |
465
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
Same as C<< $mol->sprintf >>, but prints to standard output automatically. |
467
|
|
|
|
|
|
|
Used for quick and dirty molecular information dumping. |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
=cut |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
sub printf { |
472
|
0
|
|
|
0
|
1
|
0
|
my ($mol, $format) = @_; |
473
|
0
|
|
|
|
|
0
|
print $mol->sprintf($format); |
474
|
|
|
|
|
|
|
} |
475
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
=item Chemistry::Mol->parse($string, option => value...) |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
Parse the molecule encoded in C<$string>. The format should be specified |
479
|
|
|
|
|
|
|
with the the C option; otherwise, it will be guessed. |
480
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
=cut |
482
|
|
|
|
|
|
|
|
483
|
|
|
|
|
|
|
sub parse { |
484
|
14
|
|
|
14
|
1
|
13839
|
my $self = shift; |
485
|
14
|
|
|
|
|
26
|
my $s = shift; |
486
|
14
|
|
|
|
|
62
|
my %opts = (mol_class => $self, @_); |
487
|
|
|
|
|
|
|
|
488
|
14
|
50
|
|
|
|
41
|
if ($opts{format}) { |
489
|
14
|
|
|
|
|
54
|
return $self->formats($opts{format})->parse_string($s, %opts); |
490
|
|
|
|
|
|
|
} else { |
491
|
0
|
|
|
|
|
0
|
croak "Parse does not support autodetection yet.", |
492
|
|
|
|
|
|
|
"Please specify a format."; |
493
|
|
|
|
|
|
|
} |
494
|
0
|
|
|
|
|
0
|
return; |
495
|
|
|
|
|
|
|
} |
496
|
|
|
|
|
|
|
|
497
|
|
|
|
|
|
|
=item Chemistry::Mol->read($fname, option => value ...) |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
Read a file and return a list of Mol objects, or croaks if there was a problem. |
500
|
|
|
|
|
|
|
The type of file will be guessed if not specified via the C option. |
501
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
Note that only registered file readers will be used. Readers may be registered |
503
|
|
|
|
|
|
|
using C; modules that include readers (such as |
504
|
|
|
|
|
|
|
L) usually register them automatically when they are |
505
|
|
|
|
|
|
|
loaded. |
506
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
Automatic decompression of gzipped files is supported if the L |
508
|
|
|
|
|
|
|
module is installed. Files ending in .gz are assumed to be compressed; |
509
|
|
|
|
|
|
|
otherwise it is possible to force decompression by passing the gzip => 1 |
510
|
|
|
|
|
|
|
option (or no decompression with gzip => 0). |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
=cut |
513
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
sub read_mol { # for backwards compatibility |
515
|
0
|
|
|
0
|
0
|
0
|
my ($fname, $type) = shift; |
516
|
0
|
|
|
|
|
0
|
__PACKAGE__->read($fname, format => $type); |
517
|
|
|
|
|
|
|
} |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
sub read { |
520
|
11
|
|
|
11
|
1
|
90902
|
my $self = shift; |
521
|
11
|
|
|
|
|
27
|
my $fname = shift; |
522
|
11
|
|
|
|
|
57
|
my %opts = (mol_class => $self, @_); |
523
|
|
|
|
|
|
|
|
524
|
11
|
100
|
|
|
|
50
|
if ($opts{format}) { |
525
|
3
|
|
|
|
|
18
|
return $self->formats($opts{format})->parse_file($fname, %opts); |
526
|
|
|
|
|
|
|
} else { # guess format |
527
|
8
|
|
|
|
|
41
|
for my $type ($self->formats) { |
528
|
8
|
50
|
|
|
|
30
|
if ($self->formats($type)->file_is($fname)) { |
529
|
8
|
|
|
|
|
31
|
return $self->formats($type)->parse_file($fname, %opts); |
530
|
|
|
|
|
|
|
} |
531
|
|
|
|
|
|
|
} |
532
|
|
|
|
|
|
|
} |
533
|
0
|
|
|
|
|
0
|
croak "Couldn't guess format of file '$fname'"; |
534
|
|
|
|
|
|
|
} |
535
|
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
=item $mol->write($fname, option => value ...) |
537
|
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
Write a molecule file, or croak if there was a problem. The type of file will |
539
|
|
|
|
|
|
|
be guessed if not specified via the C option. |
540
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
Note that only registered file formats will be used. |
542
|
|
|
|
|
|
|
|
543
|
|
|
|
|
|
|
Automatic gzip compression is supported if the IO::Zlib module is installed. |
544
|
|
|
|
|
|
|
Files ending in .gz are assumed to be compressed; otherwise it is possible to |
545
|
|
|
|
|
|
|
force compression by passing the gzip => 1 option (or no compression with gzip |
546
|
|
|
|
|
|
|
=> 0). Specific compression levels between 2 (fastest) and 9 (most compressed) |
547
|
|
|
|
|
|
|
may also be used (e.g., gzip => 9). |
548
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
=cut |
550
|
|
|
|
|
|
|
|
551
|
|
|
|
|
|
|
sub write { |
552
|
3
|
|
|
3
|
1
|
840
|
my ($self, $fname, %opts) = (@_); |
553
|
|
|
|
|
|
|
|
554
|
3
|
100
|
|
|
|
15
|
if ($opts{format}) { |
555
|
2
|
|
|
|
|
12
|
return $self->formats($opts{format})->write_file(@_); |
556
|
|
|
|
|
|
|
} else { # guess format |
557
|
1
|
|
|
|
|
5
|
for my $type ($self->formats) { |
558
|
1
|
50
|
|
|
|
14
|
if ($self->formats($type)->name_is($fname)) { |
559
|
1
|
|
|
|
|
6
|
return $self->formats($type)->write_file(@_); |
560
|
|
|
|
|
|
|
} |
561
|
|
|
|
|
|
|
} |
562
|
|
|
|
|
|
|
} |
563
|
0
|
|
|
|
|
0
|
croak "Couldn't guess format for writing file '$fname'"; |
564
|
|
|
|
|
|
|
} |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
=item Chemistry::Mol->file($file, option => value ...) |
567
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
Create a L-derived object for reading or writing to a file. |
569
|
|
|
|
|
|
|
The object can then be used to read the molecules or other information in the |
570
|
|
|
|
|
|
|
file. |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
This has more flexibility than calling C<< Chemistry::Mol->read >> when |
573
|
|
|
|
|
|
|
dealing with multi-molecule files or files that have higher structure or that |
574
|
|
|
|
|
|
|
have information that does not belong to the molecules themselves. For |
575
|
|
|
|
|
|
|
example, a reaction file may have a list of molecules, but also general |
576
|
|
|
|
|
|
|
information like the reaction name, yield, etc. as well as the classification |
577
|
|
|
|
|
|
|
of the molecules as reactants or products. The exact information that is |
578
|
|
|
|
|
|
|
available will depend on the file reader class that is being used. The |
579
|
|
|
|
|
|
|
following is a hypothetical example for reading MDL rxnfiles. |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
# assuming this module existed... |
582
|
|
|
|
|
|
|
use Chemistry::File::Rxn; |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
my $rxn = Chemistry::Mol->file('test.rxn'); |
585
|
|
|
|
|
|
|
$rxn->read; |
586
|
|
|
|
|
|
|
$name = $rxn->name; |
587
|
|
|
|
|
|
|
@reactants = $rxn->reactants; # mol objects |
588
|
|
|
|
|
|
|
@products = $rxn->products; |
589
|
|
|
|
|
|
|
$yield = $rxn->yield; # a number |
590
|
|
|
|
|
|
|
|
591
|
|
|
|
|
|
|
Note that only registered file readers will be used. Readers may be registered |
592
|
|
|
|
|
|
|
using register_format(); modules that include readers (such as |
593
|
|
|
|
|
|
|
Chemistry::File::PDB) usually register them automatically. |
594
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
=cut |
596
|
|
|
|
|
|
|
|
597
|
|
|
|
|
|
|
sub file { |
598
|
1
|
|
|
1
|
1
|
4
|
my ($self, $file, %opts) = @_; |
599
|
1
|
|
|
|
|
6
|
%opts = (mol_class => $self, %opts); |
600
|
|
|
|
|
|
|
|
601
|
1
|
50
|
|
|
|
9
|
if ($opts{format}) { |
602
|
0
|
|
|
|
|
0
|
return $self->formats($opts{format})->new(file => $file, |
603
|
|
|
|
|
|
|
opts => \%opts); |
604
|
|
|
|
|
|
|
} else { # guess format |
605
|
1
|
|
|
|
|
5
|
for my $type ($self->formats) { |
606
|
1
|
50
|
|
|
|
5
|
if ($self->formats($type)->file_is($file)) { |
607
|
1
|
|
|
|
|
4
|
return $self->formats($type)->new(file => $file, |
608
|
|
|
|
|
|
|
opts => \%opts); |
609
|
|
|
|
|
|
|
} |
610
|
|
|
|
|
|
|
} |
611
|
|
|
|
|
|
|
} |
612
|
0
|
|
|
|
|
0
|
croak "Couldn't guess format of file '$file'"; |
613
|
|
|
|
|
|
|
} |
614
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
=item Chemistry::Mol->register_format($name, $ref) |
616
|
|
|
|
|
|
|
|
617
|
|
|
|
|
|
|
Register a file type. The identifier $name must be unique. $ref is either a |
618
|
|
|
|
|
|
|
class name (a package) or an object that complies with the L |
619
|
|
|
|
|
|
|
interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the |
620
|
|
|
|
|
|
|
calling package is used automatically. More than one format can be registered |
621
|
|
|
|
|
|
|
at a time, but then $ref must be included for each format (e.g., |
622
|
|
|
|
|
|
|
Chemistry::Mol->register_format(format1 => "package1", format2 => package2). |
623
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
The typical user doesn't have to care about this function. It is used |
625
|
|
|
|
|
|
|
automatically by molecule file I/O modules. |
626
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
=cut |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
sub register_format { |
630
|
15
|
|
|
15
|
1
|
21529
|
my $class = shift; |
631
|
15
|
100
|
|
|
|
112
|
if (@_ == 1) { |
632
|
4
|
|
|
|
|
20
|
$FILE_FORMATS{$_[0]} = caller; |
633
|
4
|
|
|
|
|
18
|
return; |
634
|
|
|
|
|
|
|
} |
635
|
11
|
|
|
|
|
51
|
my %opts = @_; |
636
|
11
|
|
|
|
|
105
|
$FILE_FORMATS{$_} = $opts{$_} for keys %opts; |
637
|
|
|
|
|
|
|
} |
638
|
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
=item Chemistry::Mol->formats |
640
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
Returns a list of the file formats that have been installed by |
642
|
|
|
|
|
|
|
register_format() |
643
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
=cut |
645
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
sub formats { |
647
|
68
|
|
|
68
|
1
|
123
|
my $self = shift; |
648
|
68
|
100
|
|
|
|
172
|
if (@_) { |
649
|
58
|
|
|
|
|
87
|
my ($type) = @_; |
650
|
58
|
|
|
|
|
117
|
my $file_class = $FILE_FORMATS{$type}; |
651
|
58
|
50
|
|
|
|
179
|
unless ($file_class) { |
652
|
0
|
|
|
|
|
0
|
croak "No class installed for type '$type'"; |
653
|
|
|
|
|
|
|
} |
654
|
58
|
|
|
|
|
554
|
return $file_class; |
655
|
|
|
|
|
|
|
} else { |
656
|
10
|
|
|
|
|
68
|
return sort keys %FILE_FORMATS; |
657
|
|
|
|
|
|
|
} |
658
|
|
|
|
|
|
|
} |
659
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
=item $mol->mass |
661
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
Return the molar mass. This is just the sum of the masses of the atoms. See |
663
|
|
|
|
|
|
|
L::mass for details such as the handling of isotopes. |
664
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
=cut |
666
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
sub mass { |
668
|
2
|
|
|
2
|
1
|
8
|
my ($self) = @_; |
669
|
2
|
|
|
|
|
4
|
my $mass = 0; |
670
|
2
|
|
|
|
|
6
|
for my $atom ($self->atoms) { |
671
|
6
|
|
|
|
|
17
|
$mass += $atom->mass; |
672
|
|
|
|
|
|
|
} |
673
|
2
|
|
|
|
|
14
|
$mass; |
674
|
|
|
|
|
|
|
} |
675
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
=item $mol->charge |
677
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
Return the charge of the molecule. By default it returns the sum of the formal |
679
|
|
|
|
|
|
|
charges of the atoms. However, it is possible to set an arbitrary charge by |
680
|
|
|
|
|
|
|
calling C<< $mol->charge($new_charge) >> |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
=cut |
683
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
sub charge { |
685
|
0
|
|
|
0
|
1
|
0
|
my ($self) = shift; |
686
|
0
|
0
|
|
|
|
0
|
if (@_) { |
687
|
0
|
|
|
|
|
0
|
$self->{charge} = shift; |
688
|
0
|
|
|
|
|
0
|
$self; |
689
|
|
|
|
|
|
|
} else { |
690
|
0
|
0
|
|
|
|
0
|
return $self->{charge} if defined $self->{charge}; |
691
|
0
|
|
|
|
|
0
|
my $charge = 0; |
692
|
0
|
|
0
|
|
|
0
|
$charge += $_->formal_charge || 0 for $self->atoms; |
693
|
0
|
|
|
|
|
0
|
$charge; |
694
|
|
|
|
|
|
|
} |
695
|
|
|
|
|
|
|
} |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
=item $mol->formula_hash |
698
|
|
|
|
|
|
|
|
699
|
|
|
|
|
|
|
Returns a hash reference describing the molecular formula. For methane it would |
700
|
|
|
|
|
|
|
return { C => 1, H => 4 }. |
701
|
|
|
|
|
|
|
|
702
|
|
|
|
|
|
|
=cut |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
sub formula_hash { |
705
|
17
|
|
|
17
|
1
|
73
|
my ($self) = @_; |
706
|
17
|
|
|
|
|
26
|
my $formula = {}; |
707
|
17
|
|
|
|
|
56
|
for my $atom ($self->atoms) { |
708
|
538
|
|
|
|
|
1444
|
$formula->{$atom->symbol}++; |
709
|
538
|
50
|
|
|
|
1601
|
$formula->{H} += $atom->hydrogens if $atom->hydrogens; |
710
|
|
|
|
|
|
|
} |
711
|
17
|
|
|
|
|
214
|
$formula; |
712
|
|
|
|
|
|
|
} |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
=item $mol->formula($format) |
715
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
Returns a string with the formula. The format can be specified as a printf-like |
717
|
|
|
|
|
|
|
string with the control sequences specified in the L |
718
|
|
|
|
|
|
|
documentation. |
719
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
=cut |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
sub formula { |
723
|
5
|
|
|
5
|
1
|
1547
|
my ($self, $format) = @_; |
724
|
5
|
|
|
|
|
1072
|
require Chemistry::File::Formula; |
725
|
5
|
|
|
|
|
22
|
$self->print(format => "formula", formula_format => $format); |
726
|
|
|
|
|
|
|
} |
727
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
=item my $mol2 = $mol->clone; |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
Makes a copy of a molecule. Note that this is a B copy; if your molecule |
731
|
|
|
|
|
|
|
has a pointer to the rest of the universe, the entire universe will be cloned! |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
=cut |
734
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
sub clone { |
736
|
8
|
|
|
8
|
1
|
15
|
my ($self) = @_; |
737
|
8
|
|
|
|
|
2328
|
my $clone = dclone $self; |
738
|
8
|
50
|
|
|
|
155
|
$clone->_weaken if Storable->VERSION < 2.14; |
739
|
8
|
|
|
|
|
33
|
$clone; |
740
|
|
|
|
|
|
|
} |
741
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
=item my $mol2 = $mol->safe_clone; |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
Like clone, it makes a deep copy of a molecule. The difference is that the copy |
745
|
|
|
|
|
|
|
is not "exact" in that new molecule and its atoms and bonds get assigned new |
746
|
|
|
|
|
|
|
IDs. This makes it safe to combine cloned molecules. For example, this is an |
747
|
|
|
|
|
|
|
error: |
748
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
# XXX don't try this at home! |
750
|
|
|
|
|
|
|
my $mol2 = Chemistry::Mol->combine($mol1, $mol1); |
751
|
|
|
|
|
|
|
# the atoms in $mol1 will clash |
752
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
But this is ok: |
754
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
# the "safe clone" of $mol1 will have new IDs |
756
|
|
|
|
|
|
|
my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone); |
757
|
|
|
|
|
|
|
|
758
|
|
|
|
|
|
|
=cut |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
sub safe_clone { |
761
|
1
|
|
|
1
|
1
|
2
|
my ($mol) = @_; |
762
|
1
|
|
|
|
|
3
|
my $clone = $mol->clone; |
763
|
1
|
|
|
|
|
4
|
for ($clone, $clone->atoms, $clone->bonds) { |
764
|
4
|
|
|
|
|
17
|
$_->id($_->nextID); |
765
|
|
|
|
|
|
|
} |
766
|
1
|
|
|
|
|
3
|
$clone; |
767
|
|
|
|
|
|
|
} |
768
|
|
|
|
|
|
|
|
769
|
|
|
|
|
|
|
sub _weaken { |
770
|
14
|
|
|
14
|
|
36
|
my ($self) = @_; |
771
|
14
|
|
|
|
|
70
|
for ($self->atoms, $self->bonds) { |
772
|
196
|
|
|
|
|
581
|
$_->_weaken; |
773
|
|
|
|
|
|
|
} |
774
|
14
|
|
|
|
|
51
|
$self; |
775
|
|
|
|
|
|
|
} |
776
|
|
|
|
|
|
|
|
777
|
|
|
|
|
|
|
=item ($distance, $atom_here, $atom_there) = $mol->distance($obj) |
778
|
|
|
|
|
|
|
|
779
|
|
|
|
|
|
|
Returns the minimum distance to $obj, which can be an atom, a molecule, or a |
780
|
|
|
|
|
|
|
vector. In scalar context it returns only the distance; in list context it |
781
|
|
|
|
|
|
|
also returns the atoms involved. The current implementation for calculating |
782
|
|
|
|
|
|
|
the minimum distance between two molecules compares every possible pair of |
783
|
|
|
|
|
|
|
atoms, so it's not efficient for large molecules. |
784
|
|
|
|
|
|
|
|
785
|
|
|
|
|
|
|
=cut |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
sub distance { |
788
|
0
|
|
|
0
|
1
|
0
|
my ($self, $other) = @_; |
789
|
0
|
0
|
|
|
|
0
|
if ($other->isa("Chemistry::Mol")) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
790
|
0
|
|
|
|
|
0
|
my @atoms = $self->atoms; |
791
|
0
|
0
|
|
|
|
0
|
my $atom = shift @atoms or return; # need at least one atom |
792
|
0
|
|
|
|
|
0
|
my $closest_here = $atom; |
793
|
0
|
|
|
|
|
0
|
my ($min_length, $closest_there) = $atom->distance($other); |
794
|
0
|
|
|
|
|
0
|
for $atom (@atoms) { |
795
|
0
|
|
|
|
|
0
|
my ($d, $o) = $atom->distance($other); |
796
|
0
|
0
|
|
|
|
0
|
if ($d < $min_length) { |
797
|
0
|
|
|
|
|
0
|
($min_length, $closest_there, $closest_here) = ($d, $o, $atom); |
798
|
|
|
|
|
|
|
} |
799
|
|
|
|
|
|
|
} |
800
|
|
|
|
|
|
|
return wantarray ? |
801
|
0
|
0
|
|
|
|
0
|
($min_length, $closest_here, $closest_there) : $min_length; |
802
|
|
|
|
|
|
|
} elsif ($other->isa("Chemistry::Atom")) { |
803
|
0
|
|
|
|
|
0
|
return $other->distance($self); |
804
|
|
|
|
|
|
|
} elsif ($other->isa("Math::VectorReal")) { |
805
|
0
|
|
|
|
|
0
|
return Chemistry::Atom->new(coords => $other)->distance($self); |
806
|
|
|
|
|
|
|
} |
807
|
|
|
|
|
|
|
} |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
=item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...) |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
=item $mol1->combine($mol2, $mol3, ...) |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
Combines several molecules in one bigger molecule. If called as a class method, |
814
|
|
|
|
|
|
|
as in the first example, it returns a new combined molecule without altering |
815
|
|
|
|
|
|
|
any of the parameters. If called as an instance method, as in the second |
816
|
|
|
|
|
|
|
example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not |
817
|
|
|
|
|
|
|
altered. B: Make sure you don't combine molecules which contain atoms |
818
|
|
|
|
|
|
|
with duplicate IDs (for example, if they were cloned). |
819
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
=cut |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
# joins several molecules into one |
823
|
|
|
|
|
|
|
sub combine { |
824
|
2
|
|
|
2
|
1
|
455
|
my ($self, @others) = @_; |
825
|
2
|
|
|
|
|
4
|
my $mol; |
826
|
2
|
100
|
|
|
|
6
|
if (ref $self) { |
827
|
1
|
|
|
|
|
3
|
$mol = $self; |
828
|
|
|
|
|
|
|
} else { |
829
|
1
|
|
|
|
|
5
|
$mol = $self->new; |
830
|
|
|
|
|
|
|
} |
831
|
2
|
|
|
|
|
5
|
for my $other (@others) { |
832
|
3
|
|
|
|
|
9
|
my $mol2 = $other->clone; |
833
|
3
|
|
|
|
|
8
|
for my $atom ($mol2->atoms) { |
834
|
12
|
|
|
|
|
59
|
$mol->add_atom($atom); |
835
|
|
|
|
|
|
|
} |
836
|
3
|
|
|
|
|
10
|
for my $bond ($mol2->bonds) { |
837
|
9
|
|
|
|
|
24
|
$mol->add_bond($bond); |
838
|
|
|
|
|
|
|
} |
839
|
|
|
|
|
|
|
} |
840
|
2
|
|
|
|
|
7
|
$mol; |
841
|
|
|
|
|
|
|
} |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
=item my @mols = $mol->separate |
844
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
Separates a molecule into "connected fragments". The original object is not |
846
|
|
|
|
|
|
|
modified; the fragments are clones of the original ones. Example: if you have |
847
|
|
|
|
|
|
|
ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within |
848
|
|
|
|
|
|
|
one molecule object ($mol). When you call $mol->separate you get two molecules, |
849
|
|
|
|
|
|
|
each one with a CH3. |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
=cut |
852
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
# splits a molecule into connected fragments |
854
|
|
|
|
|
|
|
# returns a list of molecules. Does not touch the original copy. |
855
|
|
|
|
|
|
|
sub separate { |
856
|
1
|
|
|
1
|
1
|
946
|
my ($self) = @_; |
857
|
1
|
|
|
|
|
5
|
$self = $self->clone; |
858
|
1
|
|
|
|
|
4
|
$self->{_paint_tab} = {}; |
859
|
1
|
|
|
|
|
3
|
my $color = 0; |
860
|
1
|
|
|
|
|
6
|
for my $atom ($self->atoms) { |
861
|
8
|
100
|
|
|
|
28
|
next if defined $self->{_paint_tab}{$atom->id}; |
862
|
2
|
|
|
|
|
10
|
$self->_paint($atom, $color++); |
863
|
|
|
|
|
|
|
} |
864
|
1
|
|
|
|
|
3
|
my @mols; |
865
|
1
|
|
|
|
|
6
|
push @mols, $self->new for (1 .. $color); |
866
|
1
|
|
|
|
|
4
|
for my $atom ($self->atoms) { |
867
|
8
|
|
|
|
|
25
|
$mols[$self->{_paint_tab}{$atom->id}]->add_atom($atom); |
868
|
|
|
|
|
|
|
} |
869
|
1
|
|
|
|
|
5
|
for my $bond ($self->bonds) { |
870
|
6
|
|
|
|
|
18
|
$mols[$self->{_paint_tab}{$bond->id}]->add_bond($bond); |
871
|
|
|
|
|
|
|
} |
872
|
1
|
|
|
|
|
19
|
@mols; |
873
|
|
|
|
|
|
|
} |
874
|
|
|
|
|
|
|
|
875
|
|
|
|
|
|
|
# this method fills the _paint_tab attribute for every atom connected |
876
|
|
|
|
|
|
|
# to the given start atom $atom with $color. Used for separating |
877
|
|
|
|
|
|
|
# connected fragments. Uses a depth-first search |
878
|
|
|
|
|
|
|
sub _paint { |
879
|
14
|
|
|
14
|
|
22
|
my ($self, $atom, $color) = @_; |
880
|
14
|
100
|
|
|
|
34
|
return if defined $self->{_paint_tab}{$atom->id}; |
881
|
8
|
|
|
|
|
22
|
$self->{_paint_tab}{$atom->id} = $color; |
882
|
8
|
|
|
|
|
21
|
$self->{_paint_tab}{$_->id} = $color for ($atom->bonds); |
883
|
8
|
|
|
|
|
23
|
for my $neighbor ($atom->neighbors) { |
884
|
12
|
|
|
|
|
25
|
$self->_paint($neighbor, $color); |
885
|
|
|
|
|
|
|
} |
886
|
|
|
|
|
|
|
} |
887
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
=item $mol->sprout_hydrogens |
889
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
Convert all the implicit hydrogen atoms in the molecule to explicit atoms. |
891
|
|
|
|
|
|
|
It does B generate coordinates for the atoms. |
892
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
=cut |
894
|
|
|
|
|
|
|
|
895
|
|
|
|
|
|
|
sub sprout_hydrogens { |
896
|
1
|
|
|
1
|
1
|
2
|
my ($self) = @_; |
897
|
1
|
|
|
|
|
4
|
$_->sprout_hydrogens for $self->atoms; |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=item $mol->collapse_hydrogens |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens. |
903
|
|
|
|
|
|
|
(Exception: hydrogen atoms that are adjacent to a hydrogen atom are not |
904
|
|
|
|
|
|
|
collapsed.) |
905
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
=cut |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
sub collapse_hydrogens { |
909
|
1
|
|
|
1
|
1
|
3
|
my ($self) = @_; |
910
|
1
|
|
|
|
|
5
|
for my $atom (grep { $_->symbol ne 'H' } $self->atoms) { |
|
3
|
|
|
|
|
12
|
|
911
|
1
|
|
|
|
|
7
|
$atom->collapse_hydrogens; |
912
|
|
|
|
|
|
|
} |
913
|
|
|
|
|
|
|
} |
914
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
=item $mol->add_implicit_hydrogens |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
Use heuristics to figure out how many implicit hydrogens should each atom in |
918
|
|
|
|
|
|
|
the molecule have to satisfy its normal "organic" valence. |
919
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
=cut |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
sub add_implicit_hydrogens { |
923
|
1
|
|
|
1
|
1
|
7
|
my ($self) = @_; |
924
|
1
|
|
|
|
|
5
|
$_->add_implicit_hydrogens for $self->atoms; |
925
|
|
|
|
|
|
|
} |
926
|
|
|
|
|
|
|
|
927
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
my %DESCRIPTORS = (); |
929
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
=item Chemistry::Mol->register_descriptor($name => $sub_ref) |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
Adds a callback that can be used to add functionality to the molecule class |
933
|
|
|
|
|
|
|
(originally meant to add custom molecule descriptors.) A descriptor is a |
934
|
|
|
|
|
|
|
function that takes a molecule object as its only argument and returns a value |
935
|
|
|
|
|
|
|
or values. For example, to add a descriptor function that computes the number |
936
|
|
|
|
|
|
|
of atoms: |
937
|
|
|
|
|
|
|
|
938
|
|
|
|
|
|
|
Chemistry::Mol->register_descriptor( |
939
|
|
|
|
|
|
|
number_of_atoms => sub { |
940
|
|
|
|
|
|
|
my $mol = shift; |
941
|
|
|
|
|
|
|
return scalar $mol->atoms; |
942
|
|
|
|
|
|
|
} |
943
|
|
|
|
|
|
|
); |
944
|
|
|
|
|
|
|
|
945
|
|
|
|
|
|
|
The descriptor is accessed by name via the C instance method: |
946
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
my $n = $mol->descriptor('number_of_atoms'); |
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
=cut |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
sub register_descriptor { |
952
|
1
|
|
|
1
|
1
|
74
|
my ($self, %opts) = @_; |
953
|
1
|
|
|
|
|
8
|
$DESCRIPTORS{$_} = $opts{$_} for keys %opts; |
954
|
|
|
|
|
|
|
} |
955
|
|
|
|
|
|
|
|
956
|
|
|
|
|
|
|
=item my $value = $mol->descriptor($descriptor_name) |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
Calls a previously registered descriptor function giving it $mol as an |
959
|
|
|
|
|
|
|
argument, as shown above for C. |
960
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
=cut |
962
|
|
|
|
|
|
|
|
963
|
|
|
|
|
|
|
sub descriptor { |
964
|
1
|
|
|
1
|
1
|
3
|
my ($self, $descriptor) = @_; |
965
|
1
|
50
|
|
|
|
6
|
my $sub = $DESCRIPTORS{$descriptor} |
966
|
|
|
|
|
|
|
or croak "unknown descriptor '$descriptor'"; |
967
|
1
|
|
|
|
|
5
|
return $sub->($self); |
968
|
|
|
|
|
|
|
} |
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
1; |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
=back |
973
|
|
|
|
|
|
|
|
974
|
|
|
|
|
|
|
=head1 VERSION |
975
|
|
|
|
|
|
|
|
976
|
|
|
|
|
|
|
0.37 |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
=head1 SEE ALSO |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
L, L, L, |
981
|
|
|
|
|
|
|
L |
982
|
|
|
|
|
|
|
|
983
|
|
|
|
|
|
|
The PerlMol website L |
984
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
=head1 AUTHOR |
986
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
Ivan Tubert-Brohman Eitub@cpan.orgE |
988
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
=head1 COPYRIGHT |
990
|
|
|
|
|
|
|
|
991
|
|
|
|
|
|
|
Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is |
992
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
993
|
|
|
|
|
|
|
Perl itself. |
994
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
=cut |
996
|
|
|
|
|
|
|
|