File Coverage

blib/lib/Chemistry/Mol.pm

Criterion	Covered	Total	%
statement	243	338	71.8
branch	44	80	55.0
condition	2	7	28.5
subroutine	50	60	83.3
pod	37	45	82.2
total	376	530	70.9

line	stmt	bran	cond	sub	pod	time	code
1							package Chemistry::Mol;
2
3							our $VERSION = '0.39'; # VERSION
4							# $Id$
5
6							=head1 NAME
7
8							Chemistry::Mol - Molecule object toolkit
9
10							=head1 SYNOPSIS
11
12							use Chemistry::Mol;
13
14							$mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule");
15							$c = $mol->new_atom(symbol => "C", coords => [0,0,0]);
16							$o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]);
17							$mol->new_bond(atoms => [$c, $o], order => 3);
18
19							print $mol->print;
20
21							=head1 DESCRIPTION
22
23							This package, along with Chemistry::Atom and Chemistry::Bond, includes basic
24							objects and methods to describe molecules.
25
26							The core methods try not to enforce a particular convention. This means that
27							only a minimal set of attributes is provided by default, and some attributes
28							have very loosely defined meaning. This is because each program and file type
29							has different idea of what each concept (such as bond and atom type) means.
30							Bonds are defined as a list of atoms (typically two) with an arbitrary type.
31							Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates
32							(2D coming soon).
33
34							=cut
35
36	16			16		300916	use 5.006;
	16					87
37	16			16		75	use strict;
	16					24
	16					322
38	16			16		69	use warnings;
	16					174
	16					497
39	16			16		6491	use Chemistry::Atom;
	16					51
	16					931
40	16			16		7547	use Chemistry::Bond;
	16					37
	16					384
41	16			16		90	use Carp;
	16					27
	16					846
42	16			16		79	use base qw(Chemistry::Obj Exporter);
	16					33
	16					1624
43	16			16		9169	use Storable 'dclone';
	16					45497
	16					19500
44
45							our @EXPORT_OK = qw(read_mol);
46							our @EXPORT = ();
47							our %EXPORT_TAGS = (
48							all => [@EXPORT, @EXPORT_OK],
49							);
50
51							our $clone_backend = 'Storable';
52
53							my %FILE_FORMATS = ();
54
55							=head1 METHODS
56
57							See also L for generic attributes.
58
59							=over 4
60
61							=item Chemistry::Mol->new(name => value, ...)
62
63							Create a new Mol object with the specified attributes.
64
65							$mol = Chemistry::Mol->new(id => 'm123', name => 'my mol')
66
67							is the same as
68
69							Chemistry::Mol->new()
70							$mol->id('m123')
71							$mol->name('my mol')
72
73							=cut
74
75							sub new {
76	26			26	1	1689	my $class = shift;
77	26					48	my %args = @_;
78	26		66			61	my $self = bless {
79							id => $class->nextID,
80							byId => {},
81							atoms => [],
82							bonds => [],
83							name => "",
84							}, ref $class \|\| $class;
85	26					78	$self->$_($args{$_}) for (keys %args);
86	26					78	return $self;
87							}
88
89							my $N = 0; # molecule ID counter
90	27			27	0	216	sub nextID { "mol".++$N; }
91	0			0	0	0	sub reset_id { $N = 0; }
92	0			0	0	0	sub next_id { $N = $_[1] }
93
94							=item $mol->add_atom($atom, ...)
95
96							Add one or more Atom objects to the molecule. Returns the last atom added.
97
98							=cut
99
100							sub add_atom {
101	552			552	1	1419	my $self = shift;
102	552					681	for my $atom (@_){
103							#if ($self->by_id($atom->id)) {
104							#croak "Duplicate ID when adding atom '$atom' to mol '$self'";
105							#}
106	553					557	push @{$self->{atoms}}, $atom;
	553					1028
107	553					941	$self->{byId}{$atom->id} = $atom;
108	553					1042	$atom->parent($self);
109							}
110	552					1132	$_[-1];
111							}
112
113							sub add_atom_np {
114	0			0	0	0	my $self = shift;
115	0					0	for my $atom (@_){
116	0					0	push @{$self->{atoms}}, $atom;
	0					0
117	0					0	$self->{byId}{$atom->id} = $atom;
118							}
119	0					0	$_[-1];
120							}
121
122							=item $mol->atom_class
123
124							Returns the atom class that a molecule or molecule class expects to use by
125							default. L objects return "Chemistry::Atom", but subclasses
126							will likely override this method.
127
128							=cut
129
130							sub atom_class {
131	14			14	1	46	"Chemistry::Atom";
132							}
133
134							=item $mol->new_atom(name => value, ...)
135
136							Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>.
137
138							=cut
139
140							sub new_atom {
141	14			14	1	61	my $self = shift;
142	14					34	$self->add_atom($self->atom_class->new(@_));
143							}
144
145							=item $mol->delete_atom($atom, ...)
146
147							Deletes an atom from the molecule. It automatically deletes all the bonds in
148							which the atom participates as well. $atom should be a Chemistry::Atom
149							reference. This method also accepts the atom index, but this use is deprecated
150							(and buggy if multiple indices are given, unless they are in descending order).
151
152							=cut
153
154							sub delete_atom {
155	2			2	1	4	my $self = shift;
156	2					5	for my $i (@_) {
157	2					4	my ($atom);
158	2	100				6	if (ref $i) {
159	1					2	$atom = $i;
160							} else {
161	1	50				4	$atom = $self->atoms($i)
162							or croak "$self->delete_atom: no such atom $i\n";
163							}
164	2					8	$atom->delete($i);
165							}
166							}
167
168							# takes an atom ref to delete and optionally the atom index
169							# 1) deletes bonds that belonged to atom
170							# 2) deletes atom
171							sub _delete_atom {
172	7			7		20	my ($self, $atom) = @_;
173	7	50				19	my $index = $self->get_atom_index($atom)
174							or croak "$self->delete_atom: no such atom $atom\n";
175	7					71	my $id = $atom->id;
176	7					40	$self->delete_bond($atom->bonds);
177	7					39	delete $self->{byId}{$id};
178	7					22	splice @{$self->{atoms}}, $index - 1, 1;
	7					28
179							}
180
181							=item $mol->add_bond($bond, ...)
182
183							Add one or more Bond objects to the molecule. Returns the last bond added.
184
185							=cut
186
187							sub add_bond {
188	25			25	1	38	my $self = shift;
189	25					40	for my $bond (@_){
190							#if ($self->by_id($bond->id)) {
191							#croak "Duplicate ID when adding bond '$bond' to mol '$self'";
192							#}
193	25					30	push @{$self->{bonds}}, $bond;
	25					47
194	25					61	$self->{byId}{$bond->id} = $bond;
195	25	100				56	if ($bond->{deleted}) {
196	1					2	$_->add_bond($bond) for $bond->atoms;
197	1					3	$bond->{deleted} = 0;
198							}
199	25					57	$bond->parent($self);
200							}
201	25					64	$_[-1];
202							}
203
204							sub add_bond_np {
205	0			0	0	0	my $self = shift;
206	0					0	for my $bond (@_){
207	0					0	push @{$self->{bonds}}, $bond;
	0					0
208	0					0	$self->{byId}{$bond->id} = $bond;
209							}
210	0					0	$_[-1];
211							}
212
213							=item $mol->bond_class
214
215							Returns the bond class that a molecule or molecule class expects to use by
216							default. L objects return "Chemistry::Bond", but subclasses
217							will likely override this method.
218
219							=cut
220
221							sub bond_class {
222	8			8	1	55	"Chemistry::Bond";
223							}
224
225							=item $mol->new_bond(name => value, ...)
226
227							Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>.
228
229							=cut
230
231							sub new_bond {
232	8			8	1	28	my $self = shift;
233	8					28	$self->add_bond($self->bond_class->new(@_));
234							}
235
236							sub get_bond_index {
237	14			14	0	29	my ($self, $bond) = @_;
238	14					18	my $i;
239	14					31	for ($self->bonds) {
240	42					50	++$i;
241	42	100				74	return $i if ($_ eq $bond);
242							}
243	0					0	undef;
244							}
245
246							sub get_atom_index {
247	7			7	0	12	my ($self, $atom) = @_;
248	7					11	my $i;
249	7					16	for ($self->atoms) {
250	12					19	++$i;
251	12	100				32	return $i if ($_ eq $atom);
252							}
253	0					0	undef;
254							}
255
256							=item $mol->delete_bond($bond, ...)
257
258							Deletes a bond from the molecule. $bond should be a L object.
259
260							=cut
261
262							# mol deletes bond
263							# bond tells atoms involved to forget about it
264
265							sub delete_bond {
266	7			7	1	14	my $self = shift;
267	7					15	for my $i (@_){
268	11					13	my ($bond);
269	11	50				24	if (ref $i) {
270	11					18	$bond = $i;
271							} else {
272	0	0				0	$bond = $self->bonds($i)
273							or croak "$self->delete_bond($i): no such bond $i\n";
274							}
275	11					29	$bond->delete;
276							}
277							}
278
279							sub _delete_bond {
280	14			14		29	my ($self, $bond) = @_;
281	14	50				35	my $index = $self->get_bond_index($bond)
282							#or croak "$self->delete_bond: no such bond $bond\n";
283							or return;
284	14					42	my $id = $bond->id;
285	14					30	delete $self->{byId}{$id};
286	14					22	splice @{$self->{bonds}}, $index - 1, 1;
	14					38
287	14					37	$bond->delete_atoms;
288							}
289
290							=item $mol->by_id($id)
291
292							Return the atom or bond object with the corresponding id.
293
294							=cut
295
296							sub by_id {
297	3			3	1	6	my $self = shift;
298	3					7	my ($id) = @_;
299	3					14	$self->{byId}{$id};
300							}
301
302							sub _change_id {
303	4			4		9	my ($self, $old_id, $new_id) = @_;
304	4					7	my $ref = $self->{byId}{$old_id};
305	4					9	$self->{byId}{$new_id} = $ref;
306	4					10	delete $self->{byId}{$old_id};
307							}
308
309							=item $mol->atoms($n1, ...)
310
311							Returns the atoms with the given indices, or all by default.
312							Indices start from one, not from zero.
313
314							=cut
315
316							sub atoms {
317	90			90	1	8230	my $self = shift;
318	90	100				214	if (@_) {
319	18					48	my @ats = map {$_ - 1} @_;
	24					68
320	18					32	@{$self->{atoms}}[@ats];
	18					84
321							} else {
322	72					88	@{$self->{atoms}};
	72					383
323							}
324							}
325
326							=item $mol->atoms_by_name($name)
327
328							Returns the atoms with the given name (treated as an anchored regular
329							expression).
330
331							=cut
332
333							sub atoms_by_name {
334	1			1	1	3	my $self = shift;
335	1					28	my $re = qr/^$_[0]$/;
336	16			16		179	no warnings;
	16					36
	16					7112
337	1					5	my @ret = grep {$_->name =~ $re} $self->atoms;
	2					8
338	1	50				8	wantarray ? @ret : $ret[0];
339							}
340
341							=item $mol->sort_atoms($sub_ref)
342
343							Sort the atoms in the molecule by using the comparison function given in
344							$sub_ref. This function should take two atoms as parameters and return -1, 0,
345							or 1 depending on whether the first atom should go before, same, or after the
346							second atom. For example, to sort by atomic number, you could use the
347							following:
348
349							$mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } );
350
351							Note that the atoms are passed as parameters and not as the package variables
352							$a and $b like the core sort function does. This is because $mol->sort will
353							likely be called from another package and we don't want to play with another
354							package's symbol table.
355
356							=cut
357
358							sub sort_atoms {
359	0			0	1	0	my ($self, $sub) = @_;
360	0					0	my @a = $self->atoms;
361	0					0	@a = sort { $sub->($a,$b) } @a;
	0					0
362	0					0	$self->{atoms} = \@a;
363	0					0	$self;
364							}
365
366							=item $mol->bonds($n1, ...)
367
368							Returns the bonds with the given indices, or all by default.
369							Indices start from one, not from zero.
370
371							=cut
372
373							sub bonds {
374	49			49	1	648	my $self = shift;
375	49	100				113	if (@_) {
376	6					15	my @bonds = map {$_ - 1} @_;
	6					24
377	6					12	@{$self->{bonds}}[@bonds];
	6					33
378							} else {
379	43					54	@{$self->{bonds}};
	43					155
380							}
381							}
382
383							=item $mol->print(option => value...)
384
385							Convert the molecule to a string representation. If no options are given,
386							a default YAML-like format is used (this may change in the future). Otherwise,
387							the format should be specified by using the C option.
388
389							=cut
390
391							sub print {
392	19			19	1	349	my $self = shift;
393	19					42	my (%opts) = @_;
394	19					21	my $ret;
395	19					25	local $" = ""; #"
396
397	19	50				39	if ($opts{format}) {
398	19					41	return $self->formats($opts{format})->write_string($self, %opts);
399							}
400							# else use default printout
401	0					0	$ret = <
402							$self->{id}:
403							name: $self->{name}
404							END
405	0					0	$ret .= " attr:\n";
406	0					0	$ret .= $self->print_attr(2);
407	0					0	$ret .= " atoms:\n";
408	0					0	for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) }
	0					0
	0					0
409	0					0	$ret .= " bonds:\n";
410	0					0	for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) }
	0					0
	0					0
411	0					0	$ret;
412							}
413
414							=item $s = $mol->sprintf($format)
415
416							Format interesting molecular information in a concise way, as specified by
417							a printf-like format.
418
419							%n - name
420							%f - formula
421							%f{formula with format} - (note: right braces within
422							the format should be escaped with a backslash)
423							%s - SMILES representation
424							%S - canonical SMILES representation
425							%m - mass
426							%8.3m - mass, formatted as %8.3f with core sprintf
427							%q - formal charge
428							%a - atom count
429							%b - bond count
430							%t - type
431							%i - id
432							%% - %
433
434							For example, if you want just about everything:
435
436							$mol->sprintf("%s - %n (%f). %a atoms, %b bonds; "
437							. "mass=%m; charge =%q; type=%t; id=%i");
438
439							Note that you have to C before using C<%s> or
440							C<%S> on C<< $mol->sprintf >>.
441
442							=cut
443
444							sub sprintf {
445	0			0	1	0	my ($mol, $format) = @_;
446	16			16		116	no warnings 'uninitialized'; # don't care if some properties are undefined
	16					35
	16					34072
447	0		0			0	$format \|\|= "%f";
448	0					0	$format =~ s/%%/\\%/g; # escape %% with a \
449	0					0	$format =~ s/(?formula($1)/eg; # %f{}
	0					0
450	0					0	$format =~ s/(?formula/eg; # %f
	0					0
451	0					0	$format =~ s/(?print(format=>'smiles')/eg; # %s
	0					0
452	0					0	$format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s
	0					0
453	0					0	$format =~ s/(?name/eg; # %n
	0					0
454	0					0	$format =~ s/(?
455	0	0				0	$1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m
456	0					0	$format =~ s/(?charge/eg; # %q
	0					0
457	0					0	$format =~ s/(?atoms/eg; # %a
	0					0
458	0					0	$format =~ s/(?bonds/eg; # %b
	0					0
459	0					0	$format =~ s/(?type/eg; # %t
	0					0
460	0					0	$format =~ s/(?id/eg; # %i
	0					0
461	0					0	$format =~ s/\\(.)/$1/g; # other \ escapes
462	0					0	$format;
463							}
464
465							=item $mol->printf($format)
466
467							Same as C<< $mol->sprintf >>, but prints to standard output automatically.
468							Used for quick and dirty molecular information dumping.
469
470							=cut
471
472							sub printf {
473	0			0	1	0	my ($mol, $format) = @_;
474	0					0	print $mol->sprintf($format);
475							}
476
477							=item Chemistry::Mol->parse($string, option => value...)
478
479							Parse the molecule encoded in C<$string>. The format should be specified
480							with the the C option; otherwise, it will be guessed.
481
482							=cut
483
484							sub parse {
485	14			14	1	8053	my $self = shift;
486	14					21	my $s = shift;
487	14					38	my %opts = (mol_class => $self, @_);
488
489	14	50				33	if ($opts{format}) {
490	14					29	return $self->formats($opts{format})->parse_string($s, %opts);
491							} else {
492	0					0	croak "Parse does not support autodetection yet.",
493							"Please specify a format.";
494							}
495	0					0	return;
496							}
497
498							=item Chemistry::Mol->read($fname, option => value ...)
499
500							Read a file and return a list of Mol objects, or croaks if there was a problem.
501							The type of file will be guessed if not specified via the C option.
502
503							Note that only registered file readers will be used. Readers may be registered
504							using C; modules that include readers (such as
505							L) usually register them automatically when they are
506							loaded.
507
508							Automatic decompression of gzipped files is supported if the L
509							module is installed. Files ending in .gz are assumed to be compressed;
510							otherwise it is possible to force decompression by passing the gzip => 1
511							option (or no decompression with gzip => 0).
512
513							=cut
514
515							sub read_mol { # for backwards compatibility
516	0			0	0	0	my ($fname, $type) = shift;
517	0					0	__PACKAGE__->read($fname, format => $type);
518							}
519
520							sub read {
521	11			11	1	51683	my $self = shift;
522	11					24	my $fname = shift;
523	11					43	my %opts = (mol_class => $self, @_);
524
525	11	100				42	if ($opts{format}) {
526	3					12	return $self->formats($opts{format})->parse_file($fname, %opts);
527							} else { # guess format
528	8					31	for my $type ($self->formats) {
529	8	50				22	if ($self->formats($type)->file_is($fname)) {
530	8					26	return $self->formats($type)->parse_file($fname, %opts);
531							}
532							}
533							}
534	0					0	croak "Couldn't guess format of file '$fname'";
535							}
536
537							=item $mol->write($fname, option => value ...)
538
539							Write a molecule file, or croak if there was a problem. The type of file will
540							be guessed if not specified via the C option.
541
542							Note that only registered file formats will be used.
543
544							Automatic gzip compression is supported if the IO::Zlib module is installed.
545							Files ending in .gz are assumed to be compressed; otherwise it is possible to
546							force compression by passing the gzip => 1 option (or no compression with gzip
547							=> 0). Specific compression levels between 2 (fastest) and 9 (most compressed)
548							may also be used (e.g., gzip => 9).
549
550							=cut
551
552							sub write {
553	3			3	1	572	my ($self, $fname, %opts) = (@_);
554
555	3	100				11	if ($opts{format}) {
556	2					7	return $self->formats($opts{format})->write_file(@_);
557							} else { # guess format
558	1					4	for my $type ($self->formats) {
559	1	50				3	if ($self->formats($type)->name_is($fname)) {
560	1					5	return $self->formats($type)->write_file(@_);
561							}
562							}
563							}
564	0					0	croak "Couldn't guess format for writing file '$fname'";
565							}
566
567							=item Chemistry::Mol->file($file, option => value ...)
568
569							Create a L-derived object for reading or writing to a file.
570							The object can then be used to read the molecules or other information in the
571							file.
572
573							This has more flexibility than calling C<< Chemistry::Mol->read >> when
574							dealing with multi-molecule files or files that have higher structure or that
575							have information that does not belong to the molecules themselves. For
576							example, a reaction file may have a list of molecules, but also general
577							information like the reaction name, yield, etc. as well as the classification
578							of the molecules as reactants or products. The exact information that is
579							available will depend on the file reader class that is being used. The
580							following is a hypothetical example for reading MDL rxnfiles.
581
582							# assuming this module existed...
583							use Chemistry::File::Rxn;
584
585							my $rxn = Chemistry::Mol->file('test.rxn');
586							$rxn->read;
587							$name = $rxn->name;
588							@reactants = $rxn->reactants; # mol objects
589							@products = $rxn->products;
590							$yield = $rxn->yield; # a number
591
592							Note that only registered file readers will be used. Readers may be registered
593							using register_format(); modules that include readers (such as
594							Chemistry::File::PDB) usually register them automatically.
595
596							=cut
597
598							sub file {
599	1			1	1	4	my ($self, $file, %opts) = @_;
600	1					6	%opts = (mol_class => $self, %opts);
601
602	1	50				5	if ($opts{format}) {
603	0					0	return $self->formats($opts{format})->new(file => $file,
604							opts => \%opts);
605							} else { # guess format
606	1					5	for my $type ($self->formats) {
607	1	50				5	if ($self->formats($type)->file_is($file)) {
608	1					15	return $self->formats($type)->new(file => $file,
609							opts => \%opts);
610							}
611							}
612							}
613	0					0	croak "Couldn't guess format of file '$file'";
614							}
615
616							=item Chemistry::Mol->register_format($name, $ref)
617
618							Register a file type. The identifier $name must be unique. $ref is either a
619							class name (a package) or an object that complies with the L
620							interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the
621							calling package is used automatically. More than one format can be registered
622							at a time, but then $ref must be included for each format (e.g.,
623							Chemistry::Mol->register_format(format1 => "package1", format2 => package2).
624
625							The typical user doesn't have to care about this function. It is used
626							automatically by molecule file I/O modules.
627
628							=cut
629
630							sub register_format {
631	14			14	1	36	my $class = shift;
632	14	100				83	if (@_ == 1) {
633	3					9	$FILE_FORMATS{$_[0]} = caller;
634	3					8	return;
635							}
636	11					67	my %opts = @_;
637	11					90	$FILE_FORMATS{$_} = $opts{$_} for keys %opts;
638							}
639
640							=item Chemistry::Mol->formats
641
642							Returns a list of the file formats that have been installed by
643							register_format()
644
645							=cut
646
647							sub formats {
648	68			68	1	95	my $self = shift;
649	68	100				131	if (@_) {
650	58					95	my ($type) = @_;
651	58					91	my $file_class = $FILE_FORMATS{$type};
652	58	50				117	unless ($file_class) {
653	0					0	croak "No class installed for type '$type'";
654							}
655	58					368	return $file_class;
656							} else {
657	10					61	return sort keys %FILE_FORMATS;
658							}
659							}
660
661							=item $mol->mass
662
663							Return the molar mass. This is just the sum of the masses of the atoms. See
664							L::mass for details such as the handling of isotopes.
665
666							=cut
667
668							sub mass {
669	2			2	1	9	my ($self) = @_;
670	2					5	my $mass = 0;
671	2					5	for my $atom ($self->atoms) {
672	6					12	$mass += $atom->mass;
673							}
674	2					13	$mass;
675							}
676
677							=item $mol->charge
678
679							Return the charge of the molecule. By default it returns the sum of the formal
680							charges of the atoms. However, it is possible to set an arbitrary charge by
681							calling C<< $mol->charge($new_charge) >>
682
683							=cut
684
685							sub charge {
686	0			0	1	0	my ($self) = shift;
687	0	0				0	if (@_) {
688	0					0	$self->{charge} = shift;
689	0					0	$self;
690							} else {
691	0	0				0	return $self->{charge} if defined $self->{charge};
692	0					0	my $charge = 0;
693	0		0			0	$charge += $_->formal_charge \|\| 0 for $self->atoms;
694	0					0	$charge;
695							}
696							}
697
698							=item $mol->formula_hash
699
700							Returns a hash reference describing the molecular formula. For methane it would
701							return { C => 1, H => 4 }.
702
703							=cut
704
705							sub formula_hash {
706	17			17	1	25	my ($self) = @_;
707	17					26	my $formula = {};
708	17					26	for my $atom ($self->atoms) {
709	538					782	$formula->{$atom->symbol}++;
710	538	50				756	$formula->{H} += $atom->hydrogens if $atom->hydrogens;
711							}
712	17					37	$formula;
713							}
714
715							=item $mol->formula($format)
716
717							Returns a string with the formula. The format can be specified as a printf-like
718							string with the control sequences specified in the L
719							documentation.
720
721							=cut
722
723							sub formula {
724	5			5	1	1562	my ($self, $format) = @_;
725	5					406	require Chemistry::File::Formula;
726	5					15	$self->print(format => "formula", formula_format => $format);
727							}
728
729							=item my $mol2 = $mol->clone;
730
731							Makes a copy of a molecule. Note that this is a B copy; if your molecule
732							has a pointer to the rest of the universe, the entire universe will be cloned!
733
734							By default, clone() uses L to copy the Perl data structure. L
735							can be used instead by setting variable C<$Chemistry::Mol::clone_backend> to
736							C (default is C). The documentation of Storable claims L
737							is less memory-intensive.
738
739							=cut
740
741							sub clone {
742	9			9	1	754	my ($self) = @_;
743	9					17	my $clone;
744	9	100				26	if ($clone_backend eq "Storable") {
		50
745	8					1309	$clone = dclone $self;
746	8	50				116	$clone->_weaken if Storable->VERSION < 2.14;
747							} elsif ($clone_backend eq "Clone") {
748	1					383	require Clone;
749	1					2150	$clone = Clone::clone $self;
750							} else {
751	0					0	croak "Unknown clone backend '$clone_backend'";
752							}
753	9					38	$clone;
754							}
755
756							=item my $mol2 = $mol->safe_clone;
757
758							Like clone, it makes a deep copy of a molecule. The difference is that the copy
759							is not "exact" in that new molecule and its atoms and bonds get assigned new
760							IDs. This makes it safe to combine cloned molecules. For example, this is an
761							error:
762
763							# XXX don't try this at home!
764							my $mol2 = Chemistry::Mol->combine($mol1, $mol1);
765							# the atoms in $mol1 will clash
766
767							But this is ok:
768
769							# the "safe clone" of $mol1 will have new IDs
770							my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone);
771
772							=cut
773
774							sub safe_clone {
775	1			1	1	2	my ($mol) = @_;
776	1					5	my $clone = $mol->clone;
777	1					3	for ($clone, $clone->atoms, $clone->bonds) {
778	4					13	$_->id($_->nextID);
779							}
780	1					21	$clone;
781							}
782
783							sub _weaken {
784	14			14		45	my ($self) = @_;
785	14					59	for ($self->atoms, $self->bonds) {
786	196					358	$_->_weaken;
787							}
788	14					33	$self;
789							}
790
791							=item ($distance, $atom_here, $atom_there) = $mol->distance($obj)
792
793							Returns the minimum distance to $obj, which can be an atom, a molecule, or a
794							vector. In scalar context it returns only the distance; in list context it
795							also returns the atoms involved. The current implementation for calculating
796							the minimum distance between two molecules compares every possible pair of
797							atoms, so it's not efficient for large molecules.
798
799							=cut
800
801							sub distance {
802	0			0	1	0	my ($self, $other) = @_;
803	0	0				0	if ($other->isa("Chemistry::Mol")) {
		0
		0
804	0					0	my @atoms = $self->atoms;
805	0	0				0	my $atom = shift @atoms or return; # need at least one atom
806	0					0	my $closest_here = $atom;
807	0					0	my ($min_length, $closest_there) = $atom->distance($other);
808	0					0	for $atom (@atoms) {
809	0					0	my ($d, $o) = $atom->distance($other);
810	0	0				0	if ($d < $min_length) {
811	0					0	($min_length, $closest_there, $closest_here) = ($d, $o, $atom);
812							}
813							}
814							return wantarray ?
815	0	0				0	($min_length, $closest_here, $closest_there) : $min_length;
816							} elsif ($other->isa("Chemistry::Atom")) {
817	0					0	return $other->distance($self);
818							} elsif ($other->isa("Math::VectorReal")) {
819	0					0	return Chemistry::Atom->new(coords => $other)->distance($self);
820							}
821							}
822
823							=item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...)
824
825							=item $mol1->combine($mol2, $mol3, ...)
826
827							Combines several molecules in one bigger molecule. If called as a class method,
828							as in the first example, it returns a new combined molecule without altering
829							any of the parameters. If called as an instance method, as in the second
830							example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not
831							altered. B: Make sure you don't combine molecules which contain atoms
832							with duplicate IDs (for example, if they were cloned).
833
834							=cut
835
836							# joins several molecules into one
837							sub combine {
838	2			2	1	519	my ($self, @others) = @_;
839	2					4	my $mol;
840	2	100				5	if (ref $self) {
841	1					3	$mol = $self;
842							} else {
843	1					3	$mol = $self->new;
844							}
845	2					8	for my $other (@others) {
846	3					7	my $mol2 = $other->clone;
847	3					7	for my $atom ($mol2->atoms) {
848	12					18	$mol->add_atom($atom);
849							}
850	3					6	for my $bond ($mol2->bonds) {
851	9					15	$mol->add_bond($bond);
852							}
853							}
854	2					7	$mol;
855							}
856
857							=item my @mols = $mol->separate
858
859							Separates a molecule into "connected fragments". The original object is not
860							modified; the fragments are clones of the original ones. Example: if you have
861							ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within
862							one molecule object ($mol). When you call $mol->separate you get two molecules,
863							each one with a CH3.
864
865							=cut
866
867							# splits a molecule into connected fragments
868							# returns a list of molecules. Does not touch the original copy.
869							sub separate {
870	1			1	1	546	my ($self) = @_;
871	1					12	$self = $self->clone;
872	1					2	$self->{_paint_tab} = {};
873	1					2	my $color = 0;
874	1					3	for my $atom ($self->atoms) {
875	8	100				17	next if defined $self->{_paint_tab}{$atom->id};
876	2					6	$self->_paint($atom, $color++);
877							}
878	1					4	my @mols;
879	1					6	push @mols, $self->new for (1 .. $color);
880	1					6	for my $atom ($self->atoms) {
881	8					16	$mols[$self->{_paint_tab}{$atom->id}]->add_atom($atom);
882							}
883	1					2	for my $bond ($self->bonds) {
884	6					9	$mols[$self->{_paint_tab}{$bond->id}]->add_bond($bond);
885							}
886	1					8	@mols;
887							}
888
889							# this method fills the _paint_tab attribute for every atom connected
890							# to the given start atom $atom with $color. Used for separating
891							# connected fragments. Uses a depth-first search
892							sub _paint {
893	14			14		20	my ($self, $atom, $color) = @_;
894	14	100				47	return if defined $self->{_paint_tab}{$atom->id};
895	8					19	$self->{_paint_tab}{$atom->id} = $color;
896	8					16	$self->{_paint_tab}{$_->id} = $color for ($atom->bonds);
897	8					16	for my $neighbor ($atom->neighbors) {
898	12					20	$self->_paint($neighbor, $color);
899							}
900							}
901
902							=item $mol->sprout_hydrogens
903
904							Convert all the implicit hydrogen atoms in the molecule to explicit atoms.
905							It does B generate coordinates for the atoms.
906
907							=cut
908
909							sub sprout_hydrogens {
910	1			1	1	3	my ($self) = @_;
911	1					4	$_->sprout_hydrogens for $self->atoms;
912							}
913
914							=item $mol->collapse_hydrogens
915
916							Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens.
917							(Exception: hydrogen atoms that are adjacent to a hydrogen atom are not
918							collapsed.)
919
920							=cut
921
922							sub collapse_hydrogens {
923	1			1	1	4	my ($self) = @_;
924	1					4	for my $atom (grep { $_->symbol ne 'H' } $self->atoms) {
	3					11
925	1					14	$atom->collapse_hydrogens;
926							}
927							}
928
929							=item $mol->add_implicit_hydrogens
930
931							Use heuristics to figure out how many implicit hydrogens should each atom in
932							the molecule have to satisfy its normal "organic" valence.
933
934							=cut
935
936							sub add_implicit_hydrogens {
937	1			1	1	5	my ($self) = @_;
938	1					3	$_->add_implicit_hydrogens for $self->atoms;
939							}
940
941
942							my %DESCRIPTORS = ();
943
944							=item Chemistry::Mol->register_descriptor($name => $sub_ref)
945
946							Adds a callback that can be used to add functionality to the molecule class
947							(originally meant to add custom molecule descriptors.) A descriptor is a
948							function that takes a molecule object as its only argument and returns a value
949							or values. For example, to add a descriptor function that computes the number
950							of atoms:
951
952							Chemistry::Mol->register_descriptor(
953							number_of_atoms => sub {
954							my $mol = shift;
955							return scalar $mol->atoms;
956							}
957							);
958
959							The descriptor is accessed by name via the C instance method:
960
961							my $n = $mol->descriptor('number_of_atoms');
962
963							=cut
964
965							sub register_descriptor {
966	1			1	1	450	my ($self, %opts) = @_;
967	1					7	$DESCRIPTORS{$_} = $opts{$_} for keys %opts;
968							}
969
970							=item my $value = $mol->descriptor($descriptor_name)
971
972							Calls a previously registered descriptor function giving it $mol as an
973							argument, as shown above for C.
974
975							=cut
976
977							sub descriptor {
978	1			1	1	7	my ($self, $descriptor) = @_;
979	1	50				4	my $sub = $DESCRIPTORS{$descriptor}
980							or croak "unknown descriptor '$descriptor'";
981	1					5	return $sub->($self);
982							}
983
984							1;
985
986							=back
987
988							=head1 SOURCE CODE REPOSITORY
989
990							L
991
992							=head1 SEE ALSO
993
994							L, L, L,
995							L
996
997							=head1 AUTHOR
998
999							Ivan Tubert-Brohman Eitub@cpan.orgE
1000
1001							=head1 COPYRIGHT
1002
1003							Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is
1004							free software; you can redistribute it and/or modify it under the same terms as
1005							Perl itself.
1006
1007							=cut
1008