File Coverage

blib/lib/Chemistry/Mol.pm

Criterion	Covered	Total	%
statement	263	355	74.0
branch	46	78	58.9
condition	2	7	28.5
subroutine	50	60	83.3
pod	37	45	82.2
total	398	545	73.0

line	stmt	bran	cond	sub	pod	time	code
1							package Chemistry::Mol;
2
3							our $VERSION = '0.40'; # VERSION
4							# $Id$
5
6							=head1 NAME
7
8							Chemistry::Mol - Molecule object toolkit
9
10							=head1 SYNOPSIS
11
12							use Chemistry::Mol;
13
14							$mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule");
15							$c = $mol->new_atom(symbol => "C", coords => [0,0,0]);
16							$o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]);
17							$mol->new_bond(atoms => [$c, $o], order => 3);
18
19							print $mol->print;
20
21							=head1 DESCRIPTION
22
23							This package, along with Chemistry::Atom and Chemistry::Bond, includes basic
24							objects and methods to describe molecules.
25
26							The core methods try not to enforce a particular convention. This means that
27							only a minimal set of attributes is provided by default, and some attributes
28							have very loosely defined meaning. This is because each program and file type
29							has different idea of what each concept (such as bond and atom type) means.
30							Bonds are defined as a list of atoms (typically two) with an arbitrary type.
31							Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates
32							(2D coming soon).
33
34							=cut
35
36	16			16		616958	use 5.006;
	16					109
37	16			16		121	use strict;
	16					60
	16					457
38	16			16		74	use warnings;
	16					46
	16					986
39	16			16		10557	use Chemistry::Atom;
	16					58
	16					1158
40	16			16		10496	use Chemistry::Bond;
	16					57
	16					567
41	16			16		114	use Carp;
	16					29
	16					1197
42	16			16		98	use base qw(Chemistry::Obj Exporter);
	16					31
	16					2476
43	16			16		10936	use Set::Object qw(set);
	16					171256
	16					1153
44	16			16		10831	use Storable 'dclone';
	16					77545
	16					48573
45
46							our @EXPORT_OK = qw(read_mol);
47							our @EXPORT = ();
48							our %EXPORT_TAGS = (
49							all => [@EXPORT, @EXPORT_OK],
50							);
51
52							our $clone_backend = 'Storable';
53
54							my %FILE_FORMATS = ();
55
56							=head1 METHODS
57
58							See also L for generic attributes.
59
60							=over 4
61
62							=item Chemistry::Mol->new(name => value, ...)
63
64							Create a new Mol object with the specified attributes.
65
66							$mol = Chemistry::Mol->new(id => 'm123', name => 'my mol')
67
68							is the same as
69
70							Chemistry::Mol->new()
71							$mol->id('m123')
72							$mol->name('my mol')
73
74							=cut
75
76							sub new {
77	26			26	1	441565	my $class = shift;
78	26					79	my %args = @_;
79	26		66			83	my $self = bless {
80							id => $class->nextID,
81							byId => {},
82							atoms => [],
83							bonds => [],
84							name => "",
85							}, ref $class \|\| $class;
86	26					91	$self->$_($args{$_}) for (keys %args);
87	26					92	return $self;
88							}
89
90							my $N = 0; # molecule ID counter
91	27			27	0	280	sub nextID { "mol".++$N; }
92	0			0	0	0	sub reset_id { $N = 0; }
93	0			0	0	0	sub next_id { $N = $_[1] }
94
95							=item $mol->add_atom($atom, ...)
96
97							Add one or more Atom objects to the molecule. Returns the last atom added.
98
99							=cut
100
101							sub add_atom {
102	552			552	1	1440	my $self = shift;
103	552					1005	for my $atom (@_){
104							#if ($self->by_id($atom->id)) {
105							#croak "Duplicate ID when adding atom '$atom' to mol '$self'";
106							#}
107	553					829	push @{$self->{atoms}}, $atom;
	553					1697
108	553					1470	$self->{byId}{$atom->id} = $atom;
109	553					1400	$atom->parent($self);
110							}
111	552					1730	$_[-1];
112							}
113
114							sub add_atom_np {
115	0			0	0	0	my $self = shift;
116	0					0	for my $atom (@_){
117	0					0	push @{$self->{atoms}}, $atom;
	0					0
118	0					0	$self->{byId}{$atom->id} = $atom;
119							}
120	0					0	$_[-1];
121							}
122
123							=item $mol->atom_class
124
125							Returns the atom class that a molecule or molecule class expects to use by
126							default. L objects return "Chemistry::Atom", but subclasses
127							will likely override this method.
128
129							=cut
130
131							sub atom_class {
132	14			14	1	65	"Chemistry::Atom";
133							}
134
135							=item $mol->new_atom(name => value, ...)
136
137							Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>.
138
139							=cut
140
141							sub new_atom {
142	14			14	1	67	my $self = shift;
143	14					36	$self->add_atom($self->atom_class->new(@_));
144							}
145
146							=item $mol->delete_atom($atom, ...)
147
148							Deletes an atom from the molecule. It automatically deletes all the bonds in
149							which the atom participates as well. $atom should be a Chemistry::Atom
150							reference. This method also accepts the atom index, but this use is deprecated
151							(and buggy if multiple indices are given, unless they are in descending order).
152
153							=cut
154
155							sub delete_atom {
156	2			2	1	2	my $self = shift;
157	2					5	for my $i (@_) {
158	2					2	my ($atom);
159	2	100				6	if (ref $i) {
160	1					2	$atom = $i;
161							} else {
162	1	50				3	$atom = $self->atoms($i)
163							or croak "$self->delete_atom: no such atom $i\n";
164							}
165	2					7	$atom->delete($i);
166							}
167							}
168
169							# takes an atom ref to delete and optionally the atom index
170							# 1) deletes bonds that belonged to atom
171							# 2) deletes atom
172							sub _delete_atom {
173	7			7		16	my ($self, $atom) = @_;
174	7	50				23	my $index = $self->get_atom_index($atom)
175							or croak "$self->delete_atom: no such atom $atom\n";
176	7					31	my $id = $atom->id;
177	7					25	$self->delete_bond($atom->bonds);
178	7					44	delete $self->{byId}{$id};
179	7					27	splice @{$self->{atoms}}, $index - 1, 1;
	7					29
180							}
181
182							=item $mol->add_bond($bond, ...)
183
184							Add one or more Bond objects to the molecule. Returns the last bond added.
185
186							=cut
187
188							sub add_bond {
189	25			25	1	40	my $self = shift;
190	25					68	for my $bond (@_){
191							#if ($self->by_id($bond->id)) {
192							#croak "Duplicate ID when adding bond '$bond' to mol '$self'";
193							#}
194	25					51	push @{$self->{bonds}}, $bond;
	25					75
195	25					143	$self->{byId}{$bond->id} = $bond;
196	25	100				91	if ($bond->{deleted}) {
197	1					4	$_->add_bond($bond) for $bond->atoms;
198	1					4	$bond->{deleted} = 0;
199							}
200	25					82	$bond->parent($self);
201							}
202	25					74	$_[-1];
203							}
204
205							sub add_bond_np {
206	0			0	0	0	my $self = shift;
207	0					0	for my $bond (@_){
208	0					0	push @{$self->{bonds}}, $bond;
	0					0
209	0					0	$self->{byId}{$bond->id} = $bond;
210							}
211	0					0	$_[-1];
212							}
213
214							=item $mol->bond_class
215
216							Returns the bond class that a molecule or molecule class expects to use by
217							default. L objects return "Chemistry::Bond", but subclasses
218							will likely override this method.
219
220							=cut
221
222							sub bond_class {
223	8			8	1	59	"Chemistry::Bond";
224							}
225
226							=item $mol->new_bond(name => value, ...)
227
228							Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>.
229
230							=cut
231
232							sub new_bond {
233	8			8	1	26	my $self = shift;
234	8					31	$self->add_bond($self->bond_class->new(@_));
235							}
236
237							sub get_bond_index {
238	14			14	0	69	my ($self, $bond) = @_;
239	14					26	my $i;
240	14					30	for ($self->bonds) {
241	42					58	++$i;
242	42	100				94	return $i if ($_ eq $bond);
243							}
244	0					0	undef;
245							}
246
247							sub get_atom_index {
248	7			7	0	14	my ($self, $atom) = @_;
249	7					11	my $i;
250	7					19	for ($self->atoms) {
251	12					18	++$i;
252	12	100				36	return $i if ($_ eq $atom);
253							}
254	0					0	undef;
255							}
256
257							=item $mol->delete_bond($bond, ...)
258
259							Deletes a bond from the molecule. $bond should be a L object.
260
261							=cut
262
263							# mol deletes bond
264							# bond tells atoms involved to forget about it
265
266							sub delete_bond {
267	7			7	1	13	my $self = shift;
268	7					14	for my $i (@_){
269	11					14	my ($bond);
270	11	50				30	if (ref $i) {
271	11					20	$bond = $i;
272							} else {
273	0	0				0	$bond = $self->bonds($i)
274							or croak "$self->delete_bond($i): no such bond $i\n";
275							}
276	11					30	$bond->delete;
277							}
278							}
279
280							sub _delete_bond {
281	14			14		49	my ($self, $bond) = @_;
282	14	50				38	my $index = $self->get_bond_index($bond)
283							#or croak "$self->delete_bond: no such bond $bond\n";
284							or return;
285	14					67	my $id = $bond->id;
286	14					80	delete $self->{byId}{$id};
287	14					72	splice @{$self->{bonds}}, $index - 1, 1;
	14					46
288	14					47	$bond->delete_atoms;
289							}
290
291							=item $mol->by_id($id)
292
293							Return the atom or bond object with the corresponding id.
294
295							=cut
296
297							sub by_id {
298	3			3	1	6	my $self = shift;
299	3					7	my ($id) = @_;
300	3					17	$self->{byId}{$id};
301							}
302
303							sub _change_id {
304	4			4		11	my ($self, $old_id, $new_id) = @_;
305	4					16	my $ref = $self->{byId}{$old_id};
306	4					12	$self->{byId}{$new_id} = $ref;
307	4					15	delete $self->{byId}{$old_id};
308							}
309
310							=item $mol->atoms($n1, ...)
311
312							Returns the atoms with the given indices, or all by default.
313							Indices start from one, not from zero.
314
315							=cut
316
317							sub atoms {
318	93			93	1	5818	my $self = shift;
319	93	100				266	if (@_) {
320	18					41	my @ats = map {$_ - 1} @_;
	24					66
321	18					30	@{$self->{atoms}}[@ats];
	18					104
322							} else {
323	75					111	@{$self->{atoms}};
	75					473
324							}
325							}
326
327							=item $mol->atoms_by_name($name)
328
329							Returns the atoms with the given name (treated as an anchored regular
330							expression).
331
332							=cut
333
334							sub atoms_by_name {
335	1			1	1	2	my $self = shift;
336	1					15	my $re = qr/^$_[0]$/;
337	16			16		193	no warnings;
	16					36
	16					10047
338	1					3	my @ret = grep {$_->name =~ $re} $self->atoms;
	2					5
339	1	50				5	wantarray ? @ret : $ret[0];
340							}
341
342							=item $mol->sort_atoms($sub_ref)
343
344							Sort the atoms in the molecule by using the comparison function given in
345							$sub_ref. This function should take two atoms as parameters and return -1, 0,
346							or 1 depending on whether the first atom should go before, same, or after the
347							second atom. For example, to sort by atomic number, you could use the
348							following:
349
350							$mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } );
351
352							Note that the atoms are passed as parameters and not as the package variables
353							$a and $b like the core sort function does. This is because $mol->sort will
354							likely be called from another package and we don't want to play with another
355							package's symbol table.
356
357							=cut
358
359							sub sort_atoms {
360	0			0	1	0	my ($self, $sub) = @_;
361	0					0	my @a = $self->atoms;
362	0					0	@a = sort { $sub->($a,$b) } @a;
	0					0
363	0					0	$self->{atoms} = \@a;
364	0					0	$self;
365							}
366
367							=item $mol->bonds($n1, ...)
368
369							Returns the bonds with the given indices, or all by default.
370							Indices start from one, not from zero.
371
372							=cut
373
374							sub bonds {
375	50			50	1	1796	my $self = shift;
376	50	100				114	if (@_) {
377	6					15	my @bonds = map {$_ - 1} @_;
	6					19
378	6					9	@{$self->{bonds}}[@bonds];
	6					39
379							} else {
380	44					127	@{$self->{bonds}};
	44					165
381							}
382							}
383
384							=item $mol->print(option => value...)
385
386							Convert the molecule to a string representation. If no options are given,
387							a default YAML-like format is used (this may change in the future). Otherwise,
388							the format should be specified by using the C option.
389
390							=cut
391
392							sub print {
393	19			19	1	969	my $self = shift;
394	19					79	my (%opts) = @_;
395	19					33	my $ret;
396	19					77	local $" = ""; #"
397
398	19	50				73	if ($opts{format}) {
399	19					71	return $self->formats($opts{format})->write_string($self, %opts);
400							}
401							# else use default printout
402	0					0	$ret = <
403							$self->{id}:
404							name: $self->{name}
405							END
406	0					0	$ret .= " attr:\n";
407	0					0	$ret .= $self->print_attr(2);
408	0					0	$ret .= " atoms:\n";
409	0					0	for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) }
	0					0
	0					0
410	0					0	$ret .= " bonds:\n";
411	0					0	for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) }
	0					0
	0					0
412	0					0	$ret;
413							}
414
415							=item $s = $mol->sprintf($format)
416
417							Format interesting molecular information in a concise way, as specified by
418							a printf-like format.
419
420							%n - name
421							%f - formula
422							%f{formula with format} - (note: right braces within
423							the format should be escaped with a backslash)
424							%s - SMILES representation
425							%S - canonical SMILES representation
426							%m - mass
427							%8.3m - mass, formatted as %8.3f with core sprintf
428							%q - formal charge
429							%a - atom count
430							%b - bond count
431							%t - type
432							%i - id
433							%% - %
434
435							For example, if you want just about everything:
436
437							$mol->sprintf("%s - %n (%f). %a atoms, %b bonds; "
438							. "mass=%m; charge =%q; type=%t; id=%i");
439
440							Note that you have to C before using C<%s> or
441							C<%S> on C<< $mol->sprintf >>.
442
443							=cut
444
445							sub sprintf {
446	0			0	1	0	my ($mol, $format) = @_;
447	16			16		136	no warnings 'uninitialized'; # don't care if some properties are undefined
	16					42
	16					53364
448	0		0			0	$format \|\|= "%f";
449	0					0	$format =~ s/%%/\\%/g; # escape %% with a \
450	0					0	$format =~ s/(?formula($1)/eg; # %f{}
	0					0
451	0					0	$format =~ s/(?formula/eg; # %f
	0					0
452	0					0	$format =~ s/(?print(format=>'smiles')/eg; # %s
	0					0
453	0					0	$format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s
	0					0
454	0					0	$format =~ s/(?name/eg; # %n
	0					0
455	0					0	$format =~ s/(?
456	0	0				0	$1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m
457	0					0	$format =~ s/(?charge/eg; # %q
	0					0
458	0					0	$format =~ s/(?atoms/eg; # %a
	0					0
459	0					0	$format =~ s/(?bonds/eg; # %b
	0					0
460	0					0	$format =~ s/(?type/eg; # %t
	0					0
461	0					0	$format =~ s/(?id/eg; # %i
	0					0
462	0					0	$format =~ s/\\(.)/$1/g; # other \ escapes
463	0					0	$format;
464							}
465
466							=item $mol->printf($format)
467
468							Same as C<< $mol->sprintf >>, but prints to standard output automatically.
469							Used for quick and dirty molecular information dumping.
470
471							=cut
472
473							sub printf {
474	0			0	1	0	my ($mol, $format) = @_;
475	0					0	print $mol->sprintf($format);
476							}
477
478							=item Chemistry::Mol->parse($string, option => value...)
479
480							Parse the molecule encoded in C<$string>. The format should be specified
481							with the the C option; otherwise, it will be guessed.
482
483							=cut
484
485							sub parse {
486	14			14	1	427999	my $self = shift;
487	14					33	my $s = shift;
488	14					58	my %opts = (mol_class => $self, @_);
489
490	14	50				46	if ($opts{format}) {
491	14					48	return $self->formats($opts{format})->parse_string($s, %opts);
492							} else {
493	0					0	croak "Parse does not support autodetection yet.",
494							"Please specify a format.";
495							}
496	0					0	return;
497							}
498
499							=item Chemistry::Mol->read($fname, option => value ...)
500
501							Read a file and return a list of Mol objects, or croaks if there was a problem.
502							The type of file will be guessed if not specified via the C option.
503
504							Note that only registered file readers will be used. Readers may be registered
505							using C; modules that include readers (such as
506							L) usually register them automatically when they are
507							loaded.
508
509							Automatic decompression of gzipped files is supported if the L
510							module is installed. Files ending in .gz are assumed to be compressed;
511							otherwise it is possible to force decompression by passing the gzip => 1
512							option (or no decompression with gzip => 0).
513
514							=cut
515
516							sub read_mol { # for backwards compatibility
517	0			0	0	0	my ($fname, $type) = shift;
518	0					0	__PACKAGE__->read($fname, format => $type);
519							}
520
521							sub read {
522	15			15	1	2167180	my $self = shift;
523	15					39	my $fname = shift;
524	15					72	my %opts = (mol_class => $self, @_);
525
526	15	100				75	if ($opts{format}) {
527	5					56	return $self->formats($opts{format})->parse_file($fname, %opts);
528							} else { # guess format
529	10					49	for my $type ($self->formats) {
530	10	100				34	if ($self->formats($type)->file_is($fname)) {
531	9					33	return $self->formats($type)->parse_file($fname, %opts);
532							}
533							}
534							}
535	1					27	croak "Couldn't guess format of file '$fname'";
536							}
537
538							=item $mol->write($fname, option => value ...)
539
540							Write a molecule file, or croak if there was a problem. The type of file will
541							be guessed if not specified via the C option.
542
543							Note that only registered file formats will be used.
544
545							Automatic gzip compression is supported if the IO::Zlib module is installed.
546							Files ending in .gz are assumed to be compressed; otherwise it is possible to
547							force compression by passing the gzip => 1 option (or no compression with gzip
548							=> 0). Specific compression levels between 2 (fastest) and 9 (most compressed)
549							may also be used (e.g., gzip => 9).
550
551							=cut
552
553							sub write {
554	4			4	1	1745	my ($self, $fname, %opts) = (@_);
555
556	4	100				19	if ($opts{format}) {
557	2					9	return $self->formats($opts{format})->write_file(@_);
558							} else { # guess format
559	2					9	for my $type ($self->formats) {
560	2	100				7	if ($self->formats($type)->name_is($fname)) {
561	1					3	return $self->formats($type)->write_file(@_);
562							}
563							}
564							}
565	1					16	croak "Couldn't guess format for writing file '$fname'";
566							}
567
568							=item Chemistry::Mol->file($file, option => value ...)
569
570							Create a L-derived object for reading or writing to a file.
571							The object can then be used to read the molecules or other information in the
572							file.
573
574							This has more flexibility than calling C<< Chemistry::Mol->read >> when
575							dealing with multi-molecule files or files that have higher structure or that
576							have information that does not belong to the molecules themselves. For
577							example, a reaction file may have a list of molecules, but also general
578							information like the reaction name, yield, etc. as well as the classification
579							of the molecules as reactants or products. The exact information that is
580							available will depend on the file reader class that is being used. The
581							following is a hypothetical example for reading MDL rxnfiles.
582
583							# assuming this module existed...
584							use Chemistry::File::Rxn;
585
586							my $rxn = Chemistry::Mol->file('test.rxn');
587							$rxn->read;
588							$name = $rxn->name;
589							@reactants = $rxn->reactants; # mol objects
590							@products = $rxn->products;
591							$yield = $rxn->yield; # a number
592
593							Note that only registered file readers will be used. Readers may be registered
594							using register_format(); modules that include readers (such as
595							Chemistry::File::PDB) usually register them automatically.
596
597							=cut
598
599							sub file {
600	1			1	1	2	my ($self, $file, %opts) = @_;
601	1					3	%opts = (mol_class => $self, %opts);
602
603	1	50				4	if ($opts{format}) {
604	0					0	return $self->formats($opts{format})->new(file => $file,
605							opts => \%opts);
606							} else { # guess format
607	1					3	for my $type ($self->formats) {
608	1	50				1	if ($self->formats($type)->file_is($file)) {
609	1					3	return $self->formats($type)->new(file => $file,
610							opts => \%opts);
611							}
612							}
613							}
614	0					0	croak "Couldn't guess format of file '$file'";
615							}
616
617							=item Chemistry::Mol->register_format($name, $ref)
618
619							Register a file type. The identifier $name must be unique. $ref is either a
620							class name (a package) or an object that complies with the L
621							interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the
622							calling package is used automatically. More than one format can be registered
623							at a time, but then $ref must be included for each format (e.g.,
624							Chemistry::Mol->register_format(format1 => "package1", format2 => package2).
625
626							The typical user doesn't have to care about this function. It is used
627							automatically by molecule file I/O modules.
628
629							=cut
630
631							sub register_format {
632	14			14	1	45	my $class = shift;
633	14	100				104	if (@_ == 1) {
634	3					20	$FILE_FORMATS{$_[0]} = caller;
635	3					18	return;
636							}
637	11					65	my %opts = @_;
638	11					93	$FILE_FORMATS{$_} = $opts{$_} for keys %opts;
639							}
640
641							=item Chemistry::Mol->formats
642
643							Returns a list of the file formats that have been installed by
644							register_format()
645
646							=cut
647
648							sub formats {
649	77			77	1	138	my $self = shift;
650	77	100				186	if (@_) {
651	64					140	my ($type) = @_;
652	64					142	my $file_class = $FILE_FORMATS{$type};
653	64	100				182	unless ($file_class) {
654	1					13	croak "No class installed for type '$type'";
655							}
656	63					582	return $file_class;
657							} else {
658	13					77	return sort keys %FILE_FORMATS;
659							}
660							}
661
662							=item $mol->mass
663
664							Return the molar mass. This is just the sum of the masses of the atoms. See
665							L::mass for details such as the handling of isotopes.
666
667							=cut
668
669							sub mass {
670	2			2	1	6	my ($self) = @_;
671	2					3	my $mass = 0;
672	2					3	for my $atom ($self->atoms) {
673	6					9	$mass += $atom->mass;
674							}
675	2					9	$mass;
676							}
677
678							=item $mol->charge
679
680							Return the charge of the molecule. By default it returns the sum of the formal
681							charges of the atoms. However, it is possible to set an arbitrary charge by
682							calling C<< $mol->charge($new_charge) >>
683
684							=cut
685
686							sub charge {
687	0			0	1	0	my ($self) = shift;
688	0	0				0	if (@_) {
689	0					0	$self->{charge} = shift;
690	0					0	$self;
691							} else {
692	0	0				0	return $self->{charge} if defined $self->{charge};
693	0					0	my $charge = 0;
694	0		0			0	$charge += $_->formal_charge \|\| 0 for $self->atoms;
695	0					0	$charge;
696							}
697							}
698
699							=item $mol->formula_hash
700
701							Returns a hash reference describing the molecular formula. For methane it would
702							return { C => 1, H => 4 }.
703
704							=cut
705
706							sub formula_hash {
707	17			17	1	32	my ($self) = @_;
708	17					33	my $formula = {};
709	17					66	for my $atom ($self->atoms) {
710	538					1147	$formula->{$atom->symbol}++;
711	538	50				1114	$formula->{H} += $atom->hydrogens if $atom->hydrogens;
712							}
713	17					68	$formula;
714							}
715
716							=item $mol->formula($format)
717
718							Returns a string with the formula. The format can be specified as a printf-like
719							string with the control sequences specified in the L
720							documentation.
721
722							=cut
723
724							sub formula {
725	5			5	1	2824	my ($self, $format) = @_;
726	5					725	require Chemistry::File::Formula;
727	5					25	$self->print(format => "formula", formula_format => $format);
728							}
729
730							=item my $mol2 = $mol->clone;
731
732							Makes a copy of a molecule. Note that this is a B copy; if your molecule
733							has a pointer to the rest of the universe, the entire universe will be cloned!
734
735							By default, clone() uses L to copy the Perl data structure. L
736							can be used instead by setting variable C<$Chemistry::Mol::clone_backend> to
737							C (default is C). The documentation of Storable claims L
738							is less memory-intensive.
739
740							=cut
741
742							sub clone {
743	9			9	1	1202	my ($self) = @_;
744	9					16	my $clone;
745	9	100				33	if ($clone_backend eq "Storable") {
		50
746	8					2895	$clone = dclone $self;
747	8	50				170	$clone->_weaken if Storable->VERSION < 2.14;
748							} elsif ($clone_backend eq "Clone") {
749	1					762	require Clone;
750	1					1085	$clone = Clone::clone $self;
751							} else {
752	0					0	croak "Unknown clone backend '$clone_backend'";
753							}
754	9					52	$clone;
755							}
756
757							=item my $mol2 = $mol->safe_clone;
758
759							Like clone, it makes a deep copy of a molecule. The difference is that the copy
760							is not "exact" in that new molecule and its atoms and bonds get assigned new
761							IDs. This makes it safe to combine cloned molecules. For example, this is an
762							error:
763
764							# XXX don't try this at home!
765							my $mol2 = Chemistry::Mol->combine($mol1, $mol1);
766							# the atoms in $mol1 will clash
767
768							But this is ok:
769
770							# the "safe clone" of $mol1 will have new IDs
771							my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone);
772
773							=cut
774
775							sub safe_clone {
776	1			1	1	3	my ($mol) = @_;
777	1					2	my $clone = $mol->clone;
778	1					19	for ($clone, $clone->atoms, $clone->bonds) {
779	4					17	$_->id($_->nextID);
780							}
781	1					22	$clone;
782							}
783
784							sub _weaken {
785	15			15		47	my ($self) = @_;
786	15					65	for ($self->atoms, $self->bonds) {
787	211					495	$_->_weaken;
788							}
789	15					62	$self;
790							}
791
792							=item ($distance, $atom_here, $atom_there) = $mol->distance($obj)
793
794							Returns the minimum distance to $obj, which can be an atom, a molecule, or a
795							vector. In scalar context it returns only the distance; in list context it
796							also returns the atoms involved. The current implementation for calculating
797							the minimum distance between two molecules compares every possible pair of
798							atoms, so it's not efficient for large molecules.
799
800							=cut
801
802							sub distance {
803	0			0	1	0	my ($self, $other) = @_;
804	0	0				0	if ($other->isa("Chemistry::Mol")) {
		0
		0
805	0					0	my @atoms = $self->atoms;
806	0	0				0	my $atom = shift @atoms or return; # need at least one atom
807	0					0	my $closest_here = $atom;
808	0					0	my ($min_length, $closest_there) = $atom->distance($other);
809	0					0	for $atom (@atoms) {
810	0					0	my ($d, $o) = $atom->distance($other);
811	0	0				0	if ($d < $min_length) {
812	0					0	($min_length, $closest_there, $closest_here) = ($d, $o, $atom);
813							}
814							}
815							return wantarray ?
816	0	0				0	($min_length, $closest_here, $closest_there) : $min_length;
817							} elsif ($other->isa("Chemistry::Atom")) {
818	0					0	return $other->distance($self);
819							} elsif ($other->isa("Math::VectorReal")) {
820	0					0	return Chemistry::Atom->new(coords => $other)->distance($self);
821							}
822							}
823
824							=item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...)
825
826							=item $mol1->combine($mol2, $mol3, ...)
827
828							Combines several molecules in one bigger molecule. If called as a class method,
829							as in the first example, it returns a new combined molecule without altering
830							any of the parameters. If called as an instance method, as in the second
831							example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not
832							altered. B: Make sure you don't combine molecules which contain atoms
833							with duplicate IDs (for example, if they were cloned).
834
835							=cut
836
837							# joins several molecules into one
838							sub combine {
839	2			2	1	867	my ($self, @others) = @_;
840	2					4	my $mol;
841	2	100				9	if (ref $self) {
842	1					3	$mol = $self;
843							} else {
844	1					6	$mol = $self->new;
845							}
846	2					5	for my $other (@others) {
847	3					12	my $mol2 = $other->clone;
848	3					12	for my $atom ($mol2->atoms) {
849	12					29	$mol->add_atom($atom);
850							}
851	3					9	for my $bond ($mol2->bonds) {
852	9					21	$mol->add_bond($bond);
853							}
854							}
855	2					9	$mol;
856							}
857
858							=item my @mols = $mol->separate
859
860							Separates a molecule into "connected fragments". The original object is not
861							modified; the fragments are clones of the original ones. Example: if you have
862							ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within
863							one molecule object ($mol). When you call $mol->separate you get two molecules,
864							each one with a CH3.
865
866							=cut
867
868							# splits a molecule into connected fragments
869							# returns a list of molecules. Does not touch the original copy.
870							sub separate {
871	1			1	1	986	my ($self) = @_;
872	1					4	$self = $self->clone;
873	1					5	my $unseen = set($self->atoms);
874	1					30	my $open = set;
875	1					65	my %colors;
876	1					2	my $color = 0;
877	1					7	while (!$unseen->is_null) {
878	2					9	my ($first) = $unseen->members;
879	2					18	$unseen->remove($first);
880	2					10	$open->insert($first);
881	2					8	while (!$open->is_null) {
882	8					246	my ($atom) = $open->members;
883	8					34	$colors{$atom->id} = $color;
884	8					70	$open->remove($atom);
885
886	8					28	my $neighbors = set($atom->neighbors);
887	8					187	my $newly_opened_atoms = $unseen * $neighbors;
888	8					399	$unseen -= $newly_opened_atoms;
889	8					290	$open += $newly_opened_atoms;
890							}
891	2					77	$color++;
892							}
893
894	1					3	$color = 0;
895	1					3	my %map;
896	1					4	for my $atom ($self->atoms) {
897	8	100				19	next if exists $map{$colors{$atom->id}};
898	2					8	$map{$colors{$atom->id}} = $color;
899	2					4	$color++;
900							}
901	1					11	$colors{$_} = $map{$colors{$_}} for (keys %colors);
902
903	1					4	my @mols;
904	1					8	push @mols, $self->new for (0 .. $color-1);
905	1					3	for my $atom ($self->atoms) {
906	8					22	$mols[$colors{$atom->id}]->add_atom($atom);
907							}
908	1					5	for my $bond ($self->bonds) {
909	6					17	my ($atom) = $bond->atoms;
910	6					14	$mols[$colors{$atom->id}]->add_bond($bond);
911							}
912	1					20	@mols;
913							}
914
915							=item $mol->sprout_hydrogens
916
917							Convert all the implicit hydrogen atoms in the molecule to explicit atoms.
918							It does B generate coordinates for the atoms.
919
920							=cut
921
922							sub sprout_hydrogens {
923	1			1	1	4	my ($self) = @_;
924	1					5	$_->sprout_hydrogens for $self->atoms;
925							}
926
927							=item $mol->collapse_hydrogens
928
929							Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens.
930							(Exception: hydrogen atoms that are adjacent to a hydrogen atom are not
931							collapsed.)
932
933							=cut
934
935							sub collapse_hydrogens {
936	1			1	1	3	my ($self) = @_;
937	1					3	for my $atom (grep { $_->symbol ne 'H' } $self->atoms) {
	3					5
938	1					4	$atom->collapse_hydrogens;
939							}
940							}
941
942							=item $mol->add_implicit_hydrogens
943
944							Use heuristics to figure out how many implicit hydrogens should each atom in
945							the molecule have to satisfy its normal "organic" valence.
946
947							=cut
948
949							sub add_implicit_hydrogens {
950	1			1	1	6	my ($self) = @_;
951	1					6	$_->add_implicit_hydrogens for $self->atoms;
952							}
953
954
955							my %DESCRIPTORS = ();
956
957							=item Chemistry::Mol->register_descriptor($name => $sub_ref)
958
959							Adds a callback that can be used to add functionality to the molecule class
960							(originally meant to add custom molecule descriptors.) A descriptor is a
961							function that takes a molecule object as its only argument and returns a value
962							or values. For example, to add a descriptor function that computes the number
963							of atoms:
964
965							Chemistry::Mol->register_descriptor(
966							number_of_atoms => sub {
967							my $mol = shift;
968							return scalar $mol->atoms;
969							}
970							);
971
972							The descriptor is accessed by name via the C instance method:
973
974							my $n = $mol->descriptor('number_of_atoms');
975
976							=cut
977
978							sub register_descriptor {
979	1			1	1	238803	my ($self, %opts) = @_;
980	1					10	$DESCRIPTORS{$_} = $opts{$_} for keys %opts;
981							}
982
983							=item my $value = $mol->descriptor($descriptor_name)
984
985							Calls a previously registered descriptor function giving it $mol as an
986							argument, as shown above for C.
987
988							=cut
989
990							sub descriptor {
991	2			2	1	1128	my ($self, $descriptor) = @_;
992	2	100				24	my $sub = $DESCRIPTORS{$descriptor}
993							or croak "unknown descriptor '$descriptor'";
994	1					4	return $sub->($self);
995							}
996
997							1;
998
999							=back
1000
1001							=head1 SOURCE CODE REPOSITORY
1002
1003							L
1004
1005							=head1 SEE ALSO
1006
1007							L, L, L,
1008							L
1009
1010							=head1 AUTHOR
1011
1012							Ivan Tubert-Brohman Eitub@cpan.orgE
1013
1014							=head1 COPYRIGHT
1015
1016							Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is
1017							free software; you can redistribute it and/or modify it under the same terms as
1018							Perl itself.
1019
1020							=cut
1021