File Coverage

blib/lib/Chemistry/Mol.pm

Criterion	Covered	Total	%
statement	239	333	71.7
branch	41	76	53.9
condition	2	7	28.5
subroutine	50	60	83.3
pod	37	45	82.2
total	369	521	70.8

line	stmt	bran	cond	sub	pod	time	code
1							package Chemistry::Mol;
2
3							our $VERSION = '0.38'; # VERSION
4							# $Id$
5
6							=head1 NAME
7
8							Chemistry::Mol - Molecule object toolkit
9
10							=head1 SYNOPSIS
11
12							use Chemistry::Mol;
13
14							$mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule");
15							$c = $mol->new_atom(symbol => "C", coords => [0,0,0]);
16							$o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]);
17							$mol->new_bond(atoms => [$c, $o], order => 3);
18
19							print $mol->print;
20
21							=head1 DESCRIPTION
22
23							This package, along with Chemistry::Atom and Chemistry::Bond, includes basic
24							objects and methods to describe molecules.
25
26							The core methods try not to enforce a particular convention. This means that
27							only a minimal set of attributes is provided by default, and some attributes
28							have very loosely defined meaning. This is because each program and file type
29							has different idea of what each concept (such as bond and atom type) means.
30							Bonds are defined as a list of atoms (typically two) with an arbitrary type.
31							Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates
32							(2D coming soon).
33
34							=cut
35
36	16			16		319262	use 5.006;
	16					95
37	16			16		82	use strict;
	16					29
	16					483
38	16			16		78	use warnings;
	16					174
	16					562
39	16			16		7286	use Chemistry::Atom;
	16					48
	16					893
40	16			16		8037	use Chemistry::Bond;
	16					39
	16					401
41	16			16		101	use Carp;
	16					27
	16					840
42	16			16		83	use base qw(Chemistry::Obj Exporter);
	16					31
	16					1856
43	16			16		10689	use Storable 'dclone';
	16					48904
	16					20719
44
45							our @EXPORT_OK = qw(read_mol);
46							our @EXPORT = ();
47							our %EXPORT_TAGS = (
48							all => [@EXPORT, @EXPORT_OK],
49							);
50
51
52
53							my %FILE_FORMATS = ();
54
55							=head1 METHODS
56
57							See also L for generic attributes.
58
59							=over 4
60
61							=item Chemistry::Mol->new(name => value, ...)
62
63							Create a new Mol object with the specified attributes.
64
65							$mol = Chemistry::Mol->new(id => 'm123', name => 'my mol')
66
67							is the same as
68
69							Chemistry::Mol->new()
70							$mol->id('m123')
71							$mol->name('my mol')
72
73							=cut
74
75							sub new {
76	26			26	1	1463	my $class = shift;
77	26					52	my %args = @_;
78	26		66			61	my $self = bless {
79							id => $class->nextID,
80							byId => {},
81							atoms => [],
82							bonds => [],
83							name => "",
84							}, ref $class \|\| $class;
85	26					89	$self->$_($args{$_}) for (keys %args);
86	26					76	return $self;
87							}
88
89							my $N = 0; # molecule ID counter
90	27			27	0	223	sub nextID { "mol".++$N; }
91	0			0	0	0	sub reset_id { $N = 0; }
92	0			0	0	0	sub next_id { $N = $_[1] }
93
94							=item $mol->add_atom($atom, ...)
95
96							Add one or more Atom objects to the molecule. Returns the last atom added.
97
98							=cut
99
100							sub add_atom {
101	552			552	1	1387	my $self = shift;
102	552					720	for my $atom (@_){
103							#if ($self->by_id($atom->id)) {
104							#croak "Duplicate ID when adding atom '$atom' to mol '$self'";
105							#}
106	553					554	push @{$self->{atoms}}, $atom;
	553					1034
107	553					1023	$self->{byId}{$atom->id} = $atom;
108	553					996	$atom->parent($self);
109							}
110	552					1080	$_[-1];
111							}
112
113							sub add_atom_np {
114	0			0	0	0	my $self = shift;
115	0					0	for my $atom (@_){
116	0					0	push @{$self->{atoms}}, $atom;
	0					0
117	0					0	$self->{byId}{$atom->id} = $atom;
118							}
119	0					0	$_[-1];
120							}
121
122							=item $mol->atom_class
123
124							Returns the atom class that a molecule or molecule class expects to use by
125							default. L objects return "Chemistry::Atom", but subclasses
126							will likely override this method.
127
128							=cut
129
130							sub atom_class {
131	14			14	1	45	"Chemistry::Atom";
132							}
133
134							=item $mol->new_atom(name => value, ...)
135
136							Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>.
137
138							=cut
139
140							sub new_atom {
141	14			14	1	47	my $self = shift;
142	14					32	$self->add_atom($self->atom_class->new(@_));
143							}
144
145							=item $mol->delete_atom($atom, ...)
146
147							Deletes an atom from the molecule. It automatically deletes all the bonds in
148							which the atom participates as well. $atom should be a Chemistry::Atom
149							reference. This method also accepts the atom index, but this use is deprecated
150							(and buggy if multiple indices are given, unless they are in descending order).
151
152							=cut
153
154							sub delete_atom {
155	2			2	1	4	my $self = shift;
156	2					6	for my $i (@_) {
157	2					4	my ($atom);
158	2	100				6	if (ref $i) {
159	1					2	$atom = $i;
160							} else {
161	1	50				4	$atom = $self->atoms($i)
162							or croak "$self->delete_atom: no such atom $i\n";
163							}
164	2					8	$atom->delete($i);
165							}
166							}
167
168							# takes an atom ref to delete and optionally the atom index
169							# 1) deletes bonds that belonged to atom
170							# 2) deletes atom
171							sub _delete_atom {
172	7			7		16	my ($self, $atom) = @_;
173	7	50				21	my $index = $self->get_atom_index($atom)
174							or croak "$self->delete_atom: no such atom $atom\n";
175	7					28	my $id = $atom->id;
176	7					73	$self->delete_bond($atom->bonds);
177	7					38	delete $self->{byId}{$id};
178	7					19	splice @{$self->{atoms}}, $index - 1, 1;
	7					27
179							}
180
181							=item $mol->add_bond($bond, ...)
182
183							Add one or more Bond objects to the molecule. Returns the last bond added.
184
185							=cut
186
187							sub add_bond {
188	25			25	1	38	my $self = shift;
189	25					46	for my $bond (@_){
190							#if ($self->by_id($bond->id)) {
191							#croak "Duplicate ID when adding bond '$bond' to mol '$self'";
192							#}
193	25					28	push @{$self->{bonds}}, $bond;
	25					51
194	25					73	$self->{byId}{$bond->id} = $bond;
195	25	100				59	if ($bond->{deleted}) {
196	1					4	$_->add_bond($bond) for $bond->atoms;
197	1					4	$bond->{deleted} = 0;
198							}
199	25					84	$bond->parent($self);
200							}
201	25					50	$_[-1];
202							}
203
204							sub add_bond_np {
205	0			0	0	0	my $self = shift;
206	0					0	for my $bond (@_){
207	0					0	push @{$self->{bonds}}, $bond;
	0					0
208	0					0	$self->{byId}{$bond->id} = $bond;
209							}
210	0					0	$_[-1];
211							}
212
213							=item $mol->bond_class
214
215							Returns the bond class that a molecule or molecule class expects to use by
216							default. L objects return "Chemistry::Bond", but subclasses
217							will likely override this method.
218
219							=cut
220
221							sub bond_class {
222	8			8	1	59	"Chemistry::Bond";
223							}
224
225							=item $mol->new_bond(name => value, ...)
226
227							Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>.
228
229							=cut
230
231							sub new_bond {
232	8			8	1	35	my $self = shift;
233	8					26	$self->add_bond($self->bond_class->new(@_));
234							}
235
236							sub get_bond_index {
237	14			14	0	24	my ($self, $bond) = @_;
238	14					22	my $i;
239	14					32	for ($self->bonds) {
240	42					54	++$i;
241	42	100				84	return $i if ($_ eq $bond);
242							}
243	0					0	undef;
244							}
245
246							sub get_atom_index {
247	7			7	0	14	my ($self, $atom) = @_;
248	7					9	my $i;
249	7					17	for ($self->atoms) {
250	12					20	++$i;
251	12	100				29	return $i if ($_ eq $atom);
252							}
253	0					0	undef;
254							}
255
256							=item $mol->delete_bond($bond, ...)
257
258							Deletes a bond from the molecule. $bond should be a L object.
259
260							=cut
261
262							# mol deletes bond
263							# bond tells atoms involved to forget about it
264
265							sub delete_bond {
266	7			7	1	13	my $self = shift;
267	7					17	for my $i (@_){
268	11					13	my ($bond);
269	11	50				25	if (ref $i) {
270	11					14	$bond = $i;
271							} else {
272	0	0				0	$bond = $self->bonds($i)
273							or croak "$self->delete_bond($i): no such bond $i\n";
274							}
275	11					28	$bond->delete;
276							}
277							}
278
279							sub _delete_bond {
280	14			14		29	my ($self, $bond) = @_;
281	14	50				38	my $index = $self->get_bond_index($bond)
282							#or croak "$self->delete_bond: no such bond $bond\n";
283							or return;
284	14					49	my $id = $bond->id;
285	14					50	delete $self->{byId}{$id};
286	14					24	splice @{$self->{bonds}}, $index - 1, 1;
	14					45
287	14					42	$bond->delete_atoms;
288							}
289
290							=item $mol->by_id($id)
291
292							Return the atom or bond object with the corresponding id.
293
294							=cut
295
296							sub by_id {
297	3			3	1	6	my $self = shift;
298	3					7	my ($id) = @_;
299	3					15	$self->{byId}{$id};
300							}
301
302							sub _change_id {
303	4			4		10	my ($self, $old_id, $new_id) = @_;
304	4					8	my $ref = $self->{byId}{$old_id};
305	4					9	$self->{byId}{$new_id} = $ref;
306	4					12	delete $self->{byId}{$old_id};
307							}
308
309							=item $mol->atoms($n1, ...)
310
311							Returns the atoms with the given indices, or all by default.
312							Indices start from one, not from zero.
313
314							=cut
315
316							sub atoms {
317	90			90	1	5838	my $self = shift;
318	90	100				214	if (@_) {
319	18					45	my @ats = map {$_ - 1} @_;
	24					68
320	18					30	@{$self->{atoms}}[@ats];
	18					92
321							} else {
322	72					96	@{$self->{atoms}};
	72					325
323							}
324							}
325
326							=item $mol->atoms_by_name($name)
327
328							Returns the atoms with the given name (treated as an anchored regular
329							expression).
330
331							=cut
332
333							sub atoms_by_name {
334	1			1	1	3	my $self = shift;
335	1					15	my $re = qr/^$_[0]$/;
336	16			16		167	no warnings;
	16					31
	16					7274
337	1					4	my @ret = grep {$_->name =~ $re} $self->atoms;
	2					5
338	1	50				6	wantarray ? @ret : $ret[0];
339							}
340
341							=item $mol->sort_atoms($sub_ref)
342
343							Sort the atoms in the molecule by using the comparison function given in
344							$sub_ref. This function should take two atoms as parameters and return -1, 0,
345							or 1 depending on whether the first atom should go before, same, or after the
346							second atom. For example, to sort by atomic number, you could use the
347							following:
348
349							$mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } );
350
351							Note that the atoms are passed as parameters and not as the package variables
352							$a and $b like the core sort function does. This is because $mol->sort will
353							likely be called from another package and we don't want to play with another
354							package's symbol table.
355
356							=cut
357
358							sub sort_atoms {
359	0			0	1	0	my ($self, $sub) = @_;
360	0					0	my @a = $self->atoms;
361	0					0	@a = sort { $sub->($a,$b) } @a;
	0					0
362	0					0	$self->{atoms} = \@a;
363	0					0	$self;
364							}
365
366							=item $mol->bonds($n1, ...)
367
368							Returns the bonds with the given indices, or all by default.
369							Indices start from one, not from zero.
370
371							=cut
372
373							sub bonds {
374	49			49	1	673	my $self = shift;
375	49	100				131	if (@_) {
376	6					15	my @bonds = map {$_ - 1} @_;
	6					23
377	6					11	@{$self->{bonds}}[@bonds];
	6					37
378							} else {
379	43					58	@{$self->{bonds}};
	43					142
380							}
381							}
382
383							=item $mol->print(option => value...)
384
385							Convert the molecule to a string representation. If no options are given,
386							a default YAML-like format is used (this may change in the future). Otherwise,
387							the format should be specified by using the C option.
388
389							=cut
390
391							sub print {
392	19			19	1	672	my $self = shift;
393	19					50	my (%opts) = @_;
394	19					22	my $ret;
395	19					40	local $" = ""; #"
396
397	19	50				48	if ($opts{format}) {
398	19					52	return $self->formats($opts{format})->write_string($self, %opts);
399							}
400							# else use default printout
401	0					0	$ret = <
402							$self->{id}:
403							name: $self->{name}
404							END
405	0					0	$ret .= " attr:\n";
406	0					0	$ret .= $self->print_attr(2);
407	0					0	$ret .= " atoms:\n";
408	0					0	for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) }
	0					0
	0					0
409	0					0	$ret .= " bonds:\n";
410	0					0	for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) }
	0					0
	0					0
411	0					0	$ret;
412							}
413
414							=item $s = $mol->sprintf($format)
415
416							Format interesting molecular information in a concise way, as specified by
417							a printf-like format.
418
419							%n - name
420							%f - formula
421							%f{formula with format} - (note: right braces within
422							the format should be escaped with a backslash)
423							%s - SMILES representation
424							%S - canonical SMILES representation
425							%m - mass
426							%8.3m - mass, formatted as %8.3f with core sprintf
427							%q - formal charge
428							%a - atom count
429							%b - bond count
430							%t - type
431							%i - id
432							%% - %
433
434							For example, if you want just about everything:
435
436							$mol->sprintf("%s - %n (%f). %a atoms, %b bonds; "
437							. "mass=%m; charge =%q; type=%t; id=%i");
438
439							Note that you have to C before using C<%s> or
440							C<%S> on C<< $mol->sprintf >>.
441
442							=cut
443
444							sub sprintf {
445	0			0	1	0	my ($mol, $format) = @_;
446	16			16		150	no warnings 'uninitialized'; # don't care if some properties are undefined
	16					35
	16					36067
447	0		0			0	$format \|\|= "%f";
448	0					0	$format =~ s/%%/\\%/g; # escape %% with a \
449	0					0	$format =~ s/(?formula($1)/eg; # %f{}
	0					0
450	0					0	$format =~ s/(?formula/eg; # %f
	0					0
451	0					0	$format =~ s/(?print(format=>'smiles')/eg; # %s
	0					0
452	0					0	$format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s
	0					0
453	0					0	$format =~ s/(?name/eg; # %n
	0					0
454	0					0	$format =~ s/(?
455	0	0				0	$1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m
456	0					0	$format =~ s/(?charge/eg; # %q
	0					0
457	0					0	$format =~ s/(?atoms/eg; # %a
	0					0
458	0					0	$format =~ s/(?bonds/eg; # %b
	0					0
459	0					0	$format =~ s/(?type/eg; # %t
	0					0
460	0					0	$format =~ s/(?id/eg; # %i
	0					0
461	0					0	$format =~ s/\\(.)/$1/g; # other \ escapes
462	0					0	$format;
463							}
464
465							=item $mol->printf($format)
466
467							Same as C<< $mol->sprintf >>, but prints to standard output automatically.
468							Used for quick and dirty molecular information dumping.
469
470							=cut
471
472							sub printf {
473	0			0	1	0	my ($mol, $format) = @_;
474	0					0	print $mol->sprintf($format);
475							}
476
477							=item Chemistry::Mol->parse($string, option => value...)
478
479							Parse the molecule encoded in C<$string>. The format should be specified
480							with the the C option; otherwise, it will be guessed.
481
482							=cut
483
484							sub parse {
485	14			14	1	6749	my $self = shift;
486	14					25	my $s = shift;
487	14					35	my %opts = (mol_class => $self, @_);
488
489	14	50				36	if ($opts{format}) {
490	14					32	return $self->formats($opts{format})->parse_string($s, %opts);
491							} else {
492	0					0	croak "Parse does not support autodetection yet.",
493							"Please specify a format.";
494							}
495	0					0	return;
496							}
497
498							=item Chemistry::Mol->read($fname, option => value ...)
499
500							Read a file and return a list of Mol objects, or croaks if there was a problem.
501							The type of file will be guessed if not specified via the C option.
502
503							Note that only registered file readers will be used. Readers may be registered
504							using C; modules that include readers (such as
505							L) usually register them automatically when they are
506							loaded.
507
508							Automatic decompression of gzipped files is supported if the L
509							module is installed. Files ending in .gz are assumed to be compressed;
510							otherwise it is possible to force decompression by passing the gzip => 1
511							option (or no decompression with gzip => 0).
512
513							=cut
514
515							sub read_mol { # for backwards compatibility
516	0			0	0	0	my ($fname, $type) = shift;
517	0					0	__PACKAGE__->read($fname, format => $type);
518							}
519
520							sub read {
521	11			11	1	52273	my $self = shift;
522	11					30	my $fname = shift;
523	11					58	my %opts = (mol_class => $self, @_);
524
525	11	100				47	if ($opts{format}) {
526	3					13	return $self->formats($opts{format})->parse_file($fname, %opts);
527							} else { # guess format
528	8					37	for my $type ($self->formats) {
529	8	50				29	if ($self->formats($type)->file_is($fname)) {
530	8					31	return $self->formats($type)->parse_file($fname, %opts);
531							}
532							}
533							}
534	0					0	croak "Couldn't guess format of file '$fname'";
535							}
536
537							=item $mol->write($fname, option => value ...)
538
539							Write a molecule file, or croak if there was a problem. The type of file will
540							be guessed if not specified via the C option.
541
542							Note that only registered file formats will be used.
543
544							Automatic gzip compression is supported if the IO::Zlib module is installed.
545							Files ending in .gz are assumed to be compressed; otherwise it is possible to
546							force compression by passing the gzip => 1 option (or no compression with gzip
547							=> 0). Specific compression levels between 2 (fastest) and 9 (most compressed)
548							may also be used (e.g., gzip => 9).
549
550							=cut
551
552							sub write {
553	3			3	1	897	my ($self, $fname, %opts) = (@_);
554
555	3	100				14	if ($opts{format}) {
556	2					8	return $self->formats($opts{format})->write_file(@_);
557							} else { # guess format
558	1					5	for my $type ($self->formats) {
559	1	50				3	if ($self->formats($type)->name_is($fname)) {
560	1					5	return $self->formats($type)->write_file(@_);
561							}
562							}
563							}
564	0					0	croak "Couldn't guess format for writing file '$fname'";
565							}
566
567							=item Chemistry::Mol->file($file, option => value ...)
568
569							Create a L-derived object for reading or writing to a file.
570							The object can then be used to read the molecules or other information in the
571							file.
572
573							This has more flexibility than calling C<< Chemistry::Mol->read >> when
574							dealing with multi-molecule files or files that have higher structure or that
575							have information that does not belong to the molecules themselves. For
576							example, a reaction file may have a list of molecules, but also general
577							information like the reaction name, yield, etc. as well as the classification
578							of the molecules as reactants or products. The exact information that is
579							available will depend on the file reader class that is being used. The
580							following is a hypothetical example for reading MDL rxnfiles.
581
582							# assuming this module existed...
583							use Chemistry::File::Rxn;
584
585							my $rxn = Chemistry::Mol->file('test.rxn');
586							$rxn->read;
587							$name = $rxn->name;
588							@reactants = $rxn->reactants; # mol objects
589							@products = $rxn->products;
590							$yield = $rxn->yield; # a number
591
592							Note that only registered file readers will be used. Readers may be registered
593							using register_format(); modules that include readers (such as
594							Chemistry::File::PDB) usually register them automatically.
595
596							=cut
597
598							sub file {
599	1			1	1	3	my ($self, $file, %opts) = @_;
600	1					3	%opts = (mol_class => $self, %opts);
601
602	1	50				4	if ($opts{format}) {
603	0					0	return $self->formats($opts{format})->new(file => $file,
604							opts => \%opts);
605							} else { # guess format
606	1					4	for my $type ($self->formats) {
607	1	50				2	if ($self->formats($type)->file_is($file)) {
608	1					11	return $self->formats($type)->new(file => $file,
609							opts => \%opts);
610							}
611							}
612							}
613	0					0	croak "Couldn't guess format of file '$file'";
614							}
615
616							=item Chemistry::Mol->register_format($name, $ref)
617
618							Register a file type. The identifier $name must be unique. $ref is either a
619							class name (a package) or an object that complies with the L
620							interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the
621							calling package is used automatically. More than one format can be registered
622							at a time, but then $ref must be included for each format (e.g.,
623							Chemistry::Mol->register_format(format1 => "package1", format2 => package2).
624
625							The typical user doesn't have to care about this function. It is used
626							automatically by molecule file I/O modules.
627
628							=cut
629
630							sub register_format {
631	14			14	1	44	my $class = shift;
632	14	100				82	if (@_ == 1) {
633	3					14	$FILE_FORMATS{$_[0]} = caller;
634	3					12	return;
635							}
636	11					48	my %opts = @_;
637	11					90	$FILE_FORMATS{$_} = $opts{$_} for keys %opts;
638							}
639
640							=item Chemistry::Mol->formats
641
642							Returns a list of the file formats that have been installed by
643							register_format()
644
645							=cut
646
647							sub formats {
648	68			68	1	114	my $self = shift;
649	68	100				149	if (@_) {
650	58					105	my ($type) = @_;
651	58					115	my $file_class = $FILE_FORMATS{$type};
652	58	50				133	unless ($file_class) {
653	0					0	croak "No class installed for type '$type'";
654							}
655	58					424	return $file_class;
656							} else {
657	10					68	return sort keys %FILE_FORMATS;
658							}
659							}
660
661							=item $mol->mass
662
663							Return the molar mass. This is just the sum of the masses of the atoms. See
664							L::mass for details such as the handling of isotopes.
665
666							=cut
667
668							sub mass {
669	2			2	1	5	my ($self) = @_;
670	2					3	my $mass = 0;
671	2					5	for my $atom ($self->atoms) {
672	6					12	$mass += $atom->mass;
673							}
674	2					9	$mass;
675							}
676
677							=item $mol->charge
678
679							Return the charge of the molecule. By default it returns the sum of the formal
680							charges of the atoms. However, it is possible to set an arbitrary charge by
681							calling C<< $mol->charge($new_charge) >>
682
683							=cut
684
685							sub charge {
686	0			0	1	0	my ($self) = shift;
687	0	0				0	if (@_) {
688	0					0	$self->{charge} = shift;
689	0					0	$self;
690							} else {
691	0	0				0	return $self->{charge} if defined $self->{charge};
692	0					0	my $charge = 0;
693	0		0			0	$charge += $_->formal_charge \|\| 0 for $self->atoms;
694	0					0	$charge;
695							}
696							}
697
698							=item $mol->formula_hash
699
700							Returns a hash reference describing the molecular formula. For methane it would
701							return { C => 1, H => 4 }.
702
703							=cut
704
705							sub formula_hash {
706	17			17	1	30	my ($self) = @_;
707	17					24	my $formula = {};
708	17					33	for my $atom ($self->atoms) {
709	538					786	$formula->{$atom->symbol}++;
710	538	50				766	$formula->{H} += $atom->hydrogens if $atom->hydrogens;
711							}
712	17					38	$formula;
713							}
714
715							=item $mol->formula($format)
716
717							Returns a string with the formula. The format can be specified as a printf-like
718							string with the control sequences specified in the L
719							documentation.
720
721							=cut
722
723							sub formula {
724	5			5	1	2015	my ($self, $format) = @_;
725	5					608	require Chemistry::File::Formula;
726	5					20	$self->print(format => "formula", formula_format => $format);
727							}
728
729							=item my $mol2 = $mol->clone;
730
731							Makes a copy of a molecule. Note that this is a B copy; if your molecule
732							has a pointer to the rest of the universe, the entire universe will be cloned!
733
734							=cut
735
736							sub clone {
737	8			8	1	19	my ($self) = @_;
738	8					1429	my $clone = dclone $self;
739	8	50				136	$clone->_weaken if Storable->VERSION < 2.14;
740	8					31	$clone;
741							}
742
743							=item my $mol2 = $mol->safe_clone;
744
745							Like clone, it makes a deep copy of a molecule. The difference is that the copy
746							is not "exact" in that new molecule and its atoms and bonds get assigned new
747							IDs. This makes it safe to combine cloned molecules. For example, this is an
748							error:
749
750							# XXX don't try this at home!
751							my $mol2 = Chemistry::Mol->combine($mol1, $mol1);
752							# the atoms in $mol1 will clash
753
754							But this is ok:
755
756							# the "safe clone" of $mol1 will have new IDs
757							my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone);
758
759							=cut
760
761							sub safe_clone {
762	1			1	1	3	my ($mol) = @_;
763	1					5	my $clone = $mol->clone;
764	1					5	for ($clone, $clone->atoms, $clone->bonds) {
765	4					16	$_->id($_->nextID);
766							}
767	1					27	$clone;
768							}
769
770							sub _weaken {
771	14			14		40	my ($self) = @_;
772	14					70	for ($self->atoms, $self->bonds) {
773	196					403	$_->_weaken;
774							}
775	14					48	$self;
776							}
777
778							=item ($distance, $atom_here, $atom_there) = $mol->distance($obj)
779
780							Returns the minimum distance to $obj, which can be an atom, a molecule, or a
781							vector. In scalar context it returns only the distance; in list context it
782							also returns the atoms involved. The current implementation for calculating
783							the minimum distance between two molecules compares every possible pair of
784							atoms, so it's not efficient for large molecules.
785
786							=cut
787
788							sub distance {
789	0			0	1	0	my ($self, $other) = @_;
790	0	0				0	if ($other->isa("Chemistry::Mol")) {
		0
		0
791	0					0	my @atoms = $self->atoms;
792	0	0				0	my $atom = shift @atoms or return; # need at least one atom
793	0					0	my $closest_here = $atom;
794	0					0	my ($min_length, $closest_there) = $atom->distance($other);
795	0					0	for $atom (@atoms) {
796	0					0	my ($d, $o) = $atom->distance($other);
797	0	0				0	if ($d < $min_length) {
798	0					0	($min_length, $closest_there, $closest_here) = ($d, $o, $atom);
799							}
800							}
801							return wantarray ?
802	0	0				0	($min_length, $closest_here, $closest_there) : $min_length;
803							} elsif ($other->isa("Chemistry::Atom")) {
804	0					0	return $other->distance($self);
805							} elsif ($other->isa("Math::VectorReal")) {
806	0					0	return Chemistry::Atom->new(coords => $other)->distance($self);
807							}
808							}
809
810							=item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...)
811
812							=item $mol1->combine($mol2, $mol3, ...)
813
814							Combines several molecules in one bigger molecule. If called as a class method,
815							as in the first example, it returns a new combined molecule without altering
816							any of the parameters. If called as an instance method, as in the second
817							example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not
818							altered. B: Make sure you don't combine molecules which contain atoms
819							with duplicate IDs (for example, if they were cloned).
820
821							=cut
822
823							# joins several molecules into one
824							sub combine {
825	2			2	1	600	my ($self, @others) = @_;
826	2					19	my $mol;
827	2	100				8	if (ref $self) {
828	1					4	$mol = $self;
829							} else {
830	1					6	$mol = $self->new;
831							}
832	2					5	for my $other (@others) {
833	3					10	my $mol2 = $other->clone;
834	3					8	for my $atom ($mol2->atoms) {
835	12					22	$mol->add_atom($atom);
836							}
837	3					9	for my $bond ($mol2->bonds) {
838	9					16	$mol->add_bond($bond);
839							}
840							}
841	2					5	$mol;
842							}
843
844							=item my @mols = $mol->separate
845
846							Separates a molecule into "connected fragments". The original object is not
847							modified; the fragments are clones of the original ones. Example: if you have
848							ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within
849							one molecule object ($mol). When you call $mol->separate you get two molecules,
850							each one with a CH3.
851
852							=cut
853
854							# splits a molecule into connected fragments
855							# returns a list of molecules. Does not touch the original copy.
856							sub separate {
857	1			1	1	1033	my ($self) = @_;
858	1					3	$self = $self->clone;
859	1					3	$self->{_paint_tab} = {};
860	1					3	my $color = 0;
861	1					11	for my $atom ($self->atoms) {
862	8	100				17	next if defined $self->{_paint_tab}{$atom->id};
863	2					8	$self->_paint($atom, $color++);
864							}
865	1					2	my @mols;
866	1					6	push @mols, $self->new for (1 .. $color);
867	1					3	for my $atom ($self->atoms) {
868	8					14	$mols[$self->{_paint_tab}{$atom->id}]->add_atom($atom);
869							}
870	1					4	for my $bond ($self->bonds) {
871	6					12	$mols[$self->{_paint_tab}{$bond->id}]->add_bond($bond);
872							}
873	1					12	@mols;
874							}
875
876							# this method fills the _paint_tab attribute for every atom connected
877							# to the given start atom $atom with $color. Used for separating
878							# connected fragments. Uses a depth-first search
879							sub _paint {
880	14			14		17	my ($self, $atom, $color) = @_;
881	14	100				23	return if defined $self->{_paint_tab}{$atom->id};
882	8					15	$self->{_paint_tab}{$atom->id} = $color;
883	8					32	$self->{_paint_tab}{$_->id} = $color for ($atom->bonds);
884	8					28	for my $neighbor ($atom->neighbors) {
885	12					23	$self->_paint($neighbor, $color);
886							}
887							}
888
889							=item $mol->sprout_hydrogens
890
891							Convert all the implicit hydrogen atoms in the molecule to explicit atoms.
892							It does B generate coordinates for the atoms.
893
894							=cut
895
896							sub sprout_hydrogens {
897	1			1	1	3	my ($self) = @_;
898	1					2	$_->sprout_hydrogens for $self->atoms;
899							}
900
901							=item $mol->collapse_hydrogens
902
903							Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens.
904							(Exception: hydrogen atoms that are adjacent to a hydrogen atom are not
905							collapsed.)
906
907							=cut
908
909							sub collapse_hydrogens {
910	1			1	1	3	my ($self) = @_;
911	1					3	for my $atom (grep { $_->symbol ne 'H' } $self->atoms) {
	3					6
912	1					9	$atom->collapse_hydrogens;
913							}
914							}
915
916							=item $mol->add_implicit_hydrogens
917
918							Use heuristics to figure out how many implicit hydrogens should each atom in
919							the molecule have to satisfy its normal "organic" valence.
920
921							=cut
922
923							sub add_implicit_hydrogens {
924	1			1	1	6	my ($self) = @_;
925	1					5	$_->add_implicit_hydrogens for $self->atoms;
926							}
927
928
929							my %DESCRIPTORS = ();
930
931							=item Chemistry::Mol->register_descriptor($name => $sub_ref)
932
933							Adds a callback that can be used to add functionality to the molecule class
934							(originally meant to add custom molecule descriptors.) A descriptor is a
935							function that takes a molecule object as its only argument and returns a value
936							or values. For example, to add a descriptor function that computes the number
937							of atoms:
938
939							Chemistry::Mol->register_descriptor(
940							number_of_atoms => sub {
941							my $mol = shift;
942							return scalar $mol->atoms;
943							}
944							);
945
946							The descriptor is accessed by name via the C instance method:
947
948							my $n = $mol->descriptor('number_of_atoms');
949
950							=cut
951
952							sub register_descriptor {
953	1			1	1	488	my ($self, %opts) = @_;
954	1					8	$DESCRIPTORS{$_} = $opts{$_} for keys %opts;
955							}
956
957							=item my $value = $mol->descriptor($descriptor_name)
958
959							Calls a previously registered descriptor function giving it $mol as an
960							argument, as shown above for C.
961
962							=cut
963
964							sub descriptor {
965	1			1	1	8	my ($self, $descriptor) = @_;
966	1	50				4	my $sub = $DESCRIPTORS{$descriptor}
967							or croak "unknown descriptor '$descriptor'";
968	1					4	return $sub->($self);
969							}
970
971							1;
972
973							=back
974
975							=head1 SOURCE CODE REPOSITORY
976
977							L
978
979							=head1 SEE ALSO
980
981							L, L, L,
982							L
983
984							=head1 AUTHOR
985
986							Ivan Tubert-Brohman Eitub@cpan.orgE
987
988							=head1 COPYRIGHT
989
990							Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is
991							free software; you can redistribute it and/or modify it under the same terms as
992							Perl itself.
993
994							=cut
995