File Coverage

blib/lib/Chemistry/Mol.pm
Criterion Covered Total %
statement 263 355 74.0
branch 46 78 58.9
condition 2 7 28.5
subroutine 50 60 83.3
pod 37 45 82.2
total 398 545 73.0


line stmt bran cond sub pod time code
1             package Chemistry::Mol;
2              
3             our $VERSION = '0.40'; # VERSION
4             # $Id$
5              
6             =head1 NAME
7              
8             Chemistry::Mol - Molecule object toolkit
9              
10             =head1 SYNOPSIS
11              
12             use Chemistry::Mol;
13              
14             $mol = Chemistry::Mol->new(id => "mol_id", name => "my molecule");
15             $c = $mol->new_atom(symbol => "C", coords => [0,0,0]);
16             $o = $mol->new_atom(symbol => "O", coords => [0,0,1.23]);
17             $mol->new_bond(atoms => [$c, $o], order => 3);
18              
19             print $mol->print;
20              
21             =head1 DESCRIPTION
22              
23             This package, along with Chemistry::Atom and Chemistry::Bond, includes basic
24             objects and methods to describe molecules.
25              
26             The core methods try not to enforce a particular convention. This means that
27             only a minimal set of attributes is provided by default, and some attributes
28             have very loosely defined meaning. This is because each program and file type
29             has different idea of what each concept (such as bond and atom type) means.
30             Bonds are defined as a list of atoms (typically two) with an arbitrary type.
31             Atoms are defined by a symbol and a Z, and may have 3D and internal coordinates
32             (2D coming soon).
33              
34             =cut
35              
36 16     16   616958 use 5.006;
  16         109  
37 16     16   121 use strict;
  16         60  
  16         457  
38 16     16   74 use warnings;
  16         46  
  16         986  
39 16     16   10557 use Chemistry::Atom;
  16         58  
  16         1158  
40 16     16   10496 use Chemistry::Bond;
  16         57  
  16         567  
41 16     16   114 use Carp;
  16         29  
  16         1197  
42 16     16   98 use base qw(Chemistry::Obj Exporter);
  16         31  
  16         2476  
43 16     16   10936 use Set::Object qw(set);
  16         171256  
  16         1153  
44 16     16   10831 use Storable 'dclone';
  16         77545  
  16         48573  
45              
46             our @EXPORT_OK = qw(read_mol);
47             our @EXPORT = ();
48             our %EXPORT_TAGS = (
49             all => [@EXPORT, @EXPORT_OK],
50             );
51              
52             our $clone_backend = 'Storable';
53              
54             my %FILE_FORMATS = ();
55              
56             =head1 METHODS
57              
58             See also L for generic attributes.
59              
60             =over 4
61              
62             =item Chemistry::Mol->new(name => value, ...)
63              
64             Create a new Mol object with the specified attributes.
65              
66             $mol = Chemistry::Mol->new(id => 'm123', name => 'my mol')
67              
68             is the same as
69              
70             Chemistry::Mol->new()
71             $mol->id('m123')
72             $mol->name('my mol')
73              
74             =cut
75              
76             sub new {
77 26     26 1 441565 my $class = shift;
78 26         79 my %args = @_;
79 26   66     83 my $self = bless {
80             id => $class->nextID,
81             byId => {},
82             atoms => [],
83             bonds => [],
84             name => "",
85             }, ref $class || $class;
86 26         91 $self->$_($args{$_}) for (keys %args);
87 26         92 return $self;
88             }
89              
90             my $N = 0; # molecule ID counter
91 27     27 0 280 sub nextID { "mol".++$N; }
92 0     0 0 0 sub reset_id { $N = 0; }
93 0     0 0 0 sub next_id { $N = $_[1] }
94              
95             =item $mol->add_atom($atom, ...)
96              
97             Add one or more Atom objects to the molecule. Returns the last atom added.
98              
99             =cut
100              
101             sub add_atom {
102 552     552 1 1440 my $self = shift;
103 552         1005 for my $atom (@_){
104             #if ($self->by_id($atom->id)) {
105             #croak "Duplicate ID when adding atom '$atom' to mol '$self'";
106             #}
107 553         829 push @{$self->{atoms}}, $atom;
  553         1697  
108 553         1470 $self->{byId}{$atom->id} = $atom;
109 553         1400 $atom->parent($self);
110             }
111 552         1730 $_[-1];
112             }
113              
114             sub add_atom_np {
115 0     0 0 0 my $self = shift;
116 0         0 for my $atom (@_){
117 0         0 push @{$self->{atoms}}, $atom;
  0         0  
118 0         0 $self->{byId}{$atom->id} = $atom;
119             }
120 0         0 $_[-1];
121             }
122              
123             =item $mol->atom_class
124              
125             Returns the atom class that a molecule or molecule class expects to use by
126             default. L objects return "Chemistry::Atom", but subclasses
127             will likely override this method.
128              
129             =cut
130              
131             sub atom_class {
132 14     14 1 65 "Chemistry::Atom";
133             }
134              
135             =item $mol->new_atom(name => value, ...)
136              
137             Shorthand for C<< $mol->add_atom($mol->atom_class->new(name => value, ...)) >>.
138              
139             =cut
140              
141             sub new_atom {
142 14     14 1 67 my $self = shift;
143 14         36 $self->add_atom($self->atom_class->new(@_));
144             }
145              
146             =item $mol->delete_atom($atom, ...)
147              
148             Deletes an atom from the molecule. It automatically deletes all the bonds in
149             which the atom participates as well. $atom should be a Chemistry::Atom
150             reference. This method also accepts the atom index, but this use is deprecated
151             (and buggy if multiple indices are given, unless they are in descending order).
152              
153             =cut
154              
155             sub delete_atom {
156 2     2 1 2 my $self = shift;
157 2         5 for my $i (@_) {
158 2         2 my ($atom);
159 2 100       6 if (ref $i) {
160 1         2 $atom = $i;
161             } else {
162 1 50       3 $atom = $self->atoms($i)
163             or croak "$self->delete_atom: no such atom $i\n";
164             }
165 2         7 $atom->delete($i);
166             }
167             }
168              
169             # takes an atom ref to delete and optionally the atom index
170             # 1) deletes bonds that belonged to atom
171             # 2) deletes atom
172             sub _delete_atom {
173 7     7   16 my ($self, $atom) = @_;
174 7 50       23 my $index = $self->get_atom_index($atom)
175             or croak "$self->delete_atom: no such atom $atom\n";
176 7         31 my $id = $atom->id;
177 7         25 $self->delete_bond($atom->bonds);
178 7         44 delete $self->{byId}{$id};
179 7         27 splice @{$self->{atoms}}, $index - 1, 1;
  7         29  
180             }
181              
182             =item $mol->add_bond($bond, ...)
183              
184             Add one or more Bond objects to the molecule. Returns the last bond added.
185              
186             =cut
187              
188             sub add_bond {
189 25     25 1 40 my $self = shift;
190 25         68 for my $bond (@_){
191             #if ($self->by_id($bond->id)) {
192             #croak "Duplicate ID when adding bond '$bond' to mol '$self'";
193             #}
194 25         51 push @{$self->{bonds}}, $bond;
  25         75  
195 25         143 $self->{byId}{$bond->id} = $bond;
196 25 100       91 if ($bond->{deleted}) {
197 1         4 $_->add_bond($bond) for $bond->atoms;
198 1         4 $bond->{deleted} = 0;
199             }
200 25         82 $bond->parent($self);
201             }
202 25         74 $_[-1];
203             }
204              
205             sub add_bond_np {
206 0     0 0 0 my $self = shift;
207 0         0 for my $bond (@_){
208 0         0 push @{$self->{bonds}}, $bond;
  0         0  
209 0         0 $self->{byId}{$bond->id} = $bond;
210             }
211 0         0 $_[-1];
212             }
213              
214             =item $mol->bond_class
215              
216             Returns the bond class that a molecule or molecule class expects to use by
217             default. L objects return "Chemistry::Bond", but subclasses
218             will likely override this method.
219              
220             =cut
221              
222             sub bond_class {
223 8     8 1 59 "Chemistry::Bond";
224             }
225              
226             =item $mol->new_bond(name => value, ...)
227              
228             Shorthand for C<< $mol->add_bond($mol->bond_class->new(name => value, ...)) >>.
229              
230             =cut
231              
232             sub new_bond {
233 8     8 1 26 my $self = shift;
234 8         31 $self->add_bond($self->bond_class->new(@_));
235             }
236              
237             sub get_bond_index {
238 14     14 0 69 my ($self, $bond) = @_;
239 14         26 my $i;
240 14         30 for ($self->bonds) {
241 42         58 ++$i;
242 42 100       94 return $i if ($_ eq $bond);
243             }
244 0         0 undef;
245             }
246              
247             sub get_atom_index {
248 7     7 0 14 my ($self, $atom) = @_;
249 7         11 my $i;
250 7         19 for ($self->atoms) {
251 12         18 ++$i;
252 12 100       36 return $i if ($_ eq $atom);
253             }
254 0         0 undef;
255             }
256              
257             =item $mol->delete_bond($bond, ...)
258              
259             Deletes a bond from the molecule. $bond should be a L object.
260              
261             =cut
262              
263             # mol deletes bond
264             # bond tells atoms involved to forget about it
265              
266             sub delete_bond {
267 7     7 1 13 my $self = shift;
268 7         14 for my $i (@_){
269 11         14 my ($bond);
270 11 50       30 if (ref $i) {
271 11         20 $bond = $i;
272             } else {
273 0 0       0 $bond = $self->bonds($i)
274             or croak "$self->delete_bond($i): no such bond $i\n";
275             }
276 11         30 $bond->delete;
277             }
278             }
279              
280             sub _delete_bond {
281 14     14   49 my ($self, $bond) = @_;
282 14 50       38 my $index = $self->get_bond_index($bond)
283             #or croak "$self->delete_bond: no such bond $bond\n";
284             or return;
285 14         67 my $id = $bond->id;
286 14         80 delete $self->{byId}{$id};
287 14         72 splice @{$self->{bonds}}, $index - 1, 1;
  14         46  
288 14         47 $bond->delete_atoms;
289             }
290              
291             =item $mol->by_id($id)
292              
293             Return the atom or bond object with the corresponding id.
294              
295             =cut
296              
297             sub by_id {
298 3     3 1 6 my $self = shift;
299 3         7 my ($id) = @_;
300 3         17 $self->{byId}{$id};
301             }
302              
303             sub _change_id {
304 4     4   11 my ($self, $old_id, $new_id) = @_;
305 4         16 my $ref = $self->{byId}{$old_id};
306 4         12 $self->{byId}{$new_id} = $ref;
307 4         15 delete $self->{byId}{$old_id};
308             }
309              
310             =item $mol->atoms($n1, ...)
311              
312             Returns the atoms with the given indices, or all by default.
313             Indices start from one, not from zero.
314              
315             =cut
316              
317             sub atoms {
318 93     93 1 5818 my $self = shift;
319 93 100       266 if (@_) {
320 18         41 my @ats = map {$_ - 1} @_;
  24         66  
321 18         30 @{$self->{atoms}}[@ats];
  18         104  
322             } else {
323 75         111 @{$self->{atoms}};
  75         473  
324             }
325             }
326              
327             =item $mol->atoms_by_name($name)
328              
329             Returns the atoms with the given name (treated as an anchored regular
330             expression).
331              
332             =cut
333              
334             sub atoms_by_name {
335 1     1 1 2 my $self = shift;
336 1         15 my $re = qr/^$_[0]$/;
337 16     16   193 no warnings;
  16         36  
  16         10047  
338 1         3 my @ret = grep {$_->name =~ $re} $self->atoms;
  2         5  
339 1 50       5 wantarray ? @ret : $ret[0];
340             }
341              
342             =item $mol->sort_atoms($sub_ref)
343              
344             Sort the atoms in the molecule by using the comparison function given in
345             $sub_ref. This function should take two atoms as parameters and return -1, 0,
346             or 1 depending on whether the first atom should go before, same, or after the
347             second atom. For example, to sort by atomic number, you could use the
348             following:
349              
350             $mol->sort_atoms( sub { $_[0]->Z <=> $_[1]->Z } );
351              
352             Note that the atoms are passed as parameters and not as the package variables
353             $a and $b like the core sort function does. This is because $mol->sort will
354             likely be called from another package and we don't want to play with another
355             package's symbol table.
356              
357             =cut
358              
359             sub sort_atoms {
360 0     0 1 0 my ($self, $sub) = @_;
361 0         0 my @a = $self->atoms;
362 0         0 @a = sort { $sub->($a,$b) } @a;
  0         0  
363 0         0 $self->{atoms} = \@a;
364 0         0 $self;
365             }
366              
367             =item $mol->bonds($n1, ...)
368              
369             Returns the bonds with the given indices, or all by default.
370             Indices start from one, not from zero.
371              
372             =cut
373              
374             sub bonds {
375 50     50 1 1796 my $self = shift;
376 50 100       114 if (@_) {
377 6         15 my @bonds = map {$_ - 1} @_;
  6         19  
378 6         9 @{$self->{bonds}}[@bonds];
  6         39  
379             } else {
380 44         127 @{$self->{bonds}};
  44         165  
381             }
382             }
383              
384             =item $mol->print(option => value...)
385              
386             Convert the molecule to a string representation. If no options are given,
387             a default YAML-like format is used (this may change in the future). Otherwise,
388             the format should be specified by using the C option.
389              
390             =cut
391              
392             sub print {
393 19     19 1 969 my $self = shift;
394 19         79 my (%opts) = @_;
395 19         33 my $ret;
396 19         77 local $" = ""; #"
397              
398 19 50       73 if ($opts{format}) {
399 19         71 return $self->formats($opts{format})->write_string($self, %opts);
400             }
401             # else use default printout
402 0         0 $ret = <
403             $self->{id}:
404             name: $self->{name}
405             END
406 0         0 $ret .= " attr:\n";
407 0         0 $ret .= $self->print_attr(2);
408 0         0 $ret .= " atoms:\n";
409 0         0 for my $a (@{$self->{atoms}}) { $ret .= $a->print(2) }
  0         0  
  0         0  
410 0         0 $ret .= " bonds:\n";
411 0         0 for my $b (@{$self->{bonds}}) { $ret .= $b->print(2) }
  0         0  
  0         0  
412 0         0 $ret;
413             }
414              
415             =item $s = $mol->sprintf($format)
416              
417             Format interesting molecular information in a concise way, as specified by
418             a printf-like format.
419              
420             %n - name
421             %f - formula
422             %f{formula with format} - (note: right braces within
423             the format should be escaped with a backslash)
424             %s - SMILES representation
425             %S - canonical SMILES representation
426             %m - mass
427             %8.3m - mass, formatted as %8.3f with core sprintf
428             %q - formal charge
429             %a - atom count
430             %b - bond count
431             %t - type
432             %i - id
433             %% - %
434              
435             For example, if you want just about everything:
436              
437             $mol->sprintf("%s - %n (%f). %a atoms, %b bonds; "
438             . "mass=%m; charge =%q; type=%t; id=%i");
439              
440             Note that you have to C before using C<%s> or
441             C<%S> on C<< $mol->sprintf >>.
442              
443             =cut
444              
445             sub sprintf {
446 0     0 1 0 my ($mol, $format) = @_;
447 16     16   136 no warnings 'uninitialized'; # don't care if some properties are undefined
  16         42  
  16         53364  
448 0   0     0 $format ||= "%f";
449 0         0 $format =~ s/%%/\\%/g; # escape %% with a \
450 0         0 $format =~ s/(?formula($1)/eg; # %f{}
  0         0  
451 0         0 $format =~ s/(?formula/eg; # %f
  0         0  
452 0         0 $format =~ s/(?print(format=>'smiles')/eg; # %s
  0         0  
453 0         0 $format =~ s/(?print(format=>'smiles', unique => 1)/eg; # %s
  0         0  
454 0         0 $format =~ s/(?name/eg; # %n
  0         0  
455 0         0 $format =~ s/(?
456 0 0       0 $1 ? sprintf "%$1f", $mol->mass : $mol->mass/eg; # %m
457 0         0 $format =~ s/(?charge/eg; # %q
  0         0  
458 0         0 $format =~ s/(?atoms/eg; # %a
  0         0  
459 0         0 $format =~ s/(?bonds/eg; # %b
  0         0  
460 0         0 $format =~ s/(?type/eg; # %t
  0         0  
461 0         0 $format =~ s/(?id/eg; # %i
  0         0  
462 0         0 $format =~ s/\\(.)/$1/g; # other \ escapes
463 0         0 $format;
464             }
465              
466             =item $mol->printf($format)
467              
468             Same as C<< $mol->sprintf >>, but prints to standard output automatically.
469             Used for quick and dirty molecular information dumping.
470              
471             =cut
472              
473             sub printf {
474 0     0 1 0 my ($mol, $format) = @_;
475 0         0 print $mol->sprintf($format);
476             }
477              
478             =item Chemistry::Mol->parse($string, option => value...)
479              
480             Parse the molecule encoded in C<$string>. The format should be specified
481             with the the C option; otherwise, it will be guessed.
482              
483             =cut
484              
485             sub parse {
486 14     14 1 427999 my $self = shift;
487 14         33 my $s = shift;
488 14         58 my %opts = (mol_class => $self, @_);
489              
490 14 50       46 if ($opts{format}) {
491 14         48 return $self->formats($opts{format})->parse_string($s, %opts);
492             } else {
493 0         0 croak "Parse does not support autodetection yet.",
494             "Please specify a format.";
495             }
496 0         0 return;
497             }
498              
499             =item Chemistry::Mol->read($fname, option => value ...)
500              
501             Read a file and return a list of Mol objects, or croaks if there was a problem.
502             The type of file will be guessed if not specified via the C option.
503              
504             Note that only registered file readers will be used. Readers may be registered
505             using C; modules that include readers (such as
506             L) usually register them automatically when they are
507             loaded.
508              
509             Automatic decompression of gzipped files is supported if the L
510             module is installed. Files ending in .gz are assumed to be compressed;
511             otherwise it is possible to force decompression by passing the gzip => 1
512             option (or no decompression with gzip => 0).
513              
514             =cut
515              
516             sub read_mol { # for backwards compatibility
517 0     0 0 0 my ($fname, $type) = shift;
518 0         0 __PACKAGE__->read($fname, format => $type);
519             }
520              
521             sub read {
522 15     15 1 2167180 my $self = shift;
523 15         39 my $fname = shift;
524 15         72 my %opts = (mol_class => $self, @_);
525              
526 15 100       75 if ($opts{format}) {
527 5         56 return $self->formats($opts{format})->parse_file($fname, %opts);
528             } else { # guess format
529 10         49 for my $type ($self->formats) {
530 10 100       34 if ($self->formats($type)->file_is($fname)) {
531 9         33 return $self->formats($type)->parse_file($fname, %opts);
532             }
533             }
534             }
535 1         27 croak "Couldn't guess format of file '$fname'";
536             }
537              
538             =item $mol->write($fname, option => value ...)
539              
540             Write a molecule file, or croak if there was a problem. The type of file will
541             be guessed if not specified via the C option.
542              
543             Note that only registered file formats will be used.
544              
545             Automatic gzip compression is supported if the IO::Zlib module is installed.
546             Files ending in .gz are assumed to be compressed; otherwise it is possible to
547             force compression by passing the gzip => 1 option (or no compression with gzip
548             => 0). Specific compression levels between 2 (fastest) and 9 (most compressed)
549             may also be used (e.g., gzip => 9).
550              
551             =cut
552              
553             sub write {
554 4     4 1 1745 my ($self, $fname, %opts) = (@_);
555              
556 4 100       19 if ($opts{format}) {
557 2         9 return $self->formats($opts{format})->write_file(@_);
558             } else { # guess format
559 2         9 for my $type ($self->formats) {
560 2 100       7 if ($self->formats($type)->name_is($fname)) {
561 1         3 return $self->formats($type)->write_file(@_);
562             }
563             }
564             }
565 1         16 croak "Couldn't guess format for writing file '$fname'";
566             }
567              
568             =item Chemistry::Mol->file($file, option => value ...)
569              
570             Create a L-derived object for reading or writing to a file.
571             The object can then be used to read the molecules or other information in the
572             file.
573              
574             This has more flexibility than calling C<< Chemistry::Mol->read >> when
575             dealing with multi-molecule files or files that have higher structure or that
576             have information that does not belong to the molecules themselves. For
577             example, a reaction file may have a list of molecules, but also general
578             information like the reaction name, yield, etc. as well as the classification
579             of the molecules as reactants or products. The exact information that is
580             available will depend on the file reader class that is being used. The
581             following is a hypothetical example for reading MDL rxnfiles.
582              
583             # assuming this module existed...
584             use Chemistry::File::Rxn;
585              
586             my $rxn = Chemistry::Mol->file('test.rxn');
587             $rxn->read;
588             $name = $rxn->name;
589             @reactants = $rxn->reactants; # mol objects
590             @products = $rxn->products;
591             $yield = $rxn->yield; # a number
592              
593             Note that only registered file readers will be used. Readers may be registered
594             using register_format(); modules that include readers (such as
595             Chemistry::File::PDB) usually register them automatically.
596              
597             =cut
598              
599             sub file {
600 1     1 1 2 my ($self, $file, %opts) = @_;
601 1         3 %opts = (mol_class => $self, %opts);
602              
603 1 50       4 if ($opts{format}) {
604 0         0 return $self->formats($opts{format})->new(file => $file,
605             opts => \%opts);
606             } else { # guess format
607 1         3 for my $type ($self->formats) {
608 1 50       1 if ($self->formats($type)->file_is($file)) {
609 1         3 return $self->formats($type)->new(file => $file,
610             opts => \%opts);
611             }
612             }
613             }
614 0         0 croak "Couldn't guess format of file '$file'";
615             }
616              
617             =item Chemistry::Mol->register_format($name, $ref)
618              
619             Register a file type. The identifier $name must be unique. $ref is either a
620             class name (a package) or an object that complies with the L
621             interface (e.g., a subclass of Chemistry::File). If $ref is omitted, the
622             calling package is used automatically. More than one format can be registered
623             at a time, but then $ref must be included for each format (e.g.,
624             Chemistry::Mol->register_format(format1 => "package1", format2 => package2).
625              
626             The typical user doesn't have to care about this function. It is used
627             automatically by molecule file I/O modules.
628              
629             =cut
630              
631             sub register_format {
632 14     14 1 45 my $class = shift;
633 14 100       104 if (@_ == 1) {
634 3         20 $FILE_FORMATS{$_[0]} = caller;
635 3         18 return;
636             }
637 11         65 my %opts = @_;
638 11         93 $FILE_FORMATS{$_} = $opts{$_} for keys %opts;
639             }
640              
641             =item Chemistry::Mol->formats
642              
643             Returns a list of the file formats that have been installed by
644             register_format()
645              
646             =cut
647              
648             sub formats {
649 77     77 1 138 my $self = shift;
650 77 100       186 if (@_) {
651 64         140 my ($type) = @_;
652 64         142 my $file_class = $FILE_FORMATS{$type};
653 64 100       182 unless ($file_class) {
654 1         13 croak "No class installed for type '$type'";
655             }
656 63         582 return $file_class;
657             } else {
658 13         77 return sort keys %FILE_FORMATS;
659             }
660             }
661              
662             =item $mol->mass
663              
664             Return the molar mass. This is just the sum of the masses of the atoms. See
665             L::mass for details such as the handling of isotopes.
666              
667             =cut
668              
669             sub mass {
670 2     2 1 6 my ($self) = @_;
671 2         3 my $mass = 0;
672 2         3 for my $atom ($self->atoms) {
673 6         9 $mass += $atom->mass;
674             }
675 2         9 $mass;
676             }
677              
678             =item $mol->charge
679              
680             Return the charge of the molecule. By default it returns the sum of the formal
681             charges of the atoms. However, it is possible to set an arbitrary charge by
682             calling C<< $mol->charge($new_charge) >>
683              
684             =cut
685              
686             sub charge {
687 0     0 1 0 my ($self) = shift;
688 0 0       0 if (@_) {
689 0         0 $self->{charge} = shift;
690 0         0 $self;
691             } else {
692 0 0       0 return $self->{charge} if defined $self->{charge};
693 0         0 my $charge = 0;
694 0   0     0 $charge += $_->formal_charge || 0 for $self->atoms;
695 0         0 $charge;
696             }
697             }
698              
699             =item $mol->formula_hash
700              
701             Returns a hash reference describing the molecular formula. For methane it would
702             return { C => 1, H => 4 }.
703              
704             =cut
705              
706             sub formula_hash {
707 17     17 1 32 my ($self) = @_;
708 17         33 my $formula = {};
709 17         66 for my $atom ($self->atoms) {
710 538         1147 $formula->{$atom->symbol}++;
711 538 50       1114 $formula->{H} += $atom->hydrogens if $atom->hydrogens;
712             }
713 17         68 $formula;
714             }
715              
716             =item $mol->formula($format)
717              
718             Returns a string with the formula. The format can be specified as a printf-like
719             string with the control sequences specified in the L
720             documentation.
721              
722             =cut
723              
724             sub formula {
725 5     5 1 2824 my ($self, $format) = @_;
726 5         725 require Chemistry::File::Formula;
727 5         25 $self->print(format => "formula", formula_format => $format);
728             }
729              
730             =item my $mol2 = $mol->clone;
731              
732             Makes a copy of a molecule. Note that this is a B copy; if your molecule
733             has a pointer to the rest of the universe, the entire universe will be cloned!
734              
735             By default, clone() uses L to copy the Perl data structure. L
736             can be used instead by setting variable C<$Chemistry::Mol::clone_backend> to
737             C (default is C). The documentation of Storable claims L
738             is less memory-intensive.
739              
740             =cut
741              
742             sub clone {
743 9     9 1 1202 my ($self) = @_;
744 9         16 my $clone;
745 9 100       33 if ($clone_backend eq "Storable") {
    50          
746 8         2895 $clone = dclone $self;
747 8 50       170 $clone->_weaken if Storable->VERSION < 2.14;
748             } elsif ($clone_backend eq "Clone") {
749 1         762 require Clone;
750 1         1085 $clone = Clone::clone $self;
751             } else {
752 0         0 croak "Unknown clone backend '$clone_backend'";
753             }
754 9         52 $clone;
755             }
756              
757             =item my $mol2 = $mol->safe_clone;
758              
759             Like clone, it makes a deep copy of a molecule. The difference is that the copy
760             is not "exact" in that new molecule and its atoms and bonds get assigned new
761             IDs. This makes it safe to combine cloned molecules. For example, this is an
762             error:
763              
764             # XXX don't try this at home!
765             my $mol2 = Chemistry::Mol->combine($mol1, $mol1);
766             # the atoms in $mol1 will clash
767              
768             But this is ok:
769              
770             # the "safe clone" of $mol1 will have new IDs
771             my $mol2 = Chemistry::Mol->combine($mol1, $mol1->safe_clone);
772              
773             =cut
774              
775             sub safe_clone {
776 1     1 1 3 my ($mol) = @_;
777 1         2 my $clone = $mol->clone;
778 1         19 for ($clone, $clone->atoms, $clone->bonds) {
779 4         17 $_->id($_->nextID);
780             }
781 1         22 $clone;
782             }
783              
784             sub _weaken {
785 15     15   47 my ($self) = @_;
786 15         65 for ($self->atoms, $self->bonds) {
787 211         495 $_->_weaken;
788             }
789 15         62 $self;
790             }
791              
792             =item ($distance, $atom_here, $atom_there) = $mol->distance($obj)
793              
794             Returns the minimum distance to $obj, which can be an atom, a molecule, or a
795             vector. In scalar context it returns only the distance; in list context it
796             also returns the atoms involved. The current implementation for calculating
797             the minimum distance between two molecules compares every possible pair of
798             atoms, so it's not efficient for large molecules.
799              
800             =cut
801              
802             sub distance {
803 0     0 1 0 my ($self, $other) = @_;
804 0 0       0 if ($other->isa("Chemistry::Mol")) {
    0          
    0          
805 0         0 my @atoms = $self->atoms;
806 0 0       0 my $atom = shift @atoms or return; # need at least one atom
807 0         0 my $closest_here = $atom;
808 0         0 my ($min_length, $closest_there) = $atom->distance($other);
809 0         0 for $atom (@atoms) {
810 0         0 my ($d, $o) = $atom->distance($other);
811 0 0       0 if ($d < $min_length) {
812 0         0 ($min_length, $closest_there, $closest_here) = ($d, $o, $atom);
813             }
814             }
815             return wantarray ?
816 0 0       0 ($min_length, $closest_here, $closest_there) : $min_length;
817             } elsif ($other->isa("Chemistry::Atom")) {
818 0         0 return $other->distance($self);
819             } elsif ($other->isa("Math::VectorReal")) {
820 0         0 return Chemistry::Atom->new(coords => $other)->distance($self);
821             }
822             }
823              
824             =item my $bigmol = Chemistry::Mol->combine($mol1, $mol2, ...)
825              
826             =item $mol1->combine($mol2, $mol3, ...)
827              
828             Combines several molecules in one bigger molecule. If called as a class method,
829             as in the first example, it returns a new combined molecule without altering
830             any of the parameters. If called as an instance method, as in the second
831             example, all molecules are combined into $mol1 (but $mol2, $mol3, ...) are not
832             altered. B: Make sure you don't combine molecules which contain atoms
833             with duplicate IDs (for example, if they were cloned).
834              
835             =cut
836              
837             # joins several molecules into one
838             sub combine {
839 2     2 1 867 my ($self, @others) = @_;
840 2         4 my $mol;
841 2 100       9 if (ref $self) {
842 1         3 $mol = $self;
843             } else {
844 1         6 $mol = $self->new;
845             }
846 2         5 for my $other (@others) {
847 3         12 my $mol2 = $other->clone;
848 3         12 for my $atom ($mol2->atoms) {
849 12         29 $mol->add_atom($atom);
850             }
851 3         9 for my $bond ($mol2->bonds) {
852 9         21 $mol->add_bond($bond);
853             }
854             }
855 2         9 $mol;
856             }
857              
858             =item my @mols = $mol->separate
859              
860             Separates a molecule into "connected fragments". The original object is not
861             modified; the fragments are clones of the original ones. Example: if you have
862             ethane (H3CCH3) and you delete the C-C bond, you have two CH3 radicals within
863             one molecule object ($mol). When you call $mol->separate you get two molecules,
864             each one with a CH3.
865              
866             =cut
867              
868             # splits a molecule into connected fragments
869             # returns a list of molecules. Does not touch the original copy.
870             sub separate {
871 1     1 1 986 my ($self) = @_;
872 1         4 $self = $self->clone;
873 1         5 my $unseen = set($self->atoms);
874 1         30 my $open = set;
875 1         65 my %colors;
876 1         2 my $color = 0;
877 1         7 while (!$unseen->is_null) {
878 2         9 my ($first) = $unseen->members;
879 2         18 $unseen->remove($first);
880 2         10 $open->insert($first);
881 2         8 while (!$open->is_null) {
882 8         246 my ($atom) = $open->members;
883 8         34 $colors{$atom->id} = $color;
884 8         70 $open->remove($atom);
885              
886 8         28 my $neighbors = set($atom->neighbors);
887 8         187 my $newly_opened_atoms = $unseen * $neighbors;
888 8         399 $unseen -= $newly_opened_atoms;
889 8         290 $open += $newly_opened_atoms;
890             }
891 2         77 $color++;
892             }
893              
894 1         3 $color = 0;
895 1         3 my %map;
896 1         4 for my $atom ($self->atoms) {
897 8 100       19 next if exists $map{$colors{$atom->id}};
898 2         8 $map{$colors{$atom->id}} = $color;
899 2         4 $color++;
900             }
901 1         11 $colors{$_} = $map{$colors{$_}} for (keys %colors);
902              
903 1         4 my @mols;
904 1         8 push @mols, $self->new for (0 .. $color-1);
905 1         3 for my $atom ($self->atoms) {
906 8         22 $mols[$colors{$atom->id}]->add_atom($atom);
907             }
908 1         5 for my $bond ($self->bonds) {
909 6         17 my ($atom) = $bond->atoms;
910 6         14 $mols[$colors{$atom->id}]->add_bond($bond);
911             }
912 1         20 @mols;
913             }
914              
915             =item $mol->sprout_hydrogens
916              
917             Convert all the implicit hydrogen atoms in the molecule to explicit atoms.
918             It does B generate coordinates for the atoms.
919              
920             =cut
921              
922             sub sprout_hydrogens {
923 1     1 1 4 my ($self) = @_;
924 1         5 $_->sprout_hydrogens for $self->atoms;
925             }
926              
927             =item $mol->collapse_hydrogens
928              
929             Convert all the explicit hydrogen atoms in the molecule to implicit hydrogens.
930             (Exception: hydrogen atoms that are adjacent to a hydrogen atom are not
931             collapsed.)
932              
933             =cut
934              
935             sub collapse_hydrogens {
936 1     1 1 3 my ($self) = @_;
937 1         3 for my $atom (grep { $_->symbol ne 'H' } $self->atoms) {
  3         5  
938 1         4 $atom->collapse_hydrogens;
939             }
940             }
941              
942             =item $mol->add_implicit_hydrogens
943              
944             Use heuristics to figure out how many implicit hydrogens should each atom in
945             the molecule have to satisfy its normal "organic" valence.
946              
947             =cut
948              
949             sub add_implicit_hydrogens {
950 1     1 1 6 my ($self) = @_;
951 1         6 $_->add_implicit_hydrogens for $self->atoms;
952             }
953              
954              
955             my %DESCRIPTORS = ();
956              
957             =item Chemistry::Mol->register_descriptor($name => $sub_ref)
958              
959             Adds a callback that can be used to add functionality to the molecule class
960             (originally meant to add custom molecule descriptors.) A descriptor is a
961             function that takes a molecule object as its only argument and returns a value
962             or values. For example, to add a descriptor function that computes the number
963             of atoms:
964              
965             Chemistry::Mol->register_descriptor(
966             number_of_atoms => sub {
967             my $mol = shift;
968             return scalar $mol->atoms;
969             }
970             );
971              
972             The descriptor is accessed by name via the C instance method:
973              
974             my $n = $mol->descriptor('number_of_atoms');
975              
976             =cut
977              
978             sub register_descriptor {
979 1     1 1 238803 my ($self, %opts) = @_;
980 1         10 $DESCRIPTORS{$_} = $opts{$_} for keys %opts;
981             }
982              
983             =item my $value = $mol->descriptor($descriptor_name)
984              
985             Calls a previously registered descriptor function giving it $mol as an
986             argument, as shown above for C.
987              
988             =cut
989              
990             sub descriptor {
991 2     2 1 1128 my ($self, $descriptor) = @_;
992 2 100       24 my $sub = $DESCRIPTORS{$descriptor}
993             or croak "unknown descriptor '$descriptor'";
994 1         4 return $sub->($self);
995             }
996              
997             1;
998              
999             =back
1000              
1001             =head1 SOURCE CODE REPOSITORY
1002              
1003             L
1004              
1005             =head1 SEE ALSO
1006              
1007             L, L, L,
1008             L
1009              
1010             =head1 AUTHOR
1011              
1012             Ivan Tubert-Brohman Eitub@cpan.orgE
1013              
1014             =head1 COPYRIGHT
1015              
1016             Copyright (c) 2005 Ivan Tubert-Brohman. All rights reserved. This program is
1017             free software; you can redistribute it and/or modify it under the same terms as
1018             Perl itself.
1019              
1020             =cut
1021