| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Chemistry::File::CML; |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.11'; # VERSION |
|
4
|
|
|
|
|
|
|
# $Id$ |
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
450
|
use base 'Chemistry::File'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
532
|
|
|
7
|
1
|
|
|
1
|
|
16082
|
use Chemistry::Mol; |
|
|
1
|
|
|
|
|
31329
|
|
|
|
1
|
|
|
|
|
45
|
|
|
8
|
1
|
|
|
1
|
|
604
|
use XML::LibXML; |
|
|
1
|
|
|
|
|
38675
|
|
|
|
1
|
|
|
|
|
7
|
|
|
9
|
1
|
|
|
1
|
|
135
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
18
|
|
|
10
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
691
|
|
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $DEBUG = 0; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Chemistry::File::CML - CML reader |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Chemistry::File::CML; |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# read a molecule |
|
23
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->read('myfile.cml'); |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=cut |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Chemistry::Mol->register_format(cml => __PACKAGE__); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Chemical Markup Language reader. |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This module automatically registers the 'cml' format with Chemistry::Mol. |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
This version only reads some of the information available in CML files. |
|
36
|
|
|
|
|
|
|
It does not read stereochemistry yet, but this is envisaged in future. |
|
37
|
|
|
|
|
|
|
Writing CML files is not implemented yet too. |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
This module is part of the PerlMol project, L. |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=cut |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub parse_string { |
|
45
|
1
|
|
|
1
|
1
|
124
|
my ($self, $s, %opts) = @_; |
|
46
|
|
|
|
|
|
|
|
|
47
|
1
|
|
50
|
|
|
8
|
my $mol_class = $opts{mol_class} || 'Chemistry::Mol'; |
|
48
|
1
|
|
33
|
|
|
10
|
my $atom_class = $opts{atom_class} || $mol_class->atom_class; |
|
49
|
1
|
|
33
|
|
|
11
|
my $bond_class = $opts{bond_class} || $mol_class->bond_class; |
|
50
|
1
|
|
|
|
|
5
|
local $_; |
|
51
|
|
|
|
|
|
|
|
|
52
|
1
|
|
|
|
|
7
|
my $cml = XML::LibXML->load_xml( string => $s ); |
|
53
|
1
|
|
|
|
|
408
|
my $xp = XML::LibXML::XPathContext->new( $cml ); |
|
54
|
1
|
|
|
|
|
12
|
$xp->registerNs( 'cml', 'http://www.xml-cml.org/schema' ); |
|
55
|
|
|
|
|
|
|
|
|
56
|
1
|
|
|
|
|
6
|
my @cml_molecules = $xp->findnodes( '/cml:cml/cml:molecule' ); |
|
57
|
1
|
50
|
|
|
|
63
|
if( !@cml_molecules ) { |
|
58
|
0
|
|
|
|
|
0
|
@cml_molecules = $xp->findnodes( '/cml:molecule' ); # Somewhy some CMLs need this |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
1
|
|
|
|
|
2
|
my @molecules; |
|
62
|
1
|
|
|
|
|
3
|
for my $molecule (@cml_molecules) { |
|
63
|
1
|
|
|
|
|
6
|
my $mol = $mol_class->new; |
|
64
|
1
|
|
|
|
|
24
|
push @molecules, $mol; |
|
65
|
|
|
|
|
|
|
|
|
66
|
1
|
50
|
|
|
|
11
|
$mol->name( $molecule->getAttribute( 'id' ) ) if $molecule->hasAttribute( 'id' ); |
|
67
|
|
|
|
|
|
|
|
|
68
|
1
|
|
|
|
|
90
|
my ($atomArray) = $molecule->getChildrenByTagName( 'atomArray' ); |
|
69
|
1
|
50
|
|
|
|
88
|
next unless $atomArray; # Skip empty molecules |
|
70
|
|
|
|
|
|
|
|
|
71
|
1
|
|
|
|
|
9
|
my %atom_by_name; |
|
72
|
|
|
|
|
|
|
my %hydrogens_by_id; |
|
73
|
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# atomArray |
|
75
|
1
|
|
|
|
|
4
|
for my $element ($atomArray->getChildrenByTagName( 'atom' )) { # for each atom... |
|
76
|
1
|
|
|
|
|
24
|
my ($symbol, $charge, $hydrogen_count, $mass_number); |
|
77
|
1
|
|
|
|
|
0
|
my @coord3; |
|
78
|
|
|
|
|
|
|
|
|
79
|
1
|
50
|
|
|
|
5
|
next unless $element->hasAttribute( 'id' ); |
|
80
|
1
|
|
|
|
|
3
|
my $id = $element->getAttribute( 'id' ); |
|
81
|
1
|
|
|
|
|
12
|
my $atom = $atom_by_name{$id} = $mol->new_atom( name => $id ); |
|
82
|
|
|
|
|
|
|
|
|
83
|
1
|
50
|
|
|
|
159
|
if( $element->hasAttribute( 'elementType' ) ) { |
|
84
|
1
|
|
|
|
|
3
|
$atom->symbol( $element->getAttribute( 'elementType' ) ); |
|
85
|
|
|
|
|
|
|
} |
|
86
|
1
|
50
|
|
|
|
33
|
if( $element->hasAttribute( 'formalCharge' ) ) { |
|
87
|
0
|
|
|
|
|
0
|
$atom->formal_charge( int $element->getAttribute( 'formalCharge' ) ); |
|
88
|
|
|
|
|
|
|
} |
|
89
|
1
|
50
|
|
|
|
21
|
if( $element->hasAttribute( 'hydrogenCount' ) ) { |
|
90
|
0
|
|
|
|
|
0
|
$hydrogens_by_id{$atom->id} = int $element->getAttribute( 'hydrogenCount' ); |
|
91
|
|
|
|
|
|
|
} |
|
92
|
1
|
50
|
|
|
|
8
|
if( $element->hasAttribute( 'isotopeNumber' ) ) { |
|
93
|
0
|
|
|
|
|
0
|
$atom->mass_number( int $element->getAttribute( 'isotopeNumber' ) ); |
|
94
|
|
|
|
|
|
|
} |
|
95
|
1
|
50
|
33
|
|
|
12
|
if( $element->hasAttribute( 'x3' ) && |
|
|
|
|
33
|
|
|
|
|
|
96
|
|
|
|
|
|
|
$element->hasAttribute( 'y3' ) && |
|
97
|
|
|
|
|
|
|
$element->hasAttribute( 'z3' ) ) { |
|
98
|
1
|
|
|
|
|
13
|
$atom->coords( map { $_ * 1 } $element->getAttribute( 'x3' ), |
|
|
3
|
|
|
|
|
31
|
|
|
99
|
|
|
|
|
|
|
$element->getAttribute( 'y3' ), |
|
100
|
|
|
|
|
|
|
$element->getAttribute( 'z3' ) ); |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
1
|
|
|
|
|
35
|
my @bonds; |
|
105
|
1
|
|
|
|
|
23
|
my( $bondArray ) = $molecule->getChildrenByTagName( 'bondArray' ); |
|
106
|
1
|
50
|
|
|
|
42
|
if( $bondArray ) { |
|
107
|
0
|
|
|
|
|
0
|
@bonds = $bondArray->getChildrenByTagName( 'bond' ); |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# bondArray |
|
111
|
1
|
|
|
|
|
3
|
for my $bond (@bonds) { # for each bond... |
|
112
|
0
|
|
|
|
|
0
|
my $order = my $type = $bond->getAttribute( 'order' ); |
|
113
|
0
|
0
|
|
|
|
0
|
$order = 1 unless $order =~ /^[123]$/; |
|
114
|
|
|
|
|
|
|
$mol->new_bond( |
|
115
|
|
|
|
|
|
|
type => $type, |
|
116
|
0
|
0
|
|
|
|
0
|
atoms => [map { $atom_by_name{$_} } split ' ', $bond->getAttribute( 'atomRefs2' )], |
|
|
0
|
|
|
|
|
0
|
|
|
117
|
|
|
|
|
|
|
order => $order, |
|
118
|
|
|
|
|
|
|
($type eq 'A' ? (aromatic => 1) : ()), |
|
119
|
|
|
|
|
|
|
); |
|
120
|
|
|
|
|
|
|
} |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# calculate implicit hydrogens |
|
123
|
1
|
|
|
|
|
16
|
for my $id (sort keys %hydrogens_by_id) { |
|
124
|
0
|
|
|
|
|
0
|
my $atom = $mol->by_id( $id ); |
|
125
|
0
|
|
|
|
|
0
|
my $explicit_hydrogens = scalar grep { $_->symbol eq 'H' } |
|
|
0
|
|
|
|
|
0
|
|
|
126
|
|
|
|
|
|
|
$atom->neighbors; |
|
127
|
0
|
0
|
|
|
|
0
|
if( $explicit_hydrogens > $hydrogens_by_id{$id} ) { |
|
128
|
0
|
|
|
|
|
0
|
warn 'total number of attached hydrogen atoms is ' . |
|
129
|
|
|
|
|
|
|
"less than the number of explicit hydrogen atoms\n"; |
|
130
|
0
|
|
|
|
|
0
|
next; |
|
131
|
|
|
|
|
|
|
} |
|
132
|
0
|
0
|
|
|
|
0
|
next if $explicit_hydrogens == $hydrogens_by_id{$id}; |
|
133
|
0
|
|
|
|
|
0
|
$atom->implicit_hydrogens( $hydrogens_by_id{$id} - $explicit_hydrogens ); |
|
134
|
|
|
|
|
|
|
} |
|
135
|
|
|
|
|
|
|
} |
|
136
|
|
|
|
|
|
|
|
|
137
|
1
|
|
|
|
|
13
|
return @molecules; |
|
138
|
|
|
|
|
|
|
} |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub name_is { |
|
141
|
0
|
|
|
0
|
1
|
|
my ($self, $fname) = @_; |
|
142
|
0
|
|
|
|
|
|
$fname =~ /\.cml$/i; |
|
143
|
|
|
|
|
|
|
} |
|
144
|
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub file_is { |
|
146
|
0
|
|
|
0
|
1
|
|
my ($self, $fname) = @_; |
|
147
|
0
|
|
|
|
|
|
$fname =~ /\.cml$/i; |
|
148
|
|
|
|
|
|
|
} |
|
149
|
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
1; |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
L |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
157
|
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
L |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 AUTHOR |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Andrius Merkys |
|
163
|
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Copyright (c) 2022 Andrius Merkys. All rights reserved. This program is |
|
167
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
|
168
|
|
|
|
|
|
|
Perl itself. |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=cut |