line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File::CML; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.12'; # VERSION |
4
|
|
|
|
|
|
|
# $Id$ |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
544
|
use base 'Chemistry::File'; |
|
1
|
|
|
|
|
1
|
|
|
1
|
|
|
|
|
560
|
|
7
|
1
|
|
|
1
|
|
16036
|
use Chemistry::Mol; |
|
1
|
|
|
|
|
34865
|
|
|
1
|
|
|
|
|
64
|
|
8
|
1
|
|
|
1
|
|
723
|
use XML::LibXML; |
|
1
|
|
|
|
|
39505
|
|
|
1
|
|
|
|
|
7
|
|
9
|
1
|
|
|
1
|
|
163
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
21
|
|
10
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
1009
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $DEBUG = 0; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Chemistry::File::CML - CML reader |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Chemistry::File::CML; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# read a molecule |
23
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->read('myfile.cml'); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=cut |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Chemistry::Mol->register_format(cml => __PACKAGE__); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 DESCRIPTION |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Chemical Markup Language reader. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This module automatically registers the 'cml' format with Chemistry::Mol. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
This version only reads some of the information available in CML files. |
36
|
|
|
|
|
|
|
It does not read stereochemistry yet, but this is envisaged in future. |
37
|
|
|
|
|
|
|
Writing CML files is not implemented yet too. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
This module is part of the PerlMol project, L. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=cut |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub parse_string { |
45
|
2
|
|
|
2
|
1
|
2552
|
my ($self, $s, %opts) = @_; |
46
|
|
|
|
|
|
|
|
47
|
2
|
|
50
|
|
|
14
|
my $mol_class = $opts{mol_class} || 'Chemistry::Mol'; |
48
|
2
|
|
33
|
|
|
15
|
my $atom_class = $opts{atom_class} || $mol_class->atom_class; |
49
|
2
|
|
33
|
|
|
17
|
my $bond_class = $opts{bond_class} || $mol_class->bond_class; |
50
|
2
|
|
|
|
|
9
|
local $_; |
51
|
|
|
|
|
|
|
|
52
|
2
|
|
|
|
|
15
|
my $cml = XML::LibXML->load_xml( string => $s ); |
53
|
2
|
|
|
|
|
761
|
my $xp = XML::LibXML::XPathContext->new( $cml ); |
54
|
2
|
|
|
|
|
22
|
$xp->registerNs( 'cml', 'http://www.xml-cml.org/schema' ); |
55
|
|
|
|
|
|
|
|
56
|
2
|
|
|
|
|
5
|
my @molecules; |
57
|
2
|
|
|
|
|
9
|
for my $molecule ($xp->findnodes( '//cml:molecule' )) { |
58
|
3
|
|
|
|
|
119
|
my $mol = $mol_class->new; |
59
|
3
|
50
|
|
|
|
75
|
$mol->name( $molecule->getAttribute( 'id' ) ) if $molecule->hasAttribute( 'id' ); |
60
|
|
|
|
|
|
|
|
61
|
3
|
|
|
|
|
113
|
my ($atomArray) = $molecule->getChildrenByTagName( 'atomArray' ); |
62
|
3
|
100
|
|
|
|
173
|
next unless $atomArray; # Skip empty molecules |
63
|
|
|
|
|
|
|
|
64
|
2
|
|
|
|
|
14
|
push @molecules, $mol; |
65
|
|
|
|
|
|
|
|
66
|
2
|
|
|
|
|
6
|
my %atom_by_name; |
67
|
|
|
|
|
|
|
my %hydrogens_by_id; |
68
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
# atomArray |
70
|
2
|
|
|
|
|
5
|
for my $element ($atomArray->getChildrenByTagName( 'atom' )) { # for each atom... |
71
|
2
|
|
|
|
|
45
|
my ($symbol, $charge, $hydrogen_count, $mass_number); |
72
|
2
|
|
|
|
|
0
|
my @coord3; |
73
|
|
|
|
|
|
|
|
74
|
2
|
50
|
|
|
|
8
|
next unless $element->hasAttribute( 'id' ); |
75
|
2
|
|
|
|
|
5
|
my $id = $element->getAttribute( 'id' ); |
76
|
2
|
|
|
|
|
20
|
my $atom = $atom_by_name{$id} = $mol->new_atom( name => $id ); |
77
|
|
|
|
|
|
|
|
78
|
2
|
50
|
|
|
|
230
|
if( $element->hasAttribute( 'elementType' ) ) { |
79
|
2
|
|
|
|
|
7
|
$atom->symbol( $element->getAttribute( 'elementType' ) ); |
80
|
|
|
|
|
|
|
} |
81
|
2
|
50
|
|
|
|
51
|
if( $element->hasAttribute( 'formalCharge' ) ) { |
82
|
0
|
|
|
|
|
0
|
$atom->formal_charge( int $element->getAttribute( 'formalCharge' ) ); |
83
|
|
|
|
|
|
|
} |
84
|
2
|
50
|
|
|
|
9
|
if( $element->hasAttribute( 'hydrogenCount' ) ) { |
85
|
0
|
|
|
|
|
0
|
$hydrogens_by_id{$atom->id} = int $element->getAttribute( 'hydrogenCount' ); |
86
|
|
|
|
|
|
|
} |
87
|
2
|
50
|
|
|
|
16
|
if( $element->hasAttribute( 'isotopeNumber' ) ) { |
88
|
0
|
|
|
|
|
0
|
$atom->mass_number( int $element->getAttribute( 'isotopeNumber' ) ); |
89
|
|
|
|
|
|
|
} |
90
|
2
|
50
|
33
|
|
|
34
|
if( $element->hasAttribute( 'x3' ) && |
|
|
|
33
|
|
|
|
|
91
|
|
|
|
|
|
|
$element->hasAttribute( 'y3' ) && |
92
|
|
|
|
|
|
|
$element->hasAttribute( 'z3' ) ) { |
93
|
2
|
|
|
|
|
11
|
$atom->coords( map { $_ * 1 } $element->getAttribute( 'x3' ), |
|
6
|
|
|
|
|
55
|
|
94
|
|
|
|
|
|
|
$element->getAttribute( 'y3' ), |
95
|
|
|
|
|
|
|
$element->getAttribute( 'z3' ) ); |
96
|
|
|
|
|
|
|
} |
97
|
|
|
|
|
|
|
} |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# Second pass through atoms to set chirality (if supported) |
100
|
2
|
|
|
|
|
100
|
for my $element ($atomArray->getChildrenByTagName( 'atom' )) { # for each atom... |
101
|
2
|
|
|
|
|
80
|
my( $atomParity ) = $element->getChildrenByTagName( 'atomParity' ); |
102
|
2
|
0
|
33
|
|
|
27
|
next unless $atomParity && |
|
|
|
33
|
|
|
|
|
103
|
|
|
|
|
|
|
$atomParity->hasAttribute( 'atomRefs4' ) && |
104
|
|
|
|
|
|
|
$atomParity->textContent =~ /^-?1$/; |
105
|
|
|
|
|
|
|
|
106
|
0
|
0
|
|
|
|
0
|
next unless $element->hasAttribute( 'id' ); |
107
|
0
|
|
|
|
|
0
|
my $id = $element->getAttribute( 'id' ); |
108
|
0
|
|
|
|
|
0
|
my $atom = $atom_by_name{$id}; |
109
|
0
|
0
|
|
|
|
0
|
next unless $atom->can( 'chirality' ); |
110
|
|
|
|
|
|
|
|
111
|
0
|
|
|
|
|
0
|
my @atoms = map { $atom_by_name{$_} } |
|
0
|
|
|
|
|
0
|
|
112
|
|
|
|
|
|
|
split ' ', $atomParity->getAttribute( 'atomRefs4' ); |
113
|
0
|
|
|
|
|
0
|
$atom->chirality( @atoms, int $atomParity->textContent ); |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
2
|
|
|
|
|
5
|
my @bonds; |
117
|
2
|
|
|
|
|
19
|
my( $bondArray ) = $molecule->getChildrenByTagName( 'bondArray' ); |
118
|
2
|
50
|
|
|
|
52
|
if( $bondArray ) { |
119
|
0
|
|
|
|
|
0
|
@bonds = $bondArray->getChildrenByTagName( 'bond' ); |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# bondArray |
123
|
2
|
|
|
|
|
4
|
for my $bond (@bonds) { # for each bond... |
124
|
0
|
|
|
|
|
0
|
my $order = my $type = $bond->getAttribute( 'order' ); |
125
|
0
|
0
|
|
|
|
0
|
$order = 1 unless $order =~ /^[123]$/; |
126
|
|
|
|
|
|
|
|
127
|
0
|
|
|
|
|
0
|
my @atoms = map { $atom_by_name{$_} } |
|
0
|
|
|
|
|
0
|
|
128
|
|
|
|
|
|
|
split ' ', $bond->getAttribute( 'atomRefs2' ); |
129
|
0
|
0
|
|
|
|
0
|
my $mol_bond = $mol->new_bond( |
130
|
|
|
|
|
|
|
type => $type, |
131
|
|
|
|
|
|
|
atoms => \@atoms, |
132
|
|
|
|
|
|
|
order => $order, |
133
|
|
|
|
|
|
|
($type eq 'A' ? (aromatic => 1) : ()), |
134
|
|
|
|
|
|
|
); |
135
|
|
|
|
|
|
|
|
136
|
0
|
|
|
|
|
0
|
my( $bondStereo ) = $bond->getChildrenByTagName( 'bondStereo' ); |
137
|
0
|
0
|
0
|
|
|
0
|
if( $mol_bond->can( 'cistrans' ) && |
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
138
|
|
|
|
|
|
|
$bondStereo && |
139
|
|
|
|
|
|
|
$bondStereo->hasAttribute( 'atomRefs4' ) && |
140
|
|
|
|
|
|
|
$bondStereo->textContent =~ /^[CT]$/ ) { |
141
|
0
|
|
|
|
|
0
|
my @cistrans_atoms = map { $atom_by_name{$_} } |
|
0
|
|
|
|
|
0
|
|
142
|
|
|
|
|
|
|
split ' ', $bondStereo->getAttribute( 'atomRefs4' ); |
143
|
0
|
0
|
|
|
|
0
|
if( $cistrans_atoms[1] ne $atoms[0] ) { |
144
|
0
|
|
|
|
|
0
|
( $cistrans_atoms[0], $cistrans_atoms[3] ) = |
145
|
|
|
|
|
|
|
( $cistrans_atoms[3], $cistrans_atoms[0] ); |
146
|
0
|
0
|
|
|
|
0
|
$mol_bond->cistrans( $cistrans_atoms[0], |
147
|
|
|
|
|
|
|
$cistrans_atoms[3], |
148
|
|
|
|
|
|
|
$bondStereo->textContent eq 'C' ? 'cis' : 'trans' ); |
149
|
|
|
|
|
|
|
} |
150
|
|
|
|
|
|
|
} |
151
|
|
|
|
|
|
|
} |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# calculate implicit hydrogens |
154
|
2
|
|
|
|
|
20
|
for my $id (sort keys %hydrogens_by_id) { |
155
|
0
|
|
|
|
|
0
|
my $atom = $mol->by_id( $id ); |
156
|
0
|
|
|
|
|
0
|
my $explicit_hydrogens = scalar grep { $_->symbol eq 'H' } |
|
0
|
|
|
|
|
0
|
|
157
|
|
|
|
|
|
|
$atom->neighbors; |
158
|
0
|
0
|
|
|
|
0
|
if( $explicit_hydrogens > $hydrogens_by_id{$id} ) { |
159
|
0
|
|
|
|
|
0
|
warn 'total number of attached hydrogen atoms is ' . |
160
|
|
|
|
|
|
|
"less than the number of explicit hydrogen atoms\n"; |
161
|
0
|
|
|
|
|
0
|
next; |
162
|
|
|
|
|
|
|
} |
163
|
0
|
0
|
|
|
|
0
|
next if $explicit_hydrogens == $hydrogens_by_id{$id}; |
164
|
0
|
|
|
|
|
0
|
$atom->implicit_hydrogens( $hydrogens_by_id{$id} - $explicit_hydrogens ); |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
} |
167
|
|
|
|
|
|
|
|
168
|
2
|
|
|
|
|
19
|
return @molecules; |
169
|
|
|
|
|
|
|
} |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
sub name_is { |
172
|
0
|
|
|
0
|
1
|
|
my ($self, $fname) = @_; |
173
|
0
|
|
|
|
|
|
$fname =~ /\.cml$/i; |
174
|
|
|
|
|
|
|
} |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
sub file_is { |
177
|
0
|
|
|
0
|
1
|
|
my ($self, $fname) = @_; |
178
|
0
|
|
|
|
|
|
$fname =~ /\.cml$/i; |
179
|
|
|
|
|
|
|
} |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
1; |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
L |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
=head1 SEE ALSO |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
L |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
=head1 AUTHOR |
192
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
Andrius Merkys |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
=head1 COPYRIGHT |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
Copyright (c) 2022 Andrius Merkys. All rights reserved. This program is |
198
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
199
|
|
|
|
|
|
|
Perl itself. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=cut |