line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::File::CML; |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
our $VERSION = '0.11'; # VERSION |
4
|
|
|
|
|
|
|
# $Id$ |
5
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
450
|
use base 'Chemistry::File'; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
532
|
|
7
|
1
|
|
|
1
|
|
16082
|
use Chemistry::Mol; |
|
1
|
|
|
|
|
31329
|
|
|
1
|
|
|
|
|
45
|
|
8
|
1
|
|
|
1
|
|
604
|
use XML::LibXML; |
|
1
|
|
|
|
|
38675
|
|
|
1
|
|
|
|
|
7
|
|
9
|
1
|
|
|
1
|
|
135
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
18
|
|
10
|
1
|
|
|
1
|
|
5
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
691
|
|
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $DEBUG = 0; |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=head1 NAME |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
Chemistry::File::CML - CML reader |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Chemistry::File::CML; |
21
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
# read a molecule |
23
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->read('myfile.cml'); |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
=cut |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
Chemistry::Mol->register_format(cml => __PACKAGE__); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
=head1 DESCRIPTION |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
Chemical Markup Language reader. |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
This module automatically registers the 'cml' format with Chemistry::Mol. |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
This version only reads some of the information available in CML files. |
36
|
|
|
|
|
|
|
It does not read stereochemistry yet, but this is envisaged in future. |
37
|
|
|
|
|
|
|
Writing CML files is not implemented yet too. |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
This module is part of the PerlMol project, L. |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
=cut |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
sub parse_string { |
45
|
1
|
|
|
1
|
1
|
124
|
my ($self, $s, %opts) = @_; |
46
|
|
|
|
|
|
|
|
47
|
1
|
|
50
|
|
|
8
|
my $mol_class = $opts{mol_class} || 'Chemistry::Mol'; |
48
|
1
|
|
33
|
|
|
10
|
my $atom_class = $opts{atom_class} || $mol_class->atom_class; |
49
|
1
|
|
33
|
|
|
11
|
my $bond_class = $opts{bond_class} || $mol_class->bond_class; |
50
|
1
|
|
|
|
|
5
|
local $_; |
51
|
|
|
|
|
|
|
|
52
|
1
|
|
|
|
|
7
|
my $cml = XML::LibXML->load_xml( string => $s ); |
53
|
1
|
|
|
|
|
408
|
my $xp = XML::LibXML::XPathContext->new( $cml ); |
54
|
1
|
|
|
|
|
12
|
$xp->registerNs( 'cml', 'http://www.xml-cml.org/schema' ); |
55
|
|
|
|
|
|
|
|
56
|
1
|
|
|
|
|
6
|
my @cml_molecules = $xp->findnodes( '/cml:cml/cml:molecule' ); |
57
|
1
|
50
|
|
|
|
63
|
if( !@cml_molecules ) { |
58
|
0
|
|
|
|
|
0
|
@cml_molecules = $xp->findnodes( '/cml:molecule' ); # Somewhy some CMLs need this |
59
|
|
|
|
|
|
|
} |
60
|
|
|
|
|
|
|
|
61
|
1
|
|
|
|
|
2
|
my @molecules; |
62
|
1
|
|
|
|
|
3
|
for my $molecule (@cml_molecules) { |
63
|
1
|
|
|
|
|
6
|
my $mol = $mol_class->new; |
64
|
1
|
|
|
|
|
24
|
push @molecules, $mol; |
65
|
|
|
|
|
|
|
|
66
|
1
|
50
|
|
|
|
11
|
$mol->name( $molecule->getAttribute( 'id' ) ) if $molecule->hasAttribute( 'id' ); |
67
|
|
|
|
|
|
|
|
68
|
1
|
|
|
|
|
90
|
my ($atomArray) = $molecule->getChildrenByTagName( 'atomArray' ); |
69
|
1
|
50
|
|
|
|
88
|
next unless $atomArray; # Skip empty molecules |
70
|
|
|
|
|
|
|
|
71
|
1
|
|
|
|
|
9
|
my %atom_by_name; |
72
|
|
|
|
|
|
|
my %hydrogens_by_id; |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
# atomArray |
75
|
1
|
|
|
|
|
4
|
for my $element ($atomArray->getChildrenByTagName( 'atom' )) { # for each atom... |
76
|
1
|
|
|
|
|
24
|
my ($symbol, $charge, $hydrogen_count, $mass_number); |
77
|
1
|
|
|
|
|
0
|
my @coord3; |
78
|
|
|
|
|
|
|
|
79
|
1
|
50
|
|
|
|
5
|
next unless $element->hasAttribute( 'id' ); |
80
|
1
|
|
|
|
|
3
|
my $id = $element->getAttribute( 'id' ); |
81
|
1
|
|
|
|
|
12
|
my $atom = $atom_by_name{$id} = $mol->new_atom( name => $id ); |
82
|
|
|
|
|
|
|
|
83
|
1
|
50
|
|
|
|
159
|
if( $element->hasAttribute( 'elementType' ) ) { |
84
|
1
|
|
|
|
|
3
|
$atom->symbol( $element->getAttribute( 'elementType' ) ); |
85
|
|
|
|
|
|
|
} |
86
|
1
|
50
|
|
|
|
33
|
if( $element->hasAttribute( 'formalCharge' ) ) { |
87
|
0
|
|
|
|
|
0
|
$atom->formal_charge( int $element->getAttribute( 'formalCharge' ) ); |
88
|
|
|
|
|
|
|
} |
89
|
1
|
50
|
|
|
|
21
|
if( $element->hasAttribute( 'hydrogenCount' ) ) { |
90
|
0
|
|
|
|
|
0
|
$hydrogens_by_id{$atom->id} = int $element->getAttribute( 'hydrogenCount' ); |
91
|
|
|
|
|
|
|
} |
92
|
1
|
50
|
|
|
|
8
|
if( $element->hasAttribute( 'isotopeNumber' ) ) { |
93
|
0
|
|
|
|
|
0
|
$atom->mass_number( int $element->getAttribute( 'isotopeNumber' ) ); |
94
|
|
|
|
|
|
|
} |
95
|
1
|
50
|
33
|
|
|
12
|
if( $element->hasAttribute( 'x3' ) && |
|
|
|
33
|
|
|
|
|
96
|
|
|
|
|
|
|
$element->hasAttribute( 'y3' ) && |
97
|
|
|
|
|
|
|
$element->hasAttribute( 'z3' ) ) { |
98
|
1
|
|
|
|
|
13
|
$atom->coords( map { $_ * 1 } $element->getAttribute( 'x3' ), |
|
3
|
|
|
|
|
31
|
|
99
|
|
|
|
|
|
|
$element->getAttribute( 'y3' ), |
100
|
|
|
|
|
|
|
$element->getAttribute( 'z3' ) ); |
101
|
|
|
|
|
|
|
} |
102
|
|
|
|
|
|
|
} |
103
|
|
|
|
|
|
|
|
104
|
1
|
|
|
|
|
35
|
my @bonds; |
105
|
1
|
|
|
|
|
23
|
my( $bondArray ) = $molecule->getChildrenByTagName( 'bondArray' ); |
106
|
1
|
50
|
|
|
|
42
|
if( $bondArray ) { |
107
|
0
|
|
|
|
|
0
|
@bonds = $bondArray->getChildrenByTagName( 'bond' ); |
108
|
|
|
|
|
|
|
} |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
# bondArray |
111
|
1
|
|
|
|
|
3
|
for my $bond (@bonds) { # for each bond... |
112
|
0
|
|
|
|
|
0
|
my $order = my $type = $bond->getAttribute( 'order' ); |
113
|
0
|
0
|
|
|
|
0
|
$order = 1 unless $order =~ /^[123]$/; |
114
|
|
|
|
|
|
|
$mol->new_bond( |
115
|
|
|
|
|
|
|
type => $type, |
116
|
0
|
0
|
|
|
|
0
|
atoms => [map { $atom_by_name{$_} } split ' ', $bond->getAttribute( 'atomRefs2' )], |
|
0
|
|
|
|
|
0
|
|
117
|
|
|
|
|
|
|
order => $order, |
118
|
|
|
|
|
|
|
($type eq 'A' ? (aromatic => 1) : ()), |
119
|
|
|
|
|
|
|
); |
120
|
|
|
|
|
|
|
} |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# calculate implicit hydrogens |
123
|
1
|
|
|
|
|
16
|
for my $id (sort keys %hydrogens_by_id) { |
124
|
0
|
|
|
|
|
0
|
my $atom = $mol->by_id( $id ); |
125
|
0
|
|
|
|
|
0
|
my $explicit_hydrogens = scalar grep { $_->symbol eq 'H' } |
|
0
|
|
|
|
|
0
|
|
126
|
|
|
|
|
|
|
$atom->neighbors; |
127
|
0
|
0
|
|
|
|
0
|
if( $explicit_hydrogens > $hydrogens_by_id{$id} ) { |
128
|
0
|
|
|
|
|
0
|
warn 'total number of attached hydrogen atoms is ' . |
129
|
|
|
|
|
|
|
"less than the number of explicit hydrogen atoms\n"; |
130
|
0
|
|
|
|
|
0
|
next; |
131
|
|
|
|
|
|
|
} |
132
|
0
|
0
|
|
|
|
0
|
next if $explicit_hydrogens == $hydrogens_by_id{$id}; |
133
|
0
|
|
|
|
|
0
|
$atom->implicit_hydrogens( $hydrogens_by_id{$id} - $explicit_hydrogens ); |
134
|
|
|
|
|
|
|
} |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
|
137
|
1
|
|
|
|
|
13
|
return @molecules; |
138
|
|
|
|
|
|
|
} |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
sub name_is { |
141
|
0
|
|
|
0
|
1
|
|
my ($self, $fname) = @_; |
142
|
0
|
|
|
|
|
|
$fname =~ /\.cml$/i; |
143
|
|
|
|
|
|
|
} |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
sub file_is { |
146
|
0
|
|
|
0
|
1
|
|
my ($self, $fname) = @_; |
147
|
0
|
|
|
|
|
|
$fname =~ /\.cml$/i; |
148
|
|
|
|
|
|
|
} |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
1; |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=head1 SOURCE CODE REPOSITORY |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
L |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
=head1 SEE ALSO |
157
|
|
|
|
|
|
|
|
158
|
|
|
|
|
|
|
L |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
=head1 AUTHOR |
161
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
Andrius Merkys |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
=head1 COPYRIGHT |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
Copyright (c) 2022 Andrius Merkys. All rights reserved. This program is |
167
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
168
|
|
|
|
|
|
|
Perl itself. |
169
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
=cut |