| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Chemistry::Pattern; |
|
2
|
|
|
|
|
|
|
$VERSION = '0.27'; |
|
3
|
|
|
|
|
|
|
# $Id: Pattern.pm,v 1.21 2009/05/10 20:56:06 itubert Exp $ |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Chemistry::Pattern - Chemical substructure pattern matching |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Chemistry::Pattern; |
|
12
|
|
|
|
|
|
|
use Chemistry::Mol; |
|
13
|
|
|
|
|
|
|
use Chemistry::File::SMILES; |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Create a pattern and a molecule from SMILES strings |
|
16
|
|
|
|
|
|
|
my $mol_str = "C1CCCC1C(Cl)=O"; |
|
17
|
|
|
|
|
|
|
my $patt_str = "C(=O)Cl"; |
|
18
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->parse($mol_str, format => 'smiles'); |
|
19
|
|
|
|
|
|
|
my $patt = Chemistry::Pattern->parse($patt_str, format => 'smiles'); |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# try to match the pattern |
|
22
|
|
|
|
|
|
|
while ($patt->match($mol)) { |
|
23
|
|
|
|
|
|
|
@matched_atoms = $patt->atom_map; |
|
24
|
|
|
|
|
|
|
print "Matched: (@matched_atoms)\n"; |
|
25
|
|
|
|
|
|
|
# should print something like "Matched: (a6 a8 a7)" |
|
26
|
|
|
|
|
|
|
} |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
29
|
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This module implements basic pattern matching for molecules. |
|
31
|
|
|
|
|
|
|
The Chemistry::Pattern class is a subclass of Chemistry::Mol, so patterns |
|
32
|
|
|
|
|
|
|
have all the properties of molecules and can come from reading the same |
|
33
|
|
|
|
|
|
|
file formats. Of course there are certain formats (such as SMARTS) |
|
34
|
|
|
|
|
|
|
that are exclusively used to describe patterns. |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
To perform a pattern matching operation on a molecule, follow these steps. |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
1) Create a pattern object, either by parsing a file or string, or by adding |
|
39
|
|
|
|
|
|
|
atoms and bonds by hand by using Chemistry::Mol methods. Note that atoms and |
|
40
|
|
|
|
|
|
|
bonds in a pattern should be Chemistry::Pattern::Atom and |
|
41
|
|
|
|
|
|
|
Chemistry::Patern::Bond objects. Let's assume that the pattern object is |
|
42
|
|
|
|
|
|
|
stored in $patt and that the molecule is $mol. |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
2) Execute the pattern on the molecule by calling $patt->match($mol). |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
3) If $patt->match() returns true, extract the "map" that relates the pattern to |
|
47
|
|
|
|
|
|
|
the molecule by calling $patt->atom_map or $patt->bond_map. These methods |
|
48
|
|
|
|
|
|
|
return a list of the atoms or bonds in the molecule that are matched by the |
|
49
|
|
|
|
|
|
|
corresponding atoms in the pattern. Thus $patt->atom_map(1) would be analogous |
|
50
|
|
|
|
|
|
|
to the $1 special variable used for regular expresion matching. The difference |
|
51
|
|
|
|
|
|
|
between Chemistry::Pattern and Perl regular expressions is that atoms and bonds |
|
52
|
|
|
|
|
|
|
are always captured. |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
4) If more than one match for the molecule is desired, repeat from step (2) |
|
55
|
|
|
|
|
|
|
until match() returns false. |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=cut |
|
58
|
|
|
|
|
|
|
|
|
59
|
1
|
|
|
1
|
|
217653
|
use 5.006; |
|
|
1
|
|
|
|
|
5
|
|
|
|
1
|
|
|
|
|
45
|
|
|
60
|
1
|
|
|
1
|
|
5
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
34
|
|
|
61
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
|
1
|
|
|
|
|
8
|
|
|
|
1
|
|
|
|
|
75
|
|
|
62
|
1
|
|
|
1
|
|
5
|
use base qw(Chemistry::Mol); |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
96
|
|
|
63
|
1
|
|
|
1
|
|
800
|
use Chemistry::Pattern::Atom; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
43
|
|
|
64
|
1
|
|
|
1
|
|
707
|
use Chemistry::Pattern::Bond; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
4435
|
|
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 METHODS |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=over 4 |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=item Chemistry::Pattern->new(name => value, ...) |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Create a new empty pattern. This is just like the Chemistry::Mol constructor, |
|
73
|
|
|
|
|
|
|
with one additional option: "options", which expects a hash reference (the |
|
74
|
|
|
|
|
|
|
options themselves are described under the options() method). |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub new { |
|
79
|
17
|
|
|
17
|
1
|
24347
|
my $class = shift; |
|
80
|
17
|
|
|
|
|
128
|
my $self = $class->SUPER::new( |
|
81
|
|
|
|
|
|
|
options => { overlap => 1, permute => 0}, |
|
82
|
|
|
|
|
|
|
@_ |
|
83
|
|
|
|
|
|
|
); |
|
84
|
17
|
|
|
|
|
127
|
$self->reset; |
|
85
|
17
|
|
|
|
|
66
|
$self; |
|
86
|
|
|
|
|
|
|
} |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=item $pattern->options(option => value,...) |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Available options: |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=over |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item overlap |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
If true, matches may overlap. For example, the CC pattern could match twice |
|
97
|
|
|
|
|
|
|
on propane if this option is true, but only once if it is false. This option |
|
98
|
|
|
|
|
|
|
is true by default. |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item permute |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Sometimes there is more than one way of matching the same set of pattern atoms |
|
103
|
|
|
|
|
|
|
on the same set of molecule atoms. If true, return these "redundant" matches. |
|
104
|
|
|
|
|
|
|
For example, the CC pattern could match ethane with two different permutations |
|
105
|
|
|
|
|
|
|
(forwards and backwards). This option is false by default. |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=back |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=cut |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub options { |
|
112
|
34
|
|
|
34
|
1
|
33456
|
my $self = shift; |
|
113
|
34
|
100
|
|
|
|
85
|
if (@_ == 1) { |
|
114
|
17
|
50
|
|
|
|
20
|
$self->{options} = {%{$self->{options}||{}}, %{$_[0]}}; |
|
|
17
|
|
|
|
|
271
|
|
|
|
17
|
|
|
|
|
108
|
|
|
115
|
|
|
|
|
|
|
} else { |
|
116
|
17
|
|
|
|
|
22
|
$self->{options} = {%{$self->{options}}, @_}; |
|
|
17
|
|
|
|
|
172
|
|
|
117
|
|
|
|
|
|
|
} |
|
118
|
|
|
|
|
|
|
} |
|
119
|
|
|
|
|
|
|
|
|
120
|
45
|
|
|
45
|
1
|
5473
|
sub atom_class { "Chemistry::Pattern::Atom" } |
|
121
|
|
|
|
|
|
|
|
|
122
|
30
|
|
|
30
|
1
|
2824
|
sub bond_class { "Chemistry::Pattern::Bond" } |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
our $DEBUG = 0; |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item $patt->reset |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Reset the state of the pattern matching object, so that it begins the next |
|
129
|
|
|
|
|
|
|
match from scratch instead of where it left off after the last one. |
|
130
|
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=cut |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub reset { |
|
134
|
34
|
|
|
34
|
1
|
72
|
my ($self, $mol, %opts) = @_; |
|
135
|
34
|
50
|
|
|
|
78
|
print "Resetting to ($mol, $opts{atom})\n" if $DEBUG; |
|
136
|
34
|
|
|
|
|
86
|
$self->{flat} = $self->flatten; |
|
137
|
34
|
|
|
|
|
107
|
$self->map_to($mol); |
|
138
|
34
|
100
|
|
|
|
251
|
$self->{pending_atoms} = [$mol ? $mol->atoms : ()]; |
|
139
|
34
|
|
|
|
|
330
|
$self->{already_matched} = {}; |
|
140
|
34
|
|
|
|
|
71
|
$self->{paint_tab} = {}; |
|
141
|
34
|
|
|
|
|
61
|
$self->{anchor} = ''; |
|
142
|
34
|
|
|
|
|
102
|
my $atom = $self->next_atom($opts{atom}); |
|
143
|
|
|
|
|
|
|
#$self->match_local_init($atom) if $atom; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub already_matched { |
|
147
|
50
|
|
|
50
|
0
|
100
|
my ($self, @objs) = @_; |
|
148
|
50
|
|
|
|
|
69
|
my @ids = map {$_->id} @objs; |
|
|
172
|
|
|
|
|
813
|
|
|
149
|
50
|
|
|
|
|
373
|
my $unsorted_key = join " ", @ids; |
|
150
|
50
|
|
|
|
|
94
|
my $key; |
|
151
|
50
|
100
|
|
|
|
118
|
if ($self->{options}{permute}) { |
|
152
|
20
|
|
|
|
|
28
|
$key = $unsorted_key; |
|
153
|
|
|
|
|
|
|
} else { |
|
154
|
30
|
|
|
|
|
116
|
$key = join " ", sort @ids; |
|
155
|
|
|
|
|
|
|
} |
|
156
|
|
|
|
|
|
|
|
|
157
|
50
|
100
|
|
|
|
123
|
if ($self->{already_matched}{$key}) { |
|
158
|
12
|
50
|
|
|
|
60
|
print "already matched $unsorted_key\n" if $DEBUG; |
|
159
|
12
|
|
|
|
|
43
|
return 1; |
|
160
|
|
|
|
|
|
|
} else { |
|
161
|
38
|
|
|
|
|
94
|
$self->{already_matched}{$key} = 1; |
|
162
|
38
|
50
|
|
|
|
74
|
print "first match of $key\n" if $DEBUG; |
|
163
|
38
|
|
|
|
|
258
|
return 0; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub next_atom { |
|
168
|
89
|
|
|
89
|
0
|
141
|
my ($self, $atom) = @_; |
|
169
|
|
|
|
|
|
|
|
|
170
|
89
|
50
|
|
|
|
179
|
print "next_atom\n" if $DEBUG; |
|
171
|
89
|
100
|
|
|
|
186
|
return if $self->{anchor}; |
|
172
|
88
|
100
|
|
|
|
141
|
if ($atom) { |
|
|
87
|
100
|
|
|
|
218
|
|
|
173
|
1
|
|
|
|
|
7
|
$self->{anchor} = $atom; |
|
174
|
|
|
|
|
|
|
} elsif (@{$self->{pending_atoms}}) { |
|
175
|
54
|
|
|
|
|
61
|
$atom = shift @{$self->{pending_atoms}}; |
|
|
54
|
|
|
|
|
95
|
|
|
176
|
54
|
50
|
|
|
|
119
|
print "\tatom $atom\n" if $DEBUG; |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
|
|
179
|
88
|
|
|
|
|
133
|
$self->{next_atom} = $atom; |
|
180
|
88
|
|
|
|
|
232
|
$atom; |
|
181
|
|
|
|
|
|
|
} |
|
182
|
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Chemistry::Obj::accessor "map_to"; |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=item $pattern->atom_map |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Returns the list of atoms that matched the last time $pattern->match was called. |
|
188
|
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=cut |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub atom_map { |
|
192
|
88
|
|
|
88
|
1
|
203
|
my $self = shift; |
|
193
|
88
|
|
|
|
|
240
|
my @atoms = map { $_->map_to } $self->atoms(@_); |
|
|
210
|
|
|
|
|
1304
|
|
|
194
|
88
|
100
|
|
|
|
712
|
@atoms == 1 ? $atoms[0] : @atoms; |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item $pattern->bond_map |
|
198
|
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Returns the list of bonds that matched the last time $pattern->match was called. |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=cut |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub bond_map { |
|
204
|
50
|
|
|
50
|
1
|
62
|
my $self = shift; |
|
205
|
50
|
|
|
|
|
143
|
my @bonds = map { $_->map_to } $self->bonds(@_); |
|
|
52
|
|
|
|
|
278
|
|
|
206
|
50
|
100
|
|
|
|
260
|
@bonds == 1 ? $bonds[0] : @bonds; |
|
207
|
|
|
|
|
|
|
} |
|
208
|
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=item $pattern->match($mol, %options) |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Returns true if the pattern matches the molecule. If called again for the same |
|
212
|
|
|
|
|
|
|
molecule, continues matching where it left off (in a way similar to global |
|
213
|
|
|
|
|
|
|
regular expressions under scalar context). When there are no matches left, |
|
214
|
|
|
|
|
|
|
returns false. To force the match to always start from scratch instead of |
|
215
|
|
|
|
|
|
|
continuing where it left off, the C option may be used. |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
$pattern->match($mol, atom => $atom) |
|
218
|
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
If atom => $atom is given as an option, match will only look for matches that |
|
220
|
|
|
|
|
|
|
start at $atom (which should be an atom in $mol, of course). This is somewhat |
|
221
|
|
|
|
|
|
|
analog to anchored regular expressions. |
|
222
|
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
To find out which atoms and bonds matched, use the atom_map and bond_map |
|
224
|
|
|
|
|
|
|
methods. |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=cut |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub match { |
|
229
|
55
|
|
|
55
|
1
|
806
|
my ($self, $mol, %opts) = @_; |
|
230
|
55
|
50
|
|
|
|
131
|
print "match $self $mol\n" if $DEBUG; |
|
231
|
55
|
50
|
66
|
|
|
230
|
if (defined($mol) and $self->map_to ne $mol |
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
232
|
|
|
|
|
|
|
or $opts{reset} |
|
233
|
|
|
|
|
|
|
or defined($opts{atom}) and $opts{atom} ne $self->{anchor} |
|
234
|
|
|
|
|
|
|
){ |
|
235
|
17
|
|
|
|
|
240
|
$self->reset($mol, %opts); |
|
236
|
|
|
|
|
|
|
} |
|
237
|
55
|
|
|
|
|
662
|
my $match = $self->match_next; |
|
238
|
55
|
100
|
|
|
|
137
|
$self->map_to(undef) unless $match; |
|
239
|
55
|
50
|
|
|
|
170
|
print "returning match: '$match'\n" if $DEBUG; |
|
240
|
55
|
|
|
|
|
150
|
$match; |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub match_next { |
|
244
|
55
|
|
|
55
|
0
|
109
|
my $self = shift; |
|
245
|
55
|
|
|
|
|
62
|
my $match; |
|
246
|
55
|
50
|
|
|
|
108
|
print "match_next\n" if $DEBUG; |
|
247
|
|
|
|
|
|
|
|
|
248
|
55
|
|
|
|
|
55
|
while (1) { |
|
249
|
103
|
100
|
|
|
|
462
|
$self->match_local_init($self->{next_atom}) if $self->{next_atom}; |
|
250
|
103
|
|
|
|
|
231
|
$match = $self->match_local_next; |
|
251
|
103
|
100
|
|
|
|
184
|
if ($match) { |
|
252
|
50
|
100
|
|
|
|
115
|
if ($self->already_matched($self->atom_map, $self->bond_map)) { |
|
253
|
12
|
|
|
|
|
25
|
$match = 0, next; # try again |
|
254
|
|
|
|
|
|
|
} else { |
|
255
|
38
|
100
|
|
|
|
107
|
$self->next_atom unless ($self->{options}{overlap}); |
|
256
|
38
|
|
|
|
|
40
|
last; # matched! |
|
257
|
|
|
|
|
|
|
} |
|
258
|
|
|
|
|
|
|
} else { |
|
259
|
53
|
100
|
|
|
|
103
|
$self->next_atom or last; |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
} |
|
262
|
55
|
|
|
|
|
120
|
$match; |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub match_local_init { |
|
267
|
55
|
|
|
55
|
0
|
333
|
my ($patt, $atom) = @_; |
|
268
|
55
|
|
|
|
|
141
|
my $mol = $patt->map_to; |
|
269
|
|
|
|
|
|
|
|
|
270
|
55
|
50
|
|
|
|
323
|
print "match_local_init(",$patt->atoms(1),", $atom)\n" if $DEBUG; |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# clean up |
|
273
|
55
|
|
|
|
|
153
|
$_->map_to(undef) for ($patt->atoms, $patt->bonds); |
|
274
|
55
|
100
|
66
|
|
|
364
|
$patt->{paint_tab} = {} if ($mol and $patt->{options}{overlap}); |
|
275
|
55
|
|
|
|
|
579
|
$patt->{stack} = [[0]]; |
|
276
|
55
|
|
|
|
|
224
|
$patt->{backtrack} = 0; |
|
277
|
55
|
|
|
|
|
74
|
$patt->{next_atom} = 0; |
|
278
|
55
|
|
|
|
|
133
|
$patt->{current_atom} = $atom; |
|
279
|
|
|
|
|
|
|
} |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub match_local_next { |
|
283
|
103
|
|
|
103
|
0
|
135
|
my ($patt) = @_; |
|
284
|
103
|
|
|
|
|
145
|
my $stack = $patt->{stack}; |
|
285
|
103
|
|
|
|
|
135
|
my $paint_tab = $patt->{paint_tab}; |
|
286
|
103
|
|
|
|
|
212
|
my $match = 0; |
|
287
|
103
|
|
|
|
|
122
|
my $backtrack = $patt->{backtrack}; |
|
288
|
103
|
|
|
|
|
120
|
my $where = $patt->{current_atom}; |
|
289
|
103
|
|
|
|
|
124
|
my $flat = $patt->{flat}; |
|
290
|
103
|
|
|
|
|
240
|
my $mol = $patt->map_to; |
|
291
|
|
|
|
|
|
|
|
|
292
|
103
|
50
|
|
|
|
543
|
print "match_local_next($where)\n" if $DEBUG; |
|
293
|
|
|
|
|
|
|
|
|
294
|
103
|
|
|
|
|
116
|
while (1) { |
|
295
|
698
|
|
|
|
|
754
|
my ($idx) = @{$stack->[-1]}; |
|
|
698
|
|
|
|
|
1127
|
|
|
296
|
698
|
|
|
|
|
920
|
my $pos = @$stack - 1; |
|
297
|
698
|
|
|
|
|
908
|
my $what = $flat->[$pos]; |
|
298
|
698
|
50
|
|
|
|
1199
|
if($DEBUG) { print "[@$_] " for @$stack; print "\n" } |
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
299
|
|
|
|
|
|
|
|
|
300
|
698
|
100
|
|
|
|
4086
|
if ($backtrack) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
301
|
183
|
50
|
|
|
|
320
|
print "\tbacktracking...\n" if $DEBUG; |
|
302
|
183
|
100
|
|
|
|
479
|
if (@$stack <= 2) { |
|
303
|
48
|
50
|
|
|
|
88
|
print "\tnowhere to backtrack\n" if $DEBUG; |
|
304
|
48
|
|
|
|
|
66
|
last; |
|
305
|
|
|
|
|
|
|
} |
|
306
|
135
|
|
|
|
|
201
|
my $to; |
|
307
|
135
|
|
|
|
|
181
|
pop @$stack; |
|
308
|
135
|
|
|
|
|
205
|
(undef, $what, $to) = @{$stack->[-1]}; |
|
|
135
|
|
|
|
|
301
|
|
|
309
|
135
|
|
|
|
|
184
|
$backtrack = 0; |
|
310
|
135
|
100
|
|
|
|
343
|
if ($what) { |
|
311
|
111
|
50
|
|
|
|
591
|
print "\tcleaning $what\n" if $DEBUG; |
|
312
|
111
|
|
|
|
|
294
|
$paint_tab->{$what->map_to} = 0; |
|
313
|
111
|
|
|
|
|
951
|
$what->map_to(undef); |
|
314
|
|
|
|
|
|
|
} |
|
315
|
135
|
100
|
|
|
|
456
|
if ($to) { |
|
316
|
123
|
50
|
|
|
|
701
|
print "\tcleaning $to\n" if $DEBUG; |
|
317
|
123
|
|
|
|
|
311
|
$paint_tab->{$to->map_to} = 0; |
|
318
|
123
|
|
|
|
|
1309
|
$to->map_to(undef); |
|
319
|
|
|
|
|
|
|
} |
|
320
|
135
|
|
|
|
|
690
|
next; |
|
321
|
|
|
|
|
|
|
} elsif ($pos >= @$flat) { # nothing else to match |
|
322
|
50
|
50
|
|
|
|
98
|
print "\tmatched!\n" if $DEBUG; |
|
323
|
50
|
|
|
|
|
52
|
$backtrack = 1; |
|
324
|
50
|
|
|
|
|
47
|
$match = 1; |
|
325
|
50
|
|
|
|
|
69
|
last; |
|
326
|
|
|
|
|
|
|
} elsif (@$stack == 1) { # match first atom (anchored) |
|
327
|
55
|
100
|
100
|
|
|
132
|
if (!$paint_tab->{$where} and $what->test($where)) { |
|
328
|
50
|
50
|
|
|
|
774
|
print "\tmatched initial atom $what on $where\n" if $DEBUG; |
|
329
|
50
|
|
|
|
|
134
|
$paint_tab->{$where} = 1; |
|
330
|
50
|
|
|
|
|
427
|
$what->map_to($where); |
|
331
|
50
|
|
|
|
|
298
|
push @$stack, [0]; |
|
332
|
50
|
|
|
|
|
116
|
next; |
|
333
|
|
|
|
|
|
|
} else { |
|
334
|
5
|
|
|
|
|
55
|
last; |
|
335
|
|
|
|
|
|
|
} |
|
336
|
|
|
|
|
|
|
} elsif ($what->isa("Chemistry::Pattern::Atom")) { |
|
337
|
|
|
|
|
|
|
# match unanchored atom |
|
338
|
50
|
100
|
|
|
|
143
|
if($idx >= $mol->atoms) { |
|
339
|
10
|
50
|
|
|
|
76
|
print "\tno more atoms at $mol\n" if $DEBUG; |
|
340
|
10
|
|
|
|
|
18
|
$backtrack = 1, next; |
|
341
|
|
|
|
|
|
|
} |
|
342
|
40
|
|
|
|
|
337
|
my $next = $mol->atoms($idx+1); |
|
343
|
40
|
|
|
|
|
394
|
$stack->[-1] = [++$idx]; |
|
344
|
40
|
100
|
|
|
|
121
|
if ($paint_tab->{$next}) { |
|
345
|
16
|
50
|
|
|
|
143
|
print "\t$next already visited\n" if $DEBUG; |
|
346
|
16
|
|
|
|
|
30
|
next; |
|
347
|
|
|
|
|
|
|
} |
|
348
|
24
|
50
|
|
|
|
346
|
if ($what->test($next)) { |
|
349
|
24
|
50
|
|
|
|
268
|
print "\tmatched unanchored atom $what on $next\n" if $DEBUG; |
|
350
|
24
|
|
|
|
|
64
|
$paint_tab->{$next} = 1; |
|
351
|
24
|
|
|
|
|
203
|
$what->map_to($next); |
|
352
|
24
|
|
|
|
|
149
|
$stack->[-1] = [$idx, undef, $what]; |
|
353
|
24
|
|
|
|
|
59
|
push @$stack, [0]; |
|
354
|
|
|
|
|
|
|
} else { |
|
355
|
0
|
0
|
|
|
|
0
|
print "\tunanchored atom $what didn't match $next\n" if $DEBUG; |
|
356
|
|
|
|
|
|
|
} |
|
357
|
24
|
|
|
|
|
52
|
next; |
|
358
|
|
|
|
|
|
|
} elsif ($what->isa("Chemistry::Pattern::Bond")) { # match bond |
|
359
|
360
|
|
|
|
|
984
|
my ($a1, $a2) = $what->atoms; |
|
360
|
360
|
50
|
|
|
|
2793
|
my ($from, $to) = $a1->map_to ? ($a1, $a2) : ($a2, $a1); |
|
361
|
360
|
|
33
|
|
|
3642
|
$where = $a1->map_to || $a2->map_to; |
|
362
|
360
|
50
|
|
|
|
3945
|
print "\tfrom $from to $to\n" if $DEBUG; |
|
363
|
360
|
|
|
|
|
928
|
my @bn = $where->bonds_neighbors; |
|
364
|
360
|
100
|
|
|
|
5902
|
if ($idx >= @bn) { |
|
365
|
125
|
50
|
|
|
|
227
|
print "\tno more bonds at $where\n" if $DEBUG; |
|
366
|
125
|
|
|
|
|
452
|
$backtrack = 1, next; |
|
367
|
|
|
|
|
|
|
} |
|
368
|
235
|
|
|
|
|
433
|
my $nei = $bn[$idx]{to}; |
|
369
|
235
|
|
|
|
|
307
|
my $bond = $bn[$idx]{bond}; |
|
370
|
235
|
|
|
|
|
448
|
$stack->[-1] = [++$idx]; |
|
371
|
|
|
|
|
|
|
|
|
372
|
235
|
100
|
|
|
|
647
|
if ($paint_tab->{$bond}) { |
|
373
|
92
|
50
|
|
|
|
858
|
print "\t$bond already visited\n" if $DEBUG; |
|
374
|
92
|
|
|
|
|
304
|
next; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
143
|
100
|
66
|
|
|
1286
|
if ($a1->map_to and $a2->map_to) { # ring closure bond |
|
377
|
22
|
50
|
|
|
|
420
|
if ($what->test($bond)) { |
|
378
|
22
|
50
|
|
|
|
239
|
print "\tmatched ring bond $what on $bond\n" if $DEBUG; |
|
379
|
22
|
100
|
|
|
|
58
|
if ($to->map_to eq $nei) { |
|
380
|
12
|
50
|
|
|
|
109
|
print "\tring closed $to on $nei\n" if $DEBUG; |
|
381
|
12
|
|
|
|
|
29
|
$paint_tab->{$bond} = 1; |
|
382
|
12
|
|
|
|
|
101
|
$what->map_to($bond); |
|
383
|
12
|
|
|
|
|
27
|
$stack->[-1] = [$idx, $what]; |
|
384
|
12
|
|
|
|
|
29
|
push @$stack, [0]; |
|
385
|
12
|
|
|
|
|
52
|
next; |
|
386
|
|
|
|
|
|
|
} else { |
|
387
|
10
|
50
|
|
|
|
90
|
print "\tring didn't close $to on $nei\n" if $DEBUG; |
|
388
|
|
|
|
|
|
|
} |
|
389
|
|
|
|
|
|
|
} |
|
390
|
|
|
|
|
|
|
} else { |
|
391
|
121
|
100
|
|
|
|
1727
|
if ($paint_tab->{$nei}) { |
|
392
|
7
|
50
|
|
|
|
58
|
print "\t$nei already visited\n" if $DEBUG; |
|
393
|
7
|
|
|
|
|
24
|
next; |
|
394
|
|
|
|
|
|
|
} |
|
395
|
114
|
100
|
|
|
|
1037
|
if ($what->test($bond)) { |
|
396
|
106
|
50
|
|
|
|
1438
|
print "\tmatched bond $what on $bond\n" if $DEBUG; |
|
397
|
106
|
100
|
|
|
|
316
|
if ($to->test($nei)) { |
|
398
|
101
|
50
|
|
|
|
1079
|
print "\tmatched atom $to on $nei\n" if $DEBUG; |
|
399
|
101
|
|
|
|
|
243
|
$paint_tab->{$nei} = 1; |
|
400
|
101
|
|
|
|
|
900
|
$to->map_to($nei); |
|
401
|
101
|
|
|
|
|
662
|
$paint_tab->{$bond} = 1; |
|
402
|
101
|
|
|
|
|
937
|
$what->map_to($bond); |
|
403
|
101
|
|
|
|
|
252
|
$stack->[-1] = [$idx, $what, $to]; |
|
404
|
101
|
|
|
|
|
230
|
push @$stack, [0]; |
|
405
|
101
|
|
|
|
|
441
|
next; |
|
406
|
|
|
|
|
|
|
} else { |
|
407
|
5
|
50
|
|
|
|
55
|
print "\tatom $to didn't match $nei\n" if $DEBUG; |
|
408
|
|
|
|
|
|
|
} |
|
409
|
|
|
|
|
|
|
} else { |
|
410
|
8
|
50
|
|
|
|
158
|
print "\tbond $what didn't match $bond\n" if $DEBUG; |
|
411
|
|
|
|
|
|
|
} |
|
412
|
|
|
|
|
|
|
} |
|
413
|
23
|
|
|
|
|
146
|
next; |
|
414
|
|
|
|
|
|
|
} else { |
|
415
|
0
|
|
|
|
|
0
|
die "shouldn't be here; what=($what)!"; |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
} |
|
418
|
103
|
|
|
|
|
157
|
$patt->{backtrack} = $backtrack; |
|
419
|
103
|
|
|
|
|
285
|
$patt->{stack} = $stack; |
|
420
|
103
|
|
|
|
|
212
|
$match; |
|
421
|
|
|
|
|
|
|
} |
|
422
|
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub flatten { |
|
424
|
34
|
|
|
34
|
0
|
45
|
my ($patt) = @_; |
|
425
|
34
|
|
|
|
|
50
|
my $visited = {}; |
|
426
|
34
|
|
|
|
|
53
|
my $list = []; |
|
427
|
34
|
|
|
|
|
107
|
for my $atom ($patt->atoms) { |
|
428
|
45
|
100
|
|
|
|
474
|
next if $visited->{$atom}; |
|
429
|
19
|
|
|
|
|
162
|
push @$list, $atom; |
|
430
|
19
|
|
|
|
|
47
|
_flatten($patt, $atom, $visited, $list); |
|
431
|
|
|
|
|
|
|
} |
|
432
|
34
|
50
|
|
|
|
305
|
print "flattened list: (@$list)\n" if $DEBUG; |
|
433
|
34
|
|
|
|
|
103
|
$list; |
|
434
|
|
|
|
|
|
|
} |
|
435
|
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub _flatten { |
|
437
|
45
|
|
|
45
|
|
68
|
my ($patt, $atom, $visited, $list) = @_; |
|
438
|
|
|
|
|
|
|
|
|
439
|
45
|
|
|
|
|
108
|
$visited->{$atom} = 1; |
|
440
|
45
|
|
|
|
|
370
|
for my $bn ($atom->bonds_neighbors) { |
|
441
|
60
|
|
|
|
|
770
|
my $nei = $bn->{to}; |
|
442
|
60
|
|
|
|
|
76
|
my $bond = $bn->{bond}; |
|
443
|
60
|
100
|
|
|
|
141
|
next if $visited->{$bond}; |
|
444
|
30
|
|
|
|
|
253
|
$visited->{$bond} = 1; |
|
445
|
30
|
|
|
|
|
202
|
push @$list, $bond; |
|
446
|
30
|
100
|
|
|
|
135
|
unless ($visited->{$nei}) { |
|
447
|
26
|
|
|
|
|
208
|
_flatten($patt, $nei, $visited, $list); |
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
} |
|
450
|
|
|
|
|
|
|
} |
|
451
|
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
1; |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=back |
|
455
|
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head1 VERSION |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
0.27 |
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
461
|
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
L, L, L, |
|
463
|
|
|
|
|
|
|
L, L. |
|
464
|
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
The PerlMol website L |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head1 AUTHOR |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Ivan Tubert-Brohman Eitub@cpan.orgE |
|
470
|
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is |
|
474
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
|
475
|
|
|
|
|
|
|
Perl itself. |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=cut |
|
478
|
|
|
|
|
|
|
|