line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package Chemistry::Pattern; |
2
|
|
|
|
|
|
|
$VERSION = '0.27'; |
3
|
|
|
|
|
|
|
# $Id: Pattern.pm,v 1.21 2009/05/10 20:56:06 itubert Exp $ |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
Chemistry::Pattern - Chemical substructure pattern matching |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=head1 SYNOPSIS |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use Chemistry::Pattern; |
12
|
|
|
|
|
|
|
use Chemistry::Mol; |
13
|
|
|
|
|
|
|
use Chemistry::File::SMILES; |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
# Create a pattern and a molecule from SMILES strings |
16
|
|
|
|
|
|
|
my $mol_str = "C1CCCC1C(Cl)=O"; |
17
|
|
|
|
|
|
|
my $patt_str = "C(=O)Cl"; |
18
|
|
|
|
|
|
|
my $mol = Chemistry::Mol->parse($mol_str, format => 'smiles'); |
19
|
|
|
|
|
|
|
my $patt = Chemistry::Pattern->parse($patt_str, format => 'smiles'); |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
# try to match the pattern |
22
|
|
|
|
|
|
|
while ($patt->match($mol)) { |
23
|
|
|
|
|
|
|
@matched_atoms = $patt->atom_map; |
24
|
|
|
|
|
|
|
print "Matched: (@matched_atoms)\n"; |
25
|
|
|
|
|
|
|
# should print something like "Matched: (a6 a8 a7)" |
26
|
|
|
|
|
|
|
} |
27
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
=head1 DESCRIPTION |
29
|
|
|
|
|
|
|
|
30
|
|
|
|
|
|
|
This module implements basic pattern matching for molecules. |
31
|
|
|
|
|
|
|
The Chemistry::Pattern class is a subclass of Chemistry::Mol, so patterns |
32
|
|
|
|
|
|
|
have all the properties of molecules and can come from reading the same |
33
|
|
|
|
|
|
|
file formats. Of course there are certain formats (such as SMARTS) |
34
|
|
|
|
|
|
|
that are exclusively used to describe patterns. |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
To perform a pattern matching operation on a molecule, follow these steps. |
37
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
1) Create a pattern object, either by parsing a file or string, or by adding |
39
|
|
|
|
|
|
|
atoms and bonds by hand by using Chemistry::Mol methods. Note that atoms and |
40
|
|
|
|
|
|
|
bonds in a pattern should be Chemistry::Pattern::Atom and |
41
|
|
|
|
|
|
|
Chemistry::Patern::Bond objects. Let's assume that the pattern object is |
42
|
|
|
|
|
|
|
stored in $patt and that the molecule is $mol. |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
2) Execute the pattern on the molecule by calling $patt->match($mol). |
45
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
3) If $patt->match() returns true, extract the "map" that relates the pattern to |
47
|
|
|
|
|
|
|
the molecule by calling $patt->atom_map or $patt->bond_map. These methods |
48
|
|
|
|
|
|
|
return a list of the atoms or bonds in the molecule that are matched by the |
49
|
|
|
|
|
|
|
corresponding atoms in the pattern. Thus $patt->atom_map(1) would be analogous |
50
|
|
|
|
|
|
|
to the $1 special variable used for regular expresion matching. The difference |
51
|
|
|
|
|
|
|
between Chemistry::Pattern and Perl regular expressions is that atoms and bonds |
52
|
|
|
|
|
|
|
are always captured. |
53
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
4) If more than one match for the molecule is desired, repeat from step (2) |
55
|
|
|
|
|
|
|
until match() returns false. |
56
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
=cut |
58
|
|
|
|
|
|
|
|
59
|
1
|
|
|
1
|
|
217653
|
use 5.006; |
|
1
|
|
|
|
|
5
|
|
|
1
|
|
|
|
|
45
|
|
60
|
1
|
|
|
1
|
|
5
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
34
|
|
61
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
1
|
|
|
|
|
8
|
|
|
1
|
|
|
|
|
75
|
|
62
|
1
|
|
|
1
|
|
5
|
use base qw(Chemistry::Mol); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
96
|
|
63
|
1
|
|
|
1
|
|
800
|
use Chemistry::Pattern::Atom; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
43
|
|
64
|
1
|
|
|
1
|
|
707
|
use Chemistry::Pattern::Bond; |
|
1
|
|
|
|
|
3
|
|
|
1
|
|
|
|
|
4435
|
|
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=head1 METHODS |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
=over 4 |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
=item Chemistry::Pattern->new(name => value, ...) |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
Create a new empty pattern. This is just like the Chemistry::Mol constructor, |
73
|
|
|
|
|
|
|
with one additional option: "options", which expects a hash reference (the |
74
|
|
|
|
|
|
|
options themselves are described under the options() method). |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
=cut |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
sub new { |
79
|
17
|
|
|
17
|
1
|
24347
|
my $class = shift; |
80
|
17
|
|
|
|
|
128
|
my $self = $class->SUPER::new( |
81
|
|
|
|
|
|
|
options => { overlap => 1, permute => 0}, |
82
|
|
|
|
|
|
|
@_ |
83
|
|
|
|
|
|
|
); |
84
|
17
|
|
|
|
|
127
|
$self->reset; |
85
|
17
|
|
|
|
|
66
|
$self; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=item $pattern->options(option => value,...) |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
Available options: |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
=over |
93
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
=item overlap |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
If true, matches may overlap. For example, the CC pattern could match twice |
97
|
|
|
|
|
|
|
on propane if this option is true, but only once if it is false. This option |
98
|
|
|
|
|
|
|
is true by default. |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
=item permute |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
Sometimes there is more than one way of matching the same set of pattern atoms |
103
|
|
|
|
|
|
|
on the same set of molecule atoms. If true, return these "redundant" matches. |
104
|
|
|
|
|
|
|
For example, the CC pattern could match ethane with two different permutations |
105
|
|
|
|
|
|
|
(forwards and backwards). This option is false by default. |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=back |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=cut |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
sub options { |
112
|
34
|
|
|
34
|
1
|
33456
|
my $self = shift; |
113
|
34
|
100
|
|
|
|
85
|
if (@_ == 1) { |
114
|
17
|
50
|
|
|
|
20
|
$self->{options} = {%{$self->{options}||{}}, %{$_[0]}}; |
|
17
|
|
|
|
|
271
|
|
|
17
|
|
|
|
|
108
|
|
115
|
|
|
|
|
|
|
} else { |
116
|
17
|
|
|
|
|
22
|
$self->{options} = {%{$self->{options}}, @_}; |
|
17
|
|
|
|
|
172
|
|
117
|
|
|
|
|
|
|
} |
118
|
|
|
|
|
|
|
} |
119
|
|
|
|
|
|
|
|
120
|
45
|
|
|
45
|
1
|
5473
|
sub atom_class { "Chemistry::Pattern::Atom" } |
121
|
|
|
|
|
|
|
|
122
|
30
|
|
|
30
|
1
|
2824
|
sub bond_class { "Chemistry::Pattern::Bond" } |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
our $DEBUG = 0; |
125
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
=item $patt->reset |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
Reset the state of the pattern matching object, so that it begins the next |
129
|
|
|
|
|
|
|
match from scratch instead of where it left off after the last one. |
130
|
|
|
|
|
|
|
|
131
|
|
|
|
|
|
|
=cut |
132
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
sub reset { |
134
|
34
|
|
|
34
|
1
|
72
|
my ($self, $mol, %opts) = @_; |
135
|
34
|
50
|
|
|
|
78
|
print "Resetting to ($mol, $opts{atom})\n" if $DEBUG; |
136
|
34
|
|
|
|
|
86
|
$self->{flat} = $self->flatten; |
137
|
34
|
|
|
|
|
107
|
$self->map_to($mol); |
138
|
34
|
100
|
|
|
|
251
|
$self->{pending_atoms} = [$mol ? $mol->atoms : ()]; |
139
|
34
|
|
|
|
|
330
|
$self->{already_matched} = {}; |
140
|
34
|
|
|
|
|
71
|
$self->{paint_tab} = {}; |
141
|
34
|
|
|
|
|
61
|
$self->{anchor} = ''; |
142
|
34
|
|
|
|
|
102
|
my $atom = $self->next_atom($opts{atom}); |
143
|
|
|
|
|
|
|
#$self->match_local_init($atom) if $atom; |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
sub already_matched { |
147
|
50
|
|
|
50
|
0
|
100
|
my ($self, @objs) = @_; |
148
|
50
|
|
|
|
|
69
|
my @ids = map {$_->id} @objs; |
|
172
|
|
|
|
|
813
|
|
149
|
50
|
|
|
|
|
373
|
my $unsorted_key = join " ", @ids; |
150
|
50
|
|
|
|
|
94
|
my $key; |
151
|
50
|
100
|
|
|
|
118
|
if ($self->{options}{permute}) { |
152
|
20
|
|
|
|
|
28
|
$key = $unsorted_key; |
153
|
|
|
|
|
|
|
} else { |
154
|
30
|
|
|
|
|
116
|
$key = join " ", sort @ids; |
155
|
|
|
|
|
|
|
} |
156
|
|
|
|
|
|
|
|
157
|
50
|
100
|
|
|
|
123
|
if ($self->{already_matched}{$key}) { |
158
|
12
|
50
|
|
|
|
60
|
print "already matched $unsorted_key\n" if $DEBUG; |
159
|
12
|
|
|
|
|
43
|
return 1; |
160
|
|
|
|
|
|
|
} else { |
161
|
38
|
|
|
|
|
94
|
$self->{already_matched}{$key} = 1; |
162
|
38
|
50
|
|
|
|
74
|
print "first match of $key\n" if $DEBUG; |
163
|
38
|
|
|
|
|
258
|
return 0; |
164
|
|
|
|
|
|
|
} |
165
|
|
|
|
|
|
|
} |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub next_atom { |
168
|
89
|
|
|
89
|
0
|
141
|
my ($self, $atom) = @_; |
169
|
|
|
|
|
|
|
|
170
|
89
|
50
|
|
|
|
179
|
print "next_atom\n" if $DEBUG; |
171
|
89
|
100
|
|
|
|
186
|
return if $self->{anchor}; |
172
|
88
|
100
|
|
|
|
141
|
if ($atom) { |
|
87
|
100
|
|
|
|
218
|
|
173
|
1
|
|
|
|
|
7
|
$self->{anchor} = $atom; |
174
|
|
|
|
|
|
|
} elsif (@{$self->{pending_atoms}}) { |
175
|
54
|
|
|
|
|
61
|
$atom = shift @{$self->{pending_atoms}}; |
|
54
|
|
|
|
|
95
|
|
176
|
54
|
50
|
|
|
|
119
|
print "\tatom $atom\n" if $DEBUG; |
177
|
|
|
|
|
|
|
} |
178
|
|
|
|
|
|
|
|
179
|
88
|
|
|
|
|
133
|
$self->{next_atom} = $atom; |
180
|
88
|
|
|
|
|
232
|
$atom; |
181
|
|
|
|
|
|
|
} |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
Chemistry::Obj::accessor "map_to"; |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=item $pattern->atom_map |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Returns the list of atoms that matched the last time $pattern->match was called. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
=cut |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
sub atom_map { |
192
|
88
|
|
|
88
|
1
|
203
|
my $self = shift; |
193
|
88
|
|
|
|
|
240
|
my @atoms = map { $_->map_to } $self->atoms(@_); |
|
210
|
|
|
|
|
1304
|
|
194
|
88
|
100
|
|
|
|
712
|
@atoms == 1 ? $atoms[0] : @atoms; |
195
|
|
|
|
|
|
|
} |
196
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
=item $pattern->bond_map |
198
|
|
|
|
|
|
|
|
199
|
|
|
|
|
|
|
Returns the list of bonds that matched the last time $pattern->match was called. |
200
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
=cut |
202
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
sub bond_map { |
204
|
50
|
|
|
50
|
1
|
62
|
my $self = shift; |
205
|
50
|
|
|
|
|
143
|
my @bonds = map { $_->map_to } $self->bonds(@_); |
|
52
|
|
|
|
|
278
|
|
206
|
50
|
100
|
|
|
|
260
|
@bonds == 1 ? $bonds[0] : @bonds; |
207
|
|
|
|
|
|
|
} |
208
|
|
|
|
|
|
|
|
209
|
|
|
|
|
|
|
=item $pattern->match($mol, %options) |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
Returns true if the pattern matches the molecule. If called again for the same |
212
|
|
|
|
|
|
|
molecule, continues matching where it left off (in a way similar to global |
213
|
|
|
|
|
|
|
regular expressions under scalar context). When there are no matches left, |
214
|
|
|
|
|
|
|
returns false. To force the match to always start from scratch instead of |
215
|
|
|
|
|
|
|
continuing where it left off, the C option may be used. |
216
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
$pattern->match($mol, atom => $atom) |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
If atom => $atom is given as an option, match will only look for matches that |
220
|
|
|
|
|
|
|
start at $atom (which should be an atom in $mol, of course). This is somewhat |
221
|
|
|
|
|
|
|
analog to anchored regular expressions. |
222
|
|
|
|
|
|
|
|
223
|
|
|
|
|
|
|
To find out which atoms and bonds matched, use the atom_map and bond_map |
224
|
|
|
|
|
|
|
methods. |
225
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
=cut |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
sub match { |
229
|
55
|
|
|
55
|
1
|
806
|
my ($self, $mol, %opts) = @_; |
230
|
55
|
50
|
|
|
|
131
|
print "match $self $mol\n" if $DEBUG; |
231
|
55
|
50
|
66
|
|
|
230
|
if (defined($mol) and $self->map_to ne $mol |
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
|
66
|
|
|
|
|
232
|
|
|
|
|
|
|
or $opts{reset} |
233
|
|
|
|
|
|
|
or defined($opts{atom}) and $opts{atom} ne $self->{anchor} |
234
|
|
|
|
|
|
|
){ |
235
|
17
|
|
|
|
|
240
|
$self->reset($mol, %opts); |
236
|
|
|
|
|
|
|
} |
237
|
55
|
|
|
|
|
662
|
my $match = $self->match_next; |
238
|
55
|
100
|
|
|
|
137
|
$self->map_to(undef) unless $match; |
239
|
55
|
50
|
|
|
|
170
|
print "returning match: '$match'\n" if $DEBUG; |
240
|
55
|
|
|
|
|
150
|
$match; |
241
|
|
|
|
|
|
|
} |
242
|
|
|
|
|
|
|
|
243
|
|
|
|
|
|
|
sub match_next { |
244
|
55
|
|
|
55
|
0
|
109
|
my $self = shift; |
245
|
55
|
|
|
|
|
62
|
my $match; |
246
|
55
|
50
|
|
|
|
108
|
print "match_next\n" if $DEBUG; |
247
|
|
|
|
|
|
|
|
248
|
55
|
|
|
|
|
55
|
while (1) { |
249
|
103
|
100
|
|
|
|
462
|
$self->match_local_init($self->{next_atom}) if $self->{next_atom}; |
250
|
103
|
|
|
|
|
231
|
$match = $self->match_local_next; |
251
|
103
|
100
|
|
|
|
184
|
if ($match) { |
252
|
50
|
100
|
|
|
|
115
|
if ($self->already_matched($self->atom_map, $self->bond_map)) { |
253
|
12
|
|
|
|
|
25
|
$match = 0, next; # try again |
254
|
|
|
|
|
|
|
} else { |
255
|
38
|
100
|
|
|
|
107
|
$self->next_atom unless ($self->{options}{overlap}); |
256
|
38
|
|
|
|
|
40
|
last; # matched! |
257
|
|
|
|
|
|
|
} |
258
|
|
|
|
|
|
|
} else { |
259
|
53
|
100
|
|
|
|
103
|
$self->next_atom or last; |
260
|
|
|
|
|
|
|
} |
261
|
|
|
|
|
|
|
} |
262
|
55
|
|
|
|
|
120
|
$match; |
263
|
|
|
|
|
|
|
} |
264
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
sub match_local_init { |
267
|
55
|
|
|
55
|
0
|
333
|
my ($patt, $atom) = @_; |
268
|
55
|
|
|
|
|
141
|
my $mol = $patt->map_to; |
269
|
|
|
|
|
|
|
|
270
|
55
|
50
|
|
|
|
323
|
print "match_local_init(",$patt->atoms(1),", $atom)\n" if $DEBUG; |
271
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
# clean up |
273
|
55
|
|
|
|
|
153
|
$_->map_to(undef) for ($patt->atoms, $patt->bonds); |
274
|
55
|
100
|
66
|
|
|
364
|
$patt->{paint_tab} = {} if ($mol and $patt->{options}{overlap}); |
275
|
55
|
|
|
|
|
579
|
$patt->{stack} = [[0]]; |
276
|
55
|
|
|
|
|
224
|
$patt->{backtrack} = 0; |
277
|
55
|
|
|
|
|
74
|
$patt->{next_atom} = 0; |
278
|
55
|
|
|
|
|
133
|
$patt->{current_atom} = $atom; |
279
|
|
|
|
|
|
|
} |
280
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
sub match_local_next { |
283
|
103
|
|
|
103
|
0
|
135
|
my ($patt) = @_; |
284
|
103
|
|
|
|
|
145
|
my $stack = $patt->{stack}; |
285
|
103
|
|
|
|
|
135
|
my $paint_tab = $patt->{paint_tab}; |
286
|
103
|
|
|
|
|
212
|
my $match = 0; |
287
|
103
|
|
|
|
|
122
|
my $backtrack = $patt->{backtrack}; |
288
|
103
|
|
|
|
|
120
|
my $where = $patt->{current_atom}; |
289
|
103
|
|
|
|
|
124
|
my $flat = $patt->{flat}; |
290
|
103
|
|
|
|
|
240
|
my $mol = $patt->map_to; |
291
|
|
|
|
|
|
|
|
292
|
103
|
50
|
|
|
|
543
|
print "match_local_next($where)\n" if $DEBUG; |
293
|
|
|
|
|
|
|
|
294
|
103
|
|
|
|
|
116
|
while (1) { |
295
|
698
|
|
|
|
|
754
|
my ($idx) = @{$stack->[-1]}; |
|
698
|
|
|
|
|
1127
|
|
296
|
698
|
|
|
|
|
920
|
my $pos = @$stack - 1; |
297
|
698
|
|
|
|
|
908
|
my $what = $flat->[$pos]; |
298
|
698
|
50
|
|
|
|
1199
|
if($DEBUG) { print "[@$_] " for @$stack; print "\n" } |
|
0
|
|
|
|
|
0
|
|
|
0
|
|
|
|
|
0
|
|
299
|
|
|
|
|
|
|
|
300
|
698
|
100
|
|
|
|
4086
|
if ($backtrack) { |
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
301
|
183
|
50
|
|
|
|
320
|
print "\tbacktracking...\n" if $DEBUG; |
302
|
183
|
100
|
|
|
|
479
|
if (@$stack <= 2) { |
303
|
48
|
50
|
|
|
|
88
|
print "\tnowhere to backtrack\n" if $DEBUG; |
304
|
48
|
|
|
|
|
66
|
last; |
305
|
|
|
|
|
|
|
} |
306
|
135
|
|
|
|
|
201
|
my $to; |
307
|
135
|
|
|
|
|
181
|
pop @$stack; |
308
|
135
|
|
|
|
|
205
|
(undef, $what, $to) = @{$stack->[-1]}; |
|
135
|
|
|
|
|
301
|
|
309
|
135
|
|
|
|
|
184
|
$backtrack = 0; |
310
|
135
|
100
|
|
|
|
343
|
if ($what) { |
311
|
111
|
50
|
|
|
|
591
|
print "\tcleaning $what\n" if $DEBUG; |
312
|
111
|
|
|
|
|
294
|
$paint_tab->{$what->map_to} = 0; |
313
|
111
|
|
|
|
|
951
|
$what->map_to(undef); |
314
|
|
|
|
|
|
|
} |
315
|
135
|
100
|
|
|
|
456
|
if ($to) { |
316
|
123
|
50
|
|
|
|
701
|
print "\tcleaning $to\n" if $DEBUG; |
317
|
123
|
|
|
|
|
311
|
$paint_tab->{$to->map_to} = 0; |
318
|
123
|
|
|
|
|
1309
|
$to->map_to(undef); |
319
|
|
|
|
|
|
|
} |
320
|
135
|
|
|
|
|
690
|
next; |
321
|
|
|
|
|
|
|
} elsif ($pos >= @$flat) { # nothing else to match |
322
|
50
|
50
|
|
|
|
98
|
print "\tmatched!\n" if $DEBUG; |
323
|
50
|
|
|
|
|
52
|
$backtrack = 1; |
324
|
50
|
|
|
|
|
47
|
$match = 1; |
325
|
50
|
|
|
|
|
69
|
last; |
326
|
|
|
|
|
|
|
} elsif (@$stack == 1) { # match first atom (anchored) |
327
|
55
|
100
|
100
|
|
|
132
|
if (!$paint_tab->{$where} and $what->test($where)) { |
328
|
50
|
50
|
|
|
|
774
|
print "\tmatched initial atom $what on $where\n" if $DEBUG; |
329
|
50
|
|
|
|
|
134
|
$paint_tab->{$where} = 1; |
330
|
50
|
|
|
|
|
427
|
$what->map_to($where); |
331
|
50
|
|
|
|
|
298
|
push @$stack, [0]; |
332
|
50
|
|
|
|
|
116
|
next; |
333
|
|
|
|
|
|
|
} else { |
334
|
5
|
|
|
|
|
55
|
last; |
335
|
|
|
|
|
|
|
} |
336
|
|
|
|
|
|
|
} elsif ($what->isa("Chemistry::Pattern::Atom")) { |
337
|
|
|
|
|
|
|
# match unanchored atom |
338
|
50
|
100
|
|
|
|
143
|
if($idx >= $mol->atoms) { |
339
|
10
|
50
|
|
|
|
76
|
print "\tno more atoms at $mol\n" if $DEBUG; |
340
|
10
|
|
|
|
|
18
|
$backtrack = 1, next; |
341
|
|
|
|
|
|
|
} |
342
|
40
|
|
|
|
|
337
|
my $next = $mol->atoms($idx+1); |
343
|
40
|
|
|
|
|
394
|
$stack->[-1] = [++$idx]; |
344
|
40
|
100
|
|
|
|
121
|
if ($paint_tab->{$next}) { |
345
|
16
|
50
|
|
|
|
143
|
print "\t$next already visited\n" if $DEBUG; |
346
|
16
|
|
|
|
|
30
|
next; |
347
|
|
|
|
|
|
|
} |
348
|
24
|
50
|
|
|
|
346
|
if ($what->test($next)) { |
349
|
24
|
50
|
|
|
|
268
|
print "\tmatched unanchored atom $what on $next\n" if $DEBUG; |
350
|
24
|
|
|
|
|
64
|
$paint_tab->{$next} = 1; |
351
|
24
|
|
|
|
|
203
|
$what->map_to($next); |
352
|
24
|
|
|
|
|
149
|
$stack->[-1] = [$idx, undef, $what]; |
353
|
24
|
|
|
|
|
59
|
push @$stack, [0]; |
354
|
|
|
|
|
|
|
} else { |
355
|
0
|
0
|
|
|
|
0
|
print "\tunanchored atom $what didn't match $next\n" if $DEBUG; |
356
|
|
|
|
|
|
|
} |
357
|
24
|
|
|
|
|
52
|
next; |
358
|
|
|
|
|
|
|
} elsif ($what->isa("Chemistry::Pattern::Bond")) { # match bond |
359
|
360
|
|
|
|
|
984
|
my ($a1, $a2) = $what->atoms; |
360
|
360
|
50
|
|
|
|
2793
|
my ($from, $to) = $a1->map_to ? ($a1, $a2) : ($a2, $a1); |
361
|
360
|
|
33
|
|
|
3642
|
$where = $a1->map_to || $a2->map_to; |
362
|
360
|
50
|
|
|
|
3945
|
print "\tfrom $from to $to\n" if $DEBUG; |
363
|
360
|
|
|
|
|
928
|
my @bn = $where->bonds_neighbors; |
364
|
360
|
100
|
|
|
|
5902
|
if ($idx >= @bn) { |
365
|
125
|
50
|
|
|
|
227
|
print "\tno more bonds at $where\n" if $DEBUG; |
366
|
125
|
|
|
|
|
452
|
$backtrack = 1, next; |
367
|
|
|
|
|
|
|
} |
368
|
235
|
|
|
|
|
433
|
my $nei = $bn[$idx]{to}; |
369
|
235
|
|
|
|
|
307
|
my $bond = $bn[$idx]{bond}; |
370
|
235
|
|
|
|
|
448
|
$stack->[-1] = [++$idx]; |
371
|
|
|
|
|
|
|
|
372
|
235
|
100
|
|
|
|
647
|
if ($paint_tab->{$bond}) { |
373
|
92
|
50
|
|
|
|
858
|
print "\t$bond already visited\n" if $DEBUG; |
374
|
92
|
|
|
|
|
304
|
next; |
375
|
|
|
|
|
|
|
} |
376
|
143
|
100
|
66
|
|
|
1286
|
if ($a1->map_to and $a2->map_to) { # ring closure bond |
377
|
22
|
50
|
|
|
|
420
|
if ($what->test($bond)) { |
378
|
22
|
50
|
|
|
|
239
|
print "\tmatched ring bond $what on $bond\n" if $DEBUG; |
379
|
22
|
100
|
|
|
|
58
|
if ($to->map_to eq $nei) { |
380
|
12
|
50
|
|
|
|
109
|
print "\tring closed $to on $nei\n" if $DEBUG; |
381
|
12
|
|
|
|
|
29
|
$paint_tab->{$bond} = 1; |
382
|
12
|
|
|
|
|
101
|
$what->map_to($bond); |
383
|
12
|
|
|
|
|
27
|
$stack->[-1] = [$idx, $what]; |
384
|
12
|
|
|
|
|
29
|
push @$stack, [0]; |
385
|
12
|
|
|
|
|
52
|
next; |
386
|
|
|
|
|
|
|
} else { |
387
|
10
|
50
|
|
|
|
90
|
print "\tring didn't close $to on $nei\n" if $DEBUG; |
388
|
|
|
|
|
|
|
} |
389
|
|
|
|
|
|
|
} |
390
|
|
|
|
|
|
|
} else { |
391
|
121
|
100
|
|
|
|
1727
|
if ($paint_tab->{$nei}) { |
392
|
7
|
50
|
|
|
|
58
|
print "\t$nei already visited\n" if $DEBUG; |
393
|
7
|
|
|
|
|
24
|
next; |
394
|
|
|
|
|
|
|
} |
395
|
114
|
100
|
|
|
|
1037
|
if ($what->test($bond)) { |
396
|
106
|
50
|
|
|
|
1438
|
print "\tmatched bond $what on $bond\n" if $DEBUG; |
397
|
106
|
100
|
|
|
|
316
|
if ($to->test($nei)) { |
398
|
101
|
50
|
|
|
|
1079
|
print "\tmatched atom $to on $nei\n" if $DEBUG; |
399
|
101
|
|
|
|
|
243
|
$paint_tab->{$nei} = 1; |
400
|
101
|
|
|
|
|
900
|
$to->map_to($nei); |
401
|
101
|
|
|
|
|
662
|
$paint_tab->{$bond} = 1; |
402
|
101
|
|
|
|
|
937
|
$what->map_to($bond); |
403
|
101
|
|
|
|
|
252
|
$stack->[-1] = [$idx, $what, $to]; |
404
|
101
|
|
|
|
|
230
|
push @$stack, [0]; |
405
|
101
|
|
|
|
|
441
|
next; |
406
|
|
|
|
|
|
|
} else { |
407
|
5
|
50
|
|
|
|
55
|
print "\tatom $to didn't match $nei\n" if $DEBUG; |
408
|
|
|
|
|
|
|
} |
409
|
|
|
|
|
|
|
} else { |
410
|
8
|
50
|
|
|
|
158
|
print "\tbond $what didn't match $bond\n" if $DEBUG; |
411
|
|
|
|
|
|
|
} |
412
|
|
|
|
|
|
|
} |
413
|
23
|
|
|
|
|
146
|
next; |
414
|
|
|
|
|
|
|
} else { |
415
|
0
|
|
|
|
|
0
|
die "shouldn't be here; what=($what)!"; |
416
|
|
|
|
|
|
|
} |
417
|
|
|
|
|
|
|
} |
418
|
103
|
|
|
|
|
157
|
$patt->{backtrack} = $backtrack; |
419
|
103
|
|
|
|
|
285
|
$patt->{stack} = $stack; |
420
|
103
|
|
|
|
|
212
|
$match; |
421
|
|
|
|
|
|
|
} |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
sub flatten { |
424
|
34
|
|
|
34
|
0
|
45
|
my ($patt) = @_; |
425
|
34
|
|
|
|
|
50
|
my $visited = {}; |
426
|
34
|
|
|
|
|
53
|
my $list = []; |
427
|
34
|
|
|
|
|
107
|
for my $atom ($patt->atoms) { |
428
|
45
|
100
|
|
|
|
474
|
next if $visited->{$atom}; |
429
|
19
|
|
|
|
|
162
|
push @$list, $atom; |
430
|
19
|
|
|
|
|
47
|
_flatten($patt, $atom, $visited, $list); |
431
|
|
|
|
|
|
|
} |
432
|
34
|
50
|
|
|
|
305
|
print "flattened list: (@$list)\n" if $DEBUG; |
433
|
34
|
|
|
|
|
103
|
$list; |
434
|
|
|
|
|
|
|
} |
435
|
|
|
|
|
|
|
|
436
|
|
|
|
|
|
|
sub _flatten { |
437
|
45
|
|
|
45
|
|
68
|
my ($patt, $atom, $visited, $list) = @_; |
438
|
|
|
|
|
|
|
|
439
|
45
|
|
|
|
|
108
|
$visited->{$atom} = 1; |
440
|
45
|
|
|
|
|
370
|
for my $bn ($atom->bonds_neighbors) { |
441
|
60
|
|
|
|
|
770
|
my $nei = $bn->{to}; |
442
|
60
|
|
|
|
|
76
|
my $bond = $bn->{bond}; |
443
|
60
|
100
|
|
|
|
141
|
next if $visited->{$bond}; |
444
|
30
|
|
|
|
|
253
|
$visited->{$bond} = 1; |
445
|
30
|
|
|
|
|
202
|
push @$list, $bond; |
446
|
30
|
100
|
|
|
|
135
|
unless ($visited->{$nei}) { |
447
|
26
|
|
|
|
|
208
|
_flatten($patt, $nei, $visited, $list); |
448
|
|
|
|
|
|
|
} |
449
|
|
|
|
|
|
|
} |
450
|
|
|
|
|
|
|
} |
451
|
|
|
|
|
|
|
|
452
|
|
|
|
|
|
|
1; |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
=back |
455
|
|
|
|
|
|
|
|
456
|
|
|
|
|
|
|
=head1 VERSION |
457
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
0.27 |
459
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
=head1 SEE ALSO |
461
|
|
|
|
|
|
|
|
462
|
|
|
|
|
|
|
L, L, L, |
463
|
|
|
|
|
|
|
L, L. |
464
|
|
|
|
|
|
|
|
465
|
|
|
|
|
|
|
The PerlMol website L |
466
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
=head1 AUTHOR |
468
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
Ivan Tubert-Brohman Eitub@cpan.orgE |
470
|
|
|
|
|
|
|
|
471
|
|
|
|
|
|
|
=head1 COPYRIGHT |
472
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
Copyright (c) 2009 Ivan Tubert-Brohman. All rights reserved. This program is |
474
|
|
|
|
|
|
|
free software; you can redistribute it and/or modify it under the same terms as |
475
|
|
|
|
|
|
|
Perl itself. |
476
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
=cut |
478
|
|
|
|
|
|
|
|