File Coverage

blib/lib/Math/Symbolic/Custom/Pattern.pm
Criterion Covered Total %
statement 185 213 86.8
branch 76 98 77.5
condition 9 15 60.0
subroutine 25 27 92.5
pod 3 3 100.0
total 298 356 83.7


line stmt bran cond sub pod time code
1             package Math::Symbolic::Custom::Pattern;
2              
3 1     1   1788 use 5.006001;
  1         4  
  1         36  
4 1     1   5 use strict;
  1         2  
  1         34  
5 1     1   22 use warnings;
  1         3  
  1         31  
6 1     1   5 no warnings 'recursion';
  1         1  
  1         51  
7 1     1   5 use Carp qw/cluck confess/;
  1         1  
  1         68  
8              
9 1     1   822 use Clone qw/clone/;
  1         3343  
  1         85  
10 1     1   12 use Math::Symbolic qw/:all/;
  1         2  
  1         316  
11 1     1   862 use Math::Symbolic::Custom::Pattern::Export;
  1         3  
  1         11  
12              
13             our $VERSION = '2.01';
14              
15 1     1   78 use constant EPSILON => 1e-29;
  1         2  
  1         73  
16              
17 1     1   5 use constant TYPE => 0;
  1         2  
  1         53  
18 1     1   6 use constant VAL => 1;
  1         1  
  1         45  
19 1     1   7 use constant OPS => 2;
  1         2  
  1         54  
20 1     1   6 use constant ATTR => 3;
  1         2  
  1         46  
21              
22 1     1   5 use constant PATTERN => -1;
  1         2  
  1         50  
23              
24 1     1   6 use constant ANY_TREE => 0;
  1         2  
  1         63  
25 1     1   7 use constant ANY_CONST => 1;
  1         2  
  1         53  
26 1     1   29 use constant ANY_VAR => 2;
  1         3  
  1         56  
27 1     1   6 use constant NAMED_TREE => 3;
  1         2  
  1         61  
28 1     1   5 use constant NAMED_CONST => 4;
  1         2  
  1         56  
29 1     1   4 use constant NAMED_VAR => 5;
  1         3  
  1         46  
30              
31 1     1   5 use constant ATTR_COMMUTATIVE => 1;
  1         1  
  1         2566  
32              
33             =encoding utf8
34              
35             =head1 NAME
36              
37             Math::Symbolic::Custom::Pattern - Pattern matching on Math::Symbolic trees
38              
39             =head1 SYNOPSIS
40              
41             use Math::Symbolic qw/parse_from_string/;
42             use Math::Symbolic::Custom::Pattern;
43             my $patternstring = "VAR_foo + sin(CONST * VAR_foo)"
44             my $pattern = Math::Symbolic::Custom::Pattern->new( $patternstring );
45            
46             my $formula = parse_from_string("a + sin(5 * a)");
47            
48             if ($pattern->match($formula)) {
49             print "The pattern matches the formula.\n";
50             }
51             else {
52             print "The pattern does not match the formula.\n";
53             }
54              
55             # will print "The pattern matches the formula" since "a" is
56             # found to be "VAR_foo" and 5 is a constant.
57             # "a + sin(5 * b)" would not match since VAR_foo is already "a"
58             # when the "b" is encountered. "VAR" would match any variable.
59             # "TREE" matches any tree. "TREE_name" and "CONST_name" work as
60             # you would expect.
61            
62             # Alternatively:
63             my $pattern = $some_formula->to_pattern();
64            
65             print "yes\n" if $formula->is_of_form($pattern); # fast-ish
66             # This has syntactic sugar, too:
67             print "yes\n" if $formula->is_of_form("VAR + TREE"); # slow!
68             print "yes\n" if $formula->is_of_form($another_formula); # semi-slow...
69            
70             # Finally, when creating a pattern, one can specify that addition and
71             # product should match commutatively:
72             my $pattern = Math::Symbolic::Custom::Pattern->new(
73             parse_from_string("a + b"), commutation => 1,
74             );
75             my $formula = parse_from_string("b + a");
76             # does match even though "a+b" <=> "b+a" aren't the same
77             # internal tree representations
78             print "yes\n" if $pattern->match($formula);
79              
80             =head1 DESCRIPTION
81              
82             This module is an extension to the Math::Symbolic module. A basic
83             familiarity with that module is required.
84              
85             The Math::Symbolic::Custom::Pattern module implements pattern matching routines
86             on Math::Symbolic trees. The patterns itself are constructed from Math::Symbolic
87             trees with just a few variables which have a special meaning.
88              
89             The module provides two interfaces. You can use the C and C
90             methods this class provides, or you can use the C and
91             C methods on any Math::Symbolic tree. (Exported by the
92             Math::Symbolic::Custom::Pattern::Export module. Refer to that module for
93             details on C.)
94              
95             You can construct a pattern from any Math::Symbolic tree. For sake of
96             simplicity, we will talk about a tree "a+(b*c)" even if that's just its string
97             representation. The tree is really what is returned by
98             Cparse_from_string("a+(b*c)")>.
99              
100             Suppose you call
101              
102             my $pattern = Math::Symbolic::Custom::Pattern->new("a+(b*c)");
103              
104             That creates a pattern that matches this exact tree. Calling
105              
106             my $boolean = $pattern->match($tree);
107              
108             on any Math::Symbolic tree C<$tree> will result in C<$boolean> being false
109             except if it is C<"a+(b*c)">.
110              
111             So far so good. This isn't impressive and the C method of
112             all Math::Symbolic trees does the same. (Except that the pattern matching is
113             about twice as fast.)
114              
115             If you create a pattern from the following string, however, you get different
116             behaviour: C<"VAR + (VAR*VAR)">. Now, any variable may be in place of C,
117             C, and C. (C<"a + (x*x)">, C, ...)
118              
119             You can match with named (but not literal) variables with the following
120             pattern string: C<"VAR_first + (VAR_first*VAR_second)"> This matches
121             the tree C<"a + (a*b)">, but not C<"a + (c*b)"> since the first variable
122             in the parenthesis of the second tree is not the same as the one outside the
123             parenthesis. Note that the variable C<"b"> in both examples could have been
124             any variable, since C occurrs only once in the pattern.
125              
126             Analogous to the general C and named C pattern elements, you may
127             use C to match any subtree whatsoever or C to match a named
128             tree. Example: The pattern C<"TREE_a + 5*TREE_a"> matches the tree
129             C<"sin(b+c) + 5*sin(b+c)">, but not C<"sin(b+c) + 5*cos(b+c)">. Beware of the
130             fact that the trees C<"sin(b+c)"> and C<"sin(c+b)"> would not be the same
131             either. Though mathematically equivalent, they do not have the same internal
132             representation. Canonicalizing the internal representation is simple in this
133             example, but is impossible in the general case, so just take care.
134              
135             Finally, what works with variables and general trees also works with constants.
136             You may specify the pattern C<"CONST_foo * a + atan(CONST_foo)">. This matches
137             C<"0.5*a + atan(0.5)">, but does not match C<"2*a + atan(0.5)"> since the
138             named constants are not equal. The general form C works as a wildcard
139             for any constants.
140              
141             =head2 EXPORT
142              
143             This module does not export anything.
144              
145             =head2 METHODS
146              
147             This is a list of public methods.
148              
149             =over 2
150              
151             =cut
152              
153              
154             =item new
155              
156             C is the constructor for Math::Symbolic::Custom::Pattern objects.
157             It takes a Math::Symbolic tree as first argument which will be transformed
158             into a pattern. See the C method documentation.
159              
160             After the Math::Symbolic tree, a list of key/value pairs can be passed in
161             as options for the pattern construction.
162              
163             The only currently supported option is C indicating whether or
164             not the pattern should match sums and products commutatively. Please note
165             that this does not match recursively and does not recognize associativity:
166             The commutative pattern of C<(a + b) + c> matches the
167             expression C<(b + a) + c> and C, but B C!
168             This means that if the tree to match is built from a string such as
169             C, then it is not defined whether C<(a + b) + c> matches
170             that expression. It does so if the internal tree representation
171             happens to be C<(a + b) + c> and it doesn't if it happens to be
172             C. This may be fixed at a later point.
173              
174             =cut
175              
176             sub new {
177 132     132 1 36207 my $proto = shift;
178 132   33     659 my $class = ref($proto)||$proto;
179              
180             # I want to call that 'proto', too ;)
181 132         203 $proto = shift;
182 132 50       650 confess(
183             __PACKAGE__."new() requires a Math::Symbolic tree as first "
184             ."argument."
185             ) if not ref($proto) =~ /^Math::Symbolic/;
186              
187 132         263 my %opt = @_;
188              
189 132         733 my $info = {
190             vars => {},
191             constants => {},
192             trees => {},
193             commutation => $opt{commutation},
194             };
195            
196 132         401 my $pattern = _descend_build($proto, $info);
197              
198             #_descend_generalize($pattern, $info);
199              
200 132         595 my $self = {
201             pattern => $pattern,
202             info => $info,
203             string => $proto->to_string(),
204             };
205              
206 132         8101 return bless $self => $class;
207             }
208              
209              
210             sub _descend_build {
211 496     496   735 my ($proto, $info) = @_;
212            
213 496         774 my $tree = [];
214 496         1589 my $tt = $proto->term_type();
215              
216 496 50       2262 if ($tt == T_CONSTANT) {
    100          
217 0         0 $tree->[TYPE] = T_CONSTANT;
218 0         0 $tree->[VAL] = $proto->value();
219             }
220             elsif ($tt == T_OPERATOR) {
221 186         381 $tree->[TYPE] = T_OPERATOR;
222 186         563 $tree->[VAL] = $proto->type();
223 364         780 $tree->[OPS] = [
224 186         478 map { _descend_build($_, $info) }
225 186         1156 @{$proto->{operands}}
226             ];
227 186         330 $tree->[ATTR] = 0;
228 186 100 66     702 $tree->[ATTR] |= ATTR_COMMUTATIVE
229             if $info->{commutation} and $Math::Symbolic::Operator::Op_Types[$tree->[VAL]]{commutative};
230             # todo: ATTR_CONSTANT?
231             }
232             else { # variable
233 310         820 my $name = $proto->name();
234              
235 310         2098 $tree->[TYPE] = PATTERN;
236 310 100       1726 if ($name eq 'TREE') {
    100          
    100          
    100          
    100          
    100          
237 38         85 $tree->[VAL] = ANY_TREE;
238             }
239             elsif ($name eq 'CONST') {
240 20         43 $tree->[VAL] = ANY_CONST;
241             }
242             elsif ($name eq 'VAR') {
243 18         32 $tree->[VAL] = ANY_VAR;
244             }
245             elsif ($name =~ /^TREE_(\w+)$/) {
246 70         118 $tree->[VAL] = NAMED_TREE;
247 70         261 my @names = split /_/, $1;
248 70         139 $tree->[OPS] = \@names;
249 70         349 $info->{trees}{$_}++ for @names;
250             }
251             elsif ($name =~ /^CONST_(\w+)$/) {
252 10         15 $tree->[VAL] = NAMED_CONST;
253 10         38 my @names = split /_/, $1;
254 10         16 $tree->[OPS] = \@names;
255 10         48 $info->{constants}{$_}++ for @names;
256             }
257             elsif ($name =~ /^VAR_(\w+)$/) {
258 66         103 $tree->[VAL] = NAMED_VAR;
259 66         298 my @names = split /_/, $1;
260 66         116 $tree->[OPS] = \@names;
261 66         359 $info->{vars}{$_}++ for @names;
262             }
263             else {
264 88         149 $tree->[TYPE] = T_VARIABLE;
265 88         194 $tree->[VAL] = $name;
266             }
267             }
268              
269 496         1381 return $tree;
270             }
271              
272              
273             =item match
274              
275             This method takes a Math::Symbolic tree as first argument. It throws a
276             fatal error if this is not the case.
277              
278             It returns a true value if the pattern matches the tree and a false value
279             if the pattern does not match. Please have a look at the L
280             to find out what I means in this context.
281              
282             As a matter of fact, if you need to know what subtrees were matched by the
283             various C, C, and C identifiers, you can find
284             out by inspecting the return value of a successful match. It will be a
285             reference to a hash containing three key/value pairs with the keys
286             C, C, and C. Each of these will again point to a hash.
287             These hashes contain the names of the matched subtrees. For example, if your
288             pattern is C and it matches C, then
289             the return value will be:
290              
291             {
292             constants => {},
293             trees => {},
294             vars => {
295             'x' => 'foo*bar',
296             }
297             }
298              
299             Except that C will actually be the corresponding Math::Symbolic tree
300             and not a string. Please note that the subtrees are real subtrees. Modifying
301             them will result in a modified original tree as well.
302              
303             =cut
304              
305              
306             sub match {
307 132     132 1 29725 my $self = shift;
308              
309 132         201 my $tree = shift;
310 132 50       589 confess(
311             __PACKAGE__."match() requires a Math::Symbolic tree as first "
312             ."argument."
313             ) if not ref($tree) =~ /^Math::Symbolic/;
314              
315 132         249 my $info = $self->{info};
316 10         32 my $info_copy = {
317 132         532 constants => { map {($_,undef)} keys %{$info->{constants}} },
  60         159  
318 132         362 vars => { map {($_,undef)} keys %{$info->{vars}} },
  54         208  
319 132         212 trees => { map {($_,undef)} keys %{$info->{trees}} },
  132         527  
320             };
321            
322 132         442 my $okay = _descend_match($self->{pattern}, $tree, $info_copy);
323 132 100       776 return $info_copy if $okay;
324 52         218 return undef;
325             }
326              
327             sub _descend_match {
328 438     438   854 my ($pat, $tree, $info) = @_;
329            
330 438         594 my $ptype = $pat->[TYPE];
331 438         1233 my $ttype = $tree->term_type();
332              
333 438 50       2224 if ($ptype == T_CONSTANT) {
    100          
    100          
    50          
334 0 0       0 return undef if $ttype != T_CONSTANT;
335 0 0       0 return 1 if abs($tree->value()-$pat->[VAL]) < EPSILON;
336 0         0 return undef;
337             }
338             elsif ($ptype == T_VARIABLE) {
339 74 100       213 return undef if $ttype != T_VARIABLE;
340 60 100       165 return 1 if $tree->name() eq $pat->[VAL];
341 12         123 return undef;
342             }
343             elsif ($ptype == T_OPERATOR) {
344 170 100       433 return undef if $ttype != T_OPERATOR;
345 168         549 my $optype = $tree->type();
346 168 100       1739 return undef if $optype != $pat->[VAL];
347            
348 164         214 my @operands = @{$pat->[OPS]};
  164         504  
349 164         213 my @tree_ops = @{$tree->{operands}};
  164         459  
350              
351 164 50       393 return undef if @operands != @tree_ops;
352            
353 164 100 66     612 if (($pat->[ATTR] & ATTR_COMMUTATIVE) && @operands > 1) {
354 20 50       43 if (@operands == 2) { # use hard coded permutation
355 20   66     62 my $ok = _descend_match($operands[0], $tree_ops[0], $info) && _descend_match($operands[1], $tree_ops[1], $info);
356 20 100       109 if (!$ok) {
357 12   66     32 $ok = _descend_match($operands[0], $tree_ops[1], $info) && _descend_match($operands[1], $tree_ops[0], $info);
358             }
359 20 100       115 return undef unless $ok;
360             }
361             else {
362 0         0 _permute {
363 0         0 my $ok;
364 0         0 for (@_) {
365 0         0 $ok = _descend_match($_->[0], $_->[1], $info);
366 0 0       0 last if not $ok;
367             }
368             $ok
369 0         0 } map {[$operands[$_], $tree_ops[$_]]} 0..$#operands;
  0         0  
370             }
371             }
372             else { # no commutation
373 144         367 foreach my $oper_no (0..$#operands) {
374 256         646 my $ok = _descend_match($operands[$oper_no], $tree_ops[$oper_no], $info);
375 256 100       920 return undef unless $ok;
376             }
377             }
378 128         370 return 1;
379             }
380             elsif ($ptype == PATTERN) {
381 194         273 my $match = $pat->[VAL];
382 194 100       718 if ($match == ANY_TREE) {
    100          
    100          
    100          
    100          
    50          
383 34         85 return 1;
384             }
385             elsif ($match == ANY_CONST) {
386 16         59 my $ttype = $tree->term_type();
387 16 100       79 return $ttype == T_CONSTANT ? 1 : undef;
388             }
389             elsif ($match == ANY_VAR) {
390 10         28 my $ttype = $tree->term_type();
391 10 100       51 return $ttype == T_VARIABLE ? 1 : undef;
392             }
393             elsif ($match == NAMED_TREE) {
394 66         88 my @names = @{$pat->[OPS]};
  66         182  
395 66         119 my $itrees = $info->{trees};
396 66         108 foreach my $name (@names) {
397 78 50       198 die "tree name '$name' should exist, but does not. "
398             ."Internal error."
399             if not exists $itrees->{$name};
400            
401 78         136 my $itree = $itrees->{$name};
402 78 100       133 if (defined $itree) {
403 34         236 my $ok = $itree->is_identical($tree);
404 34 100       4637 return 1 if $ok;
405             }
406             else {
407 44         71 $itrees->{$name} = $tree;
408 44         133 return 1;
409             }
410             }
411 6         19 return undef;
412             }
413             elsif ($match == NAMED_CONST) {
414 10 100       24 return undef unless $ttype == T_CONSTANT;
415            
416 6         8 my @names = @{$pat->[OPS]};
  6         15  
417 6         13 my $iconsts = $info->{constants};
418 6         10 foreach my $name (@names) {
419 6 50       16 die "constant name '$name' should exist, but does not. "
420             ."Internal error."
421             if not exists $iconsts->{$name};
422            
423 6         10 my $iconst = $iconsts->{$name};
424 6 50       11 if (defined $iconst) {
425 0         0 my $ok = $iconst == $tree->value();
426 0 0       0 return 1 if $ok;
427             }
428             else {
429 6         20 $iconsts->{$name} = $tree->value();
430 6         46 return 1;
431             }
432             }
433 0         0 return undef;
434             }
435             elsif ($match == NAMED_VAR) {
436 58 100       120 return undef unless $ttype == T_VARIABLE;
437            
438 52         52 my @names = @{$pat->[OPS]};
  52         138  
439 52         80 my $ivars = $info->{vars};
440 52         77 foreach my $name (@names) {
441 54 50       126 die "variable name '$name' should exist, but does not. "
442             ."Internal error."
443             if not exists $ivars->{$name};
444            
445 54         78 my $ivar = $ivars->{$name};
446 54 100       92 if (defined $ivar) {
447 16         46 my $ok = $ivar eq $tree->name();
448 16 100       137 return 1 if $ok;
449             }
450             else {
451 38         109 $ivars->{$name} = $tree->name();
452 38         274 return 1;
453             }
454             }
455 4         13 return undef;
456             }
457             else {
458 0           die "Internal error: Invalid pattern type '$match'";
459             }
460            
461             }
462             else {
463 0           die "Invalid pattern type with number $ptype.";
464             }
465             }
466              
467             # Fischer-Krause ordered permutation generator adapted from perlfaq4
468             sub _permute (&@) {
469 0     0     my $code = shift;
470 0           my @idx = 0..$#_;
471 0           while ( not $code->(@_[@idx]) ) {
472 0           my $p = $#idx;
473 0           --$p while $idx[$p-1] > $idx[$p];
474 0 0         my $q = $p or return;
475 0           push @idx, reverse splice @idx, $p;
476 0           ++$q while $idx[$p-1] > $idx[$q];
477 0           @idx[$p-1, $q] = @idx[$q, $p-1];
478             }
479             }
480              
481             =begin comment
482              
483             If completed, this could remove all placeholders that exist only once
484             and replace them with the more general match.
485             But I'll skip this since we might be able to combine patterns later.
486              
487             sub _descend_generalize {
488             my ($pattern, $info) = @_;
489            
490             my $type = $pattern->[TYPE];
491             return if $type != PATTERN;
492              
493             my $ptype = $pattern->[VAL];
494              
495             if ($ptype == NAMED_TREE) {
496             my @names = $pattern->[OPS];
497             my $no_one = grep { $info->{trees}{$_} == 1 } @names;
498             if ($no_one == @names) {
499             # all of them exist only once
500            
501             }
502            
503             }
504             elsif ($ptype == NAMED_CONST) {
505             }
506            
507            
508             }
509              
510             =end comment
511              
512             =cut
513              
514             =item to_string
515              
516             Returns a string representation of the pattern.
517              
518             =cut
519              
520             sub to_string {
521 0     0 1   my $self = shift;
522 0           return $self->{string};
523             }
524              
525             1;
526             __END__