|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Regexp::Assemple.pm  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Copyright (c) 2004-2011 David Landgren  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # All rights reserved  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Regexp::Assemble;  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
338167
 | 
 use strict;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
359
 | 
    | 
| 
9
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
50
 | 
 use warnings;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
386
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
48
 | 
 use constant DEBUG_ADD  => 1;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
905
 | 
    | 
| 
12
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
53
 | 
 use constant DEBUG_TAIL => 2;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
580
 | 
    | 
| 
13
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
54
 | 
 use constant DEBUG_LEX  => 4;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
600
 | 
    | 
| 
14
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
62
 | 
 use constant DEBUG_TIME => 8;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
663
 | 
    | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
57
 | 
 use vars qw/$have_Storable $Current_Lexer $Default_Lexer $Single_Char $Always_Fail/;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52151
 | 
    | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The following patterns were generated with examples/naive.  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Default_Lexer = qr/(?![[(\\]).(?:[*+?]\??|\{\d+(?:,\d*)?\}\??)?|\\(?:[bABCEGLQUXZ]|[lu].|(?:[^\w]|[aefnrtdDwWsS]|c.|0\d{2}|x(?:[\da-fA-F]{2}|{[\da-fA-F]{4}})|N\{\w+\}|[Pp](?:\{\w+\}|.))(?:[*+?]\??|\{\d+(?:,\d*)?\}\??)?)|\[.*?(?
 | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Single_Char   = qr/^(?:\\(?:[aefnrtdDwWsS]|c.|[^\w\/{|}-]|0\d{2}|x(?:[\da-fA-F]{2}|{[\da-fA-F]{4}}))|[^\$^])$/;  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The pattern to return when nothing has been added (and thus not match anything)  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $Always_Fail = "^\\b\0";  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '0.36';  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ------------------------------------------------  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
33
 | 
2197
 | 
 
 | 
 
 | 
  
2197
  
 | 
  
1
  
 | 
1878970
 | 
     my $class = shift;  | 
| 
34
 | 
2197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5102
 | 
     my %args  = @_;  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
36
 | 
2197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2196
 | 
     my $anc;  | 
| 
37
 | 
2197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3799
 | 
     for $anc (qw(word line string)) {  | 
| 
38
 | 
6591
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15952
 | 
         if (exists $args{"anchor_$anc"}) {  | 
| 
39
 | 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
267
 | 
             my $val = delete $args{"anchor_$anc"};  | 
| 
40
 | 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
397
 | 
             for my $anchor ("anchor_${anc}_begin", "anchor_${anc}_end") {  | 
| 
41
 | 
270
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
928
 | 
                 $args{$anchor} = $val unless exists $args{$anchor};  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # anchor_string_absolute sets anchor_string_begin and anchor_string_end_absolute  | 
| 
47
 | 
2197
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5147
 | 
     if (exists $args{anchor_string_absolute}) {  | 
| 
48
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         my $val = delete $args{anchor_string_absolute};  | 
| 
49
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         for my $anchor (qw(anchor_string_begin anchor_string_end_absolute)) {  | 
| 
50
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
             $args{$anchor} = $val unless exists $args{$anchor};  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
54
 | 
2197
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
49118
 | 
     exists $args{$_} or $args{$_} = 0 for qw(  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         anchor_word_begin  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         anchor_word_end  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         anchor_line_begin  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         anchor_line_end  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         anchor_string_begin  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         anchor_string_end  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         anchor_string_end_absolute  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         debug  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         dup_warn  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         indent  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         lookahead  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         mutable  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         track  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         unroll_plus  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
71
 | 
2197
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
14315
 | 
     exists $args{$_} or $args{$_} = 1 for qw(  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         fold_meta_pairs  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         reduce  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         chomp  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
77
 | 
2197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6180
 | 
     @args{qw(re str path)} = (undef, undef, []);  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
79
 | 
2197
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
13933
 | 
     $args{flags} ||= delete $args{modifiers} || '';  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
80
 | 
2197
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4428
 | 
     $args{lex}     = $Current_Lexer if defined $Current_Lexer;  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
82
 | 
2197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3852
 | 
     my $self = bless \%args, $class;  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
84
 | 
2197
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5412
 | 
     if ($self->_debug(DEBUG_TIME)) {  | 
| 
85
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         $self->_init_time_func();  | 
| 
86
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         $self->{_begin_time} = $self->{_time_func}->();  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{input_record_separator} = delete $self->{rs}  | 
| 
89
 | 
2197
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4659
 | 
         if exists $self->{rs};  | 
| 
90
 | 
2197
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3947
 | 
     exists $self->{file} and $self->add_file($self->{file});  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
2196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5466
 | 
     return $self;  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _init_time_func {  | 
| 
96
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
20
 | 
     my $self = shift;  | 
| 
97
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     return if exists $self->{_time_func};  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # attempt to improve accuracy  | 
| 
100
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     if (!defined($self->{_use_time_hires})) {  | 
| 
101
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
         eval {require Time::HiRes};  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1223
 | 
    | 
| 
102
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2751
 | 
         $self->{_use_time_hires} = $@;  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $self->{_time_func} = length($self->{_use_time_hires}) > 0  | 
| 
105
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
 
 | 
10
 | 
         ? sub { time }  | 
| 
106
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
41
 | 
         : \&Time::HiRes::time  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub clone {  | 
| 
111
 | 
55
 | 
 
 | 
 
 | 
  
55
  
 | 
  
1
  
 | 
518
 | 
     my $self = shift;  | 
| 
112
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
     my $clone;  | 
| 
113
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
354
 | 
     my @attr = grep {$_ ne 'path'} keys %$self;  | 
| 
 
 | 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1927
 | 
    | 
| 
114
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
145
 | 
     @{$clone}{@attr} = @{$self}{@attr};  | 
| 
 
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
640
 | 
    | 
| 
 
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
159
 | 
    | 
| 
115
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
177
 | 
     $clone->{path}   = _path_clone($self->_path);  | 
| 
116
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
600
 | 
     bless $clone, ref($self);  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _fastlex {  | 
| 
120
 | 
884
 | 
 
 | 
 
 | 
  
884
  
 | 
 
 | 
821
 | 
     my $self   = shift;  | 
| 
121
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
776
 | 
     my $record = shift;  | 
| 
122
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
700
 | 
     my $len    = 0;  | 
| 
123
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1022
 | 
     my @path   = ();  | 
| 
124
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
806
 | 
     my $case   = '';  | 
| 
125
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
671
 | 
     my $qm     = '';  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
941
 | 
     my $debug       = $self->{debug} & DEBUG_LEX;  | 
| 
128
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
788
 | 
     my $unroll_plus = $self->{unroll_plus};  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
737
 | 
     my $token;  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $qualifier;  | 
| 
132
 | 
884
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3118
 | 
     $debug and print "# _lex <$record>\n";  | 
| 
133
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
880
 | 
     my $modifier        = q{(?:[*+?]\\??|\\{(?:\\d+(?:,\d*)?|,\d+)\\}\\??)?};  | 
| 
134
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2410
 | 
     my $class_matcher   = qr/\[(?:\[:[a-z]+:\]|\\?.)*?\]/;  | 
| 
135
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3362
 | 
     my $paren_matcher   = qr/\(.*?(?
 | 
| 
136
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2553
 | 
     my $misc_matcher    = qr/(?:(c)(.)|(0)(\d{2}))($modifier)/;  | 
| 
137
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2373
 | 
     my $regular_matcher = qr/([^\\[(])($modifier)/;  | 
| 
138
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1432
 | 
     my $qm_matcher      = qr/(\\?.)/;  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
140
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
805
 | 
     my $matcher = $regular_matcher;  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
142
 | 
884
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
675
 | 
         if ($record =~ /\G$matcher/gc) {  | 
| 
 
 | 
5960
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24296
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # neither a \\ nor [ nor ( followed by a modifer  | 
| 
144
 | 
3344
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
9624
 | 
             if ($1 eq '\\E') {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
118
 | 
                 $debug and print "#   E\n";  | 
| 
146
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
                 $case = $qm = '';  | 
| 
147
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 $matcher = $regular_matcher;  | 
| 
148
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                 redo;  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($qm and ($1 eq '\\L' or $1 eq '\\U')) {  | 
| 
151
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 $debug and print "#  ignore \\L, \\U\n";  | 
| 
152
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                 redo;  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
154
 | 
3327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3049
 | 
             $token = $1;  | 
| 
155
 | 
3327
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4401
 | 
             $qualifier = defined $2 ? $2 : '';  | 
| 
156
 | 
3327
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7522
 | 
             $debug and print "#  token <$token> <$qualifier>\n";  | 
| 
157
 | 
3327
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3555
 | 
             if ($qm) {  | 
| 
158
 | 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
95
 | 
                 $token = quotemeta($token);  | 
| 
159
 | 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
134
 | 
                 $token =~ s/^\\([^\w$()*+.?@\[\\\]^|{}\/])$/$1/;  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
162
 | 
3237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3770
 | 
                 $token =~ s{\A([][{}*+?@\\/])\Z}{\\$1};  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
164
 | 
3327
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
5559
 | 
             if ($unroll_plus and $qualifier =~ s/\A\+(\?)?\Z/*/) {  | 
| 
165
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
56
 | 
                 $1 and $qualifier .= $1;  | 
| 
166
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
885
 | 
                 $debug and print " unroll <$token><$token><$qualifier>\n";  | 
| 
167
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
53
 | 
                 $case and $token = $case eq 'L' ? lc($token) : uc($token);  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
86
 | 
                 push @path, $token, "$token$qualifier";  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
171
 | 
3305
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5888
 | 
                 $debug and print " clean <$token>\n";  | 
| 
172
 | 
3305
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6703
 | 
                 push @path,  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       $case eq 'L' ? lc($token).$qualifier  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     : $case eq 'U' ? uc($token).$qualifier  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     :                   $token.$qualifier  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ;  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
178
 | 
3327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2892
 | 
             redo;  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($record =~ /\G\\/gc) {  | 
| 
182
 | 
1680
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3447
 | 
             $debug and print "#  backslash\n";  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # backslash  | 
| 
184
 | 
1680
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13206
 | 
             if ($record =~ /\G([sdwSDW])($modifier)/gc) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
185
 | 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
616
 | 
                 ($token, $qualifier) = ($1, $2);  | 
| 
186
 | 
443
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
871
 | 
                 $debug and print "#   meta <$token> <$qualifier>\n";  | 
| 
187
 | 
443
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1094
 | 
                 push @path, ($unroll_plus and $qualifier =~ s/\A\+(\?)?\Z/*/)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ? ("\\$token", "\\$token$qualifier" . (defined $1 ? $1 : ''))  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     : "\\$token$qualifier";  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($record =~ /\Gx([\da-fA-F]{2})($modifier)/gc) {  | 
| 
192
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
294
 | 
                 $debug and print "#   x $1\n";  | 
| 
193
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
                 $token = quotemeta(chr(hex($1)));  | 
| 
194
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 $qualifier = $2;  | 
| 
195
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
284
 | 
                 $debug and print "#  cooked <$token>\n";  | 
| 
196
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
                 $token =~ s/^\\([^\w$()*+.?\[\\\]^|{\/])$/$1/; # } balance  | 
| 
197
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
307
 | 
                 $debug and print "#   giving <$token>\n";  | 
| 
198
 | 
9
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
67
 | 
                 push @path, ($unroll_plus and $qualifier =~ s/\A\+(\?)?\Z/*/)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ? ($token, "$token$qualifier" . (defined $1 ? $1 : ''))  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     : "$token$qualifier";  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($record =~ /\GQ/gc) {  | 
| 
203
 | 
26
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
232
 | 
                 $debug and print "#   Q\n";  | 
| 
204
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
                 $qm = 1;  | 
| 
205
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
                 $matcher = $qm_matcher;  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($record =~ /\G([LU])/gc) {  | 
| 
208
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
413
 | 
                 $debug and print "#   case $1\n";  | 
| 
209
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
                 $case = $1;  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($record =~ /\GE/gc) {  | 
| 
212
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
17
 | 
                 $debug and print "#   E\n";  | 
| 
213
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                 $case = $qm = '';  | 
| 
214
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                 $matcher = $regular_matcher;  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($record =~ /\G([lu])(.)/gc) {  | 
| 
217
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
                 $debug and print "#   case $1 to <$2>\n";  | 
| 
218
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
35
 | 
                 push @path, $1 eq 'l' ? lc($2) : uc($2);  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
220
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
             elsif (my @arg = grep {defined} $record =~ /\G$misc_matcher/gc) {  | 
| 
221
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
                 if ($] < 5.007) {  | 
| 
222
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     my $len = 0;  | 
| 
223
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $len += length($_) for @arg;  | 
| 
224
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $debug and print "#  pos ", pos($record), " fixup add $len\n";  | 
| 
225
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     pos($record) = pos($record) + $len;  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
227
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                 my $directive = shift @arg;  | 
| 
228
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
                 if ($directive eq 'c') {  | 
| 
229
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                     $debug and print "#  ctrl <@arg>\n";  | 
| 
230
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                     push @path, "\\c" . uc(shift @arg);  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else { # elsif ($directive eq '0') {  | 
| 
233
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                     $debug and print "#  octal <@arg>\n";  | 
| 
234
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                     my $ascii = oct(shift @arg);  | 
| 
235
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
                     push @path, ($ascii < 32)  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ? "\\c" . chr($ascii+64)  | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         : chr($ascii)  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ;  | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
240
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                 $path[-1] .= join( '', @arg ); # if @arg;  | 
| 
241
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                 redo;  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($record =~ /\G(.)/gc) {  | 
| 
244
 | 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1712
 | 
                 $token = $1;  | 
| 
245
 | 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2273
 | 
                 $token =~ s{[AZabefnrtz\[\]{}()\\\$*+.?@|/^]}{\\$token};  | 
| 
246
 | 
1165
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1698
 | 
                 $debug and print "#   meta <$token>\n";  | 
| 
247
 | 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1741
 | 
                 push @path, $token;  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
250
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
                 $debug and print "#   ignore char at ", pos($record), " of <$record>\n";  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
252
 | 
1674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2203
 | 
             redo;  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($record =~ /\G($class_matcher)($modifier)/gc) {  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # [class] followed by a modifer  | 
| 
257
 | 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
             my $class     = $1;  | 
| 
258
 | 
39
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
82
 | 
             my $qualifier = defined $2 ? $2 : '';  | 
| 
259
 | 
39
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
375
 | 
             $debug and print "#  class begin <$class> <$qualifier>\n";  | 
| 
260
 | 
39
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
115
 | 
             if ($class =~ /\A\[\\?(.)]\Z/) {  | 
| 
261
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
                 $class = quotemeta $1;  | 
| 
262
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
                 $class =~ s{\A\\([!@%])\Z}{$1};  | 
| 
263
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
143
 | 
                 $debug and print "#  class unwrap $class\n";  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
265
 | 
39
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
353
 | 
             $debug and print "#  class end <$class> <$qualifier>\n";  | 
| 
266
 | 
39
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
178
 | 
             push @path, ($unroll_plus and $qualifier =~ s/\A\+(\?)?\Z/*/)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ? ($class, "$class$qualifier" . (defined $1 ? $1 : ''))  | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : "$class$qualifier";  | 
| 
269
 | 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
             redo;  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($record =~ /\G($paren_matcher)/gc) {  | 
| 
273
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
35
 | 
             $debug and print "#  paren <$1>\n";  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (paren) followed by a modifer  | 
| 
275
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
             push @path, $1;  | 
| 
276
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
             redo;  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
280
 | 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3622
 | 
     return \@path;  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _lex {  | 
| 
284
 | 
211
 | 
 
 | 
 
 | 
  
211
  
 | 
 
 | 
359
 | 
     my $self   = shift;  | 
| 
285
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
220
 | 
     my $record = shift;  | 
| 
286
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
205
 | 
     my $len    = 0;  | 
| 
287
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
282
 | 
     my @path   = ();  | 
| 
288
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
214
 | 
     my $case   = '';  | 
| 
289
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
201
 | 
     my $qm     = '';  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $re     = defined $self->{lex} ? $self->{lex}  | 
| 
291
 | 
211
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
556
 | 
         : defined $Current_Lexer ? $Current_Lexer  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         : $Default_Lexer;  | 
| 
293
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
258
 | 
     my $debug  = $self->{debug} & DEBUG_LEX;  | 
| 
294
 | 
211
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2018
 | 
     $debug and print "# _lex <$record>\n";  | 
| 
295
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
196
 | 
     my ($token, $next_token, $diff, $token_len);  | 
| 
296
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3693
 | 
     while( $record =~ /($re)/g ) {  | 
| 
297
 | 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
939
 | 
         $token = $1;  | 
| 
298
 | 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
646
 | 
         $token_len = length($token);  | 
| 
299
 | 
480
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8723
 | 
         $debug and print "# lexed <$token> len=$token_len\n";  | 
| 
300
 | 
480
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1066
 | 
         if( pos($record) - $len > $token_len ) {  | 
| 
301
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
             $next_token = $token;  | 
| 
302
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
             $token = substr( $record, $len, $diff = pos($record) - $len - $token_len );  | 
| 
303
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
814
 | 
             $debug and print "#  recover <", substr( $record, $len, $diff ), "> as <$token>, save <$next_token>\n";  | 
| 
304
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
             $len += $diff;  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
306
 | 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
450
 | 
         $len += $token_len;  | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         TOKEN: {  | 
| 
308
 | 
480
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
406
 | 
             if( substr( $token, 0, 1 ) eq '\\' ) {  | 
| 
 
 | 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
980
 | 
    | 
| 
309
 | 
226
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1132
 | 
                 if( $token =~ /^\\([ELQU])$/ ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
310
 | 
51
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
195
 | 
                     if( $1 eq 'E' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         $qm and $re = defined $self->{lex} ? $self->{lex}  | 
| 
312
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
47
 | 
                             : defined $Current_Lexer ? $Current_Lexer  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             : $Default_Lexer;  | 
| 
314
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
                         $case = $qm = '';  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     elsif( $1 eq 'Q' ) {  | 
| 
317
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
                         $qm = $1;  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         # switch to a more precise lexer to quotemeta individual characters  | 
| 
319
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
                         $re = qr/\\?./;  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else {  | 
| 
322
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
101
 | 
                         $case = $1;  | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
324
 | 
51
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1687
 | 
                     $debug and print "#  state change qm=<$qm> case=<$case>\n";  | 
| 
325
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
716
 | 
                     goto NEXT_TOKEN;  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif( $token =~ /^\\([lu])(.)$/ ) {  | 
| 
328
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
100
 | 
                     $debug and print "#  apply case=<$1> to <$2>\n";  | 
| 
329
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
                     push @path, $1 eq 'l' ? lc($2) : uc($2);  | 
| 
330
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
                     goto NEXT_TOKEN;  | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif( $token =~ /^\\x([\da-fA-F]{2})$/ ) {  | 
| 
333
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
113
 | 
                     $token = quotemeta(chr(hex($1)));  | 
| 
334
 | 
41
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
229
 | 
                     $debug and print "#  cooked <$token>\n";  | 
| 
335
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
84
 | 
                     $token =~ s/^\\([^\w$()*+.?@\[\\\]^|{\/])$/$1/; # } balance  | 
| 
336
 | 
41
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
165
 | 
                     $debug and print "#   giving <$token>\n";  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
339
 | 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
229
 | 
                     $token =~ s/^\\([^\w$()*+.?@\[\\\]^|{\/])$/$1/; # } balance  | 
| 
340
 | 
131
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
662
 | 
                     $debug and print "#  backslashed <$token>\n";  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
344
 | 
269
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
480
 | 
                 $case and $token = $case eq 'U' ? uc($token) : lc($token);  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
269
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
434
 | 
                 $qm   and $token = quotemeta($token);  | 
| 
346
 | 
269
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
466
 | 
                 $token = '\\/' if $token eq '/';  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # undo quotemeta's brute-force escapades  | 
| 
349
 | 
441
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
736
 | 
             $qm and $token =~ s/^\\([^\w$()*+.?@\[\\\]^|{}\/])$/$1/;  | 
| 
350
 | 
441
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7576
 | 
             $debug and print "#   <$token> case=<$case> qm=<$qm>\n";  | 
| 
351
 | 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
801
 | 
             push @path, $token;  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             NEXT_TOKEN:  | 
| 
354
 | 
495
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4271
 | 
             if( defined $next_token ) {  | 
| 
355
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
937
 | 
                 $debug and print "#   redo <$next_token>\n";  | 
| 
356
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
                 $token = $next_token;  | 
| 
357
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
                 $next_token = undef;  | 
| 
358
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
                 redo TOKEN;  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
362
 | 
211
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
378
 | 
     if( $len < length($record) ) {  | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # NB: the remainder only arises in the case of degenerate lexer,  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # and if \Q is operative, the lexer will have been switched to  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # /\\?./, which means there can never be a remainder, so we  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # don't have to bother about quotemeta. In other words:  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # $qm will never be true in this block.  | 
| 
368
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
         my $remain = substr($record,$len);  | 
| 
369
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
         $case and $remain = $case eq 'U' ? uc($remain) : lc($remain);  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
630
 | 
         $debug and print "#   add remaining <$remain> case=<$case> qm=<$qm>\n";  | 
| 
371
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
         push @path, $remain;  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
373
 | 
211
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2056
 | 
     $debug and print "# _lex out <@path>\n";  | 
| 
374
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1540
 | 
     return \@path;  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub add {  | 
| 
378
 | 
1069
 | 
 
 | 
 
 | 
  
1069
  
 | 
  
1
  
 | 
13649
 | 
     my $self = shift;  | 
| 
379
 | 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
928
 | 
     my $record;  | 
| 
380
 | 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1391
 | 
     my $debug  = $self->{debug} & DEBUG_LEX;  | 
| 
381
 | 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2449
 | 
     while( defined( $record = shift @_ )) {  | 
| 
382
 | 
2279
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4411
 | 
         CORE::chomp($record) if $self->{chomp};  | 
| 
383
 | 
2279
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
4812
 | 
         next if $self->{pre_filter} and not $self->{pre_filter}->($record);  | 
| 
384
 | 
2278
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6925
 | 
         $debug and print "# add <$record>\n";  | 
| 
385
 | 
2278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2692
 | 
         $self->{stats_raw} += length $record;  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $list = $record =~ /[+*?(\\\[{]/ # }]) restore equilibrium  | 
| 
387
 | 
2278
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
14642
 | 
             ? $self->{lex} ? $self->_lex($record) : $self->_fastlex($record)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : [split //, $record]  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
390
 | 
2278
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
5794
 | 
         next if $self->{filter} and not $self->{filter}->(@$list);  | 
| 
391
 | 
2277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4091
 | 
         $self->_insertr( $list );  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
393
 | 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2305
 | 
     return $self;  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub add_file {  | 
| 
397
 | 
13
 | 
 
 | 
 
 | 
  
13
  
 | 
  
1
  
 | 
19
 | 
     my $self = shift;  | 
| 
398
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my $rs;  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @file;  | 
| 
400
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
37
 | 
     if (ref($_[0]) eq 'HASH') {  | 
| 
401
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
         my $arg = shift;  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $rs = $arg->{rs}  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             || $arg->{input_record_separator}  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             || $self->{input_record_separator}  | 
| 
405
 | 
6
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
40
 | 
             || $/;  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @file = ref($arg->{file}) eq 'ARRAY'  | 
| 
407
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
             ? @{$arg->{file}}  | 
| 
408
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
             : $arg->{file};  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
411
 | 
7
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
29
 | 
         $rs   = $self->{input_record_separator} || $/;  | 
| 
412
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
         @file = @_;  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
414
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     local $/ = $rs;  | 
| 
415
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     my $file;  | 
| 
416
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     for $file (@file) {  | 
| 
417
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1531
 | 
         open my $fh, '<', $file or do {  | 
| 
418
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
             require Carp;  | 
| 
419
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
247
 | 
             Carp::croak("cannot open $file for input: $!");  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         };  | 
| 
421
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23739
 | 
         while (defined (my $rec = <$fh>)) {  | 
| 
422
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
132
 | 
             $self->add($rec);  | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
424
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
171
 | 
         close $fh;  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
426
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
     return $self;  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub insert {  | 
| 
430
 | 
3398
 | 
 
 | 
 
 | 
  
3398
  
 | 
  
1
  
 | 
12413
 | 
     my $self = shift;  | 
| 
431
 | 
3398
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
7647
 | 
     return if $self->{filter} and not $self->{filter}->(@_);  | 
| 
432
 | 
3397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21858
 | 
     $self->_insertr( [@_] );  | 
| 
433
 | 
3397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7273
 | 
     return $self;  | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _insertr {  | 
| 
437
 | 
7084
 | 
 
 | 
 
 | 
  
7084
  
 | 
 
 | 
6984
 | 
     my $self   = shift;  | 
| 
438
 | 
7084
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
20848
 | 
     my $dup    = $self->{stats_dup} || 0;  | 
| 
439
 | 
7084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11252
 | 
     $self->{path} = $self->_insert_path( $self->_path, $self->_debug(DEBUG_ADD), $_[0] );  | 
| 
440
 | 
7084
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
18935
 | 
     if( not defined $self->{stats_dup} or $dup == $self->{stats_dup} ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
441
 | 
7073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8313
 | 
         ++$self->{stats_add};  | 
| 
442
 | 
7073
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5888
 | 
         $self->{stats_cooked} += defined($_) ? length($_) : 0 for @{$_[0]};  | 
| 
 
 | 
7073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37982
 | 
    | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif( $self->{dup_warn} ) {  | 
| 
445
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
10
 | 
         if( ref $self->{dup_warn} eq 'CODE' ) {  | 
| 
446
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             $self->{dup_warn}->($self, $_[0]);  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
449
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             my $pattern = join( '', @{$_[0]} );  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
450
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             require Carp;  | 
| 
451
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
             Carp::carp("duplicate pattern added: /$pattern/");  | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
454
 | 
7084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18152
 | 
     $self->{str} = $self->{re} = undef;  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub lexstr {  | 
| 
458
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
13
 | 
     return shift->_lex(shift);  | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub pre_filter {  | 
| 
462
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
623
 | 
     my $self   = shift;  | 
| 
463
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my $pre_filter = shift;  | 
| 
464
 | 
3
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
19
 | 
     if( defined $pre_filter and ref($pre_filter) ne 'CODE' ) {  | 
| 
465
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         require Carp;  | 
| 
466
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
69
 | 
         Carp::croak("pre_filter method not passed a coderef");  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
468
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     $self->{pre_filter} = $pre_filter;  | 
| 
469
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return $self;  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub filter {  | 
| 
474
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
307
 | 
     my $self   = shift;  | 
| 
475
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my $filter = shift;  | 
| 
476
 | 
4
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
22
 | 
     if( defined $filter and ref($filter) ne 'CODE' ) {  | 
| 
477
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
         require Carp;  | 
| 
478
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
166
 | 
         Carp::croak("filter method not passed a coderef");  | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
480
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     $self->{filter} = $filter;  | 
| 
481
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     return $self;  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub as_string {  | 
| 
485
 | 
800
 | 
 
 | 
 
 | 
  
800
  
 | 
  
1
  
 | 
1388
 | 
     my $self = shift;  | 
| 
486
 | 
800
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1513
 | 
     if( not defined $self->{str} ) {  | 
| 
487
 | 
798
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1356
 | 
         if( $self->{track} ) {  | 
| 
488
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
             $self->{m}      = undef;  | 
| 
489
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
             $self->{mcount} = 0;  | 
| 
490
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
             $self->{mlist}  = [];  | 
| 
491
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
             $self->{str}    = _re_path_track($self, $self->_path, '', '');  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
494
 | 
790
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
4327
 | 
             $self->_reduce unless ($self->{mutable} or not $self->{reduce});  | 
| 
495
 | 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1217
 | 
             my $arg  = {@_};  | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $arg->{indent} = $self->{indent}  | 
| 
497
 | 
790
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
3689
 | 
                 if not exists $arg->{indent} and $self->{indent} > 0;  | 
| 
498
 | 
790
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2595
 | 
             if( exists $arg->{indent} and $arg->{indent} > 0 ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
499
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
                 $arg->{depth} = 0;  | 
| 
500
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
101
 | 
                 $self->{str}  = _re_path_pretty($self, $self->_path, $arg);  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif( $self->{lookahead} ) {  | 
| 
503
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
90
 | 
                 $self->{str}  = _re_path_lookahead($self, $self->_path);  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
506
 | 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1356
 | 
                 $self->{str}  = _re_path($self, $self->_path);  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
509
 | 
798
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1835
 | 
         if (not length $self->{str}) {  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # explicitly fail to match anything if no pattern was generated  | 
| 
511
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
             $self->{str} = $Always_Fail;  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my $begin =  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   $self->{anchor_word_begin}   ? '\\b'  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : $self->{anchor_line_begin}   ? '^'  | 
| 
517
 | 
789
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2495
 | 
                 : $self->{anchor_string_begin} ? '\A'  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : ''  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my $end =  | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   $self->{anchor_word_end}            ? '\\b'  | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : $self->{anchor_line_end}            ? '$'  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : $self->{anchor_string_end}          ? '\Z'  | 
| 
524
 | 
789
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2439
 | 
                 : $self->{anchor_string_end_absolute} ? '\z'  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : ''  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
527
 | 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1520
 | 
             $self->{str} = "$begin$self->{str}$end";  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
529
 | 
798
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2024
 | 
         $self->{path} = [] unless $self->{mutable};  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
531
 | 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4493
 | 
     return $self->{str};  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub re {  | 
| 
535
 | 
122
 | 
 
 | 
 
 | 
  
122
  
 | 
  
1
  
 | 
1345
 | 
     my $self = shift;  | 
| 
536
 | 
122
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
486
 | 
     $self->_build_re($self->as_string(@_)) unless defined $self->{re};  | 
| 
537
 | 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
644
 | 
     return $self->{re};  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use overload '""' => sub {  | 
| 
541
 | 
2131
 | 
 
 | 
 
 | 
  
2131
  
 | 
 
 | 
692647
 | 
     my $self = shift;  | 
| 
542
 | 
2131
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
17310
 | 
     return $self->{re} if $self->{re};  | 
| 
543
 | 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
972
 | 
     $self->_build_re($self->as_string());  | 
| 
544
 | 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6245
 | 
     return $self->{re};  | 
| 
545
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
16230
 | 
 };  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12073
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
128
 | 
    | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _build_re {  | 
| 
548
 | 
618
 | 
 
 | 
 
 | 
  
618
  
 | 
 
 | 
658
 | 
     my $self  = shift;  | 
| 
549
 | 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
610
 | 
     my $str   = shift;  | 
| 
550
 | 
618
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1161
 | 
     if( $self->{track} ) {  | 
| 
551
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
1059
 | 
         use re 'eval';  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
128533
 | 
    | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{re} = length $self->{flags}  | 
| 
553
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1571
 | 
             ? qr/(?$self->{flags}:$str)/  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : qr/$str/  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # how could I not repeat myself?  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{re} = length $self->{flags}  | 
| 
560
 | 
610
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13474
 | 
             ? qr/(?$self->{flags}:$str)/  | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : qr/$str/  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub match {  | 
| 
567
 | 
29
 | 
 
 | 
 
 | 
  
29
  
 | 
  
1
  
 | 
6342
 | 
     my $self = shift;  | 
| 
568
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
     my $target = shift;  | 
| 
569
 | 
29
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
140
 | 
     $self->_build_re($self->as_string(@_)) unless defined $self->{re};  | 
| 
570
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
     $self->{m}    = undef;  | 
| 
571
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
     $self->{mvar} = [];  | 
| 
572
 | 
29
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1014
 | 
     if( not $target =~ /$self->{re}/ ) {  | 
| 
573
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
         $self->{mbegin} = [];  | 
| 
574
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
         $self->{mend}   = [];  | 
| 
575
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
         return undef;  | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
577
 | 
21
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
92
 | 
     $self->{m}      = $^R if $] >= 5.009005;  | 
| 
578
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
     $self->{mbegin} = _path_copy([@-]);  | 
| 
579
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
132
 | 
     $self->{mend}   = _path_copy([@+]);  | 
| 
580
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     my $n = 0;  | 
| 
581
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
     for( my $n = 0; $n < @-; ++$n ) {  | 
| 
582
 | 
43
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
257
 | 
         push @{$self->{mvar}}, substr($target, $-[$n], $+[$n] - $-[$n])  | 
| 
 
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
281
 | 
    | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             if defined $-[$n] and defined $+[$n];  | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
585
 | 
21
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
63
 | 
     if( $self->{track} ) {  | 
| 
586
 | 
20
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
190
 | 
         return defined $self->{m} ? $self->{mlist}[$self->{m}] : 1;  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
589
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
         return 1;  | 
| 
590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub source {  | 
| 
594
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
387
 | 
     my $self = shift;  | 
| 
595
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     return unless $self->{track};  | 
| 
596
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     defined($_[0]) and return $self->{mlist}[$_[0]];  | 
| 
597
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     return unless defined $self->{m};  | 
| 
598
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return $self->{mlist}[$self->{m}];  | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub mbegin {  | 
| 
602
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
12
 | 
     my $self = shift;  | 
| 
603
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     return exists $self->{mbegin} ? $self->{mbegin} : [];  | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub mend {  | 
| 
607
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
7
 | 
     my $self = shift;  | 
| 
608
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
58
 | 
     return exists $self->{mend} ? $self->{mend} : [];  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub mvar {  | 
| 
612
 | 
19
 | 
 
 | 
 
 | 
  
19
  
 | 
  
1
  
 | 
35
 | 
     my $self = shift;  | 
| 
613
 | 
19
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
68
 | 
     return undef unless exists $self->{mvar};  | 
| 
614
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
134
 | 
     return defined($_[0]) ? $self->{mvar}[$_[0]] : $self->{mvar};  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub capture {  | 
| 
618
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
20
 | 
     my $self = shift;  | 
| 
619
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     if( $self->{mvar} ) {  | 
| 
620
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         my @capture = @{$self->{mvar}};  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
621
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
         shift @capture;  | 
| 
622
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
         return @capture;  | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
624
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return ();  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub matched {  | 
| 
628
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
476
 | 
     my $self = shift;  | 
| 
629
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
82
 | 
     return defined $self->{m} ? $self->{mlist}[$self->{m}] : undef;  | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub stats_add {  | 
| 
633
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
6
 | 
     my $self = shift;  | 
| 
634
 | 
2
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
10
 | 
     return $self->{stats_add} || 0;  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub stats_dup {  | 
| 
638
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
3
 | 
     my $self = shift;  | 
| 
639
 | 
2
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
11
 | 
     return $self->{stats_dup} || 0;  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub stats_raw {  | 
| 
643
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
4
 | 
     my $self = shift;  | 
| 
644
 | 
2
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
12
 | 
     return $self->{stats_raw} || 0;  | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub stats_cooked {  | 
| 
648
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
5
 | 
     my $self = shift;  | 
| 
649
 | 
2
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
11
 | 
     return $self->{stats_cooked} || 0;  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub stats_length {  | 
| 
653
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
2618
 | 
     my $self = shift;  | 
| 
654
 | 
6
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
41
 | 
     return (defined $self->{str} and $self->{str} ne $Always_Fail) ? length $self->{str} : 0;  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub dup_warn {  | 
| 
658
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
1250
 | 
     my $self = shift;  | 
| 
659
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     $self->{dup_warn} = defined($_[0]) ? $_[0] : 1;  | 
| 
660
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     return $self;  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_word_begin {  | 
| 
664
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
6
 | 
     my $self = shift;  | 
| 
665
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     $self->{anchor_word_begin} = defined($_[0]) ? $_[0] : 1;  | 
| 
666
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     return $self;  | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_word_end {  | 
| 
670
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
3
 | 
     my $self = shift;  | 
| 
671
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $self->{anchor_word_end} = defined($_[0]) ? $_[0] : 1;  | 
| 
672
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     return $self;  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_word {  | 
| 
676
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
3
 | 
     my $self  = shift;  | 
| 
677
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $state = shift;  | 
| 
678
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $self->anchor_word_begin($state)->anchor_word_end($state);  | 
| 
679
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return $self;  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_line_begin {  | 
| 
683
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
4
 | 
     my $self = shift;  | 
| 
684
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     $self->{anchor_line_begin} = defined($_[0]) ? $_[0] : 1;  | 
| 
685
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     return $self;  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_line_end {  | 
| 
689
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
2
 | 
     my $self = shift;  | 
| 
690
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $self->{anchor_line_end} = defined($_[0]) ? $_[0] : 1;  | 
| 
691
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     return $self;  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_line {  | 
| 
695
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
3
 | 
     my $self  = shift;  | 
| 
696
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
     my $state = shift;  | 
| 
697
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     $self->anchor_line_begin($state)->anchor_line_end($state);  | 
| 
698
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
     return $self;  | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_string_begin {  | 
| 
702
 | 
277
 | 
 
 | 
 
 | 
  
277
  
 | 
  
1
  
 | 
199
 | 
     my $self = shift;  | 
| 
703
 | 
277
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
453
 | 
     $self->{anchor_string_begin} = defined($_[0]) ? $_[0] : 1;  | 
| 
704
 | 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
411
 | 
     return $self;  | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_string_end {  | 
| 
708
 | 
276
 | 
 
 | 
 
 | 
  
276
  
 | 
  
1
  
 | 
203
 | 
     my $self = shift;  | 
| 
709
 | 
276
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
421
 | 
     $self->{anchor_string_end} = defined($_[0]) ? $_[0] : 1;  | 
| 
710
 | 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
215
 | 
     return $self;  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_string_end_absolute {  | 
| 
714
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
  
1
  
 | 
4
 | 
     my $self = shift;  | 
| 
715
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     $self->{anchor_string_end_absolute} = defined($_[0]) ? $_[0] : 1;  | 
| 
716
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return $self;  | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_string {  | 
| 
720
 | 
274
 | 
 
 | 
 
 | 
  
274
  
 | 
  
1
  
 | 
248
 | 
     my $self  = shift;  | 
| 
721
 | 
274
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
406
 | 
     my $state = defined($_[0]) ? $_[0] : 1;  | 
| 
722
 | 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
387
 | 
     $self->anchor_string_begin($state)->anchor_string_end($state);  | 
| 
723
 | 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
685
 | 
     return $self;  | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub anchor_string_absolute {  | 
| 
727
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
3
 | 
     my $self  = shift;  | 
| 
728
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my $state = defined($_[0]) ? $_[0] : 1;  | 
| 
729
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     $self->anchor_string_begin($state)->anchor_string_end_absolute($state);  | 
| 
730
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return $self;  | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub debug {  | 
| 
734
 | 
605
 | 
 
 | 
 
 | 
  
605
  
 | 
  
1
  
 | 
2217
 | 
     my $self = shift;  | 
| 
735
 | 
605
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1241
 | 
     $self->{debug} = defined($_[0]) ? $_[0] : 0;  | 
| 
736
 | 
605
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
911
 | 
     if ($self->_debug(DEBUG_TIME)) {  | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # hmm, debugging time was switched on after instantiation  | 
| 
738
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
         $self->_init_time_func;  | 
| 
739
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
         $self->{_begin_time} = $self->{_time_func}->();  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
741
 | 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
774
 | 
     return $self;  | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub dump {  | 
| 
745
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
1429
 | 
     return _dump($_[0]->_path);  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub chomp {  | 
| 
749
 | 
22
 | 
 
 | 
 
 | 
  
22
  
 | 
  
1
  
 | 
584
 | 
     my $self = shift;  | 
| 
750
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
56
 | 
     $self->{chomp} = defined($_[0]) ? $_[0] : 1;  | 
| 
751
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
     return $self;  | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub fold_meta_pairs {  | 
| 
755
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
7
 | 
     my $self = shift;  | 
| 
756
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     $self->{fold_meta_pairs} = defined($_[0]) ? $_[0] : 1;  | 
| 
757
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     return $self;  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub indent {  | 
| 
761
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
593
 | 
     my $self = shift;  | 
| 
762
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
     $self->{indent} = defined($_[0]) ? $_[0] : 0;  | 
| 
763
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return $self;  | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub lookahead {  | 
| 
767
 | 
22
 | 
 
 | 
 
 | 
  
22
  
 | 
  
1
  
 | 
32
 | 
     my $self = shift;  | 
| 
768
 | 
22
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
82
 | 
     $self->{lookahead} = defined($_[0]) ? $_[0] : 1;  | 
| 
769
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
     return $self;  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub flags {  | 
| 
773
 | 
24
 | 
 
 | 
 
 | 
  
24
  
 | 
  
1
  
 | 
1190
 | 
     my $self = shift;  | 
| 
774
 | 
24
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
85
 | 
     $self->{flags} = defined($_[0]) ? $_[0] : '';  | 
| 
775
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
84
 | 
     return $self;  | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub modifiers {  | 
| 
779
 | 
4
 | 
 
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
1130
 | 
     my $self = shift;  | 
| 
780
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     return $self->flags(@_);  | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub track {  | 
| 
784
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
1181
 | 
     my $self = shift;  | 
| 
785
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     $self->{track} = defined($_[0]) ? $_[0] : 1;  | 
| 
786
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     return $self;  | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub unroll_plus {  | 
| 
790
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
840
 | 
     my $self = shift;  | 
| 
791
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     $self->{unroll_plus} = defined($_[0]) ? $_[0] : 1;  | 
| 
792
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     return $self;  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub lex {  | 
| 
796
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
3
 | 
     my $self = shift;  | 
| 
797
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
     $self->{lex} = qr($_[0]);  | 
| 
798
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
     return $self;  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub reduce {  | 
| 
802
 | 
19
 | 
 
 | 
 
 | 
  
19
  
 | 
  
1
  
 | 
975
 | 
     my $self = shift;  | 
| 
803
 | 
19
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
74
 | 
     $self->{reduce} = defined($_[0]) ? $_[0] : 1;  | 
| 
804
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
     return $self;  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub mutable {  | 
| 
808
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
905
 | 
     my $self = shift;  | 
| 
809
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     $self->{mutable} = defined($_[0]) ? $_[0] : 1;  | 
| 
810
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
     return $self;  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub reset {  | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # reinitialise the internal state of the object  | 
| 
815
 | 
19
 | 
 
 | 
 
 | 
  
19
  
 | 
  
1
  
 | 
2451
 | 
     my $self = shift;  | 
| 
816
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
     $self->{path} = [];  | 
| 
817
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
     $self->{re}   = undef;  | 
| 
818
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     $self->{str}  = undef;  | 
| 
819
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
82
 | 
     return $self;  | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub Default_Lexer {  | 
| 
823
 | 
4
 | 
  
100
  
 | 
 
 | 
  
4
  
 | 
  
1
  
 | 
3135
 | 
     if( $_[0] ) {  | 
| 
824
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
         if( my $refname = ref($_[0]) ) {  | 
| 
825
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             require Carp;  | 
| 
826
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
162
 | 
             Carp::croak("Cannot pass a $refname to Default_Lexer");  | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
828
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         $Current_Lexer = $_[0];  | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
830
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     return defined $Current_Lexer ? $Current_Lexer : $Default_Lexer;  | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # --- no user serviceable parts below ---  | 
| 
834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # -- debug helpers  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _debug {  | 
| 
838
 | 
12316
 | 
 
 | 
 
 | 
  
12316
  
 | 
 
 | 
10213
 | 
     my $self = shift;  | 
| 
839
 | 
12316
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
37393
 | 
     return $self->{debug} & shift() ? 1 : 0;  | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # -- helpers  | 
| 
843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _path {  | 
| 
845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # access the path  | 
| 
846
 | 
10027
 | 
 
 | 
 
 | 
  
10027
  
 | 
 
 | 
23461
 | 
     return $_[0]->{path};  | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # -- the heart of the matter  | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 $have_Storable = do {  | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     eval {  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         require Storable;  | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         import Storable 'dclone';  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $@ ? 0 : 1;  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _path_clone {  | 
| 
860
 | 
55
 | 
  
100
  
 | 
 
 | 
  
55
  
 | 
 
 | 
3533
 | 
     $have_Storable ? dclone($_[0]) : _path_copy($_[0]);  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _path_copy {  | 
| 
864
 | 
80
 | 
 
 | 
 
 | 
  
80
  
 | 
 
 | 
661
 | 
     my $path = shift;  | 
| 
865
 | 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
88
 | 
     my $new  = [];  | 
| 
866
 | 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
197
 | 
     for( my $p = 0; $p < @$path; ++$p ) {  | 
| 
867
 | 
201
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
400
 | 
         if( ref($path->[$p]) eq 'HASH' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
868
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
             push @$new, _node_copy($path->[$p]);  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( ref($path->[$p]) eq 'ARRAY' ) {  | 
| 
871
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             push @$new, _path_copy($path->[$p]);  | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
874
 | 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
438
 | 
             push @$new, $path->[$p];  | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
877
 | 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
189
 | 
     return $new;  | 
| 
878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _node_copy {  | 
| 
881
 | 
11
 | 
 
 | 
 
 | 
  
11
  
 | 
 
 | 
11
 | 
     my $node = shift;  | 
| 
882
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     my $new  = {};  | 
| 
883
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
     while( my( $k, $v ) = each %$node ) {  | 
| 
884
 | 
27
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
49
 | 
         $new->{$k} = defined($v)  | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ? _path_copy($v)  | 
| 
886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : undef  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
889
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     return $new;  | 
| 
890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _insert_path {  | 
| 
893
 | 
7158
 | 
 
 | 
 
 | 
  
7158
  
 | 
 
 | 
6708
 | 
     my $self  = shift;  | 
| 
894
 | 
7158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5836
 | 
     my $list  = shift;  | 
| 
895
 | 
7158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6522
 | 
     my $debug = shift;  | 
| 
896
 | 
7158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5917
 | 
     my @in    = @{shift()}; # create a new copy  | 
| 
 
 | 
7158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17541
 | 
    | 
| 
897
 | 
7158
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13612
 | 
     if( @$list == 0 ) { # special case the first time  | 
| 
898
 | 
2033
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
9831
 | 
         if( @in == 0 or (@in == 1 and (not defined $in[0] or $in[0] eq ''))) {  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
899
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
221
 | 
             return [{'' => undef}];  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
902
 | 
2000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4686
 | 
             return \@in;  | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
905
 | 
5125
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9268
 | 
     $debug and print "# _insert_path @{[_dump(\@in)]} into @{[_dump($list)]}\n";  | 
| 
 
 | 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
235
 | 
    | 
| 
 
 | 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
176
 | 
    | 
| 
906
 | 
5125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4905
 | 
     my $path   = $list;  | 
| 
907
 | 
5125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4172
 | 
     my $offset = 0;  | 
| 
908
 | 
5125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4019
 | 
     my $token;  | 
| 
909
 | 
5125
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8533
 | 
     if( not @in ) {  | 
| 
910
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         if( ref($list->[0]) ne 'HASH' ) {  | 
| 
911
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             return [ { '' => undef, $list->[0] => $list } ];  | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
914
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             $list->[0]{''} = undef;  | 
| 
915
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             return $list;  | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
918
 | 
5123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10827
 | 
     while( defined( $token = shift @in )) {  | 
| 
919
 | 
17747
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
34171
 | 
         if( ref($token) eq 'HASH' ) {  | 
| 
920
 | 
282
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
551
 | 
             $debug and print "#  p0=", _dump($path), "\n";  | 
| 
921
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
777
 | 
             $path = $self->_insert_node( $path, $offset, $token, $debug, @in );  | 
| 
922
 | 
282
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
540
 | 
             $debug and print "#  p1=", _dump($path), "\n";  | 
| 
923
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
354
 | 
             last;  | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
925
 | 
17465
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
27747
 | 
         if( ref($path->[$offset]) eq 'HASH' ) {  | 
| 
926
 | 
3714
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5657
 | 
             $debug and print "#   at (off=$offset len=@{[scalar @$path]}) ", _dump($path->[$offset]), "\n";  | 
| 
 
 | 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
220
 | 
    | 
| 
927
 | 
3714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3558
 | 
             my $node = $path->[$offset];  | 
| 
928
 | 
3714
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5625
 | 
             if( exists( $node->{$token} )) {  | 
| 
929
 | 
2632
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4425
 | 
                 if ($offset < $#$path) {  | 
| 
930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     my $new = {  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         $token => [$token, @in],  | 
| 
932
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                         _re_path($self, [$node]) => [@{$path}[$offset..$#$path]],  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     };  | 
| 
934
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                     splice @$path, $offset, @$path-$offset, $new;  | 
| 
935
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                     last;  | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
938
 | 
2631
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4083
 | 
                     $debug and print "#   descend key=$token @{[_dump($node->{$token})]}\n";  | 
| 
 
 | 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
    | 
| 
939
 | 
2631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2892
 | 
                     $path   = $node->{$token};  | 
| 
940
 | 
2631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2190
 | 
                     $offset = 0;  | 
| 
941
 | 
2631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2992
 | 
                     redo;  | 
| 
942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
945
 | 
1082
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1797
 | 
                 $debug and print "#   add path ($token:@{[_dump(\@in)]}) into @{[_dump($path)]} at off=$offset to end=@{[scalar $#$path]}\n";  | 
| 
 
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
    | 
| 
 
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
    | 
| 
 
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1101
 | 
    | 
| 
946
 | 
1082
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1793
 | 
                 if( $offset == $#$path ) {  | 
| 
947
 | 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3063
 | 
                     $node->{$token} = [ $token, @in ];  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     my $new = {  | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         _node_key($token) => [ $token, @in ],  | 
| 
952
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
                         _node_key($node)  => [@{$path}[$offset..$#{$path}]],  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
    | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     };  | 
| 
954
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
                     splice( @$path, $offset, @$path - $offset, $new );  | 
| 
955
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
                     $debug and print "#   fused node=@{[_dump($new)]} path=@{[_dump($path)]}\n";  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
957
 | 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1410
 | 
                 last;  | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
961
 | 
13751
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18804
 | 
         if( $debug ) {  | 
| 
962
 | 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
289
 | 
             my $msg = '';  | 
| 
963
 | 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
223
 | 
             my $n;  | 
| 
964
 | 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
533
 | 
             for( $n = 0; $n < @$path; ++$n ) {  | 
| 
965
 | 
1093
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1385
 | 
                 $msg .= ' ' if $n;  | 
| 
966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 my $atom = ref($path->[$n]) eq 'HASH'  | 
| 
967
 | 
1093
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1396
 | 
                     ? '{'.join( ' ', keys(%{$path->[$n]})).'}'  | 
| 
 
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
209
 | 
    | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     : $path->[$n]  | 
| 
969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
970
 | 
1093
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2432
 | 
                 $msg .= $n == $offset ? "<$atom>" : $atom;  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
972
 | 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12480
 | 
             print "# at path ($msg)\n";  | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
975
 | 
13751
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
33357
 | 
         if( $offset >= @$path ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
976
 | 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3047
 | 
             push @$path, { $token => [ $token, @in ], '' => undef };  | 
| 
977
 | 
732
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1680
 | 
             $debug and print "#   added remaining @{[_dump($path)]}\n";  | 
| 
 
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
    | 
| 
978
 | 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
953
 | 
             last;  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( $token ne $path->[$offset] ) {  | 
| 
981
 | 
2103
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6803
 | 
             $debug and print "#   token $token not present\n";  | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             splice @$path, $offset, @$path-$offset, {  | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 length $token  | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ? ( _node_key($token) => [$token, @in])  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     : ( '' => undef )  | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ,  | 
| 
987
 | 
2103
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5961
 | 
                 $path->[$offset] => [@{$path}[$offset..$#{$path}]],  | 
| 
 
 | 
2103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8690
 | 
    | 
| 
 
 | 
2103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3045
 | 
    | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             };  | 
| 
989
 | 
2103
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4418
 | 
             $debug and print "#   path=@{[_dump($path)]}\n";  | 
| 
 
 | 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
119
 | 
    | 
| 
990
 | 
2103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2404
 | 
             last;  | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( not @in ) {  | 
| 
993
 | 
923
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1952
 | 
             $debug and print "#   last token to add\n";  | 
| 
994
 | 
923
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2171
 | 
             if( defined( $path->[$offset+1] )) {  | 
| 
995
 | 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
971
 | 
                 ++$offset;  | 
| 
996
 | 
912
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1655
 | 
                 if( ref($path->[$offset]) eq 'HASH' ) {  | 
| 
997
 | 
118
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
322
 | 
                     $debug and print "#   add sentinel to node\n";  | 
| 
998
 | 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
216
 | 
                     $path->[$offset]{''} = undef;  | 
| 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
1001
 | 
794
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1397
 | 
                     $debug and print "#   convert <$path->[$offset]> to node for sentinel\n";  | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     splice @$path, $offset, @$path-$offset, {  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ''               => undef,  | 
| 
1004
 | 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2166
 | 
                         $path->[$offset] => [ @{$path}[$offset..$#{$path}] ],  | 
| 
 
 | 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3758
 | 
    | 
| 
 
 | 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1262
 | 
    | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     };  | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # already seen this pattern  | 
| 
1010
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
                 ++$self->{stats_dup};  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1012
 | 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1370
 | 
             last;  | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # if we get here then @_ still contains a token  | 
| 
1015
 | 
9993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16401
 | 
         ++$offset;  | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1017
 | 
5123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11451
 | 
     $list;  | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _insert_node {  | 
| 
1021
 | 
282
 | 
 
 | 
 
 | 
  
282
  
 | 
 
 | 
310
 | 
     my $self   = shift;  | 
| 
1022
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
283
 | 
     my $path   = shift;  | 
| 
1023
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
263
 | 
     my $offset = shift;  | 
| 
1024
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
306
 | 
     my $token  = shift;  | 
| 
1025
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
270
 | 
     my $debug  = shift;  | 
| 
1026
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
379
 | 
     my $path_end = [@{$path}[$offset..$#{$path}]];  | 
| 
 
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
609
 | 
    | 
| 
 
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
420
 | 
    | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # NB: $path->[$offset] and $[path_end->[0] are equivalent  | 
| 
1028
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
751
 | 
     my $token_key = _re_path($self, [$token]);  | 
| 
1029
 | 
282
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
726
 | 
     $debug and print "#  insert node(@{[_dump($token)]}:@{[_dump(\@_)]}) (key=$token_key)",  | 
| 
 
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
    | 
| 
 
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
    | 
| 
1030
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
         " at path=@{[_dump($path_end)]}\n";  | 
| 
1031
 | 
282
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
720
 | 
     if( ref($path_end->[0]) eq 'HASH' ) {  | 
| 
1032
 | 
195
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
684
 | 
         if( exists($path_end->[0]{$token_key}) ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1033
 | 
25
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
70
 | 
             if( @$path_end > 1 ) {  | 
| 
1034
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                 my $path_key = _re_path($self, [$path_end->[0]]);  | 
| 
1035
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
                 my $new = {  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $path_key  => [ @$path_end ],  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $token_key => [ $token, @_ ],  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 };  | 
| 
1039
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                 $debug and print "#   +bifurcate new=@{[_dump($new)]}\n";  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
1040
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
                 splice( @$path, $offset, @$path_end, $new );  | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1043
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
                 my $old_path = $path_end->[0]{$token_key};  | 
| 
1044
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
                 my $new_path = [];  | 
| 
1045
 | 
23
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
115
 | 
                 while( @$old_path and _node_eq( $old_path->[0], $token )) {  | 
| 
1046
 | 
30
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
111
 | 
                     $debug and print "#  identical nodes in sub_path ",  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ref($token) ? _dump($token) : $token, "\n";  | 
| 
1048
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
                     push @$new_path, shift(@$old_path);  | 
| 
1049
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
97
 | 
                     $token = shift @_;  | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1051
 | 
23
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
72
 | 
                 if( @$new_path ) {  | 
| 
1052
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
                     my $new;  | 
| 
1053
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
                     my $token_key = $token;  | 
| 
1054
 | 
23
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
46
 | 
                     if( @_ ) {  | 
| 
1055
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                         $new = {  | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             _re_path($self, $old_path) => $old_path,  | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $token_key => [$token, @_],  | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         };  | 
| 
1059
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
                         $debug and print "#  insert_node(bifurc) n=@{[_dump([$new])]}\n";  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
    | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else {  | 
| 
1062
 | 
17
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
                         $debug and print "#  insert $token into old path @{[_dump($old_path)]}\n";  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
1063
 | 
17
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
46
 | 
                         if( @$old_path ) {  | 
| 
1064
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
                             $new = ($self->_insert_path( $old_path, $debug, [$token] ))->[0];  | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         else {  | 
| 
1067
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
                             $new = { '' => undef, $token => [$token] };  | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1070
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
                     push @$new_path, $new;  | 
| 
1071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1072
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
                 $path_end->[0]{$token_key} = $new_path;  | 
| 
1073
 | 
23
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
59
 | 
                 $debug and print "#   +_insert_node result=@{[_dump($path_end)]}\n";  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
1074
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
                 splice( @$path, $offset, @$path_end, @$path_end );  | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( not _node_eq( $path_end->[0], $token )) {  | 
| 
1078
 | 
70
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
149
 | 
             if( @$path_end > 1 ) {  | 
| 
1079
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
                 my $path_key = _re_path($self, [$path_end->[0]]);  | 
| 
1080
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
                 my $new = {  | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $path_key  => [ @$path_end ],  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $token_key => [ $token, @_ ],  | 
| 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 };  | 
| 
1084
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
69
 | 
                 $debug and print "#   path->node1 at $path_key/$token_key @{[_dump($new)]}\n";  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
1085
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
                 splice( @$path, $offset, @$path_end, $new );  | 
| 
1086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1088
 | 
59
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
121
 | 
                 $debug and print "#   next in path is node, trivial insert at $token_key\n";  | 
| 
1089
 | 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
212
 | 
                 $path_end->[0]{$token_key} = [$token, @_];  | 
| 
1090
 | 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
157
 | 
                 splice( @$path, $offset, @$path_end, @$path_end );  | 
| 
1091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1094
 | 
100
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
392
 | 
             while( @$path_end and _node_eq( $path_end->[0], $token )) {  | 
| 
1095
 | 
131
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
419
 | 
                 $debug and print "#  identical nodes @{[_dump([$token])]}\n";  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
1096
 | 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
187
 | 
                 shift @$path_end;  | 
| 
1097
 | 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
254
 | 
                 $token = shift @_;  | 
| 
1098
 | 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
350
 | 
                 ++$offset;  | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1100
 | 
100
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
202
 | 
             if( @$path_end ) {  | 
| 
1101
 | 
57
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
130
 | 
                 $debug and print "#   insert at $offset $token:@{[_dump(\@_)]} into @{[_dump($path_end)]}\n";  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
1102
 | 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
318
 | 
                 $path_end = $self->_insert_path( $path_end, $debug, [$token, @_] );  | 
| 
1103
 | 
57
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
149
 | 
                 $debug and print "#   got off=$offset s=@{[scalar @_]} path_add=@{[_dump($path_end)]}\n";  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
1104
 | 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
175
 | 
                 splice( @$path, $offset, @$path - $offset, @$path_end );  | 
| 
1105
 | 
57
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
150
 | 
                 $debug and print "#   got final=@{[_dump($path)]}\n";  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1108
 | 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
113
 | 
                 $token_key = _node_key($token);  | 
| 
1109
 | 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
212
 | 
                 my $new = {  | 
| 
1110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ''         => undef,  | 
| 
1111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $token_key => [ $token, @_ ],  | 
| 
1112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 };  | 
| 
1113
 | 
43
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
129
 | 
                 $debug and print "#   convert opt @{[_dump($new)]}\n";  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
1114
 | 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
101
 | 
                 push @$path, $new;  | 
| 
1115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1119
 | 
87
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
172
 | 
         if( @$path_end ) {  | 
| 
1120
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
356
 | 
             my $new = {  | 
| 
1121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $path_end->[0] => [ @$path_end ],  | 
| 
1122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $token_key     => [ $token, @_ ],  | 
| 
1123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             };  | 
| 
1124
 | 
74
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
165
 | 
             $debug and print "#   atom->node @{[_dump($new)]}\n";  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
1125
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
205
 | 
             splice( @$path, $offset, @$path_end, $new );  | 
| 
1126
 | 
74
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
180
 | 
             $debug and print "#   out=@{[_dump($path)]}\n";  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
1127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1129
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
34
 | 
             $debug and print "#   add opt @{[_dump([$token,@_])]} via $token_key\n";  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
1130
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
             push @$path, {  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ''         => undef,  | 
| 
1132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $token_key => [ $token, @_ ],  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             };  | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1136
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
952
 | 
     $path;  | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _reduce {  | 
| 
1140
 | 
810
 | 
 
 | 
 
 | 
  
810
  
 | 
 
 | 
994
 | 
     my $self    = shift;  | 
| 
1141
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1384
 | 
     my $context = { debug => $self->_debug(DEBUG_TAIL), depth => 0 };  | 
| 
1142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1143
 | 
810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1563
 | 
     if ($self->_debug(DEBUG_TIME)) {  | 
| 
1144
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
         $self->_init_time_func;  | 
| 
1145
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
         my $now = $self->{_time_func}->();  | 
| 
1146
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
         if (exists $self->{_begin_time}) {  | 
| 
1147
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
302
 | 
             printf "# load=%0.6f\n", $now - $self->{_begin_time};  | 
| 
1148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1150
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
             printf "# load-epoch=%0.6f\n", $now;  | 
| 
1151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1152
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
         $self->{_begin_time} = $self->{_time_func}->();  | 
| 
1153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1155
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1316
 | 
     my ($head, $tail) = _reduce_path( $self->_path, $context );  | 
| 
1156
 | 
810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1632
 | 
     $context->{debug} and print "# final head=", _dump($head), ' tail=', _dump($tail), "\n";  | 
| 
1157
 | 
810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1356
 | 
     if( !@$head ) {  | 
| 
1158
 | 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
991
 | 
         $self->{path} = $tail;  | 
| 
1159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $self->{path} = [  | 
| 
1162
 | 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
345
 | 
             @{_unrev_path( $tail, $context )},  | 
| 
1163
 | 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
174
 | 
             @{_unrev_path( $head, $context )},  | 
| 
 
 | 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
249
 | 
    | 
| 
1164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ];  | 
| 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1167
 | 
810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1961
 | 
     if ($self->_debug(DEBUG_TIME)) {  | 
| 
1168
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
         my $now = $self->{_time_func}->();  | 
| 
1169
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
         if (exists $self->{_begin_time}) {  | 
| 
1170
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
329
 | 
             printf "# reduce=%0.6f\n", $now - $self->{_begin_time};  | 
| 
1171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1173
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             printf "# reduce-epoch=%0.6f\n", $now;  | 
| 
1174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1175
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
         $self->{_begin_time} = $self->{_time_func}->();  | 
| 
1176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1178
 | 
810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1695
 | 
     $context->{debug} and print "# final path=", _dump($self->{path}), "\n";  | 
| 
1179
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1566
 | 
     return $self;  | 
| 
1180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _remove_optional {  | 
| 
1183
 | 
1710
 | 
  
100
  
 | 
 
 | 
  
1710
  
 | 
 
 | 
3395
 | 
     if( exists $_[0]->{''} ) {  | 
| 
1184
 | 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
633
 | 
         delete $_[0]->{''};  | 
| 
1185
 | 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
545
 | 
         return 1;  | 
| 
1186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1187
 | 
1360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1598
 | 
     return 0;  | 
| 
1188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _reduce_path {  | 
| 
1191
 | 
810
 | 
 
 | 
 
 | 
  
810
  
 | 
 
 | 
925
 | 
     my ($path, $ctx) = @_;  | 
| 
1192
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1504
 | 
     my $indent = ' ' x $ctx->{depth};  | 
| 
1193
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
823
 | 
     my $debug  =       $ctx->{debug};  | 
| 
1194
 | 
810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1420
 | 
     $debug and print "#$indent _reduce_path $ctx->{depth} ", _dump($path), "\n";  | 
| 
1195
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
700
 | 
     my $new;  | 
| 
1196
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
960
 | 
     my $head = [];  | 
| 
1197
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1010
 | 
     my $tail = [];  | 
| 
1198
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1897
 | 
     while( defined( my $p = pop @$path )) {  | 
| 
1199
 | 
1159
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1946
 | 
         if( ref($p) eq 'HASH' ) {  | 
| 
1200
 | 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
937
 | 
             my ($node_head, $node_tail) = _reduce_node($p, _descend($ctx) );  | 
| 
1201
 | 
514
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1496
 | 
             $debug and print "#$indent| head=", _dump($node_head), " tail=", _dump($node_tail), "\n";  | 
| 
1202
 | 
514
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1233
 | 
             push @$head, @$node_head if scalar @$node_head;  | 
| 
1203
 | 
514
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4140
 | 
             push @$tail, ref($node_tail) eq 'HASH' ? $node_tail : @$node_tail;  | 
| 
1204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1206
 | 
645
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
943
 | 
             if( @$head ) {  | 
| 
1207
 | 
125
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
257
 | 
                 $debug and print "#$indent| push $p leaves @{[_dump($path)]}\n";  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
    | 
| 
1208
 | 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
383
 | 
                 push @$tail, $p;  | 
| 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1211
 | 
520
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1135
 | 
                 $debug and print "#$indent| unshift $p\n";  | 
| 
1212
 | 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1573
 | 
                 unshift @$tail, $p;  | 
| 
1213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1216
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
278
 | 
     $debug and print "#$indent| tail nr=@{[scalar @$tail]} t0=", ref($tail->[0]),  | 
| 
1217
 | 
810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1511
 | 
         (ref($tail->[0]) eq 'HASH' ? " n=" . scalar(keys %{$tail->[0]}) : '' ),  | 
| 
 
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
894
 | 
    | 
| 
1218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "\n";  | 
| 
1219
 | 
810
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2566
 | 
     if( @$tail > 1  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         and ref($tail->[0]) eq 'HASH'  | 
| 
1221
 | 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
437
 | 
         and keys %{$tail->[0]} == 2  | 
| 
1222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) {  | 
| 
1223
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
         my $opt;  | 
| 
1224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $fixed;  | 
| 
1225
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
107
 | 
         while( my ($key, $path) = each %{$tail->[0]} ) {  | 
| 
 
 | 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
586
 | 
    | 
| 
1226
 | 
144
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
262
 | 
             $debug and print "#$indent| scan k=$key p=@{[_dump($path)]}\n";  | 
| 
 
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
1227
 | 
144
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
287
 | 
             next unless $path;  | 
| 
1228
 | 
123
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
415
 | 
             if (@$path == 1 and ref($path->[0]) eq 'HASH') {  | 
| 
1229
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
                 $opt = $path->[0];  | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1232
 | 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
175
 | 
                 $fixed = $path;  | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1235
 | 
72
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
197
 | 
         if( exists $tail->[0]{''} ) {  | 
| 
1236
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
             my $path = [@{$tail}[1..$#{$tail}]];  | 
| 
 
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
    | 
| 
 
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
    | 
| 
1237
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
             $tail = $tail->[0];  | 
| 
1238
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
             ($head, $tail, $path) = _slide_tail( $head, $tail, $path, _descend($ctx) );  | 
| 
1239
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
             $tail = [$tail, @$path];  | 
| 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1242
 | 
810
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1396
 | 
     $debug and print "#$indent _reduce_path $ctx->{depth} out head=", _dump($head), ' tail=', _dump($tail), "\n";  | 
| 
1243
 | 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1473
 | 
     return ($head, $tail);  | 
| 
1244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _reduce_node {  | 
| 
1247
 | 
1080
 | 
 
 | 
 
 | 
  
1080
  
 | 
 
 | 
1338
 | 
     my ($node, $ctx) = @_;  | 
| 
1248
 | 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2204
 | 
     my $indent = ' ' x $ctx->{depth};  | 
| 
1249
 | 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1100
 | 
     my $debug  =       $ctx->{debug};  | 
| 
1250
 | 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1513
 | 
     my $optional = _remove_optional($node);  | 
| 
1251
 | 
1080
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1935
 | 
     $debug and print "#$indent _reduce_node $ctx->{depth} in @{[_dump($node)]} opt=$optional\n";  | 
| 
 
 | 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
101
 | 
    | 
| 
1252
 | 
1080
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2500
 | 
     if( $optional and scalar keys %$node == 1 ) {  | 
| 
1253
 | 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
128
 | 
         my $path = (values %$node)[0];  | 
| 
1254
 | 
70
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
105
 | 
         if( not grep { ref($_) eq 'HASH' } @$path ) {  | 
| 
 
 | 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
311
 | 
    | 
| 
1255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # if we have removed an optional, and there is only one path  | 
| 
1256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # left then there is nothing left to compare. Because of the  | 
| 
1257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # optional it cannot participate in any further reductions.  | 
| 
1258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (unless we test for equality among sub-trees).  | 
| 
1259
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
197
 | 
             my $result = {  | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ''         => undef,  | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $path->[0] => $path  | 
| 
1262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             };  | 
| 
1263
 | 
61
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
121
 | 
             $debug and print "#$indent| fast fail @{[_dump($result)]}\n";  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
1264
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
155
 | 
             return [], $result;  | 
| 
1265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1268
 | 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1416
 | 
     my( $fail, $reduce ) = _scan_node( $node, _descend($ctx) );  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1270
 | 
1019
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2711
 | 
     $debug and print "#$indent|_scan_node done opt=$optional reduce=@{[_dump($reduce)]} fail=@{[_dump($fail)]}\n";  | 
| 
 
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
100
 | 
    | 
| 
 
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
91
 | 
    | 
| 
1271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # We now perform tail reduction on each of the nodes in the reduce  | 
| 
1273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # hash. If we have only one key, we know we will have a successful  | 
| 
1274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # reduction (since everything that was inserted into the node based  | 
| 
1275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # on the value of the last token of each path all mapped to the same  | 
| 
1276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # value).  | 
| 
1277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1278
 | 
1019
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
5796
 | 
     if( @$fail == 0 and keys %$reduce == 1 and not $optional) {  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # every path shares a common path  | 
| 
1280
 | 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
825
 | 
         my $path = (values %$reduce)[0];  | 
| 
1281
 | 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
906
 | 
         my ($common, $tail) = _do_reduce( $path, _descend($ctx) );  | 
| 
1282
 | 
506
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1373
 | 
         $debug and print "#$indent|_reduce_node  $ctx->{depth} common=@{[_dump($common)]} tail=", _dump($tail), "\n";  | 
| 
 
 | 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
105
 | 
    | 
| 
1283
 | 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2350
 | 
         return( $common, $tail );  | 
| 
1284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # this node resulted in a list of paths, game over  | 
| 
1287
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1027
 | 
     $ctx->{indent} = $indent;  | 
| 
1288
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
970
 | 
     return _reduce_fail( $reduce, $fail, $optional, _descend($ctx) );  | 
| 
1289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _reduce_fail {  | 
| 
1292
 | 
513
 | 
 
 | 
 
 | 
  
513
  
 | 
 
 | 
628
 | 
     my( $reduce, $fail, $optional, $ctx ) = @_;  | 
| 
1293
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
555
 | 
     my( $debug, $depth, $indent ) = @{$ctx}{qw(debug depth indent)};  | 
| 
 
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
935
 | 
    | 
| 
1294
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
533
 | 
     my %result;  | 
| 
1295
 | 
513
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
892
 | 
     $result{''} = undef if $optional;  | 
| 
1296
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
472
 | 
     my $p;  | 
| 
1297
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1024
 | 
     for $p (keys %$reduce) {  | 
| 
1298
 | 
1181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1157
 | 
         my $path = $reduce->{$p};  | 
| 
1299
 | 
1181
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1649
 | 
         if( scalar @$path == 1 ) {  | 
| 
1300
 | 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1137
 | 
             $path = $path->[0];  | 
| 
1301
 | 
1114
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1633
 | 
             $debug and print "#$indent| -simple opt=$optional unrev @{[_dump($path)]}\n";  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
1302
 | 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1383
 | 
             $path = _unrev_path($path, _descend($ctx) );  | 
| 
1303
 | 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2347
 | 
             $result{_node_key($path->[0])} = $path;  | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1306
 | 
67
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
283
 | 
             $debug and print "#$indent| _do_reduce(@{[_dump($path)]})\n";  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
1307
 | 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
143
 | 
             my ($common, $tail) = _do_reduce( $path, _descend($ctx) );  | 
| 
1308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $path = [  | 
| 
1309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 (  | 
| 
1310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ref($tail) eq 'HASH'  | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ? _unrev_node($tail, _descend($ctx) )  | 
| 
1312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         : _unrev_path($tail, _descend($ctx) )  | 
| 
1313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ),  | 
| 
1314
 | 
67
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
330
 | 
                 @{_unrev_path($common, _descend($ctx) )}  | 
| 
 
 | 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
131
 | 
    | 
| 
1315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
1316
 | 
67
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
266
 | 
             $debug and print "#$indent| +reduced @{[_dump($path)]}\n";  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
1317
 | 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
286
 | 
             $result{_node_key($path->[0])} = $path;  | 
| 
1318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1320
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
542
 | 
     my $f;  | 
| 
1321
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
786
 | 
     for $f( @$fail ) {  | 
| 
1322
 | 
219
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
369
 | 
         $debug and print "#$indent| +fail @{[_dump($f)]}\n";  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
1323
 | 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
383
 | 
         $result{$f->[0]} = $f;  | 
| 
1324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1325
 | 
513
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
846
 | 
     $debug and print "#$indent _reduce_fail $depth fail=@{[_dump(\%result)]}\n";  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
1326
 | 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2252
 | 
     return ( [], \%result );  | 
| 
1327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _scan_node {  | 
| 
1330
 | 
1019
 | 
 
 | 
 
 | 
  
1019
  
 | 
 
 | 
1144
 | 
     my( $node, $ctx ) = @_;  | 
| 
1331
 | 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1590
 | 
     my $indent = ' ' x $ctx->{depth};  | 
| 
1332
 | 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1142
 | 
     my $debug  =       $ctx->{debug};  | 
| 
1333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # For all the paths in the node, reverse them. If the first token  | 
| 
1335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # of the path is a scalar, push it onto an array in a hash keyed by  | 
| 
1336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the value of the scalar.  | 
| 
1337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  | 
| 
1338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # If it is a node, call _reduce_node on this node beforehand. If we  | 
| 
1339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # get back a common head, all of the paths in the subnode shared a  | 
| 
1340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # common tail. We then store the common part and the remaining node  | 
| 
1341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # of paths (which is where the paths diverged from the end and install  | 
| 
1342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # this into the same hash. At this point both the common and the tail  | 
| 
1343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # are in reverse order, just as simple scalar paths are.  | 
| 
1344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  | 
| 
1345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # On the other hand, if there were no common path returned then all  | 
| 
1346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # the paths of the sub-node diverge at the end character. In this  | 
| 
1347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # case the tail cannot participate in any further reductions and will  | 
| 
1348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # appear in forward order.  | 
| 
1349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #  | 
| 
1350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # certainly the hurgliest function in the whole file :(  | 
| 
1351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # $debug = 1 if $depth >= 8;  | 
| 
1353
 | 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
908
 | 
     my @fail;  | 
| 
1354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %reduce;  | 
| 
1355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1356
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $n;  | 
| 
1357
 | 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2186
 | 
     for $n(  | 
| 
1358
 | 
2743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5801
 | 
         map { substr($_, index($_, '#')+1) }  | 
| 
1359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         sort  | 
| 
1360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         map {  | 
| 
1361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             join( '|' =>  | 
| 
1362
 | 
9029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13768
 | 
                 scalar(grep {ref($_) eq 'HASH'} @{$node->{$_}}),  | 
| 
 
 | 
2743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4056
 | 
    | 
| 
1363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 _node_offset($node->{$_}),  | 
| 
1364
 | 
2743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2315
 | 
                 scalar @{$node->{$_}},  | 
| 
 
 | 
2743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10173
 | 
    | 
| 
1365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             )  | 
| 
1366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             . "#$_"  | 
| 
1367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     keys %$node ) {  | 
| 
1369
 | 
2743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2164
 | 
         my( $end, @path ) = reverse @{$node->{$n}};  | 
| 
 
 | 
2743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6082
 | 
    | 
| 
1370
 | 
2743
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4069
 | 
         if( ref($end) ne 'HASH' ) {  | 
| 
1371
 | 
2137
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3262
 | 
             $debug and print "# $indent|_scan_node push reduce ($end:@{[_dump(\@path)]})\n";  | 
| 
 
 | 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
177
 | 
    | 
| 
1372
 | 
2137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1658
 | 
             push @{$reduce{$end}}, [ $end, @path ];  | 
| 
 
 | 
2137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7966
 | 
    | 
| 
1373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1375
 | 
606
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1208
 | 
             $debug and print "# $indent|_scan_node head=", _dump(\@path), ' tail=', _dump($end), "\n";  | 
| 
1376
 | 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
627
 | 
             my $new_path;  | 
| 
1377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # deal with sing, singing => s(?:ing)?ing  | 
| 
1378
 | 
606
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
2278
 | 
             if( keys %$end == 2 and exists $end->{''} ) {  | 
| 
1379
 | 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
180
 | 
                 my ($key, $opt_path) = each %$end;  | 
| 
1380
 | 
94
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
248
 | 
                 ($key, $opt_path) = each %$end if $key eq '';  | 
| 
1381
 | 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
94
 | 
                 $opt_path = [reverse @{$opt_path}];  | 
| 
 
 | 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
229
 | 
    | 
| 
1382
 | 
94
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
196
 | 
                 $debug and print "# $indent| check=", _dump($opt_path), "\n";  | 
| 
1383
 | 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
328
 | 
                 my $end = { '' => undef, $opt_path->[0] => [@$opt_path] };  | 
| 
1384
 | 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
169
 | 
                 my $head = [];  | 
| 
1385
 | 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
187
 | 
                 my $path = [@path];  | 
| 
1386
 | 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
207
 | 
                 ($head, my $slide, $path) = _slide_tail( $head, $end, $path, $ctx );  | 
| 
1387
 | 
94
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
331
 | 
                 if( @$head ) {  | 
| 
1388
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
184
 | 
                     $new_path = [ @$head, $slide, @$path ];  | 
| 
1389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1391
 | 
606
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
874
 | 
             if( $new_path ) {  | 
| 
1392
 | 
40
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
84
 | 
                 $debug and print "# $indent|_scan_node slid=", _dump($new_path), "\n";  | 
| 
1393
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
                 push @{$reduce{$new_path->[0]}}, $new_path;  | 
| 
 
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
173
 | 
    | 
| 
1394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1396
 | 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1003
 | 
                 my( $common, $tail ) = _reduce_node( $end, _descend($ctx) );  | 
| 
1397
 | 
566
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1634
 | 
                     if( not @$common ) {  | 
| 
1398
 | 
219
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
586
 | 
                     $debug and print "# $indent| +failed $n\n";  | 
| 
1399
 | 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
839
 | 
                     push @fail, [reverse(@path), $tail];  | 
| 
1400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
1402
 | 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
782
 | 
                     my $path = [@path];  | 
| 
1403
 | 
347
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
705
 | 
                     $debug and print "# $indent|_scan_node ++recovered common=@{[_dump($common)]} tail=",  | 
| 
 
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
    | 
| 
1404
 | 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
                         _dump($tail), " path=@{[_dump($path)]}\n";  | 
| 
1405
 | 
347
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1864
 | 
                     if( ref($tail) eq 'HASH'  | 
| 
1406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         and keys %$tail == 2  | 
| 
1407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ) {  | 
| 
1408
 | 
287
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
632
 | 
                         if( exists $tail->{''} ) {  | 
| 
1409
 | 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
280
 | 
                             ($common, $tail, $path) = _slide_tail( $common, $tail, $path, $ctx );  | 
| 
1410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1412
 | 
347
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
432
 | 
                     push @{$reduce{$common->[0]}}, [  | 
| 
 
 | 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2496
 | 
    | 
| 
1413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @$common,  | 
| 
1414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         (ref($tail) eq 'HASH' ? $tail : @$tail ),  | 
| 
1415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @$path  | 
| 
1416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ];  | 
| 
1417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1421
 | 
1019
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2588
 | 
     $debug and print  | 
| 
1422
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
179
 | 
         "# $indent|_scan_node counts: reduce=@{[scalar keys %reduce]} fail=@{[scalar @fail]}\n";  | 
| 
 
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2347
 | 
    | 
| 
1423
 | 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2340
 | 
     return( \@fail, \%reduce );  | 
| 
1424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _do_reduce {  | 
| 
1427
 | 
573
 | 
 
 | 
 
 | 
  
573
  
 | 
 
 | 
679
 | 
     my ($path, $ctx) = @_;  | 
| 
1428
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
954
 | 
     my $indent = ' ' x $ctx->{depth};  | 
| 
1429
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
652
 | 
     my $debug  =       $ctx->{debug};  | 
| 
1430
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1495
 | 
     my $ra = Regexp::Assemble->new(chomp=>0);  | 
| 
1431
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1279
 | 
     $ra->debug($debug);  | 
| 
1432
 | 
573
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1026
 | 
     $debug and print "# $indent| do @{[_dump($path)]}\n";  | 
| 
 
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
80
 | 
    | 
| 
1433
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1778
 | 
     $ra->_insertr( $_ ) for  | 
| 
1434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # When nodes come into the picture, we have to be careful  | 
| 
1435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # about how we insert the paths into the assembly.  | 
| 
1436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Paths with nodes first, then closest node to front  | 
| 
1437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # then shortest path. Merely because if we can control  | 
| 
1438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # order in which paths containing nodes get inserted,  | 
| 
1439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # then we can make a couple of assumptions that simplify  | 
| 
1440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # the code in _insert_node.  | 
| 
1441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         sort {  | 
| 
1442
 | 
6000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7831
 | 
             scalar(grep {ref($_) eq 'HASH'} @$a)  | 
| 
1443
 | 
1113
 | 
  
 50
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1441
 | 
             <=> scalar(grep {ref($_) eq 'HASH'} @$b)  | 
| 
 
 | 
6578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9782
 | 
    | 
| 
1444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ||  | 
| 
1445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             _node_offset($b) <=> _node_offset($a)  | 
| 
1446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ||  | 
| 
1447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             scalar @$a <=> scalar @$b  | 
| 
1448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @$path  | 
| 
1450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
1451
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1188
 | 
     $path = $ra->_path;  | 
| 
1452
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
712
 | 
     my $common = [];  | 
| 
1453
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3553
 | 
     push @$common, shift @$path while( ref($path->[0]) ne 'HASH' );  | 
| 
1454
 | 
573
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1173
 | 
     my $tail = scalar( @$path ) > 1 ? [@$path] : $path->[0];  | 
| 
1455
 | 
573
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
980
 | 
     $debug and print "# $indent| _do_reduce common=@{[_dump($common)]} tail=@{[_dump($tail)]}\n";  | 
| 
 
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
89
 | 
    | 
| 
 
 | 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
    | 
| 
1456
 | 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3129
 | 
     return ($common, $tail);  | 
| 
1457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _node_offset {  | 
| 
1460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # return the offset that the first node is found, or -ve  | 
| 
1461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # optimised for speed  | 
| 
1462
 | 
4422
 | 
 
 | 
 
 | 
  
4422
  
 | 
 
 | 
3172
 | 
     my $nr = @{$_[0]};  | 
| 
 
 | 
4422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5067
 | 
    | 
| 
1463
 | 
4422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3803
 | 
     my $atom = -1;  | 
| 
1464
 | 
4422
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
33844
 | 
     ref($_[0]->[$atom]) eq 'HASH' and return $atom while ++$atom < $nr;  | 
| 
1465
 | 
3458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6136
 | 
     return -1;  | 
| 
1466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _slide_tail {  | 
| 
1469
 | 
240
 | 
 
 | 
 
 | 
  
240
  
 | 
 
 | 
17552
 | 
     my $head   = shift;  | 
| 
1470
 | 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
242
 | 
     my $tail   = shift;  | 
| 
1471
 | 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
224
 | 
     my $path   = shift;  | 
| 
1472
 | 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
245
 | 
     my $ctx    = shift;  | 
| 
1473
 | 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
434
 | 
     my $indent = ' ' x $ctx->{depth};  | 
| 
1474
 | 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
279
 | 
     my $debug  =       $ctx->{debug};  | 
| 
1475
 | 
240
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
444
 | 
     $debug and print "# $indent| slide in h=", _dump($head),  | 
| 
1476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ' t=', _dump($tail), ' p=', _dump($path), "\n";  | 
| 
1477
 | 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
486
 | 
     my $slide_path = (each %$tail)[-1];  | 
| 
1478
 | 
240
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
585
 | 
     $slide_path = (each %$tail)[-1] unless defined $slide_path;  | 
| 
1479
 | 
240
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
461
 | 
     $debug and print "# $indent| slide potential ", _dump($slide_path), " over ", _dump($path), "\n";  | 
| 
1480
 | 
240
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
1147
 | 
     while( defined $path->[0] and $path->[0] eq $slide_path->[0] ) {  | 
| 
1481
 | 
154
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
478
 | 
         $debug and print "# $indent| slide=tail=$slide_path->[0]\n";  | 
| 
1482
 | 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
195
 | 
         my $slide = shift @$path;  | 
| 
1483
 | 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
165
 | 
         shift @$slide_path;  | 
| 
1484
 | 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
241
 | 
         push @$slide_path, $slide;  | 
| 
1485
 | 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
570
 | 
         push @$head, $slide;  | 
| 
1486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1487
 | 
240
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
520
 | 
     $debug and print "# $indent| slide path ", _dump($slide_path), "\n";  | 
| 
1488
 | 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
501
 | 
     my $slide_node = {  | 
| 
1489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         '' => undef,  | 
| 
1490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         _node_key($slide_path->[0]) => $slide_path,  | 
| 
1491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
1492
 | 
240
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
571
 | 
     $debug and print "# $indent| slide out h=", _dump($head),  | 
| 
1493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ' s=', _dump($slide_node), ' p=', _dump($path), "\n";  | 
| 
1494
 | 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
702
 | 
     return ($head, $slide_node, $path);  | 
| 
1495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _unrev_path {  | 
| 
1498
 | 
2690
 | 
 
 | 
 
 | 
  
2690
  
 | 
 
 | 
2747
 | 
     my ($path, $ctx) = @_;  | 
| 
1499
 | 
2690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3576
 | 
     my $indent = ' ' x $ctx->{depth};  | 
| 
1500
 | 
2690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2415
 | 
     my $debug  =       $ctx->{debug};  | 
| 
1501
 | 
2690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2003
 | 
     my $new;  | 
| 
1502
 | 
2690
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2869
 | 
     if( not grep { ref($_) } @$path ) {  | 
| 
 
 | 
6383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10476
 | 
    | 
| 
1503
 | 
2196
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3404
 | 
         $debug and print "# ${indent}_unrev path fast ", _dump($path);  | 
| 
1504
 | 
2196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3934
 | 
         $new = [reverse @$path];  | 
| 
1505
 | 
2196
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3520
 | 
         $debug and print "#  -> ", _dump($new), "\n";  | 
| 
1506
 | 
2196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3500
 | 
         return $new;  | 
| 
1507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1508
 | 
494
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
860
 | 
     $debug and print "# ${indent}unrev path in ", _dump($path), "\n";  | 
| 
1509
 | 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1147
 | 
     while( defined( my $p = pop @$path )) {  | 
| 
1510
 | 
1483
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4520
 | 
         push @$new,  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               ref($p) eq 'HASH'  ? _unrev_node($p, _descend($ctx) )  | 
| 
1512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : ref($p) eq 'ARRAY' ? _unrev_path($p, _descend($ctx) )  | 
| 
1513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : $p  | 
| 
1514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
1515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1516
 | 
494
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
907
 | 
     $debug and print "# ${indent}unrev path out ", _dump($new), "\n";  | 
| 
1517
 | 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
755
 | 
     return $new;  | 
| 
1518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _unrev_node {  | 
| 
1521
 | 
630
 | 
 
 | 
 
 | 
  
630
  
 | 
 
 | 
727
 | 
     my ($node, $ctx ) = @_;  | 
| 
1522
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1011
 | 
     my $indent = ' ' x $ctx->{depth};  | 
| 
1523
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
591
 | 
     my $debug  =       $ctx->{debug};  | 
| 
1524
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
866
 | 
     my $optional = _remove_optional($node);  | 
| 
1525
 | 
630
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1147
 | 
     $debug and print "# ${indent}unrev node in ", _dump($node), " opt=$optional\n";  | 
| 
1526
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
643
 | 
     my $new;  | 
| 
1527
 | 
630
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1103
 | 
     $new->{''} = undef if $optional;  | 
| 
1528
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
464
 | 
     my $n;  | 
| 
1529
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1455
 | 
     for $n( keys %$node ) {  | 
| 
1530
 | 
1167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1780
 | 
         my $path = _unrev_path($node->{$n}, _descend($ctx) );  | 
| 
1531
 | 
1167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2645
 | 
         $new->{_node_key($path->[0])} = $path;  | 
| 
1532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1533
 | 
630
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1142
 | 
     $debug and print "# ${indent}unrev node out ", _dump($new), "\n";  | 
| 
1534
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2628
 | 
     return $new;  | 
| 
1535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _node_key {  | 
| 
1538
 | 
4657
 | 
 
 | 
 
 | 
  
4657
  
 | 
 
 | 
5056
 | 
     my $node = shift;  | 
| 
1539
 | 
4657
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7581
 | 
     return _node_key($node->[0]) if ref($node) eq 'ARRAY';  | 
| 
1540
 | 
4646
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15824
 | 
     return $node unless ref($node) eq 'HASH';  | 
| 
1541
 | 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
243
 | 
     my $key = '';  | 
| 
1542
 | 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
213
 | 
     my $k;  | 
| 
1543
 | 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
490
 | 
     for $k( keys %$node ) {  | 
| 
1544
 | 
452
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
749
 | 
         next if $k eq '';  | 
| 
1545
 | 
374
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1311
 | 
         $key = $k if $key eq '' or $key gt $k;  | 
| 
1546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1547
 | 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
633
 | 
     return $key;  | 
| 
1548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _descend {  | 
| 
1551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Take a context object, and increase the depth by one.  | 
| 
1552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # By creating a fresh hash each time, we don't have to  | 
| 
1553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # bother adding make-work code to decrease the depth  | 
| 
1554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # when we return from what we called.  | 
| 
1555
 | 
6190
 | 
 
 | 
 
 | 
  
6190
  
 | 
 
 | 
5261
 | 
     my $ctx = shift;  | 
| 
1556
 | 
6190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24211
 | 
     return {%$ctx, depth => $ctx->{depth}+1};  | 
| 
1557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #####################################################################  | 
| 
1560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _make_class {  | 
| 
1562
 | 
650
 | 
 
 | 
 
 | 
  
650
  
 | 
 
 | 
708
 | 
     my $self = shift;  | 
| 
1563
 | 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1053
 | 
     my %set = map { ($_,1) } @_;  | 
| 
 
 | 
1675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3230
 | 
    | 
| 
1564
 | 
650
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1481
 | 
     delete $set{'\\d'} if exists $set{'\\w'};  | 
| 
1565
 | 
650
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1185
 | 
     delete $set{'\\D'} if exists $set{'\\W'};  | 
| 
1566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return '.' if exists $set{'.'}  | 
| 
1567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         or ($self->{fold_meta_pairs} and (  | 
| 
1568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                (exists $set{'\\d'} and exists $set{'\\D'})  | 
| 
1569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             or (exists $set{'\\s'} and exists $set{'\\S'})  | 
| 
1570
 | 
650
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
5172
 | 
             or (exists $set{'\\w'} and exists $set{'\\W'})  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ))  | 
| 
1572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
1573
 | 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1001
 | 
     for my $meta( q/\\d/, q/\\D/, q/\\s/, q/\\S/, q/\\w/, q/\\W/ ) {  | 
| 
1574
 | 
3792
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
6044
 | 
         if( exists $set{$meta} ) {  | 
| 
1575
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
180
 | 
             my $re = qr/$meta/;  | 
| 
1576
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
             my @delete;  | 
| 
1577
 | 
28
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
379
 | 
             $_ =~ /^$re$/ and push @delete, $_ for keys %set;  | 
| 
1578
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
90
 | 
             delete @set{@delete} if @delete;  | 
| 
1579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1581
 | 
632
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1352
 | 
     return (keys %set)[0] if keys %set == 1;  | 
| 
1582
 | 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
880
 | 
     for my $meta( '.', '+', '*', '?', '(', ')', '^', '@', '$', '[', '/', ) {  | 
| 
1583
 | 
6875
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11239
 | 
         exists $set{"\\$meta"} and $set{$meta} = delete $set{"\\$meta"};  | 
| 
1584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1585
 | 
625
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1211
 | 
     my $dash  = exists $set{'-'} ? do { delete($set{'-'}), '-' } : '';  | 
| 
 
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
    | 
| 
1586
 | 
625
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
975
 | 
     my $caret = exists $set{'^'} ? do { delete($set{'^'}), '^' } : '';  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
    | 
| 
1587
 | 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2184
 | 
     my $class = join( '' => sort keys %set );  | 
| 
1588
 | 
625
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1938
 | 
     $class =~ s/0123456789/\\d/ and $class eq '\\d' and return $class;  | 
| 
1589
 | 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4706
 | 
     return "[$dash$class$caret]";  | 
| 
1590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _re_sort {  | 
| 
1593
 | 
999
 | 
 
 | 
  
100
  
 | 
  
999
  
 | 
 
 | 
5178
 | 
     return length $b <=> length $a || $a cmp $b  | 
| 
1594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _combine {  | 
| 
1597
 | 
140
 | 
 
 | 
 
 | 
  
140
  
 | 
 
 | 
155
 | 
     my $self = shift;  | 
| 
1598
 | 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
134
 | 
     my $type = shift;  | 
| 
1599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # print "c in = @{[_dump(\@_)]}\n";  | 
| 
1600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # my $combine =  | 
| 
1601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return '('  | 
| 
1602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     . $type  | 
| 
1603
 | 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
177
 | 
     . do {  | 
| 
1604
 | 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
113
 | 
         my( @short, @long );  | 
| 
1605
 | 
140
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
249
 | 
         push @{ /^$Single_Char$/ ? \@short : \@long}, $_ for @_;  | 
| 
 
 | 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2240
 | 
    | 
| 
1606
 | 
140
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
325
 | 
         if( @short == 1 ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1607
 | 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
126
 | 
             @long = sort _re_sort @long, @short;  | 
| 
1608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( @short > 1 ) {  | 
| 
1610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # yucky but true  | 
| 
1611
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
151
 | 
             my @combine = (_make_class($self, @short), sort _re_sort @long);  | 
| 
1612
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
211
 | 
             @long = @combine;  | 
| 
1613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1615
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
103
 | 
             @long = sort _re_sort @long;  | 
| 
1616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1617
 | 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
473
 | 
         join( '|', @long );  | 
| 
1618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     . ')';  | 
| 
1620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # print "combine <$combine>\n";  | 
| 
1621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # $combine;  | 
| 
1622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _combine_new {  | 
| 
1625
 | 
1738
 | 
 
 | 
 
 | 
  
1738
  
 | 
 
 | 
1746
 | 
     my $self = shift;  | 
| 
1626
 | 
1738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1560
 | 
     my( @short, @long );  | 
| 
1627
 | 
1738
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2705
 | 
     push @{ /^$Single_Char$/ ? \@short : \@long}, $_ for @_;  | 
| 
 
 | 
3244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20283
 | 
    | 
| 
1628
 | 
1738
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
8016
 | 
     if( @short == 1 and @long == 0 ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1629
 | 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2017
 | 
         return $short[0];  | 
| 
1630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif( @short > 1 and @short == @_ ) {  | 
| 
1632
 | 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
904
 | 
         return _make_class($self, @short);  | 
| 
1633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1635
 | 
879
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13737
 | 
         return '(?:'  | 
| 
1636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             . join( '|' =>  | 
| 
1637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @short > 1  | 
| 
1638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ? ( _make_class($self, @short), sort _re_sort @long)  | 
| 
1639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     : ( (sort _re_sort( @long )), @short )  | 
| 
1640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             )  | 
| 
1641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         . ')';  | 
| 
1642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _re_path {  | 
| 
1646
 | 
4772
 | 
 
 | 
 
 | 
  
4772
  
 | 
 
 | 
4334
 | 
     my $self = shift;  | 
| 
1647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # in shorter assemblies, _re_path() is the second hottest  | 
| 
1648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # routine. after insert(), so make it fast.  | 
| 
1649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1650
 | 
4772
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7927
 | 
     if ($self->{unroll_plus}) {  | 
| 
1651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # but we can't easily make this blockless  | 
| 
1652
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
         my @arr = @{$_[0]};  | 
| 
 
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
126
 | 
    | 
| 
1653
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
         my $str = '';  | 
| 
1654
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
         my $skip = 0;  | 
| 
1655
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
125
 | 
         for my $i (0..$#arr) {  | 
| 
1656
 | 
127
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1188
 | 
             if (ref($arr[$i]) eq 'ARRAY') {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1657
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 $str .= _re_path($self, $arr[$i]);  | 
| 
1658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif (ref($arr[$i]) eq 'HASH') {  | 
| 
1660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $str .= exists $arr[$i]->{''}  | 
| 
1661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ? _combine_new( $self,  | 
| 
1662
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
                         map { _re_path( $self, $arr[$i]->{$_} ) } grep { $_ ne '' } keys %{$arr[$i]}  | 
| 
 
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
    | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
1663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ) . '?'  | 
| 
1664
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
48
 | 
                     : _combine_new($self, map { _re_path( $self, $arr[$i]->{$_} ) } keys %{$arr[$i]})  | 
| 
 
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
73
 | 
    | 
| 
 
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
    | 
| 
1665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
1666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($i < $#arr and $arr[$i+1] =~ /\A$arr[$i]\*(\??)\Z/) {  | 
| 
1668
 | 
7
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
29
 | 
                 $str .= "$arr[$i]+" . (defined $1 ? $1 : '');  | 
| 
1669
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 ++$skip;  | 
| 
1670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif ($skip) {  | 
| 
1672
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 $skip = 0;  | 
| 
1673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1675
 | 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
137
 | 
                 $str .= $arr[$i];  | 
| 
1676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1678
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
200
 | 
         return $str;  | 
| 
1679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1681
 | 
4700
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5156
 | 
     return join( '', @_ ) unless grep { length ref $_ } @_;  | 
| 
 
 | 
4700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10016
 | 
    | 
| 
1682
 | 
4700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3412
 | 
     my $p;  | 
| 
1683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return join '', map {  | 
| 
1684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ref($_) eq '' ? $_  | 
| 
1685
 | 
9506
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24009
 | 
         : ref($_) eq 'HASH' ? do {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # In the case of a node, see whether there's a '' which  | 
| 
1687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # indicates that the whole thing is optional and thus  | 
| 
1688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # requires a trailing ?  | 
| 
1689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Unroll the two different paths to avoid the needless  | 
| 
1690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # grep when it isn't necessary.  | 
| 
1691
 | 
1710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1479
 | 
             $p = $_;  | 
| 
1692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             exists $_->{''}  | 
| 
1693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ?  _combine_new( $self,  | 
| 
1694
 | 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1613
 | 
                 map { _re_path( $self, $p->{$_} ) } grep { $_ ne '' } keys %$_  | 
| 
 
 | 
1508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2527
 | 
    | 
| 
1695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) . '?'  | 
| 
1696
 | 
1710
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5146
 | 
             : _combine_new($self, map { _re_path( $self, $p->{$_} ) } keys %$_ )  | 
| 
 
 | 
2394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3791
 | 
    | 
| 
1697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         : _re_path($self, $_) # ref($_) eq 'ARRAY'  | 
| 
1699
 | 
4700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3629
 | 
     } @{$_[0]}  | 
| 
 
 | 
4700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6443
 | 
    | 
| 
1700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _lookahead {  | 
| 
1703
 | 
132
 | 
 
 | 
 
 | 
  
132
  
 | 
 
 | 
141
 | 
     my $in = shift;  | 
| 
1704
 | 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
129
 | 
     my %head;  | 
| 
1705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $path;  | 
| 
1706
 | 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
244
 | 
     for $path( keys %$in ) {  | 
| 
1707
 | 
328
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
577
 | 
         next unless defined $in->{$path};  | 
| 
1708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # print "look $path: ", ref($in->{$path}[0]), ".\n";  | 
| 
1709
 | 
267
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
595
 | 
         if( ref($in->{$path}[0]) eq 'HASH' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1710
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
             my $next = 0;  | 
| 
1711
 | 
15
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
54
 | 
             while( ref($in->{$path}[$next]) eq 'HASH' and @{$in->{$path}} > $next + 1 ) {  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
    | 
| 
1712
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
                 if( exists $in->{$path}[$next]{''} ) {  | 
| 
1713
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
                     ++$head{$in->{$path}[$next+1]};  | 
| 
1714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1715
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
                 ++$next;  | 
| 
1716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1717
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
             my $inner = _lookahead( $in->{$path}[0] );  | 
| 
1718
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
             @head{ keys %$inner } = (values %$inner);  | 
| 
1719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( ref($in->{$path}[0]) eq 'ARRAY' ) {  | 
| 
1721
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
             my $subpath = $in->{$path}[0];  | 
| 
1722
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
             for( my $sp = 0; $sp < @$subpath; ++$sp ) {  | 
| 
1723
 | 
3
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
                 if( ref($subpath->[$sp]) eq 'HASH' ) {  | 
| 
1724
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                     my $follow = _lookahead( $subpath->[$sp] );  | 
| 
1725
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                     @head{ keys %$follow } = (values %$follow);  | 
| 
1726
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                     last unless exists $subpath->[$sp]{''};  | 
| 
1727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
1729
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                     ++$head{$subpath->[$sp]};  | 
| 
1730
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                     last;  | 
| 
1731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1735
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
488
 | 
             ++$head{ $in->{$path}[0] };  | 
| 
1736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # print "_lookahead ", _dump($in), '==>', _dump([keys %head]), "\n";  | 
| 
1739
 | 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
277
 | 
     return \%head;  | 
| 
1740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _re_path_lookahead {  | 
| 
1743
 | 
265
 | 
 
 | 
 
 | 
  
265
  
 | 
 
 | 
250
 | 
     my $self = shift;  | 
| 
1744
 | 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
226
 | 
     my $in  = shift;  | 
| 
1745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # print "_re_path_la in ", _dump($in), "\n";  | 
| 
1746
 | 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
241
 | 
     my $out = '';  | 
| 
1747
 | 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
526
 | 
     for( my $p = 0; $p < @$in; ++$p ) {  | 
| 
1748
 | 
573
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
979
 | 
         if( ref($in->[$p]) eq '' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1749
 | 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
460
 | 
             $out .= $in->[$p];  | 
| 
1750
 | 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
794
 | 
             next;  | 
| 
1751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( ref($in->[$p]) eq 'ARRAY' ) {  | 
| 
1753
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
             $out .= _re_path_lookahead($self, $in->[$p]);  | 
| 
1754
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             next;  | 
| 
1755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # print "$p ", _dump($in->[$p]), "\n";  | 
| 
1757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $path = [  | 
| 
1758
 | 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
455
 | 
             map { _re_path_lookahead($self, $in->[$p]{$_} ) }  | 
| 
1759
 | 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
445
 | 
             grep { $_ ne '' }  | 
| 
1760
 | 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
102
 | 
             keys %{$in->[$p]}  | 
| 
 
 | 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
294
 | 
    | 
| 
1761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ];  | 
| 
1762
 | 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
287
 | 
         my $ahead = _lookahead($in->[$p]);  | 
| 
1763
 | 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
115
 | 
         my $more = 0;  | 
| 
1764
 | 
109
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
393
 | 
         if( exists $in->[$p]{''} and $p + 1 < @$in ) {  | 
| 
1765
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
             my $next = 1;  | 
| 
1766
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
             while( $p + $next < @$in ) {  | 
| 
1767
 | 
14
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
36
 | 
                 if( ref( $in->[$p+$next] ) eq 'HASH' ) {  | 
| 
1768
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                     my $follow = _lookahead( $in->[$p+$next] );  | 
| 
1769
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                     @{$ahead}{ keys %$follow } = (values %$follow);  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
1770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
1772
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
                     ++$ahead->{$in->[$p+$next]};  | 
| 
1773
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
                     last;  | 
| 
1774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1775
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                 ++$next;  | 
| 
1776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1777
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
             $more = 1;  | 
| 
1778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1779
 | 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
146
 | 
         my $nr_one = grep { /^$Single_Char$/ } @$path;  | 
| 
 
 | 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1500
 | 
    | 
| 
1780
 | 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
129
 | 
         my $nr     = @$path;  | 
| 
1781
 | 
109
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
340
 | 
         if( $nr_one > 1 and $nr_one == $nr ) {  | 
| 
1782
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
             $out .= _make_class($self, @$path);  | 
| 
1783
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
110
 | 
             $out .= '?' if exists $in->[$p]{''};  | 
| 
1784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my $zwla = keys(%$ahead) > 1  | 
| 
1787
 | 
91
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
281
 | 
                 ?  _combine($self, '?=', grep { s/\+$//; $_ } keys %$ahead )  | 
| 
 
 | 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
190
 | 
    | 
| 
 
 | 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
296
 | 
    | 
| 
1788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 : '';  | 
| 
1789
 | 
91
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
255
 | 
             my $patt = $nr > 1 ? _combine($self, '?:', @$path ) : $path->[0];  | 
| 
1790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # print "have nr=$nr n1=$nr_one n=", _dump($in->[$p]), ' a=', _dump([keys %$ahead]), " zwla=$zwla patt=$patt @{[_dump($path)]}\n";  | 
| 
1791
 | 
91
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
216
 | 
             if( exists $in->[$p]{''} ) {  | 
| 
1792
 | 
44
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
282
 | 
                 $out .=  $more ? "$zwla(?:$patt)?" : "(?:$zwla$patt)?";  | 
| 
1793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1795
 | 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
228
 | 
                 $out .= "$zwla$patt";  | 
| 
1796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1799
 | 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
714
 | 
     return $out;  | 
| 
1800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _re_path_track {  | 
| 
1803
 | 
33
 | 
 
 | 
 
 | 
  
33
  
 | 
 
 | 
48
 | 
     my $self      = shift;  | 
| 
1804
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     my $in        = shift;  | 
| 
1805
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     my $normal    = shift;  | 
| 
1806
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
     my $augmented = shift;  | 
| 
1807
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
     my $o;  | 
| 
1808
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     my $simple  = '';  | 
| 
1809
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     my $augment = '';  | 
| 
1810
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
90
 | 
     for( my $n = 0; $n < @$in; ++$n ) {  | 
| 
1811
 | 
114
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
188
 | 
         if( ref($in->[$n]) eq '' ) {  | 
| 
1812
 | 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
118
 | 
             $o = $in->[$n];  | 
| 
1813
 | 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
99
 | 
             $simple  .= $o;  | 
| 
1814
 | 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
108
 | 
             $augment .= $o;  | 
| 
1815
 | 
104
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
896
 | 
             if( (  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $n < @$in - 1  | 
| 
1817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     and ref($in->[$n+1]) eq 'HASH' and exists $in->[$n+1]{''}  | 
| 
1818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
1819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 or $n == @$in - 1  | 
| 
1820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
1821
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
                 push @{$self->{mlist}}, $normal . $simple ;  | 
| 
 
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
97
 | 
    | 
| 
1822
 | 
24
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
93
 | 
                 $augment .= $] < 5.009005  | 
| 
1823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ? "(?{\$self->{m}=$self->{mcount}})"  | 
| 
1824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     : "(?{$self->{mcount}})"  | 
| 
1825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
1826
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
                 ++$self->{mcount};  | 
| 
1827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my $path = [  | 
| 
1831
 | 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
111
 | 
                 map { $self->_re_path_track( $in->[$n]{$_}, $normal.$simple , $augmented.$augment ) }  | 
| 
1832
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
                 grep { $_ ne '' }  | 
| 
1833
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                 keys %{$in->[$n]}  | 
| 
 
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
    | 
| 
1834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
1835
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
80
 | 
             $o = '(?:' . join( '|' => sort _re_sort @$path ) . ')';  | 
| 
1836
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
31
 | 
             $o .= '?' if exists $in->[$n]{''};  | 
| 
1837
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
             $simple  .= $o;  | 
| 
1838
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
             $augment .= $o;  | 
| 
1839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1841
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
116
 | 
     return $augment;  | 
| 
1842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _re_path_pretty {  | 
| 
1845
 | 
411
 | 
 
 | 
 
 | 
  
411
  
 | 
 
 | 
377
 | 
     my $self = shift;  | 
| 
1846
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
362
 | 
     my $in  = shift;  | 
| 
1847
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
331
 | 
     my $arg = shift;  | 
| 
1848
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
759
 | 
     my $pre    = ' ' x (($arg->{depth}+0) * $arg->{indent});  | 
| 
1849
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
536
 | 
     my $indent = ' ' x (($arg->{depth}+1) * $arg->{indent});  | 
| 
1850
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
389
 | 
     my $out = '';  | 
| 
1851
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
355
 | 
     $arg->{depth}++;  | 
| 
1852
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
342
 | 
     my $prev_was_paren = 0;  | 
| 
1853
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
774
 | 
     for( my $p = 0; $p < @$in; ++$p ) {  | 
| 
1854
 | 
1084
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1705
 | 
         if( ref($in->[$p]) eq '' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1855
 | 
910
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1303
 | 
             $out .= "\n$pre" if $prev_was_paren;  | 
| 
1856
 | 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
824
 | 
             $out .= $in->[$p];  | 
| 
1857
 | 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1554
 | 
             $prev_was_paren = 0;  | 
| 
1858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( ref($in->[$p]) eq 'ARRAY' ) {  | 
| 
1860
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
             $out .= _re_path($self, $in->[$p]);  | 
| 
1861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my $path = [  | 
| 
1864
 | 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
683
 | 
                 map { _re_path_pretty($self, $in->[$p]{$_}, $arg ) }  | 
| 
1865
 | 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
629
 | 
                 grep { $_ ne '' }  | 
| 
1866
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
145
 | 
                 keys %{$in->[$p]}  | 
| 
 
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
451
 | 
    | 
| 
1867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
1868
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
334
 | 
             my $nr = @$path;  | 
| 
1869
 | 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
             my( @short, @long );  | 
| 
1870
 | 
171
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
271
 | 
             push @{/^$Single_Char$/ ? \@short : \@long}, $_ for @$path;  | 
| 
 
 | 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2421
 | 
    | 
| 
1871
 | 
171
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
278
 | 
             if( @short == $nr ) {  | 
| 
1872
 | 
37
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
123
 | 
                 $out .=  $nr == 1 ? $path->[0] : _make_class($self, @short);  | 
| 
1873
 | 
37
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
205
 | 
                 $out .= '?' if exists $in->[$p]{''};  | 
| 
1874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1876
 | 
134
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
291
 | 
                 $out .= "\n" if length $out;  | 
| 
1877
 | 
134
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
221
 | 
                 $out .= $pre if $p;  | 
| 
1878
 | 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
206
 | 
                 $out .= "(?:\n$indent";  | 
| 
1879
 | 
134
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
209
 | 
                 if( @short < 2 ) {  | 
| 
1880
 | 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
138
 | 
                     my $r = 0;  | 
| 
1881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $out .= join( "\n$indent|" => map {  | 
| 
1882
 | 
133
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
546
 | 
                             $r++ and $_ =~ s/^\(\?:/\n$indent(?:/;  | 
| 
 
 | 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
547
 | 
    | 
| 
1883
 | 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
574
 | 
                             $_  | 
| 
1884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         sort _re_sort @$path  | 
| 
1886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
1887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
1889
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                     $out .= join( "\n$indent|" => ( (sort _re_sort @long), _make_class($self, @short) ));  | 
| 
1890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1891
 | 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
231
 | 
                 $out .= "\n$pre)";  | 
| 
1892
 | 
134
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
241
 | 
                 if( exists $in->[$p]{''} ) {  | 
| 
1893
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
                     $out .= "\n$pre?";  | 
| 
1894
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
133
 | 
                     $prev_was_paren = 0;  | 
| 
1895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
1897
 | 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
314
 | 
                     $prev_was_paren = 1;  | 
| 
1898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1902
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
371
 | 
     $arg->{depth}--;  | 
| 
1903
 | 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1009
 | 
     return $out;  | 
| 
1904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _node_eq {  | 
| 
1907
 | 
425
 | 
  
100
  
 | 
  
 66
  
 | 
  
425
  
 | 
 
 | 
1534
 | 
     return 0 if not defined $_[0] or not defined $_[1];  | 
| 
1908
 | 
422
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
974
 | 
     return 0 if ref $_[0] ne ref $_[1];  | 
| 
1909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Now that we have determined that the reference of each  | 
| 
1910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # argument are the same, we only have to test the first  | 
| 
1911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # one, which gives us a nice micro-optimisation.  | 
| 
1912
 | 
381
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
741
 | 
     if( ref($_[0]) eq 'HASH' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1913
 | 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
634
 | 
         keys %{$_[0]} == keys %{$_[1]}  | 
| 
 
 | 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
878
 | 
    | 
| 
1914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             and  | 
| 
1915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # does this short-circuit to avoid _re_path() cost more than it saves?  | 
| 
1916
 | 
305
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
271
 | 
         join( '|' => sort keys %{$_[0]}) eq join( '|' => sort keys %{$_[1]})  | 
| 
 
 | 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
957
 | 
    | 
| 
 
 | 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1669
 | 
    | 
| 
1917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             and  | 
| 
1918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         _re_path(undef, [$_[0]] ) eq _re_path(undef, [$_[1]] );  | 
| 
1919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif( ref($_[0]) eq 'ARRAY' ) {  | 
| 
1921
 | 
9
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
         scalar @{$_[0]} == scalar @{$_[1]}  | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
 
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
    | 
| 
1922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             and  | 
| 
1923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         _re_path(undef, $_[0]) eq _re_path(undef, $_[1]);  | 
| 
1924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1926
 | 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
232
 | 
         $_[0] eq $_[1];  | 
| 
1927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _pretty_dump {  | 
| 
1931
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
32
 | 
     return sprintf "\\x%02x", ord(shift);  | 
| 
1932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _dump {  | 
| 
1935
 | 
5579
 | 
 
 | 
 
 | 
  
5579
  
 | 
 
 | 
5210
 | 
     my $path = shift;  | 
| 
1936
 | 
5579
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9288
 | 
     return _dump_node($path) if ref($path) eq 'HASH';  | 
| 
1937
 | 
5057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3824
 | 
     my $dump = '[';  | 
| 
1938
 | 
5057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3311
 | 
     my $d;  | 
| 
1939
 | 
5057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3539
 | 
     my $nr = 0;  | 
| 
1940
 | 
5057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5040
 | 
     for $d( @$path ) {  | 
| 
1941
 | 
11036
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15461
 | 
         $dump .= ' ' if $nr++;  | 
| 
1942
 | 
11036
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18870
 | 
         if( ref($d) eq 'HASH' ) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1943
 | 
1340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1584
 | 
             $dump .= _dump_node($d);  | 
| 
1944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( ref($d) eq 'ARRAY' ) {  | 
| 
1946
 | 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
297
 | 
             $dump .= _dump($d);  | 
| 
1947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif( defined $d ) {  | 
| 
1949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # D::C indicates the second test is redundant  | 
| 
1950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $dump .= ( $d =~ /\s/ or not length $d )  | 
| 
1951
 | 
9453
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
22412
 | 
             $dump .= (  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $d =~ /\s/            ? qq{'$d'}         :  | 
| 
1953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $d =~ /^[\x00-\x1f]$/ ? _pretty_dump($d) :  | 
| 
1954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $d  | 
| 
1955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
1956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1958
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             $dump .= '*';  | 
| 
1959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1961
 | 
5057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55197
 | 
     return $dump . ']';  | 
| 
1962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _dump_node {  | 
| 
1965
 | 
1862
 | 
 
 | 
 
 | 
  
1862
  
 | 
 
 | 
1470
 | 
     my $node = shift;  | 
| 
1966
 | 
1862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1417
 | 
     my $dump = '{';  | 
| 
1967
 | 
1862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1316
 | 
     my $nr   = 0;  | 
| 
1968
 | 
1862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1349
 | 
     my $n;  | 
| 
1969
 | 
1862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4209
 | 
     for $n (sort keys %$node) {  | 
| 
1970
 | 
3899
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
5573
 | 
         $dump .= ' ' if $nr++;  | 
| 
1971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # Devel::Cover shows this to test to be redundant  | 
| 
1972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # $dump .= ( $n eq '' and not defined $node->{$n} )  | 
| 
1973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $dump .= $n eq ''  | 
| 
1974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ? '*'  | 
| 
1975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             : ($n =~ /^[\x00-\x1f]$/ ? _pretty_dump($n) : $n)  | 
| 
1976
 | 
3899
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9647
 | 
                 . "=>" . _dump($node->{$n})  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
1978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1979
 | 
1862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19697
 | 
     return $dump . '}';  | 
| 
1980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =pod  | 
| 
1983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
1985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Regexp::Assemble - Assemble multiple Regular Expressions into a single RE  | 
| 
1987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
1989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Regexp::Assemble;  | 
| 
1991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ra = Regexp::Assemble->new;  | 
| 
1993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ra->add( 'ab+c' );  | 
| 
1994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ra->add( 'ab+-' );  | 
| 
1995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ra->add( 'a\w\d+' );  | 
| 
1996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ra->add( 'a\d+' );  | 
| 
1997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print $ra->re; # prints a(?:\w?\d+|b+[-c])  | 
| 
1998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
2000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Regexp::Assemble takes an arbitrary number of regular expressions  | 
| 
2002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and assembles them into a single regular expression (or RE) that  | 
| 
2003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 matches all that the individual REs match.  | 
| 
2004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 As a result, instead of having a large list of expressions to loop  | 
| 
2006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 over, a target string only needs to be tested against one expression.  | 
| 
2007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is interesting when you have several thousand patterns to deal  | 
| 
2008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with. Serious effort is made to produce the smallest pattern possible.  | 
| 
2009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It is also possible to track the original patterns, so that you can  | 
| 
2011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 determine which, among the source patterns that form the assembled  | 
| 
2012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern, was the one that caused the match to occur.  | 
| 
2013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You should realise that large numbers of alternations are processed  | 
| 
2015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in perl's regular expression engine in O(n) time, not O(1). If you  | 
| 
2016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are still having performance problems, you should look at using a  | 
| 
2017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 trie. Note that Perl's own regular expression engine will implement  | 
| 
2018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 trie optimisations in perl 5.10 (they are already available in  | 
| 
2019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perl 5.9.3 if you want to try them out). C will  | 
| 
2020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 do the right thing when it knows it's running on a trie'd perl.  | 
| 
2021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (At least in some version after this one).  | 
| 
2022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Some more examples of usage appear in the accompanying README. If  | 
| 
2024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that file is not easy to access locally, you can find it on a web  | 
| 
2025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 repository such as  | 
| 
2026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L or  | 
| 
2027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.  | 
| 
2028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See also L.  | 
| 
2030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Methods  | 
| 
2032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 add(LIST)  | 
| 
2034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Takes a string, breaks it apart into a set of tokens (respecting  | 
| 
2036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 meta characters) and inserts the resulting list into the C  | 
| 
2037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object. It uses a naive regular expression to lex the string  | 
| 
2038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that may be fooled complex expressions (specifically, it will  | 
| 
2039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 fail to lex nested parenthetical expressions such as  | 
| 
2040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C correctly). If this is the case, the end of   | 
| 
2041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the string will not be tokenised correctly and returned as one  | 
| 
2042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 long string.  | 
| 
2043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 On the one hand, this may indicate that the patterns you are  | 
| 
2045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 trying to feed the C object are too complex. Simpler  | 
| 
2046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 patterns might allow the algorithm to work more effectively and  | 
| 
2047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perform more reductions in the resulting pattern.  | 
| 
2048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 On the other hand, you can supply your own pattern to perform the  | 
| 
2050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 lexing if you need. The test suite contains an example of a lexer  | 
| 
2051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern that will match one level of nested parentheses.  | 
| 
2052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that there is an internal optimisation that will bypass a  | 
| 
2054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 much of the lexing process. If a string contains no C<\>  | 
| 
2055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (backslash), C<[> (open square bracket), C<(> (open paren),  | 
| 
2056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C> (question mark), C<+> (plus), C<*> (star) or C<{> (open  | 
| 
2057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 curly), a character split will be performed directly.  | 
| 
2058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A list of strings may be supplied, thus you can pass it a file  | 
| 
2060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 handle of a file opened for reading:  | 
| 
2061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $re->add( '\d+-\d+-\d+-\d+\.example\.com' );  | 
| 
2063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $re->add(  );  | 
| 
2064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the file is very large, it may be more efficient to use a  | 
| 
2066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C loop, to read the file line-by-line:  | 
| 
2067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $re->add($_) while ;  | 
| 
2069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method will chomp the lines automatically. If you  | 
| 
2071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 do not want this to occur (you want to keep the record  | 
| 
2072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 separator), then disable Cing.  | 
| 
2073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $re->chomp(0);  | 
| 
2075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $re->add($_) while ;  | 
| 
2076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method is chainable.  | 
| 
2078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 add_file(FILENAME [...])  | 
| 
2080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Takes a list of file names. Each file is opened and read  | 
| 
2082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 line by line. Each line is added to the assembly.  | 
| 
2083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add_file( 'file.1', 'file.2' );  | 
| 
2085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If a file cannot be opened, the method will croak. If you cannot  | 
| 
2087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 afford to let this happen then you should wrap the call in a C  | 
| 
2088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 block.  | 
| 
2089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Chomping happens automatically unless you the C method  | 
| 
2091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to disable it. By default, input lines are read according to the  | 
| 
2092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 value of the C attribute (if defined), and  | 
| 
2093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will otherwise fall back to the current setting of the system C<$/>  | 
| 
2094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 variable. The record separator may also be specified on each  | 
| 
2095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 call to C. Internally, the routine Cises the  | 
| 
2096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 value of C<$/> to whatever is required, for the duration of the  | 
| 
2097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 call.  | 
| 
2098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 An alternate calling mechanism using a hash reference is  | 
| 
2100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 available.  The recognised keys are:  | 
| 
2101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
2103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item file  | 
| 
2105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Reference to a list of file names, or the name of a single  | 
| 
2107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 file.  | 
| 
2108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add_file({file => ['file.1', 'file.2', 'file.3']});  | 
| 
2110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add_file({file => 'file.n'});  | 
| 
2111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item input_record_separator  | 
| 
2113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If present, indicates what constitutes a line  | 
| 
2115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add_file({file => 'data.txt', input_record_separator => ':' });  | 
| 
2117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item rs  | 
| 
2119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 An alias for input_record_separator (mnemonic: same as the  | 
| 
2121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 English variable names).  | 
| 
2122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
2124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add_file( {  | 
| 
2126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     file => [ 'pattern.txt', 'more.txt' ],  | 
| 
2127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     input_record_separator  => "\r\n",  | 
| 
2128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   });  | 
| 
2129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 clone()  | 
| 
2131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Clones the contents of a Regexp::Assemble object and creates a new  | 
| 
2133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object (in other words it performs a deep copy).  | 
| 
2134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the Storable module is installed, its dclone method will be used,  | 
| 
2136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 otherwise the cloning will be performed using a pure perl approach.  | 
| 
2137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can use this method to take a snapshot of the patterns that have  | 
| 
2139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 been added so far to an object, and generate an assembly from the  | 
| 
2140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 clone. Additional patterns may to be added to the original object  | 
| 
2141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 afterwards.  | 
| 
2142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $re = $main->clone->re();  | 
| 
2144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $main->add( 'another-pattern-\\d+' );  | 
| 
2145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 insert(LIST)  | 
| 
2147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Takes a list of tokens representing a regular expression and  | 
| 
2149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 stores them in the object. Note: you should not pass it a bare  | 
| 
2150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 regular expression, such as C. You must pass it as   | 
| 
2151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a list of tokens, I C<('a', 'b+', 'c?', 'd*', 'e')>.  | 
| 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method is chainable, I:  | 
| 
2154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ra = Regexp::Assemble->new  | 
| 
2156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->insert( qw[ a b+ c? d* e ] )  | 
| 
2157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->insert( qw[ a c+ d+ e* f ] );  | 
| 
2158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Lexing complex patterns with metacharacters and so on can consume  | 
| 
2160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a significant proportion of the overall time to build an assembly.  | 
| 
2161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you have the information available in a tokenised form, calling  | 
| 
2162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C directly can be a big win.  | 
| 
2163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 lexstr  | 
| 
2165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Use the C method if you are curious to see how a pattern  | 
| 
2167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 gets tokenised. It takes a scalar on input, representing a pattern,  | 
| 
2168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and returns a reference to an array, containing the tokenised  | 
| 
2169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern. You can recover the original pattern by performing a  | 
| 
2170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C:  | 
| 
2171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my @token = $re->lexstr($pattern);  | 
| 
2173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $new_pattern = join( '', @token );  | 
| 
2174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the original pattern contains unnecessary backslashes, or C<\x4b>  | 
| 
2176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 escapes, or quotemeta escapes (C<\Q>...C<\E>) the resulting pattern  | 
| 
2177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 may not be identical.  | 
| 
2178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Call C does not add the pattern to the object, it is merely  | 
| 
2180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for exploratory purposes. It will, however, update various statistical  | 
| 
2181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 counters.  | 
| 
2182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 pre_filter(CODE)  | 
| 
2184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Allows you to install a callback to check that the pattern being  | 
| 
2186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 loaded contains valid input. It receives the pattern as a whole to  | 
| 
2187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be added, before it been tokenised by the lexer. It may to return  | 
| 
2188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 0 or C to indicate that the pattern should not be added, any  | 
| 
2189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 true value indicates that the contents are fine.  | 
| 
2190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A filter to strip out trailing comments (marked by #):  | 
| 
2192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $re->pre_filter( sub { $_[0] =~ s/\s*#.*$//; 1 } );  | 
| 
2194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A filter to ignore blank lines:  | 
| 
2196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $re->pre_filter( sub { length(shift) } );  | 
| 
2198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you want to remove the filter, pass C as a parameter.  | 
| 
2200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ra->pre_filter(undef);  | 
| 
2202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method is chainable.  | 
| 
2204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 filter(CODE)  | 
| 
2206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Allows you to install a callback to check that the pattern being  | 
| 
2208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 loaded contains valid input. It receives a list on input, after it  | 
| 
2209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has been tokenised by the lexer. It may to return 0 or undef to  | 
| 
2210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 indicate that the pattern should not be added, any true value  | 
| 
2211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 indicates that the contents are fine.  | 
| 
2212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you know that all patterns you expect to assemble contain  | 
| 
2214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a restricted set of of tokens (e.g. no spaces), you could do  | 
| 
2215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the following:  | 
| 
2216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ra->filter(sub { not grep { / / } @_ });  | 
| 
2218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or  | 
| 
2220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   sub only_spaces_and_digits {  | 
| 
2222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     not grep { ![\d ] } @_  | 
| 
2223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
2224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ra->filter( \&only_spaces_and_digits );  | 
| 
2225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These two examples will silently ignore faulty patterns, If you  | 
| 
2227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 want the user to be made aware of the problem you should raise an  | 
| 
2228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 error (via C or C), log an error message, whatever is  | 
| 
2229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 best. If you want to remove a filter, pass C as a parameter.  | 
| 
2230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ra->filter(undef);  | 
| 
2232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method is chainable.  | 
| 
2234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 as_string  | 
| 
2236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Assemble the expression and return it as a string. You may want to do  | 
| 
2238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 this if you are writing the pattern to a file. The following arguments  | 
| 
2239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 can be passed to control the aspect of the resulting pattern:  | 
| 
2240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, the number of spaces used to indent nested grouping of  | 
| 
2242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a pattern. Use this to produce a pretty-printed pattern (for some  | 
| 
2243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 definition of "pretty"). The resulting output is rather verbose. The  | 
| 
2244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reason is to ensure that the metacharacters C<(?:> and C<)> always  | 
| 
2245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 occur on otherwise empty lines. This allows you grep the result for an  | 
| 
2246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 even more synthetic view of the pattern:  | 
| 
2247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   egrep -v '^ *[()]'   | 
| 
2249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The result of the above is quite readable. Remember to backslash the  | 
| 
2251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 spaces appearing in your own patterns if you wish to use an indented  | 
| 
2252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern in an C construct. Indenting is ignored if tracking  | 
| 
2253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is enabled.  | 
| 
2254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The B argument takes precedence over the C  | 
| 
2256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method/attribute of the object.  | 
| 
2257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Calling this  | 
| 
2259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method will drain the internal data structure. Large numbers of patterns  | 
| 
2260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 can eat a significant amount of memory, and this lets perl recover the  | 
| 
2261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 memory used for other purposes.  | 
| 
2262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you want to reduce the pattern I continue to add new patterns,  | 
| 
2264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 clone the object and reduce the clone, leaving the original object intact.  | 
| 
2265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 re  | 
| 
2267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Assembles the pattern and return it as a compiled RE, using the  | 
| 
2269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C operator.  | 
| 
2270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 As with C, calling this method will reset the internal data  | 
| 
2272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 structures to free the memory used in assembling the RE.  | 
| 
2273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The B attribute, documented in the C method, can be  | 
| 
2275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 used here (it will be ignored if tracking is enabled).  | 
| 
2276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 With method chaining, it is possible to produce a RE without having  | 
| 
2278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a temporary C object lying around, I:  | 
| 
2279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $re = Regexp::Assemble->new  | 
| 
2281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->add( q[ab+cd+e] )  | 
| 
2282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->add( q[ac\\d+e] )  | 
| 
2283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->add( q[c\\d+e] )  | 
| 
2284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->re;  | 
| 
2285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C<$re> variable now contains a Regexp object that can be used  | 
| 
2287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 directly:  | 
| 
2288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while( <> ) {  | 
| 
2290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /$re/ and print "Something in [$_] matched\n";  | 
| 
2291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   )  | 
| 
2292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method is called when the object is used in string context  | 
| 
2294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (hence, within an C operator), so by and large you do not even  | 
| 
2295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 need to save the RE in a separate variable. The following will work  | 
| 
2296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 as expected:  | 
| 
2297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $re = Regexp::Assemble->new->add( qw[ fee fie foe fum ] );  | 
| 
2299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while(  ) {  | 
| 
2300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if( /($re)/ ) {  | 
| 
2301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       print "Here be giants: $1\n";  | 
| 
2302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
2304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This approach does not work with tracked patterns. The  | 
| 
2306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C and C methods must be used instead, see below.  | 
| 
2307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 match(SCALAR)  | 
| 
2309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The following information applies to Perl 5.8 and below. See  | 
| 
2311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the section that follows for information on Perl 5.10.  | 
| 
2312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If pattern tracking is in use, you must C | 
| 
2314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to make things work correctly. At a minimum, this will make your  | 
| 
2315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 code look like this:  | 
| 
2316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $did_match = do { use re 'eval'; $target =~ /$ra/ }  | 
| 
2318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if( $did_match ) {  | 
| 
2319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print "matched ", $ra->matched, "\n";  | 
| 
2320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (The main reason is that the C<$^R> variable is currently broken  | 
| 
2323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and an ugly workaround that runs some Perl code during the match  | 
| 
2324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is required, in order to simulate what C<$^R> should be doing. See  | 
| 
2325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Perl bug #32840 for more information if you are curious. The README  | 
| 
2326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 also contains more information). This bug has been fixed in 5.10.  | 
| 
2327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The important thing to note is that with C | 
| 
2329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ARE SECURITY IMPLICATIONS WHICH YOU IGNORE AT YOUR PERIL. The problem  | 
| 
2330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is this: if you do not have strict control over the patterns being  | 
| 
2331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 fed to C when tracking is enabled, and someone  | 
| 
2332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 slips you a pattern such as C^(?{system 'rm -rf /'})/> and you  | 
| 
2333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 attempt to match a string against the resulting pattern, you will  | 
| 
2334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 know Fear and Loathing.  | 
| 
2335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 What is more, the C<$^R> workaround means that that tracking does  | 
| 
2337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 not work if you perform a bare C$re/> pattern match as shown  | 
| 
2338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 above. You have to instead call the C method, in order to  | 
| 
2339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 supply the necessary context to take care of the tracking housekeeping  | 
| 
2340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 details.  | 
| 
2341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    if( defined( my $match = $ra->match($_)) ) {  | 
| 
2343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        print "  $_ matched by $match\n";  | 
| 
2344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    }  | 
| 
2345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In the case of a successful match, the original matched pattern  | 
| 
2347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is returned directly. The matched pattern will also be available  | 
| 
2348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 through the C method.  | 
| 
2349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (Except that the above is not true for 5.6.0: the C method  | 
| 
2351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 returns true or undef, and the C method always returns  | 
| 
2352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 undef).  | 
| 
2353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you are capturing parts of the pattern I C  | 
| 
2355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 you will want to get at the captures. See the C, C,  | 
| 
2356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C and C methods. If you are not using captures  | 
| 
2357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 then you may safely ignore this section.  | 
| 
2358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In 5.10, since the bug concerning C<$^R> has been resolved, there  | 
| 
2360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is no need to use C and the assembled pattern does  | 
| 
2361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 not require any Perl code to be executed during the match.  | 
| 
2362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 new()  | 
| 
2364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Creates a new C object. The following optional  | 
| 
2366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 key/value parameters may be employed. All keys have a corresponding  | 
| 
2367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method that can be used to change the behaviour later on. As a  | 
| 
2368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 general rule, especially if you're just starting out, you don't  | 
| 
2369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 have to bother with any of these.  | 
| 
2370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, a family of optional attributes that allow anchors  | 
| 
2372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (C<^>, C<\b>, C<\Z>...) to be added to the resulting pattern.  | 
| 
2373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, sets the C flags to add to the assembled regular  | 
| 
2375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 expression.  Warning: no error checking is done, you should ensure  | 
| 
2376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that the flags you pass are understood by the version of Perl you  | 
| 
2377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are using. B exists as an alias, for users familiar  | 
| 
2378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with L.  | 
| 
2379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, controls whether the pattern should be chomped before being  | 
| 
2381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 lexed. Handy if you are reading patterns from a file. By default,  | 
| 
2382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Cing is performed (this behaviour changed as of version 0.24,  | 
| 
2383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 prior versions did not chomp automatically).  | 
| 
2384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See also the C attribute and the C method.  | 
| 
2385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, slurp the contents of the specified file and add them  | 
| 
2387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to the assembly. Multiple files may be processed by using a list.  | 
| 
2388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $r = Regexp::Assemble->new(file => 're.list');  | 
| 
2390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $r = Regexp::Assemble->new(file => ['re.1', 're.2']);  | 
| 
2392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you really don't want chomping to occur, you will have to set  | 
| 
2394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the C attribute to 0 (zero). You may also want to look at  | 
| 
2395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the C attribute, as well.  | 
| 
2396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, controls what constitutes a record  | 
| 
2398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 separator when using the C attribute or the C  | 
| 
2399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method. May be abbreviated to B. See the C<$/> variable in  | 
| 
2400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.  | 
| 
2401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, controls whether the pattern should contain zero-width  | 
| 
2403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 lookahead assertions (For instance: (?=[abc])(?:bob|alice|charles).  | 
| 
2404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is not activated by default, because in many circumstances the  | 
| 
2405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 cost of processing the assertion itself outweighs the benefit of  | 
| 
2406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 its faculty for short-circuiting a match that will fail. This is  | 
| 
2407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sensitive to the probability of a match succeeding, so if you're  | 
| 
2408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 worried about performance you'll have to benchmark a sample population  | 
| 
2409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of targets to see which way the benefits lie.  | 
| 
2410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B | 
| 
2412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 patterns was the one that matched. See the C method for  | 
| 
2413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 more details. Note for version 5.8 of Perl and below, in this mode  | 
| 
2414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of operation YOU SHOULD BE AWARE OF THE SECURITY IMPLICATIONS that  | 
| 
2415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 this entails. Perl 5.10 does not suffer from any such restriction.  | 
| 
2416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, the number of spaces used to indent nested grouping of  | 
| 
2418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a pattern. Use this to produce a pretty-printed pattern. See the  | 
| 
2419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C method for a more detailed explanation.  | 
| 
2420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, allows you to add a callback to enable sanity checks  | 
| 
2422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 on the pattern being loaded. This callback is triggered before the  | 
| 
2423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern is split apart by the lexer. In other words, it operates  | 
| 
2424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 on the entire pattern. If you are loading patterns from a file,  | 
| 
2425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 this would be an appropriate place to remove comments.  | 
| 
2426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, allows you to add a callback to enable sanity checks on  | 
| 
2428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the pattern being loaded. This callback is triggered after the  | 
| 
2429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern has been split apart by the lexer.  | 
| 
2430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, controls whether to unroll, for example, C into  | 
| 
2432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C, C, which may allow additional reductions in the  | 
| 
2433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 resulting assembled pattern.  | 
| 
2434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, controls whether tail reduction occurs or not. If set,  | 
| 
2436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 patterns like C will be reduced to C.  | 
| 
2437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 That is, the end of the pattern in each part of the b... and d...  | 
| 
2438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 alternations is identical, and hence is hoisted out of the alternation  | 
| 
2439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and placed after it. On by default. Turn it off if you're really  | 
| 
2440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pressed for short assembly times.  | 
| 
2441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, specifies the pattern used to lex the input lines into  | 
| 
2443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 tokens. You could replace the default pattern by a more sophisticated  | 
| 
2444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 version that matches arbitrarily nested parentheses, for example.  | 
| 
2445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, controls whether copious amounts of output is produced  | 
| 
2447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 during the loading stage or the reducing stage of assembly.  | 
| 
2448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ra = Regexp::Assemble->new;  | 
| 
2450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $rb = Regexp::Assemble->new( chomp => 1, debug => 3 );  | 
| 
2451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B, controls whether new patterns can be added to the object  | 
| 
2453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 after the assembled pattern is generated. DEPRECATED.  | 
| 
2454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method/attribute will be removed in a future release. It doesn't  | 
| 
2456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 really serve any purpose, and may be more effectively replaced by  | 
| 
2457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 cloning an existing C object and spinning out a  | 
| 
2458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern from that instead.  | 
| 
2459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 source()  | 
| 
2461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When using tracked mode, after a successful match is made, returns  | 
| 
2463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the original source pattern that caused the match. In Perl 5.10,  | 
| 
2464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the C<$^R> variable can be used to as an index to fetch the correct  | 
| 
2465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern from the object.  | 
| 
2466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If no successful match has been performed, or the object is not in  | 
| 
2468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 tracked mode, this method returns C.  | 
| 
2469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $r = Regexp::Assemble->new->track(1)->add(qw(foo? bar{2} [Rr]at));  | 
| 
2471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for my $w (qw(this food is rather barren)) {  | 
| 
2473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if ($w =~ /$r/) {  | 
| 
2474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       print "$w matched by ", $r->source($^R), $/;  | 
| 
2475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
2477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       print "$w no match\n";  | 
| 
2478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
2480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 mbegin()  | 
| 
2482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method returns a copy of C<@-> at the moment of the  | 
| 
2484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 last match. You should ordinarily not need to bother with  | 
| 
2485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 this, C should be able to supply all your needs.  | 
| 
2486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 mend()  | 
| 
2488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method returns a copy of C<@+> at the moment of the  | 
| 
2490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 last match.  | 
| 
2491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 mvar(NUMBER)  | 
| 
2493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method returns the captures of the last match.  | 
| 
2495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C corresponds to $1, C to $2, and so on.  | 
| 
2496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C happens to return the target string matched,  | 
| 
2497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 as a byproduct of walking down the C<@-> and C<@+> arrays  | 
| 
2498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 after the match.  | 
| 
2499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If called without a parameter, C will return a  | 
| 
2501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reference to an array containing all captures.  | 
| 
2502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 capture  | 
| 
2504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method returns the the captures of the last  | 
| 
2506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 match as an array. Unlink C, this method does not  | 
| 
2507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 include the matched string. It is equivalent to getting an  | 
| 
2508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 array back that contains C<$1, $2, $3, ...>.  | 
| 
2509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If no captures were found in the match, an empty array is  | 
| 
2511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 returned, rather than C. You are therefore guaranteed  | 
| 
2512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to be able to use C<< for my $c ($re->capture) { ... >>  | 
| 
2513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 without have to check whether anything was captured.  | 
| 
2514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 matched()  | 
| 
2516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If pattern tracking has been set, via the C | 
| 
2518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or through the C | 
| 
2519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 original pattern of the last successful match. Returns undef  | 
| 
2520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 match has yet been performed, or tracking has not been enabled.  | 
| 
2521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See below in the NOTES section for additional subtleties of  | 
| 
2523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which you should be aware of when tracking patterns.  | 
| 
2524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that this method is not available in 5.6.0, due to  | 
| 
2526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 limitations in the implementation of C<(?{...})> at the time.  | 
| 
2527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Statistics/Reporting routines  | 
| 
2529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 stats_add  | 
| 
2531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the number of patterns added to the assembly (whether  | 
| 
2533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by C or C). Duplicate patterns are not included  | 
| 
2534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in this total.  | 
| 
2535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 stats_dup  | 
| 
2537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the number of duplicate patterns added to the assembly.  | 
| 
2539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If non-zero, this may be a sign that something is wrong with  | 
| 
2540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 your data (or at the least, some needless redundancy). This may  | 
| 
2541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 occur when you have two patterns (for instance, C and  | 
| 
2542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C) which map to the same result.  | 
| 
2543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 stats_raw()  | 
| 
2545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the raw number of bytes in the patterns added to the  | 
| 
2547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 assembly. This includes both original and duplicate patterns.  | 
| 
2548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For instance, adding the two patterns C and C will    | 
| 
2549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 count as 4 bytes.  | 
| 
2550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 stats_cooked()  | 
| 
2552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Return the true number of bytes added to the assembly. This  | 
| 
2554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will not include duplicate patterns. Furthermore, it may differ  | 
| 
2555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 from the raw bytes due to quotemeta treatment. For instance,  | 
| 
2556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C will count as 7 (not 8) bytes, because C<\,> will  | 
| 
2557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be stored as C<,>. Also, C<\Qa.b\E> is 7 bytes long, however,  | 
| 
2558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 after the quotemeta directives are processed, C will be  | 
| 
2559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 stored, for a total of 4 bytes.  | 
| 
2560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 stats_length()  | 
| 
2562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the length of the resulting assembled expression.  | 
| 
2564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Until C or C have been called, the length  | 
| 
2565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will be 0 (since the assembly will have not yet been  | 
| 
2566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 performed). The length includes only the pattern, not the  | 
| 
2567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 additional (C<(?-xism...>) fluff added by the compilation.  | 
| 
2568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 dup_warn(NUMBER|CODEREF)  | 
| 
2570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Turns warnings about duplicate patterns on or off. By  | 
| 
2572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 default, no warnings are emitted. If the method is  | 
| 
2573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 called with no parameters, or a true parameter,  | 
| 
2574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the object will carp about patterns it has  | 
| 
2575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 already seen. To turn off the warnings, use 0 as a  | 
| 
2576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parameter.  | 
| 
2577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->dup_warn();  | 
| 
2579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The method may also be passed a code block. In this case  | 
| 
2581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the code will be executed and it will receive a reference  | 
| 
2582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to the object in question, and the lexed pattern.  | 
| 
2583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->dup_warn(  | 
| 
2585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub {  | 
| 
2586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       my $self = shift;  | 
| 
2587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       print $self->stats_add, " patterns added at line $.\n",  | 
| 
2588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           join( '', @_ ), " added previously\n";  | 
| 
2589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   )  | 
| 
2591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Anchor routines  | 
| 
2593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Suppose you wish to assemble a series of patterns that all begin  | 
| 
2595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with C<^>  and end with C<$> (anchor pattern to the beginning and  | 
| 
2596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 end of line). Rather than add the anchors to each and every pattern  | 
| 
2597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (and possibly forget to do so when a new entry is added), you may  | 
| 
2598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 specify the anchors in the object, and they will appear in the  | 
| 
2599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 resulting pattern, and you no longer need to (or should) put them  | 
| 
2600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in your source patterns. For example, the two following snippets  | 
| 
2601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will produce identical patterns:  | 
| 
2602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add(qw(^this ^that ^them))->as_string;  | 
| 
2604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add(qw(this that them))->anchor_line_begin->as_string;  | 
| 
2606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # both techniques will produce ^th(?:at|em|is)  | 
| 
2608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All anchors are possible word (C<\b>) boundaries, line  | 
| 
2610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 boundaries (C<^> and C<$>) and string boundaries (C<\A>  | 
| 
2611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and C<\Z> (or C<\z> if you absolutely need it)).  | 
| 
2612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The shortcut C> implies both  | 
| 
2614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C_begin> C_end>  | 
| 
2615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is also available. If different anchors are specified  | 
| 
2616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the most specific anchor wins. For instance, if both  | 
| 
2617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C and C are  | 
| 
2618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 specified, C takes precedence.  | 
| 
2619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All the anchor methods are chainable.  | 
| 
2621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_word_begin  | 
| 
2623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be prefixed with a C<\b>  | 
| 
2625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 word boundary assertion when the value is true. Set  | 
| 
2626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add('pre')->anchor_word_begin->as_string;  | 
| 
2629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # produces '\bpre'  | 
| 
2630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_word_end  | 
| 
2632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be suffixed with a C<\b>  | 
| 
2634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 word boundary assertion when the value is true. Set  | 
| 
2635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add(qw(ing tion))  | 
| 
2638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->anchor_word_end  | 
| 
2639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->as_string; # produces '(?:tion|ing)\b'  | 
| 
2640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_word  | 
| 
2642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be have C<\b>  | 
| 
2644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 word boundary assertions at the beginning and end  | 
| 
2645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the pattern when the value is true. Set  | 
| 
2646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add(qw(cat carrot)  | 
| 
2649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->anchor_word(1)  | 
| 
2650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->as_string; # produces '\bca(?:rro)t\b'  | 
| 
2651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_line_begin  | 
| 
2653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be prefixed with a C<^>  | 
| 
2655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 line boundary assertion when the value is true. Set  | 
| 
2656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->anchor_line_begin;  | 
| 
2659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # or  | 
| 
2660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->anchor_line_begin(1);  | 
| 
2661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_line_end  | 
| 
2663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be suffixed with a C<$>  | 
| 
2665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 line boundary assertion when the value is true. Set  | 
| 
2666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # turn it off  | 
| 
2669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->anchor_line_end(0);  | 
| 
2670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_line  | 
| 
2672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be have the C<^> and C<$>  | 
| 
2674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 line boundary assertions at the beginning and end  | 
| 
2675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the pattern, respectively, when the value is true. Set  | 
| 
2676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add(qw(cat carrot)  | 
| 
2679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->anchor_line  | 
| 
2680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->as_string; # produces '^ca(?:rro)t$'  | 
| 
2681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_string_begin  | 
| 
2683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be prefixed with a C<\A>  | 
| 
2685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 string boundary assertion when the value is true. Set  | 
| 
2686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->anchor_string_begin(1);  | 
| 
2689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_string_end  | 
| 
2691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be suffixed with a C<\Z>  | 
| 
2693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 string boundary assertion when the value is true. Set  | 
| 
2694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # disable the string boundary end anchor  | 
| 
2697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->anchor_string_end(0);  | 
| 
2698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_string_end_absolute  | 
| 
2700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be suffixed with a C<\z>  | 
| 
2702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 string boundary assertion when the value is true. Set  | 
| 
2703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # disable the string boundary absolute end anchor  | 
| 
2706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->anchor_string_end_absolute(0);  | 
| 
2707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you don't understand the difference between  | 
| 
2709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<\Z> and C<\z>, the former will probably do what  | 
| 
2710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 you want.  | 
| 
2711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_string  | 
| 
2713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be have the C<\A> and C<\Z>  | 
| 
2715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 string boundary assertions at the beginning and end  | 
| 
2716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the pattern, respectively, when the value is true. Set  | 
| 
2717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add(qw(cat carrot)  | 
| 
2720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->anchor_string  | 
| 
2721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->as_string; # produces '\Aca(?:rro)t\Z'  | 
| 
2722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 anchor_string_absolute  | 
| 
2724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The resulting pattern will be have the C<\A> and C<\z>  | 
| 
2726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 string boundary assertions at the beginning and end  | 
| 
2727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the pattern, respectively, when the value is true. Set  | 
| 
2728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to 0 to disable.  | 
| 
2729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->add(qw(cat carrot)  | 
| 
2731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->anchor_string_absolute  | 
| 
2732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ->as_string; # produces '\Aca(?:rro)t\z'  | 
| 
2733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 debug(NUMBER)  | 
| 
2735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Turns debugging on or off. Statements are printed  | 
| 
2737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to the currently selected file handle (STDOUT by default).  | 
| 
2738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you are already using this handle, you will have to  | 
| 
2739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 arrange to select an output handle to a file of your own  | 
| 
2740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 choosing, before call the C, C or C)  | 
| 
2741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 functions, otherwise it will scribble all over your  | 
| 
2742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 carefully formatted output.  | 
| 
2743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
2745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * 0  | 
| 
2747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Off. Turns off all debugging output.  | 
| 
2749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * 1  | 
| 
2751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Add. Trace the addition of patterns.  | 
| 
2753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * 2  | 
| 
2755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Reduce. Trace the process of reduction and assembly.  | 
| 
2757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * 4  | 
| 
2759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Lex. Trace the lexing of the input patterns into its constituent  | 
| 
2761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 tokens.  | 
| 
2762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * 8  | 
| 
2764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Time. Print to STDOUT the time taken to load all the patterns. This is  | 
| 
2766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 nothing more than the difference between the time the object was  | 
| 
2767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 instantiated and the time reduction was initiated.  | 
| 
2768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # load=  | 
| 
2770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Any lengthy computation performed in the client code will be reflected  | 
| 
2772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in this value. Another line will be printed after reduction is  | 
| 
2773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 complete.  | 
| 
2774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # reduce=  | 
| 
2776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The above output lines will be changed to C and  | 
| 
2778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C if the internal state of the object is corrupted  | 
| 
2779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and the initial timestamp is lost.  | 
| 
2780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The code attempts to load L in order to report fractional  | 
| 
2782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 seconds. If this is not successful, the elapsed time is displayed  | 
| 
2783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in whole seconds.  | 
| 
2784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
2786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Values can be added (or or'ed together) to trace everything  | 
| 
2788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $r->debug(7)->add( '\\d+abc' );  | 
| 
2790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Calling C with no arguments turns debugging off.  | 
| 
2792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 dump()  | 
| 
2794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Produces a synthetic view of the internal data structure. How  | 
| 
2796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to interpret the results is left as an exercise to the reader.  | 
| 
2797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print $r->dump;  | 
| 
2799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 chomp(0|1)  | 
| 
2801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Turns chomping on or off.  | 
| 
2803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 IMPORTANT: As of version 0.24, chomping is now on by default as it  | 
| 
2805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 makes C Just Work. The only time you may run into trouble  | 
| 
2806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is with C. So don't do that, or else explicitly turn  | 
| 
2807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 off chomping.  | 
| 
2808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 To avoid incorporating (spurious)  | 
| 
2810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 record separators (such as "\n" on Unix) when reading from a file,  | 
| 
2811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C Cs its input. If you don't want this to happen,  | 
| 
2812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 call C with a false value.  | 
| 
2813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $re->chomp(0); # really want the record separators  | 
| 
2815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $re->add();  | 
| 
2816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 fold_meta_pairs(NUMBER)  | 
| 
2818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Determines whether C<\s>, C<\S> and C<\w>, C<\W> and C<\d>, C<\D>  | 
| 
2820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are folded into a C<.> (dot). Folding happens by default (for  | 
| 
2821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reasons of backwards compatibility, even though it is wrong when  | 
| 
2822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the C expression modifier is active).  | 
| 
2823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Call this method with a false value to prevent this behaviour (which  | 
| 
2825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is only a problem when dealing with C<\n> if the C expression  | 
| 
2826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 modifier is also set).  | 
| 
2827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $re->add( '\\w', '\\W' );  | 
| 
2829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $clone = $re->clone;  | 
| 
2830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $clone->fold_meta_pairs(0);  | 
| 
2832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print $clone->as_string; # prints '.'  | 
| 
2833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print $re->as_string;    # print '[\W\w]'  | 
| 
2834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 indent(NUMBER)  | 
| 
2836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Sets the level of indent for pretty-printing nested groups  | 
| 
2838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 within a pattern. See the C method for more details.  | 
| 
2839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When called without a parameter, no indenting is performed.  | 
| 
2840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $re->indent( 4 );  | 
| 
2842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print $re->as_string;  | 
| 
2843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 lookahead(0|1)  | 
| 
2845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Turns on zero-width lookahead assertions. This is usually  | 
| 
2847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 beneficial when you expect that the pattern will usually fail.  | 
| 
2848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you expect that the pattern will usually match you will  | 
| 
2849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 probably be worse off.  | 
| 
2850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 flags(STRING)  | 
| 
2852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Sets the flags that govern how the pattern behaves (for  | 
| 
2854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 versions of Perl up to 5.9 or so, these are C). By  | 
| 
2855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 default no flags are enabled.  | 
| 
2856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 modifiers(STRING)  | 
| 
2858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 An alias of the C method, for users familiar with  | 
| 
2860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C.  | 
| 
2861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 track(0|1)  | 
| 
2863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Turns tracking on or off. When this attribute is enabled,  | 
| 
2865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 additional housekeeping information is inserted into the  | 
| 
2866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 assembled expression using C<({...}> embedded code  | 
| 
2867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 constructs. This provides the necessary information to  | 
| 
2868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 determine which, of the original patterns added, was the  | 
| 
2869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 one that caused the match.  | 
| 
2870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $re->track( 1 );  | 
| 
2872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if( $target =~ /$re/ ) {  | 
| 
2873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print "$target matched by ", $re->matched, "\n";  | 
| 
2874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
2875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that when this functionality is enabled, no  | 
| 
2877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reduction is performed and no character classes are  | 
| 
2878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 generated. In other words, C is not  | 
| 
2879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reduced down to C<(?:br|t)ag> and C is not  | 
| 
2880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reduced to C.  | 
| 
2881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 unroll_plus(0|1)  | 
| 
2883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Turns the unrolling of plus metacharacters on or off. When  | 
| 
2885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a pattern is broken up, C becomes C, C (and  | 
| 
2886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C becomes C, C. This may allow the freed C  | 
| 
2887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to assemble with other patterns. Not enabled by default.  | 
| 
2888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 lex(SCALAR)  | 
| 
2890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Change the pattern used to break a string apart into tokens.  | 
| 
2892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You can examine the C script as a starting point.  | 
| 
2893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 reduce(0|1)  | 
| 
2895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Turns pattern reduction on or off. A reduced pattern may  | 
| 
2897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be considerably shorter than an unreduced pattern. Consider  | 
| 
2898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C I C. An unreduced  | 
| 
2899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern will be very similar to those produced by  | 
| 
2900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C. Reduction is on by default. Turning  | 
| 
2901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it off speeds assembly (but assembly is pretty fast -- it's  | 
| 
2902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the breaking up of the initial patterns in the lexing stage  | 
| 
2903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that can consume a non-negligible amount of time).  | 
| 
2904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 mutable(0|1)  | 
| 
2906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method has been marked as DEPRECATED. It will be removed  | 
| 
2908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in a future release. See the C method for a technique  | 
| 
2909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to replace its functionality.  | 
| 
2910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 reset()  | 
| 
2912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Empties out the patterns that have been Ced or C-ed  | 
| 
2914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 into the object. Does not modify the state of controller attributes  | 
| 
2915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 such as C, C, C and the like.  | 
| 
2916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Default_Lexer  | 
| 
2918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 B the C function is a class method, not  | 
| 
2920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 an object method. It is a fatal error to call it as an object  | 
| 
2921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method.  | 
| 
2922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method lets you replace the default pattern  | 
| 
2924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 used for all subsequently created C objects. It  | 
| 
2925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will not have any effect on existing objects. (It is also possible  | 
| 
2926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to override the lexer pattern used on a per-object basis).  | 
| 
2927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The parameter should be an ordinary scalar, not a compiled  | 
| 
2929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern. If the pattern fails to match all parts of the string,  | 
| 
2930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the missing parts will be returned as single chunks. Therefore  | 
| 
2931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the following pattern is legal (albeit rather cork-brained):  | 
| 
2932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Regexp::Assemble::Default_Lexer( '\\d' );  | 
| 
2934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The above pattern will split up input strings digit by digit, and  | 
| 
2936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 all non-digit characters as single chunks.  | 
| 
2937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DIAGNOSTICS  | 
| 
2939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   "Cannot pass a C to Default_Lexer"  | 
| 
2941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You tried to replace the default lexer pattern with an object  | 
| 
2943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 instead of a scalar. Solution: You probably tried to call  | 
| 
2944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<< $obj->Default_Lexer >>. Call the qualified class method instead  | 
| 
2945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C.  | 
| 
2946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   "filter method not passed a coderef"  | 
| 
2948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   "pre_filter method not passed a coderef"  | 
| 
2950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A reference to a subroutine (anonymous or otherwise) was expected.  | 
| 
2952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Solution: read the documentation for the C method.  | 
| 
2953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   "duplicate pattern added: /.../"  | 
| 
2955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C attribute is active, and a duplicate pattern was  | 
| 
2957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 added (well duh!). Solution: clean your data.  | 
| 
2958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   "cannot open [file] for input: [reason]"  | 
| 
2960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C method was unable to open the specified file for  | 
| 
2962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 whatever reason. Solution: make sure the file exists and the script  | 
| 
2963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has the required privileges to read it.  | 
| 
2964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NOTES  | 
| 
2966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module has been tested successfully with a range of versions  | 
| 
2968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of perl, from 5.005_03 to 5.9.3. Use of 5.6.0 is not recommended.  | 
| 
2969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The expressions produced by this module can be used with the PCRE  | 
| 
2971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 library.  | 
| 
2972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Remember to "double up" your backslashes if the patterns are  | 
| 
2974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 hard-coded as constants in your program. That is, you should  | 
| 
2975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 literally C rather than C. It  | 
| 
2976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 usually will work either way, but it's good practice to do so.  | 
| 
2977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Where possible, supply the simplest tokens possible. Don't add  | 
| 
2979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C when C will do. The reason is that  | 
| 
2980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 if you also add C the resulting assembly changes  | 
| 
2981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 dramatically: C I  | 
| 
2982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C. Since R::A doesn't perform enough analysis,  | 
| 
2983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it won't "unroll" the C<{2}> quantifier, and will fail to notice  | 
| 
2984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the divergence after the first C<-d\d+>.  | 
| 
2985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Furthermore, when the string 'X-123000P' is matched against the  | 
| 
2987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 first assembly, the regexp engine will have to backtrack over each  | 
| 
2988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 alternation (the one that ends in Y B the one that ends in Z)  | 
| 
2989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 before determining that there is no match. No such backtracking  | 
| 
2990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 occurs in the second pattern: as soon as the engine encounters the  | 
| 
2991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'P' in the target string, neither of the alternations at that point  | 
| 
2992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (C<-\d+Y> or C) could succeed and so the match fails.  | 
| 
2993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C does, however, know how to build character  | 
| 
2995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 classes. Given C, C and C, it will assemble these  | 
| 
2996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 into C. When C<-> (dash) appears as a candidate for a  | 
| 
2997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 character class it will be the first character in the class. When  | 
| 
2998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<^> (circumflex) appears as a candidate for a character class it  | 
| 
2999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will be the last character in the class.  | 
| 
3000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It also knows about meta-characters than can "absorb" regular  | 
| 
3002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 characters. For instance, given C and C, it knows that  | 
| 
3003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<5> can be represented by C<\d> and so the assembly is just C.  | 
| 
3004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The "absorbent" meta-characters it deals with are C<.>, C<\d>, C<\s>  | 
| 
3005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and C<\W> and their complements. It will replace C<\d>/C<\D>,  | 
| 
3006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<\s>/C<\S> and C<\w>/C<\W> by C<.> (dot), and it will drop C<\d>  | 
| 
3007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 if C<\w> is also present (as will C<\D> in the presence of C<\W>).  | 
| 
3008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C deals correctly with C's propensity  | 
| 
3010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to backslash many characters that have no need to be. Backslashes on  | 
| 
3011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 non-metacharacters will be removed. Similarly, in character classes,  | 
| 
3012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a number of characters lose their magic and so no longer need to be  | 
| 
3013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 backslashed within a character class. Two common examples are C<.>  | 
| 
3014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (dot) and C<$>. Such characters will lose their backslash.  | 
| 
3015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 At the same time, it will also process C<\Q...\E> sequences. When  | 
| 
3017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 such a sequence is encountered, the inner section is extracted and  | 
| 
3018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C is applied to the section. The resulting quoted text  | 
| 
3019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is then used in place of the original unquoted text, and the C<\Q>  | 
| 
3020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and C<\E> metacharacters are thrown away. Similar processing occurs  | 
| 
3021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with the C<\U...\E> and C<\L...\E> sequences. This may have surprising  | 
| 
3022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 effects when using a dispatch table. In this case, you will need  | 
| 
3023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to know exactly what the module makes of your input. Use the C  | 
| 
3024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method to find out what's going on:  | 
| 
3025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $pattern = join( '', @{$re->lexstr($pattern)} );  | 
| 
3027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If all the digits 0..9 appear in a character class, C  | 
| 
3029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will replace them by C<\d>. I'd do it for letters as well, but  | 
| 
3030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 thinking about accented characters and other glyphs hurts my head.  | 
| 
3031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In an alternation, the longest paths are chosen first (for example,  | 
| 
3033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C). When two paths have the same length, the path  | 
| 
3034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with the most subpaths will appear first. This aims to put the  | 
| 
3035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "busiest" paths to the front of the alternation. For example, the  | 
| 
3036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 list C, C, C, C and C will produce the  | 
| 
3037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern C<(?:f(?:ew|ig|un)|b(?:ad|it))>. See F for a  | 
| 
3038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 real-world example of how alternations are sorted. Once you have  | 
| 
3039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 looked at that, everything should be crystal clear.  | 
| 
3040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When tracking is in use, no reduction is performed. nor are  | 
| 
3042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 character classes formed. The reason is that it is  | 
| 
3043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 too difficult to determine the original pattern afterwards. Consider the  | 
| 
3044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 two patterns C and C. These should be reduced to  | 
| 
3045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C. The final character matches one of two possibilities.  | 
| 
3046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 To resolve whether it matched an C<'e'> or C<'m'> would require  | 
| 
3047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 keeping track of the fact that the pattern finished up in a character  | 
| 
3048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 class, which would the require a whole lot more work to figure out  | 
| 
3049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 which character of the class matched. Without character classes  | 
| 
3050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it becomes much easier. Instead, C is produced, which  | 
| 
3051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 lets us find out more simply where we ended up.  | 
| 
3052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Similarly, C and C should form C<(?:dog|sea)food>.  | 
| 
3054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When the pattern is being assembled, the tracking decision needs  | 
| 
3055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to be made at the end of the grouping, but the tail of the pattern  | 
| 
3056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 has not yet been visited. Deferring things to make this work correctly  | 
| 
3057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is a vast hassle. In this case, the pattern becomes merely  | 
| 
3058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<(?:dogfood|seafood>. Tracked patterns will therefore be bulkier than  | 
| 
3059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 simple patterns.  | 
| 
3060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There is an open bug on this issue:  | 
| 
3062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
3064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If this bug is ever resolved, tracking would become much easier to  | 
| 
3066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 deal with (none of the C hassle would be required - you could  | 
| 
3067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 just match like a regular RE and it would Just Work).  | 
| 
3068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
3070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
3072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item L  | 
| 
3074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 General information about Perl's regular expressions.  | 
| 
3076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item L  | 
| 
3078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Specific information about C | 
| 
3080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item Regex::PreSuf  | 
| 
3082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C takes a string and chops it itself into tokens of  | 
| 
3084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 length 1. Since it can't deal with tokens of more than one character,  | 
| 
3085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it can't deal with meta-characters and thus no regular expressions.  | 
| 
3086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Which is the main reason why I wrote this module.  | 
| 
3087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item Regexp::Optimizer  | 
| 
3089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C produces regular expressions that are similar to  | 
| 
3091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 those produced by R::A with reductions switched off. It's biggest  | 
| 
3092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 drawback is that it is exponentially slower than Regexp::Assemble on  | 
| 
3093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 very large sets of patterns.  | 
| 
3094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item Regexp::Parser  | 
| 
3096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Fine grained analysis of regular expressions.  | 
| 
3098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item Regexp::Trie  | 
| 
3100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Funnily enough, this was my working name for C  | 
| 
3102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 during its development. I changed the name because I thought it  | 
| 
3103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 was too obscure. Anyway, C does much the same as  | 
| 
3104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C and C except that it runs  | 
| 
3105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 much faster (according to the author). It does not recognise  | 
| 
3106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 meta characters (that is, 'a+b' is interpreted as 'a\+b').  | 
| 
3107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item Text::Trie  | 
| 
3109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C is well worth investigating. Tries can outperform very  | 
| 
3111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bushy (read: many alternations) patterns.  | 
| 
3112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item Tree::Trie  | 
| 
3114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C is another module that builds tries. The algorithm that  | 
| 
3116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C uses appears to be quite similar to the  | 
| 
3117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 algorithm described therein, except that C solves its  | 
| 
3118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 end-marker problem without having to rewrite the leaves.  | 
| 
3119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
3121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 LIMITATIONS  | 
| 
3123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Some mildly complex cases are not handled well. See examples/failure.01.pl  | 
| 
3125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and L.  | 
| 
3126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  does not attempt to find common substrings. For  | 
| 
3128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 instance, it will not collapse C down to C.  | 
| 
3129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If there's a module out there that performs this sort of string  | 
| 
3130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 analysis I'd like to know about it. But keep in mind that the  | 
| 
3131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 algorithms that do this are very expensive: quadratic or worse.  | 
| 
3132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C does not interpret meta-character modifiers.  | 
| 
3134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For instance, if the following two patterns are  | 
| 
3135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 given: C and C, it will not determine that C<\d> can be  | 
| 
3136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 matched by C<\d+>. Instead, it will produce C. Along  | 
| 
3137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a similar line of reasoning, it will not determine that C and  | 
| 
3138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C is equivalent to C (It will produce C  | 
| 
3139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 instead).  | 
| 
3140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You cannot remove a pattern that has been added to an object. You'll  | 
| 
3142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 just have to start over again. Adding a pattern is difficult enough,  | 
| 
3143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I'd need a solid argument to convince me to add a C method.  | 
| 
3144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you need to do this you should read the documentation for the  | 
| 
3145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C method.  | 
| 
3146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C does not (yet)? employ the C<(?E...)>  | 
| 
3148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 construct.  | 
| 
3149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The module does not produce POSIX-style regular expressions. This  | 
| 
3151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 would be quite easy to add, if there was a demand for it.  | 
| 
3152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 BUGS  | 
| 
3154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Patterns that generate look-ahead assertions sometimes produce  | 
| 
3156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 incorrect patterns in certain obscure corner cases. If you  | 
| 
3157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 suspect that this is occurring in your pattern, disable  | 
| 
3158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 lookaheads.  | 
| 
3159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Tracking doesn't really work at all with 5.6.0. It works better  | 
| 
3161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in subsequent 5.6 releases. For maximum reliability, the use of  | 
| 
3162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a 5.8 release is strongly recommended. Tracking barely works with  | 
| 
3163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 5.005_04. Of note, using C<\d>-style meta-characters invariably  | 
| 
3164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 causes panics. Tracking really comes into its own in Perl 5.10.  | 
| 
3165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you feed C patterns with nested parentheses,  | 
| 
3167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 there is a chance that the resulting pattern will be uncompilable  | 
| 
3168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 due to mismatched parentheses (not enough closing parentheses). This  | 
| 
3169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is normal, so long as the default lexer pattern is used. If you want  | 
| 
3170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to find out which pattern among a list of 3000 patterns are to blame  | 
| 
3171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (speaking from experience here), the F script offers  | 
| 
3172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a strategy for pinpointing the pattern at fault. While you may not  | 
| 
3173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be able to use the script directly, the general approach is easy to  | 
| 
3174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 implement.  | 
| 
3175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The algorithm used to assemble the regular expressions makes extensive  | 
| 
3177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use of mutually-recursive functions (that is, A calls B, B calls  | 
| 
3178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A, ...) For deeply similar expressions, it may be possible to provoke  | 
| 
3179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "Deep recursion" warnings.  | 
| 
3180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The module has been tested extensively, and has an extensive test  | 
| 
3182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 suite (that achieves close to 100% statement coverage), but you  | 
| 
3183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 never know...  A bug may manifest itself in two ways: creating a  | 
| 
3184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 pattern that cannot be compiled, such as C, or a pattern  | 
| 
3185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that compiles correctly but that either matches things it shouldn't,  | 
| 
3186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or doesn't match things it should. It is assumed that Such problems  | 
| 
3187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will occur when the reduction algorithm encounters some sort of  | 
| 
3188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 edge case. A temporary work-around is to disable reductions:  | 
| 
3189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $pattern = $assembler->reduce(0)->re;  | 
| 
3191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A discussion about implementation details and where bugs might lurk  | 
| 
3193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 appears in the README file. If this file is not available locally,  | 
| 
3194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 you should be able to find a copy on the Web at your nearest CPAN  | 
| 
3195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mirror.  | 
| 
3196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Seriously, though, a number of people have been using this module to  | 
| 
3198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 create expressions anywhere from 140Kb to 600Kb in size, and it seems to  | 
| 
3199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be working according to spec. Thus, I don't think there are any serious  | 
| 
3200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 bugs remaining.  | 
| 
3201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you are feeling brave, extensive debugging traces are available to  | 
| 
3203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 figure out where assembly goes wrong.  | 
| 
3204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Please report all bugs at  | 
| 
3206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
3207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Make sure you include the output from the following two commands:  | 
| 
3209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl -MRegexp::Assemble -le 'print $Regexp::Assemble::VERSION'  | 
| 
3211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl -V  | 
| 
3212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There is a mailing list for the discussion of C.  | 
| 
3214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Subscription details are available at  | 
| 
3215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.  | 
| 
3216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 ACKNOWLEDGEMENTS  | 
| 
3218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module grew out of work I did building access maps for Postfix,  | 
| 
3220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a modern SMTP mail transfer agent. See L  | 
| 
3221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for more information. I used Perl to build large regular expressions  | 
| 
3222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for blocking dynamic/residential IP addresses to cut down on spam  | 
| 
3223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and viruses. Once I had the code running for this, it was easy to  | 
| 
3224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 start adding stuff to block really blatant spam subject lines, bogus  | 
| 
3225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 HELO strings, spammer mailer-ids and more...  | 
| 
3226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I presented the work at the French Perl Workshop in 2004, and the  | 
| 
3228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 thing most people asked was whether the underlying mechanism for  | 
| 
3229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 assembling the REs was available as a module. At that time it was  | 
| 
3230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 nothing more that a twisty maze of scripts, all different. The  | 
| 
3231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 interest shown indicated that a module was called for. I'd like to  | 
| 
3232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 thank the people who showed interest. Hey, it's going to make I  | 
| 
3233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 messy scripts smaller, in any case.  | 
| 
3234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Thomas Drugeon was a valuable sounding board for trying out  | 
| 
3236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 early ideas. Jean Forget and Philippe Blayo looked over an early  | 
| 
3237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 version. H.Merijn Brandt stopped over in Paris one evening, and  | 
| 
3238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 discussed things over a few beers.  | 
| 
3239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Nicholas Clark pointed out that while what this module does  | 
| 
3241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (?:c|sh)ould be done in perl's core, as per the 2004 TODO, he  | 
| 
3242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 encouraged me to continue with the development of this module. In  | 
| 
3243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 any event, this module allows one to gauge the difficulty of  | 
| 
3244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 undertaking the endeavour in C. I'd rather gouge my eyes out with  | 
| 
3245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a blunt pencil.  | 
| 
3246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Paul Johnson settled the question as to whether this module should  | 
| 
3248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 live in the Regex:: namespace, or Regexp:: namespace. If you're  | 
| 
3249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 not convinced, try running the following one-liner:  | 
| 
3250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   perl -le 'print ref qr//'  | 
| 
3252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Philippe Bruhat found a couple of corner cases where this module  | 
| 
3254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 could produce incorrect results. Such feedback is invaluable,  | 
| 
3255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and only improves the module's quality.  | 
| 
3256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Machine-Readable Change Log  | 
| 
3258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The file Changes was converted into Changelog.ini by L.  | 
| 
3260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
3262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 David Landgren  | 
| 
3264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Copyright (C) 2004-2011. All rights reserved.  | 
| 
3266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   http://www.landgren.net/perl/  | 
| 
3268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you use this module, I'd love to hear about what you're using  | 
| 
3270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it for. If you want to be informed of updates, send me a note.  | 
| 
3271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Ron Savage is co-maint of the module, starting with V 0.36.  | 
| 
3273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 Repository  | 
| 
3275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L  | 
| 
3277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 TODO  | 
| 
3279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1. Tree equivalencies. Currently, /contend/ /content/ /resend/ /resent/  | 
| 
3281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 produces (?:conten[dt]|resend[dt]) but it is possible to produce  | 
| 
3282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (?:cont|res)en[dt] if one can spot the common tail nodes (and walk back  | 
| 
3283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the equivalent paths). Or be by me my => /[bm][ey]/ in the simplest case.  | 
| 
3284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 To do this requires a certain amount of restructuring of the code.  | 
| 
3286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Currently, the algorithm uses a two-phase approach. In the first  | 
| 
3287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 phase, the trie is traversed and reductions are performed. In the  | 
| 
3288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 second phase, the reduced trie is traversed and the pattern is  | 
| 
3289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 emitted.  | 
| 
3290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 What has to occur is that the reduction and emission have to occur  | 
| 
3292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 together. As a node is completed, it is replaced by its string  | 
| 
3293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 representation. This then allows child nodes to be compared for  | 
| 
3294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 equality with a simple 'eq'. Since there is only a single traversal,  | 
| 
3295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the overall generation time might drop, even though the context  | 
| 
3296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 baggage required to delve through the tree will be more expensive  | 
| 
3297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to carry along (a hash rather than a couple of scalars).  | 
| 
3298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Actually, a simpler approach is to take on a secret sentinel  | 
| 
3300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 atom at the end of every pattern, which gives the reduction  | 
| 
3301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 algorithm sufficient traction to create a perfect trie.  | 
| 
3302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 I'm rewriting the reduction code using this technique.  | 
| 
3304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 2. Investigate how (?>foo) works. Can it be applied?  | 
| 
3306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 5. How can a tracked pattern be serialised? (Add freeze and thaw methods).  | 
| 
3308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 6. Store callbacks per tracked pattern.  | 
| 
3310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 12. utf-8... hmmmm...  | 
| 
3312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 14. Adding qr//'ed patterns. For example, consider  | 
| 
3314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $r->add ( qr/^abc/i )  | 
| 
3315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ->add( qr/^abd/ )  | 
| 
3316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ->add( qr/^ab e/x );  | 
| 
3317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     this should admit abc abC aBc aBC abd abe as matches  | 
| 
3318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 16. Allow a fast, unsafe tracking mode, that can be used if a(?bc)?  | 
| 
3320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     can't happen. (Possibly carp if it does appear during traversal)?  | 
| 
3321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 17. given a-\d+-\d+-\d+-\d+-b, produce a(?:-\d+){4}-b. Something  | 
| 
3323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     along the lines of (.{4))(\1+) would let the regexp engine  | 
| 
3324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     itself be brought to bear on the matter, which is a rather  | 
| 
3325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     appealing idea. Consider  | 
| 
3326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       while(/(?!\+)(\S{2,}?)(\1+)/g) { ... $1, $2 ... }  | 
| 
3328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     as a starting point.  | 
| 
3330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 19. The reduction code has become unbelievably baroque. Adding code  | 
| 
3332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     to handle (sting,singing,sing) => s(?:(?:ing)?|t)ing was far  | 
| 
3333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     too difficult. Adding more stuff just breaks existing behaviour.  | 
| 
3334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     And fixing the ^abcd$ ... bug broke stuff all over again.  | 
| 
3335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     Now that the corner cases are more clearly identified, a full  | 
| 
3336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rewrite of the reduction code is needed. And would admit the  | 
| 
3337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     possibility of implementing items 1 and 17.  | 
| 
3338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 20. Handle debug unrev with a separate bit  | 
| 
3340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 23. Japhy's http://www.perlmonks.org/index.pl?node_id=90876 list2range  | 
| 
3342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     regexp  | 
| 
3343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 24. Lookahead assertions contain serious bugs (as shown by  | 
| 
3345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     assembling powersets. Need to save more context during reduction,  | 
| 
3346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     which in turn will simplify the preparation of the lookahead  | 
| 
3347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     classes. See also 19.  | 
| 
3348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 26. _lex() swamps the overall run-time. It stems from the decision  | 
| 
3350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     to use a single regexp to pull apart any pattern. A suite of  | 
| 
3351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     simpler regexp to pick of parens, char classes, quantifiers  | 
| 
3352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     and bare tokens may be faster. (This has been implemented as  | 
| 
3353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	_fastlex(), but it's only marginally faster. Perhaps split-by-  | 
| 
3354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	char and lex a la C?  | 
| 
3355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 27. We don't, as yet, unroll_plus a paren e.g. (abc)+?  | 
| 
3357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 28. We don't reroll unrolled a a* to a+ in indented or tracked  | 
| 
3359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     output  | 
| 
3360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 29. Use (*MARK n) in blead for tracked patterns, and use (*FAIL) for  | 
| 
3362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     the unmatchable pattern.  | 
| 
3363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 LICENSE  | 
| 
3365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This library is free software; you can redistribute it and/or modify  | 
| 
3367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it under the same terms as Perl itself.  | 
| 
3368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Return a +ve value to tell Perl the module is ready to go.  | 
| 
3372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'The Lusty Decadent Delights of Imperial Pompeii';  |