|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
7788
 | 
 use 5.008008;  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
    | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
312
 | 
    | 
| 
2
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
37
 | 
 use strict;  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
196
 | 
    | 
| 
3
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
34
 | 
 use warnings;  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
    | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
187
 | 
    | 
| 
4
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
6640
 | 
 use integer;  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
    | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
36
 | 
    | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Regexp::ERE;  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '0.02';  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BEGIN {  | 
| 
10
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
461
 | 
     use Exporter ();  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
521
 | 
    | 
| 
11
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
16
 | 
     our (@ISA, @EXPORT_OK);  | 
| 
12
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
120
 | 
     @ISA = qw(Exporter);  | 
| 
13
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
522
 | 
     @EXPORT_OK = qw(  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &ere_to_nfa  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &ere_to_tree  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &ere_to_regex  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &ere_to_input_constraints  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_to_tree  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_to_regex  | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_to_input_constraints  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_clone  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_concat  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_union  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_inter  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_match  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_quant  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_isomorph  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_to_dfa  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &dfa_to_min_dfa  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &nfa_to_min_dfa  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &tree_to_regex  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &tree_to_input_constraints  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &char_to_cc  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &interval_list_to_cc  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         &cc_union  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =encoding utf8  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Regexp::ERE - extended regular expressions and finite automata  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Regexp::ERE qw(  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       &ere_to_nfa  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       &nfa_inter  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       &nfa_to_regex  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       &nfa_to_input_constraints  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       &nfa_to_dfa  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       &dfa_to_min_dfa  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   );  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # condition 1: begins with abc or def  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $nfa1 = ere_to_nfa('^(abc|def)');  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # condition 2: ends with 123 or 456  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $nfa2 = ere_to_nfa('(123|456)$');  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # condition 1 and condition 2  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $inter_nfa = nfa_inter($nfa1, $nfa2);  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # compute extended regular expression (string)  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ere = nfa_to_regex($inter_nfa);  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # compute perl regular expression  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $perlre = nfa_to_regex($inter_nfa, 1);  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # compute weaker input constraints suitable for widgets  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my ($input_constraints, $split_perlre)  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     = nfa_to_input_constraints($inter_nfa);  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # minimal dfa (simpler regular expression happens to result)  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $nfa3 = ere_to_nfa('^(a|ab|b)*$');  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $dfa3 = nfa_to_dfa($nfa3);  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $min_dfa3 = dfa_to_min_dfa($dfa3);  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $ere3 = nfa_to_regex($min_dfa3);  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pure-perl module for:  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Parsing POSIX Extended Regular Expressions (C<$ere>) into  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Non-Deterministic Finite Automata (C<$nfa>)  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Manipulating C<$nfa>s (concatenating, or-ing, and-ing)  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Computing Deterministic Finite Automata (C<$dfa>s) from C<$nfa>s  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (powerset construction)  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Computing minimal C<$dfa>s from C<$dfa>s (Hopcroft's algorithm)  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Computing C<$ere>s or Perl Regular Expressions from C<$nfa> or C<$dfa>  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (Warshall algorithm)  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item *  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Heuristically deriving (possibly weaker) constraints from a C<$nfa> or C<$dfa>  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 suitable for display in a graphical user interface,  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 i.e. a sequence of widgets of type 'free text' and 'drop down';  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Example: '^(abc|def)' => $nfa => [['abc', 'def'], 'free text']  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 GLOSSARY AND CONVERSIONS OVERVIEW  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Conversions overview  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $ere -> $nfa -> $tree -> $regex  ($ere or $perlre)  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         -> $input_constraints  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   The second argument of -> $regex conversions is an optional boolean,  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       true : conversion to a compiled perl regular expression  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       false: conversion to an ere string  | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   The -> $input_constraints conversions return a pair (  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $input_constraints: aref as described at tree_to_input_constraints()  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $split_perlre     : a compiled perl regular expression  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   )  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Glossary  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $char_class  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A set of unicode characters.  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $ere  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Extended regular expression (string).  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 See C for the exact syntax.  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $perlre  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Perl regular expression  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $nfa  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Non-deterministic finite automaton  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $dfa  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Deterministic finite automaton (special case of C<$nfa>)  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $tree  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Intermediate hierarchical representation of a regular expression  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (which still can be manipulated before stringification),  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 similar to a parse tree (but used for generating, not for parsing).  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item $input_constraints  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Ad-hoc data structure representing a list of gui-widgets  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (free text fields and drop-down lists),  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a helper for entering inputs  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 conforming to a given C<$nfa>.  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Config  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # If true, nfa_to_tree() always expands concatned alternations.  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Example: (ab|cd) (ef|gh)  -> (abef|abgh|cdef|cdgh)  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $TREE_CONCAT_FULL_EXPAND = 0;  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # If true, prefixes and suffixes are factorized out even for  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # trees with a single alternation.  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Example: (a1b|a2b) -> a(1|2)b  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $FULL_FACTORIZE_FIXES = 0;  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Should be 0. Else, traces nfa_to_tree() on STDERR.  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use constant {  | 
| 
195
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
602
 | 
     TRACE_NFA_TO_TREE => 0  | 
| 
196
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
37
 | 
 };  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use constant {  | 
| 
199
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2406
 | 
     MAX_CHAR   => 0x10FFFF  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   , CHAR_CLASS => 'cc' # for blessing $char_classes (label only, no methods)  | 
| 
201
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
33
 | 
 };  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DATA STRUCTURES AND SUBROUTINES  | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Each of the documented subroutines can be imported,  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for instance C | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $char_class  | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Character class  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 WARNING: C<$char_class>es must be created exclusively by  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       char_to_cc()  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    or interval_list_to_cc()  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for equivalent character classes to be always the same array reference.  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For the same reason, C<$char_class>es must never be mutated.  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In this implementation, the state transitions of a C<$nfa> are based upon  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 character classes (not single characters). A character class is an ordered  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 list of disjunct, non-mergeable intervals (over unicode code points,  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 i.e. positive integers).  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $char_class = [  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       [ $low_0, $high_0 ] # $interval_0  | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     , [ $low_1, $high_1 ] # $interval_1  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     , ...  | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ]  | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Constraints:  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     1:  0 <= $$char_class[$i][0]                          (0 <= low)  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     2:  $$char_class[$i][1] <= MAX_CHAR                   (high <= MAX_CHAR)  | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     3:  $$char_class[$i][0] <= $$char_class[$i][1]        (low <= high)  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     4:  $$char_class[$i][1] + 1 <  $$char_class[$i+1][0]  (non mergeable)  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Exceptions (anchors used only in the parsing phase only):  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     begin         : [ -2, -1 ]  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     end           : [ -3, -2 ]  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     begin or end  : [ -3, -1 ]  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Immediately after parsing, such pseudo-character classes  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are removed by C.  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $ERE_litteral = qr/ [^.[\\()*+?{|^\$] /xms;  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $PERLRE_char_class_special = qr/ [\[\]\\\^\-] /xms;  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $cc_any = bless([[ 0, MAX_CHAR ]], CHAR_CLASS);  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $cc_none = bless([], CHAR_CLASS);  | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $cc_beg = bless([[ -2, -1]], CHAR_CLASS);  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $cc_end = bless([[ -3, -2]], CHAR_CLASS);  | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
40
 | 
     no warnings qw(utf8); # in particular for 0x10FFFF  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
126162
 | 
    | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %cc_cache;  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # keys: join(',',1,map{@$_}@{$char_class})  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for ($cc_any, $cc_none, $cc_beg, $cc_end) {  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $cc_cache{ join(',', 1, map {@$_} @$_) } = $_;  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item char_to_cc($c)  | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the unique $char_class equivalent to C<[[ord($c), ord($c)]]>.  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub char_to_cc {  | 
| 
283
 | 
196
 | 
 
 | 
  
100
  
 | 
  
196
  
 | 
  
1
  
 | 
8886
 | 
         return $cc_cache{ join(',', 1, (ord($_[0])) x 2) }  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          ||= bless([[ord($_[0]), ord($_[0])]], CHAR_CLASS);  | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # $interval_list is the same data structure as $char_class.  | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Constraints 1, 2 are assumed.  | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Constraints 3, 4 are enforced.  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item interval_list_to_cc($interval_list)  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<$interval_list> is an arbitrary list of intervals.  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the unique C<$char_class> whose reunion of intervals  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is the same set as the reunion of the intervals of C<$interval_list>.  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Example:  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     interval_list_to_cc([[102, 112], [65, 90], [97, 102], [113, 122]])  | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     returns [[65, 90], [97, 122]]  | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     (i.e [f-p]|[A-Z]|[a-f]|[q-z] => [A-Z]|[a-z])  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that both $interval_list and $char_class are lists of intervals,  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 but only $char_class obeys the constraints above,  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 while $interval_list does not.  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Remark also that C is the identity  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (returns the same reference as given) on C<$char_class>es returned  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by either C or C.  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub interval_list_to_cc {  | 
| 
314
 | 
4171
 | 
 
 | 
 
 | 
  
4171
  
 | 
  
1
  
 | 
4905
 | 
         my ($interval_list) =  @_;  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @sorted  | 
| 
316
 | 
4384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6261
 | 
           = sort { $$a[0] <=> $$b[0] }  | 
| 
 
 | 
6548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16986
 | 
    | 
| 
317
 | 
4171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5462
 | 
             grep { $$_[0] <= $$_[1] }  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$interval_list  | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
320
 | 
4171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7854
 | 
         my $char_class = bless([], CHAR_CLASS);  | 
| 
321
 | 
4171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5153
 | 
         my $i = 0;  | 
| 
322
 | 
4171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8812
 | 
         while ($i != @sorted) {  | 
| 
323
 | 
4519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5415
 | 
             my $interval = $sorted[$i];  | 
| 
324
 | 
4519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4465
 | 
             $i++;  | 
| 
325
 | 
4519
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
12929
 | 
             while ($i != @sorted && $$interval[1] + 1 >= $sorted[$i][0]) {  | 
| 
326
 | 
2029
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4279
 | 
                 if ($$interval[1] < $sorted[$i][1]) {  | 
| 
327
 | 
2028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2985
 | 
                     $$interval[1] = $sorted[$i][1];  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
329
 | 
2029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7368
 | 
                 $i++;  | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
331
 | 
4519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11944
 | 
             push(@$char_class, $interval);  | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
333
 | 
4171
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
6454
 | 
         return $cc_cache{ join(',', 1, map {@$_} @$char_class) }  | 
| 
 
 | 
4519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24913
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          ||= $char_class;  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub cc_neg {  | 
| 
338
 | 
1051
 | 
 
 | 
 
 | 
  
1051
  
 | 
  
0
  
 | 
1239
 | 
         my ($char_class) = @_;  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
340
 | 
1051
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2152
 | 
         if (!@$char_class) { return $cc_any; }  | 
| 
 
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
493
 | 
    | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
342
 | 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1672
 | 
         my $neg = bless([], CHAR_CLASS);  | 
| 
343
 | 
970
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2304
 | 
         if ($$char_class[0][0] != 0) {  | 
| 
344
 | 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2373
 | 
             push(@$neg, [0, $$char_class[0][0] - 1]);  | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
346
 | 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1094
 | 
         my $i = 0;  | 
| 
347
 | 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2397
 | 
         while ($i != $#$char_class) {  | 
| 
348
 | 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
478
 | 
             push(@$neg, [$$char_class[$i][1] + 1, $$char_class[$i+1][0] - 1]);  | 
| 
349
 | 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
342
 | 
             $i++;  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
351
 | 
970
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2134
 | 
         if ($$char_class[$i][1] != MAX_CHAR) {  | 
| 
352
 | 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2223
 | 
             push(@$neg, [$$char_class[$i][1] + 1, MAX_CHAR]);  | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
354
 | 
970
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
1503
 | 
         return $cc_cache{ join(',', 1, map{@$_} @$neg) } ||= $neg;  | 
| 
 
 | 
2017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19610
 | 
    | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub cc_inter2 {  | 
| 
358
 | 
354
 | 
 
 | 
 
 | 
  
354
  
 | 
  
0
  
 | 
489
 | 
         my ($char_class_0, $char_class_1) = @_;  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
360
 | 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
632
 | 
         my $inter = bless([], CHAR_CLASS);  | 
| 
361
 | 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
393
 | 
         my $i_0 = 0;  | 
| 
362
 | 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
336
 | 
         my $i_1 = 0;  | 
| 
363
 | 
354
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
1472
 | 
         while ($i_0 < @$char_class_0 && $i_1 < @$char_class_1) {  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # skip interval_0 if interval_0 < interval_1  | 
| 
366
 | 
416
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
2577
 | 
             while (  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $i_0 < @$char_class_0  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && $i_1 < @$char_class_1  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && $$char_class_0[$i_0][1] < $$char_class_1[$i_1][0]  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
371
 | 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1608
 | 
                 $i_0++;  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # skip interval_1 if interval_1 < interval_0  | 
| 
375
 | 
416
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
2243
 | 
             while (  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $i_0 < @$char_class_0  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && $i_1 < @$char_class_1  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && $$char_class_1[$i_1][1] < $$char_class_0[$i_0][0]  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
380
 | 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
601
 | 
                 $i_1++;  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Check that the exit condition of the first while still holds.  | 
| 
384
 | 
416
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2402
 | 
             if (  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $i_0 < @$char_class_0  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && $i_1 < @$char_class_1  | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && $$char_class_1[$i_1][0] <= $$char_class_0[$i_0][1]  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # The exit conditions of both whiles hold:  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     $$char_class_0[$i_0][1] >= $$char_class_1[$i_1][0]  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  && $$char_class_1[$i_1][1] >= $$char_class_0[$i_0][0]  | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # short:  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     high_0 >= low_1  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     high_1 >= low_0  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # furthermore:  | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     high_0 >= low_0  | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     high_1 >= low_1  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # with:  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     min_high := min(high_0, high_1)  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     max_low := max(low_0, low_1)  | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # holds:  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     min_high >= max_low_0  | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
409
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
188
 | 
                 my ($interval_0_done, $interval_1_done);  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
411
 | 
213
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
502
 | 
                 my $max_low =  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $$char_class_0[$i_0][0] > $$char_class_1[$i_1][0]  | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   ? $$char_class_0[$i_0][0]  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   : $$char_class_1[$i_1][0]  | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
417
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
313
 | 
                 my $min_high;  | 
| 
418
 | 
213
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
488
 | 
                 if ($$char_class_0[$i_0][1] <= $$char_class_1[$i_1][1]) {  | 
| 
419
 | 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
186
 | 
                     $min_high = $$char_class_0[$i_0][1];  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # interval_0 < next interval_1  | 
| 
421
 | 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
155
 | 
                     $interval_0_done = 1;  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
423
 | 
213
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
423
 | 
                 if ($$char_class_1[$i_1][1] <= $$char_class_0[$i_0][1]) {  | 
| 
424
 | 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
183
 | 
                     $min_high = $$char_class_1[$i_1][1];  | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # interval_1 < next interval_0  | 
| 
426
 | 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
156
 | 
                     $interval_1_done = 1;  | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
428
 | 
213
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
374
 | 
                 if ($interval_0_done) { $i_0++; }  | 
| 
 
 | 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
152
 | 
    | 
| 
429
 | 
213
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
337
 | 
                 if ($interval_1_done) { $i_1++; }  | 
| 
 
 | 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
144
 | 
    | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
431
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1144
 | 
                 push(@$inter, [$max_low, $min_high]);  | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
434
 | 
354
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
1567
 | 
         return $cc_cache{ join(',', 1, map{@$_} @$inter) } ||=$inter;  | 
| 
 
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1493
 | 
    | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cc_match {  | 
| 
439
 | 
86306
 | 
 
 | 
 
 | 
  
86306
  
 | 
  
0
  
 | 
119665
 | 
     my ($char_class, $c) = @_;  | 
| 
440
 | 
86306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
116617
 | 
     for my $interval (@$char_class) {  | 
| 
441
 | 
98032
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
245638
 | 
         if ($c < $$interval[0])  {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
442
 | 
34606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
110937
 | 
             return 0;  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($c <= $$interval[1]) {  | 
| 
445
 | 
19484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61529
 | 
             return 1;  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
448
 | 
32216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
97559
 | 
     return 0;  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item cc_union(@char_classes)  | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the unique C<$char_class> containing all characters of all given  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<@char_classes>.  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cc_union {  | 
| 
459
 | 
1234
 | 
 
 | 
 
 | 
  
1234
  
 | 
  
1
  
 | 
1879
 | 
     return interval_list_to_cc( [ map { map { [@$_] } @$_ } @_ ] );  | 
| 
 
 | 
3216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4083
 | 
    | 
| 
 
 | 
3380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9697
 | 
    | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cc_is_subset {  | 
| 
463
 | 
53
 | 
 
 | 
 
 | 
  
53
  
 | 
  
0
  
 | 
71
 | 
     my ($char_class_0, $char_class_1) = @_;  | 
| 
464
 | 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
80
 | 
     for my $c ( map { @$_ } @$char_class_0 ) {  | 
| 
 
 | 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
160
 | 
    | 
| 
465
 | 
86
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
252
 | 
         if (!cc_match($char_class_1, $c)) { return 0; }  | 
| 
 
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
100
 | 
    | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
467
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
95
 | 
     return 1;  | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $to_perlre (boolean)  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     true : perl syntax  | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     false: ere syntax  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cc_to_regex {  | 
| 
474
 | 
554
 | 
 
 | 
 
 | 
  
554
  
 | 
  
0
  
 | 
752
 | 
     my ($char_class, $to_perlre) = (@_, 0);  | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
476
 | 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
532
 | 
     my @items;  | 
| 
477
 | 
554
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
2166
 | 
     if (@$char_class && $$char_class[0][0] < 0) {  | 
| 
478
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if ($$char_class[0][0] == -2) {  | 
| 
479
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if ($$char_class[0][1] == -1) {  | 
| 
480
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 push(@items, '^');  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
483
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 push(@items, '^$');  | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
487
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if ($$char_class[0][1] == -2) {  | 
| 
488
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 push(@items, '$');  | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
491
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 push(@items, '^', '$');  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
494
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $char_class = [@$char_class[1..$#$char_class]];  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
496
 | 
554
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
936
 | 
     if (@$char_class) {  | 
| 
497
 | 
514
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2020
 | 
         if (  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$char_class == 1  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $$char_class[0][0] == $$char_class[0][1]  | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
501
 | 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
809
 | 
             my $c = chr($$char_class[0][0]);  | 
| 
502
 | 
500
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
687
 | 
             if ($to_perlre) {  | 
| 
503
 | 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
84
 | 
                 push(@items, quotemeta($c))  | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
506
 | 
459
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1548
 | 
                 push(@items,  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $c =~ /$ERE_litteral/o  | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   ? $c  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   : "\\$c"  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (  | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$char_class == 1  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $$char_class[0][0] == 0  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $$char_class[0][1] == MAX_CHAR  | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
518
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
             push(@items, '.');  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($$char_class[$#$char_class][1] == MAX_CHAR) {  | 
| 
521
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
             if ($to_perlre) {  | 
| 
522
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                 push(@items,  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     '[^' . _cc_to_perlre(cc_neg($char_class)) . ']'  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
527
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
                 push(@items,  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     '[^' . _cc_to_ere(cc_neg($char_class)) . ']'  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
533
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             if ($to_perlre) {  | 
| 
534
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                 push(@items, '[' . _cc_to_perlre($char_class) . ']');  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
537
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 push(@items, '[' . _cc_to_ere($char_class) . ']');  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
542
 | 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
564
 | 
     my $regex;  | 
| 
543
 | 
554
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1219
 | 
     if (@items == 0) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
544
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
188
 | 
         return '';  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (@items == 1) {  | 
| 
547
 | 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2885
 | 
         return $items[0];  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
550
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if ($to_perlre) {  | 
| 
551
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return '(?:' . join('|', @items) . ')';  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
554
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return '(' . join('|', @items) . ')';  | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _cc_to_ere {  | 
| 
560
 | 
8
 | 
 
 | 
 
 | 
  
8
  
 | 
 
 | 
14
 | 
     my ($char_class) = @_;  | 
| 
561
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     my $has_minus;  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $has_r_bracket;  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $ere = join('',  | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         map {  | 
| 
565
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
             if ($$_[0] == $$_[1]) {  | 
| 
 
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
    | 
| 
566
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
21
 | 
                 if ($$_[0] == ord('-')) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
567
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $has_minus = 1;  | 
| 
568
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     '';  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif ($$_[0] == ord(']')) {  | 
| 
571
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $has_r_bracket = 1;  | 
| 
572
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     '';  | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
575
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
                     chr($$_[0]);  | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
579
 | 
5
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
32
 | 
                 if (  | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $$_[0] == ord('-')  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  || $$_[0] == ord(']')  | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ) {  | 
| 
583
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     if ($$_[0] == ord('-')) {  | 
| 
584
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $has_minus = 1;  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else {  | 
| 
587
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $has_r_bracket = 1;  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
589
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     if ($$_[1] == $$_[0] + 1) {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
590
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         chr($$_[1]);  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     elsif ($$_[1] == $$_[0] + 2) {  | 
| 
593
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         chr($$_[0] + 1) . chr($$_[1]);  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else {  | 
| 
596
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         chr($$_[0] + 1) . '-' . chr($$_[1]);  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
600
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
                     if ($$_[1] == $$_[0] + 1) {  | 
| 
601
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
                         chr($$_[0]) . chr($$_[1]);  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else {  | 
| 
604
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                         chr($$_[0]) . '-' . chr($$_[1]);  | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @$char_class  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
611
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     if ($has_minus) { $ere .= '-'; }  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
612
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     if ($has_r_bracket) { $ere = "]$ere"; }  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
613
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     return $ere;  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _cc_to_perlre {  | 
| 
617
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
 
 | 
4
 | 
     my ($char_class) = @_;  | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return join('',  | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         map {  | 
| 
620
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             if ($$_[0] == $$_[1]) {  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
621
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 my $c = chr($$_[0]);  | 
| 
622
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
                 $c =~ /$PERLRE_char_class_special/o ?  "\\$c" : $c;  | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
625
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                 my ($c1, $c2) = (chr($$_[0]), chr($$_[1]));  | 
| 
626
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
22
 | 
                 ($c1 =~ /$PERLRE_char_class_special/o ?  "\\$c1" : $c1)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               . ($$_[0] + 1 < $$_[1] ? '-' : '')  | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               . ($c2 =~ /$PERLRE_char_class_special/o ?  "\\$c2" : $c2)  | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } @$char_class  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $nfa  | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Nfa  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 WARNING: C routines are destructive,  | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the C<$nfa> references given as arguments will not be valid C<$nfa> any more.  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Furthermore, the same C<$nfa> reference must be used only once as argument.  | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For instance, for concatenating a C<$nfa> with itself, C  | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 does not work; instead, C must be used;  | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or even C if the original  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<$nfa> is to be used further.  | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $nfa = [ $state_0, $state_1, ... ]  | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $state = [  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       $accepting  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     , $transitions  | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ]  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $transitions = [  | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       [ $char_class_0 => $state_ind_0 ]  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     , [ $char_class_1 => $state_ind_1 ]  | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     , ...  | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ]  | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In the same C<$transition>, C<$state_ind_i> are pairwise different and are  | 
| 
666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 valid indexes of C<@$nfa>. There is exactly one initial state at index 0.  | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Maps each of the given C<@nfas> to a clone.  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_clone {  | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return  | 
| 
678
 | 
266
 | 
 
 | 
 
 | 
  
266
  
 | 
  
1
  
 | 
432
 | 
         map { [  | 
| 
 
 | 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1666
 | 
    | 
| 
679
 | 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
368
 | 
             map { [  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $$_[0]                         # accepting  | 
| 
681
 | 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
996
 | 
               , [ map { [ @$_ ] } @{$$_[1]} ]  # transitions  | 
| 
 
 | 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1758
 | 
    | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ] }  | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$_  # states of the $nfa  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ] } @_   # list of $nfas  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _transitions_is_subset {  | 
| 
689
 | 
604
 | 
 
 | 
 
 | 
  
604
  
 | 
 
 | 
855
 | 
     my ($transitions_0, $transitions_1, $state_ind_map) = @_;  | 
| 
690
 | 
705
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
4722
 | 
     my %state_ind_to_t_1  | 
| 
691
 | 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
896
 | 
         = map {(  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $state_ind_map && exists($$state_ind_map{$$_[1]})  | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               ? $$state_ind_map{$$_[1]}  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               : $$_[1]  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            => $_  | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           )}  | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           @$transitions_1  | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
699
 | 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1211
 | 
     for my $t_0 (@$transitions_0) {  | 
| 
700
 | 
184
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
921
 | 
         my $state_ind_0  | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           = $state_ind_map && exists($$state_ind_map{$$t_0[1]})  | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ? $$state_ind_map{$$t_0[1]}  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           : $$t_0[1]  | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
705
 | 
184
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
629
 | 
         if (!exists($state_ind_to_t_1{$state_ind_0})) { return 0; }  | 
| 
 
 | 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
680
 | 
    | 
| 
706
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
         my $t_1 = $state_ind_to_t_1{$state_ind_0};  | 
| 
707
 | 
26
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
68
 | 
         if (!cc_is_subset($$t_0[0], $$t_1[0])) { return 0; }  | 
| 
 
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
    | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
709
 | 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1366
 | 
     return 1;  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The keys of %$state_ind_to_equiv are state_inds of @$nfa to be removed.  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # State indexes in transitions are remapped following %$state_ind_to_equiv.  | 
| 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # A state index mapped to itself denotes an unreachable state index.  | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _nfa_shrink_equiv {  | 
| 
716
 | 
733
 | 
 
 | 
 
 | 
  
733
  
 | 
 
 | 
984
 | 
     my ($nfa, $state_ind_to_equiv) = @_;  | 
| 
717
 | 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
847
 | 
     my $i = 0;  | 
| 
718
 | 
3099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7130
 | 
     my %compact_map  | 
| 
719
 | 
4282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9459
 | 
       = map { ($_ => $i++) }  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @active_state_inds  | 
| 
721
 | 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1436
 | 
       = grep { !exists($$state_ind_to_equiv{$_}) }  | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         (0..$#$nfa)  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
725
 | 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1597
 | 
     my %equiv_index_to_char_classes;  | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %plain_index_to_char_class;  | 
| 
727
 | 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3402
 | 
     for (@$nfa = @$nfa[@active_state_inds]) {  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # update $state_ind  | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #      -> $compact_map{$state_ind}  | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         #      or $compact_map{$$state_ind_to_equiv{$state_ind}}  | 
| 
732
 | 
3099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4619
 | 
         %equiv_index_to_char_classes = ();  | 
| 
733
 | 
3099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3959
 | 
         %plain_index_to_char_class = ();  | 
| 
734
 | 
3099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2905
 | 
         for (@{$$_[1]}) { # transition list  | 
| 
 
 | 
3099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5160
 | 
    | 
| 
735
 | 
5788
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
10700
 | 
             if (exists($$state_ind_to_equiv{$$_[1]})) {  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 push(  | 
| 
737
 | 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
847
 | 
                     @{$equiv_index_to_char_classes{  | 
| 
738
 | 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3774
 | 
                         $$_[1]  | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       = $compact_map{$$state_ind_to_equiv{$$_[1]}}  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }}  | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , $$_[0]  | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $plain_index_to_char_class{  | 
| 
746
 | 
4920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12515
 | 
                     $$_[1]  | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   = $compact_map{$$_[1]}  | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 } = $$_[0];  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # merge char_classes to the same state index  | 
| 
752
 | 
3099
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7524
 | 
         if (keys(%equiv_index_to_char_classes)) {  | 
| 
753
 | 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3038
 | 
             @{$$_[1]} = ((  | 
| 
 
 | 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
203
 | 
    | 
| 
754
 | 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2899
 | 
                 map {[  | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     exists($equiv_index_to_char_classes{$_})  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       ? cc_union(  | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $plain_index_to_char_class{$_}  | 
| 
758
 | 
579
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1905
 | 
                           , @{$equiv_index_to_char_classes{$_}}  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         )  | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       : $plain_index_to_char_class{$_}  | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , $_  | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ]}  | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 keys(%plain_index_to_char_class)  | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) , (  | 
| 
765
 | 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1762
 | 
                 map {[  | 
| 
766
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
                     @{$equiv_index_to_char_classes{$_}} == 1  | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       ? $equiv_index_to_char_classes{$_}[0]  | 
| 
768
 | 
739
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
739
 | 
                       : cc_union(@{$equiv_index_to_char_classes{$_}})  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , $_  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ]}  | 
| 
771
 | 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1491
 | 
                 grep { !exists($plain_index_to_char_class{$_}) }  | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 keys(%equiv_index_to_char_classes)  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ))  | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
776
 | 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6405
 | 
     return $nfa;  | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Precondition: C<0 <= $min && ( $max eq '' || $min <= $max)>  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns C<$out_nfa>, a C<$nfa> computed from C<$in_nfa>.  | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Let L be the language accepted by C<$in_nfa> and M the language accepted  | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by C<$out_nfa>. Then a word m belongs to M if and only if and ordered list  | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (l_1, ..., l_r) of words belonging to L exists such that:  | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $min <= r  | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and ($max eq '' or r <= $max)  | 
| 
791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and m is the concatenation of (l_1, ..., l_r)  | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Examples with C<$in_nfa> being a C<$nfa> accepting C<'^a$'>:  | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   nfa_quant($in_nfa, 2, 4 ) accepts '^a{2,4}$'  | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   nfa_quant($in_nfa, 0, '') accepts '^a{0,}$' (i.e. '^a*$')  | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_quant {  | 
| 
801
 | 
245
 | 
 
 | 
 
 | 
  
245
  
 | 
  
1
  
 | 
368
 | 
     my ($nfa, $min, $max) = @_;  | 
| 
802
 | 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
244
 | 
     my @quant_parts;  | 
| 
803
 | 
245
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
473
 | 
     if ($min > 0) {  | 
| 
804
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
         push(@quant_parts, nfa_concat(nfa_clone(($nfa) x $min)));  | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
807
 | 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
270
 | 
     my $optional_part;  | 
| 
808
 | 
245
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
681
 | 
     if (  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         length($max) == 0  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      || $max > $min  | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) {  | 
| 
812
 | 
244
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
581
 | 
         if ($$nfa[0][0]) {  | 
| 
 
 | 
455
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1029
 | 
    | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # initial state already accepting  | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (a*)?  | 
| 
815
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
             ($optional_part) = nfa_clone($nfa);  | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (  | 
| 
818
 | 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1083
 | 
             !grep { $$_[1] == 0 }  | 
| 
819
 | 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
513
 | 
             map { @{$$_[1]} }  | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$nfa  | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # initial state not accepting and unreachable  | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (a)?  | 
| 
824
 | 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
536
 | 
             ($optional_part) = nfa_clone($nfa);  | 
| 
825
 | 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
424
 | 
             $$optional_part[0][0] = 1;  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # initial state not accepting and reachable  | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (a*b)?  | 
| 
830
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
             $optional_part = [  | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # additional root initial state accepting state  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 [  | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     1                                            # accepting  | 
| 
834
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                   , [ map {[$$_[0] , $$_[1]+1]} @{$$nfa[0][1]} ] # transitions  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
    | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ]  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # original states with offset 1  | 
| 
837
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
               , map { [  | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $$_[0]                                       # accepting  | 
| 
839
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                   , [ map {[ $$_[0], $$_[1]+1 ]} @{$$_[1]} ]     # transitions  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
    | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ] }  | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @$nfa  | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              ];  | 
| 
843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
845
 | 
245
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
464
 | 
     if (length($max) == 0) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # starify optional part  | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
849
 | 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
863
 | 
         my %root_index_to_char_class  | 
| 
850
 | 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
374
 | 
           = map { ($$_[1] => $$_[0]) }  | 
| 
851
 | 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
364
 | 
             @{$$optional_part[0][1]}  | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
854
 | 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
428
 | 
         my $state_ind_to_equiv = {};  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # loop over accepting state indexes  | 
| 
856
 | 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
432
 | 
         for (grep { $$optional_part[$_][0] } (1..$#$optional_part)) {  | 
| 
 
 | 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
640
 | 
    | 
| 
857
 | 
232
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
875
 | 
             if (  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 _transitions_is_subset(  | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $$optional_part[$_][1]  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , $$optional_part[0][1]  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , { $_ => 0 }  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Accepting states whose transitions are  | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # a subset of the transitions of the initial state  | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # are equivalent to the initial state.  | 
| 
867
 | 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
931
 | 
                 $$state_ind_to_equiv{$_} = 0;  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
870
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                 if (  | 
| 
871
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                     grep { exists($root_index_to_char_class{$_}) }  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
872
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
                     map { $$_[1] }  | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     @{$$optional_part[$_][1]}  | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ) {  | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # merge char classes to the same state index  | 
| 
876
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                     my %new_index_to_char_classes  | 
| 
877
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                       = map { ($$_[1] => [$$_[0]]) }  | 
| 
878
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                         @{$$optional_part[$_][1]}  | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ;  | 
| 
880
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                     for (keys(%root_index_to_char_class)) {  | 
| 
881
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                         push (  | 
| 
882
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                             @{$new_index_to_char_classes{$_}}  | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           , $root_index_to_char_class{$_}  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         );  | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
886
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
                     @{$$optional_part[$_][1]}  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
887
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                       = map {[  | 
| 
888
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                             @{$new_index_to_char_classes{$_}} == 1  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               ? $new_index_to_char_classes{$_}[0]  | 
| 
890
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                               : cc_union(@{$new_index_to_char_classes{$_}})  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           , $_  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]}  | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         keys(%new_index_to_char_classes)  | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ;  | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
897
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                     push(  | 
| 
898
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
                         @{$$optional_part[$_][1]}  | 
| 
899
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                       , map { [@$_] } @{$$optional_part[0][1]}  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
905
 | 
232
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
752
 | 
         push(@quant_parts,  | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             keys(%$state_ind_to_equiv)  | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ?  _nfa_shrink_equiv($optional_part, $state_ind_to_equiv)  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           :  $optional_part  | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         );  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($max > $min) {  | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # concatenate optional_part $max - $min times  | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
915
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
         push(@quant_parts, _nfa_concat(1, nfa_clone(  | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ($optional_part) x ($max - $min)  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         )));  | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
919
 | 
245
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
788
 | 
     return @quant_parts == 1 ? $quant_parts[0] : nfa_concat(@quant_parts);  | 
| 
920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns C<$out_nfa>, a C<$nfa> computed from C<@in_nfas>.  | 
| 
925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Let r be the number of given C<@in_nfas>,  | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L_i the language accepted by C<$in_nfas[$i]> and M the language accepted  | 
| 
928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 by C<$out_nfa>. Then a word m belongs to M if and only if an ordered list  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (l_1, ..., l_r) of words exists, l_i belonging to L_i, such that  | 
| 
930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 m is the concatenation of (l_1, ..., l_r).  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_concat {  | 
| 
935
 | 
245
 | 
 
 | 
 
 | 
  
245
  
 | 
  
1
  
 | 
531
 | 
     _nfa_concat(0, @_);  | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _nfa_concat {  | 
| 
939
 | 
257
 | 
 
 | 
 
 | 
  
257
  
 | 
 
 | 
474
 | 
     my $starifying = shift(@_);  | 
| 
940
 | 
257
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
586
 | 
     if (!@_) {  | 
| 
941
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return [[1, []]];  # neutral element: accepting empty string  | 
| 
942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
943
 | 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
322
 | 
     my $concat = shift(@_);  | 
| 
944
 | 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
478
 | 
     my @accepting_state_inds = grep { $$concat[$_][0] } (0..$#$concat);  | 
| 
 
 | 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1179
 | 
    | 
| 
945
 | 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
378
 | 
     my $state_ind_to_equiv = {};  | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my (  | 
| 
947
 | 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
324
 | 
         $nfa  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , $state  | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , $init_state_ind  | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , $init_reachable  | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , $init_equiv_reachable  | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , $init_skipped  | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , @new_accepting_state_inds  | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
955
 | 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
591
 | 
     while (@_) {  | 
| 
956
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
400
 | 
         $nfa = shift(@_);  | 
| 
957
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
378
 | 
         $init_state_ind = @$concat;  | 
| 
958
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
314
 | 
         $init_reachable = 0;  | 
| 
959
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
521
 | 
         $init_equiv_reachable = 0;  | 
| 
960
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
354
 | 
         $init_skipped = 0;  | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @new_accepting_state_inds  | 
| 
962
 | 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
873
 | 
           = map { $_ + $init_state_ind }  | 
| 
 
 | 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1210
 | 
    | 
| 
963
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
596
 | 
             grep { $$nfa[$_][0] }  | 
| 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             (0..$#$nfa)  | 
| 
965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
966
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
535
 | 
         for (map { @{$$_[1]} } @$nfa) {  | 
| 
 
 | 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
760
 | 
    | 
| 
 
 | 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1588
 | 
    | 
| 
967
 | 
813
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2572
 | 
             ($$_[1] += $init_state_ind) == $init_state_ind  | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && ($init_reachable ||= 1);  | 
| 
969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
970
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
607
 | 
         for my $acc_ind (@accepting_state_inds) {  | 
| 
971
 | 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
604
 | 
             $state = $$concat[$acc_ind];  | 
| 
972
 | 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
570
 | 
             $$state[0] = $$nfa[0][0];  | 
| 
973
 | 
417
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
392
 | 
             if (  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
974
 | 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2051
 | 
                  @{$$state[1]} <= 1  | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && _transitions_is_subset(  | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $$state[1]   # transitions of the old accepting state  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , $$nfa[0][1]  # transitions of the new initial state  | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , { $acc_ind => $init_state_ind }  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
980
 | 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
924
 | 
             ) {  | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Old accepting states whose transitions are  | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # a subset of the transitions of the new initial state  | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # are equivalent to the initial state.  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Note that such an old accepting states can have either  | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # no transition or one self-transition;  | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # the case that the old accepting state has no transition  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # occurs very often.  | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # %$state_ind_to_equiv gets extended by  | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     $acc_ind_ (old accepting state) => $init_state_ind  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # But the keys and the values of %$state_ind_to_equiv  | 
| 
996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # MUST remain disjoint (except for pairs key = val).  | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Since $init_state_index are growing  | 
| 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # and $acc_ind < $init_state_index:  | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   - the new value does not belong the the keys  | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   - the new key may belong to the vals,  | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     such values must be updated.  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Example:  | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     0 => 1   ( %$state_ind_to_equiv )  | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     1 => 2   ( $acc_ind => $init_state_index )  | 
| 
1007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # %$state_ind_to_equiv must be updated to  | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     0 => 2  | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # before being extended by  | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     1 => 2  | 
| 
1011
 | 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
556
 | 
                 for (grep { $_ == $acc_ind } values(%$state_ind_to_equiv)) {  | 
| 
 
 | 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
347
 | 
    | 
| 
1012
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                     $_ = $init_state_ind;  | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1014
 | 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
386
 | 
                 $$state_ind_to_equiv{$acc_ind} = $init_state_ind;  | 
| 
1015
 | 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
508
 | 
                 $init_equiv_reachable = 1;  | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif (  | 
| 
1018
 | 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
403
 | 
                 (grep { $$_[1] == $init_state_ind } @{$$nfa[0][1]})  | 
| 
 
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
    | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && cc_is_subset(  | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # char_class of the self-transition  | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # of the new initial state  | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     (  | 
| 
1024
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
                         map { $$_[0] }  | 
| 
1025
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
                         grep { $$_[1] == $init_state_ind }  | 
| 
1026
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
                         @{$$nfa[0][1]}  | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # char_class of the self-transition  | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # of the old accepting state  | 
| 
1031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , (  | 
| 
1032
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
91
 | 
                         map { $$_[0] }  | 
| 
1033
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
                         grep { $$_[1] == $acc_ind }  | 
| 
1034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @{$$state[1]}  | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # If the self-transitions of the new init state are  | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # a subset of the transitions of the old accepting state,  | 
| 
1040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # the new state is not needed for looping;  | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # the transition to the new init state can be skipped.  | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
1043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Example 1:  | 
| 
1044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     [ab]*a*  | 
| 
1045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     the state for a* is superfluous.  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Example 2:  | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     ( x[ab]* | y[ac]* | z[bc]* ) a* c  | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     the state for a* is only needed after [bc]*  | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     the regular expression is equivalent to:  | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     [ab]*c | y[ac]*c | z[bc]*a*c  | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # Note that this one-letter-star optimization is  | 
| 
1053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # probably not very useful for practical purposes;  | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # more general equivalences like (abc)*(abc)* ~ (abc)*  | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # are not catched up, while the focused use cases  | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # of prefix and suffix recognition need no star at all.  | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # It is merely a toy optimization for solving some exercices  | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # of an introductory course on regexes.  | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  | 
| 
1061
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
                 push(@{$$state[1]},  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
1062
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
                     map { [ @$_ ] }  | 
| 
1063
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
                     grep { $$_[1] != $init_state_ind}  | 
| 
1064
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
                     @{$$nfa[0][1]})  | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
1066
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
                 $init_skipped++;  | 
| 
1067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1069
 | 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
374
 | 
                 push(@{$$state[1]},  | 
| 
 
 | 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1113
 | 
    | 
| 
1070
 | 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
334
 | 
                     map { [ @$_ ] }  | 
| 
1071
 | 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
212
 | 
                     @{$$nfa[0][1]})  | 
| 
1072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
1073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1075
 | 
346
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2068
 | 
         if (  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             !$init_reachable && !$init_equiv_reachable  | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          || $init_skipped == @accepting_state_inds  | 
| 
1078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # for being removed by _nfa_shrink_equiv()  | 
| 
1080
 | 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
311
 | 
             $$state_ind_to_equiv{$init_state_ind} = $init_state_ind;  | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1083
 | 
346
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1059
 | 
         if (!$$nfa[0][0]) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1084
 | 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
309
 | 
             @accepting_state_inds = ();  | 
| 
1085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($starifying) {  | 
| 
1087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $starifying set for optimizing x{n,m}.  | 
| 
1088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # The old accepting states are redundant,  | 
| 
1089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # since reacheble iff the newer ones are.  | 
| 
1090
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
             for (@accepting_state_inds[1..$#accepting_state_inds]) {  | 
| 
1091
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 $$concat[$_][0] = 0;  | 
| 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1093
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
             if (!$init_reachable) {  | 
| 
1094
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                 $$nfa[0][0] = 0;  | 
| 
1095
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                 shift(@new_accepting_state_inds);  | 
| 
1096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1097
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
             @accepting_state_inds = (0);  | 
| 
1098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @accepting_state_inds  | 
| 
1101
 | 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
242
 | 
               = grep { !exists($$state_ind_to_equiv{$_}) }  | 
| 
 
 | 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
557
 | 
    | 
| 
1102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @accepting_state_inds  | 
| 
1103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
1104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1106
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
810
 | 
         push(@$concat, @$nfa);  | 
| 
1107
 | 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
781
 | 
         push(@accepting_state_inds, @new_accepting_state_inds);  | 
| 
1108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1109
 | 
257
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
599
 | 
     if (keys(%$state_ind_to_equiv)) {  | 
| 
1110
 | 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
420
 | 
         return _nfa_shrink_equiv($concat, $state_ind_to_equiv);  | 
| 
1111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1113
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
164
 | 
         return $concat;  | 
| 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
1118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns C<$out_nfa>, a C<$nfa> computed from C<@in_nfas>.  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<$out_nfa> accepts a word w if and only if at least one of C<@in_nfas>  | 
| 
1122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 accepts w.  | 
| 
1123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Adds the total number of states  | 
| 
1127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_union {  | 
| 
1128
 | 
125
 | 
 
 | 
 
 | 
  
125
  
 | 
  
1
  
 | 
294
 | 
     my $union = [[0, []]]; # root, neutral element: accepting nothing  | 
| 
1129
 | 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
185
 | 
     my $state_ind_to_equiv = {};  | 
| 
1130
 | 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
215
 | 
     my $first_trivial_accepting_state_ind;  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my (  | 
| 
1132
 | 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
142
 | 
         $nfa  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , $init_state_ind  | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , $init_reachable  | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       , $orig_state  | 
| 
1136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     );  | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1138
 | 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
217
 | 
     for $nfa (@_) {  | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # merge initial $accepting  | 
| 
1141
 | 
293
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
1243
 | 
         $$union[0][0] ||= $$nfa[0][0];  | 
| 
1142
 | 
293
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
651
 | 
         if (@$nfa == 1 && @{$$nfa[0][1]} == 0) {  | 
| 
 
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
149
 | 
    | 
| 
1143
 | 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
             next;  | 
| 
1144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # Must be skipped because such a trivial state  | 
| 
1145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # would be removed below (!$init_reachable)  | 
| 
1146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # although it may be the $first_trivial_accepting state.  | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  | 
| 
1148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # On the other side, a well defined $nfa  | 
| 
1149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # with a single state and with a non-empty transition list  | 
| 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # must loop to itself, thus $init_reachable.  | 
| 
1151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1153
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
293
 | 
         $init_state_ind = @$union;  | 
| 
1154
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
254
 | 
         $init_reachable = 0;  | 
| 
1155
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
512
 | 
         for (0..$#$nfa) {  | 
| 
1156
 | 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1404
 | 
             $orig_state = $$nfa[$_];  | 
| 
1157
 | 
1106
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2549
 | 
             if (  | 
| 
1158
 | 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
815
 | 
                 $$orig_state[0]          # accepting  | 
| 
1159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && !@{$$orig_state[1]}      # trivial  | 
| 
1160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
1161
 | 
214
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
364
 | 
                 if (defined($first_trivial_accepting_state_ind)) {  | 
| 
1162
 | 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
342
 | 
                     $$state_ind_to_equiv{$_ + $init_state_ind}  | 
| 
1163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       = $first_trivial_accepting_state_ind;  | 
| 
1164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
1166
 | 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
315
 | 
                     $first_trivial_accepting_state_ind  | 
| 
1167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   = $_ + $init_state_ind;  | 
| 
1168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1171
 | 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
783
 | 
                 for ( @{$$orig_state[1]} ) { # transition list  | 
| 
 
 | 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1362
 | 
    | 
| 
1172
 | 
1124
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
3336
 | 
                     ($$_[1] += $init_state_ind) == $init_state_ind  | 
| 
1173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  && ($init_reachable ||= 1);  | 
| 
1174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         };  | 
| 
1177
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
683
 | 
         push(@$union, @$nfa);  | 
| 
1178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # merge initial $transitions  | 
| 
1180
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
273
 | 
         push(@{$$union[0][1]}, map { [ @$_ ] } @{$$nfa[0][1]});  | 
| 
 
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
389
 | 
    | 
| 
 
 | 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
812
 | 
    | 
| 
 
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
394
 | 
    | 
| 
1181
 | 
262
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
712
 | 
         if (!$init_reachable) {  | 
| 
1182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # for being removed by _nfa_shrink_equiv()  | 
| 
1183
 | 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
618
 | 
             $$state_ind_to_equiv{$init_state_ind} = $init_state_ind;  | 
| 
1184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     };  | 
| 
1186
 | 
125
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
391
 | 
     if (keys(%$state_ind_to_equiv)) {  | 
| 
1187
 | 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
226
 | 
         return _nfa_shrink_equiv($union, $state_ind_to_equiv);  | 
| 
1188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1190
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
         return $union;  | 
| 
1191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %cached_cc_inter2;  | 
| 
1197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
1199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns C<$out_nfa>, a $C<$nfa> computed from C<@in_nfas>.  | 
| 
1201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<$out_nfa> accepts a word w if and only if each of C<@in_nfas> accepts w.  | 
| 
1203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub nfa_inter {  | 
| 
1207
 | 
16
 | 
 
 | 
 
 | 
  
16
  
 | 
  
1
  
 | 
804
 | 
         my ($inter, @nfas) = sort { @$a <=> @$b } @_;  | 
| 
 
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
62
 | 
    | 
| 
1208
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
         for (@nfas) { $inter = nfa_inter2($inter, $_); }  | 
| 
 
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
58
 | 
    | 
| 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return  | 
| 
1210
 | 
16
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
123
 | 
             $inter  | 
| 
1211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          || [[1, [[$cc_any, 0]]]] # neutral element: accepting anything  | 
| 
1212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
1213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Multiplies the total number of states  | 
| 
1216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub nfa_inter2 {  | 
| 
1217
 | 
18
 | 
 
 | 
 
 | 
  
18
  
 | 
  
0
  
 | 
28
 | 
         my ($nfa_0, $nfa_1) = @_;  | 
| 
1218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # computed states  | 
| 
1220
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
         my @todo = (0);  | 
| 
1221
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
         my %todo_seen; # set of state_inds  | 
| 
1222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my %done;      # key-subset of %todo_seen (values are states)  | 
| 
1223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # After the following while, %done are %todo_seen the same set.  | 
| 
1224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # dead end detection  | 
| 
1226
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my %path_tr;  | 
| 
1227
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my @cur_livings;  | 
| 
1228
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         my %livings;  | 
| 
1229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # tmp variables  | 
| 
1231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my (  | 
| 
1232
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $from_state_ind, $to_state_ind  | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , $nfa_0_accepting, $nfa_0_transitions  | 
| 
1234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , $nfa_1_accepting, $nfa_1_transitions  | 
| 
1235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , $t_0, $t_1  | 
| 
1236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , $char_class  | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , $accepting  | 
| 
1238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , @keys_path_to_state_ind  | 
| 
1239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         );  | 
| 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1241
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         my $nfa_1_len = @$nfa_1;  | 
| 
1242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1243
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
         while (@todo) {  | 
| 
1244
 | 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1513
 | 
             $todo_seen{$from_state_ind} = $from_state_ind = pop(@todo);  | 
| 
1245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1246
 | 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1478
 | 
             ($nfa_0_accepting, $nfa_0_transitions)  | 
| 
1247
 | 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
854
 | 
               = @{$$nfa_0[$from_state_ind / $nfa_1_len]}; # i-th state  | 
| 
1248
 | 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1349
 | 
             ($nfa_1_accepting, $nfa_1_transitions)  | 
| 
1249
 | 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
864
 | 
               = @{$$nfa_1[$from_state_ind % $nfa_1_len]}; # j-th state  | 
| 
1250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1251
 | 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1142
 | 
             my $new_transitions = [];  | 
| 
1252
 | 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1056
 | 
             for $t_0 (@$nfa_0_transitions) {  | 
| 
1253
 | 
1742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2079
 | 
                 for $t_1 (@$nfa_1_transitions) {  | 
| 
1254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1255
 | 
4160
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
18349
 | 
                     if (  | 
| 
1256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         (  | 
| 
1257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $char_class  | 
| 
1258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               = $cached_cc_inter2{$$t_0[0]}{$$t_1[0]}  | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             ||= &cc_inter2($$t_0[0], $$t_1[0])  | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ) != $cc_none  | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ) {  | 
| 
1262
 | 
1942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4736
 | 
                         push (@$new_transitions, [  | 
| 
1263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $char_class  | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           , $to_state_ind = $$t_0[1] * $nfa_1_len + $$t_1[1]  | 
| 
1265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]);  | 
| 
1266
 | 
1942
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4093
 | 
                         if (!exists($todo_seen{$to_state_ind})) {  | 
| 
1267
 | 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3309
 | 
                             push(@todo,  | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $todo_seen{$to_state_ind} = $to_state_ind);  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1270
 | 
1942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5748
 | 
                         $path_tr{$to_state_ind}{$from_state_ind} = undef;  | 
| 
1271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1274
 | 
831
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2358
 | 
             if ($accepting = $nfa_0_accepting && $nfa_1_accepting) {  | 
| 
1275
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
                 push(@cur_livings, $from_state_ind);  | 
| 
1276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1277
 | 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3085
 | 
             $done{$from_state_ind} = [  | 
| 
1278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $accepting  | 
| 
1279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , $new_transitions  | 
| 
1280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
1281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # remove dead ends  | 
| 
1284
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
         %livings = map { ($_ => $_) } @cur_livings;  | 
| 
 
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
89
 | 
    | 
| 
1285
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
53
 | 
         while (@cur_livings) {  | 
| 
1286
 | 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1522
 | 
             push(@cur_livings,  | 
| 
1287
 | 
1518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2670
 | 
                 map { $livings{$_} = $_ }  | 
| 
1288
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1437
 | 
                 grep { !exists($livings{$_}) }  | 
| 
1289
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
634
 | 
                 keys(%{$path_tr{pop(@cur_livings)}})  | 
| 
1290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            );  | 
| 
1291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1293
 | 
18
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
59
 | 
         if (keys(%livings) == 0) {  | 
| 
1294
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return [[0, []]];  | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # compact renumbering  | 
| 
1298
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         my @sorted_keys;  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $inter = [@done{  | 
| 
1300
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
185
 | 
             @sorted_keys = sort { $a <=> $b } keys(%livings)  | 
| 
 
 | 
2879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3145
 | 
    | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }];  | 
| 
1302
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
         my $i = 0;  | 
| 
1303
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
         my %compact_map = map { ($_ => $i++) } @sorted_keys;  | 
| 
 
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
947
 | 
    | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1305
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
         for (  | 
| 
1306
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1582
 | 
             map {  | 
| 
1307
 | 
1638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3114
 | 
                 @{$$_[1]}  | 
| 
1308
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
841
 | 
               = grep { exists($compact_map{$$_[1]}) }  | 
| 
1309
 | 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
559
 | 
                 @{$$_[1]}  | 
| 
1310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$inter  | 
| 
1312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1313
 | 
1518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1898
 | 
             $$_[1] = $compact_map{$$_[1]};  | 
| 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1315
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1000
 | 
         return $inter;  | 
| 
1316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_resolve_anchors {  | 
| 
1320
 | 
10
 | 
 
 | 
 
 | 
  
10
  
 | 
  
0
  
 | 
17
 | 
     my ($nfa) = @_;  | 
| 
1321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # find state_inds reachable from the root by begin-anchor transitions  | 
| 
1323
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     my %begs = (0 => undef);  | 
| 
1324
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my @todo = (0);  | 
| 
1325
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
     while (defined(my $beg = pop(@todo))) {  | 
| 
1326
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
         for (  | 
| 
1327
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             map { $$_[1] }               # state_ind  | 
| 
 
 | 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
85
 | 
    | 
| 
1328
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
             grep { $$_[0][0][1] == -1 }  # begin-achor  | 
| 
1329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @{$$nfa[$beg][1]}  | 
| 
1330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1331
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             if (!exists($begs{$_})) {  | 
| 
1332
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                 $begs{$_} = undef;  | 
| 
1333
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                 push(@todo, $_);  | 
| 
1334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # find state_inds leading to an accepting state by end-anchor transitions  | 
| 
1339
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my @cur_livings;  | 
| 
1340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %path_tr;  | 
| 
1341
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
     for my $from_state_ind (0..$#$nfa) {  | 
| 
1342
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
         for (@{$$nfa[$from_state_ind][1]}) {  | 
| 
 
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
68
 | 
    | 
| 
1343
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
147
 | 
             $path_tr{$$_[1]}{$from_state_ind} = $$_[0];  | 
| 
1344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1345
 | 
37
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
91
 | 
         if ($$nfa[$from_state_ind][0]) {  | 
| 
1346
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
             push(@cur_livings, $from_state_ind);  | 
| 
1347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1349
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
     my %livings = map {($_ => undef)} @cur_livings;  | 
| 
 
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
    | 
| 
1350
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
     while (defined(my $end = pop(@cur_livings))) {  | 
| 
1351
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
         for (  | 
| 
1352
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
             grep {  | 
| 
1353
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
                 $path_tr{$end}{$_}[0][0] == -3;  # end-anchor  | 
| 
1354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             keys(%{$path_tr{$end}})  | 
| 
1356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1357
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             if (!exists($livings{$_})) {  | 
| 
1358
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 push(@cur_livings, $livings{$_} = undef);  | 
| 
1359
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $$nfa[$_][0] = 1;  | 
| 
1360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1364
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
     my $accept_empty;  | 
| 
1365
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     if (!($accept_empty = scalar(grep {$$nfa[$_][0]} keys(%begs)) ? 1 : 0)) {  | 
| 
 
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
58
 | 
    | 
| 
1366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # special case for $^ for and the like: empty string matches  | 
| 
1367
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
         my %begends;  | 
| 
1368
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
         my @todo = keys(%begs);  | 
| 
1369
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
134
 | 
         while (defined(my $begend = pop(@todo))) {  | 
| 
1370
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
             for (  | 
| 
1371
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
                 map { $$_[1] }             # state_ind  | 
| 
 
 | 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
86
 | 
    | 
| 
1372
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
                 grep { $$_[0][0][1] < 0 }  # achor  | 
| 
1373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @{$$nfa[$begend][1]}  | 
| 
1374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
1375
 | 
16
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
66
 | 
                 if (!exists($begs{$_}) && !exists($begends{$_})) {  | 
| 
1376
 | 
14
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
43
 | 
                     if ($$nfa[$_][0]) {  | 
| 
1377
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                         $accept_empty = 1;  | 
| 
1378
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                         @todo = ();  | 
| 
1379
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
                         last;  | 
| 
1380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1381
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
                     $begends{$_} = undef;  | 
| 
1382
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
                     push(@todo, $_);  | 
| 
1383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove anchors  | 
| 
1389
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     for my $from_state_ind (  | 
| 
 
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
92
 | 
    | 
| 
1390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         grep {  | 
| 
1391
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
             grep { $$_[0][0][0] < 0 }  # anchor  | 
| 
1392
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
             @{$$nfa[$_][1]}            # transitions  | 
| 
1393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         (0..$#$nfa)  | 
| 
1395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) {  | 
| 
1396
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
         my $state = $$nfa[$from_state_ind];  | 
| 
1397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $$state[1] = [  | 
| 
1398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             map {  | 
| 
1399
 | 
30
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
61
 | 
                 if ($$_[0][0][0] >= 0) {  | 
| 
 
 | 
20
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
39
 | 
    | 
| 
 
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
1400
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
                     $_;  | 
| 
1401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif ( @{$$_[0]} == 1 ) {  | 
| 
1403
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
119
 | 
                     delete($path_tr{$$_[1]}{$from_state_ind});  | 
| 
1404
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
                     ();  | 
| 
1405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
1407
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $path_tr{$$_[1]}{$from_state_ind}  | 
| 
1408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   = $$_[0]  | 
| 
1409
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                   = interval_list_to_cc(@{$$_[0]}[1..$#{$$_[0]}]);  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1410
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $_;  | 
| 
1411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1413
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
             @{$$state[1]}  # transitions  | 
| 
1414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ];  | 
| 
1415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # ensure that the initial state cannot be reached  | 
| 
1418
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     if (@{$$nfa[0][1]}) {  | 
| 
 
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
29
 | 
    | 
| 
1419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # proper init transitions (clone of the initial state needed)  | 
| 
1420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # replace transitions to the initial state  | 
| 
1422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # with transitions to the cloned initial state  | 
| 
1423
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
         my $new_state_ind = @$nfa;  | 
| 
1424
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
         my $clone_reachable;  | 
| 
1425
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
         for my $transition (  | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
    | 
| 
1426
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
             grep { $$_[1] == 0 }  # to initial state  | 
| 
1427
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
             map { @{$$_[1]} }     # transitions  | 
| 
1428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$nfa  | 
| 
1429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1430
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             $$transition[1] = $new_state_ind;  | 
| 
1431
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
             $clone_reachable = 1;  | 
| 
1432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1434
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
25
 | 
         if ($clone_reachable) {  | 
| 
1435
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
             my $new_state = [  | 
| 
1436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $$nfa[0][0]  | 
| 
1437
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
               , [@{$$nfa[0][1]}]  | 
| 
1438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
1439
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
             push(@$nfa, $new_state);  | 
| 
1440
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
             $path_tr{$new_state_ind} = $path_tr{0};  | 
| 
1441
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             for (@{$$nfa[0][1]}) {  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
    | 
| 
1442
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
40
 | 
                 $path_tr{$$_[1]}{$new_state_ind} = $$_[0];  | 
| 
1443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1444
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
             if ($$nfa[0][0]) {  | 
| 
1445
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $livings{$new_state_ind} = undef;  | 
| 
1446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # no proper init transitions  | 
| 
1451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # drop transitions to the initial state  | 
| 
1453
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         for my $state (@$nfa) {  | 
| 
1454
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             @{$$state[1]} = grep { $$_[1] != 0 } @{$$state[1]};  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
    | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
1455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1457
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
     delete($path_tr{0});  | 
| 
1458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # extend intial state (merge all initial states of %begs)  | 
| 
1460
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     if (keys(%begs) > 1) {  | 
| 
1461
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
         my %state_ind_to_char_classes;  | 
| 
1462
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
         for ( map { @{$$nfa[$_][1]} } keys(%begs) ) {  | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
1463
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             push(@{$state_ind_to_char_classes{$$_[1]}}, $$_[0]);  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
    | 
| 
1464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1465
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         @{$$nfa[0][1]}  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
    | 
| 
1466
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
           = map { [  | 
| 
1467
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                 $path_tr{$_}{0} = cc_union(@{$state_ind_to_char_classes{$_}})  | 
| 
1468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , int($_)  | 
| 
1469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ] }  | 
| 
1470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             keys(%state_ind_to_char_classes)  | 
| 
1471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
1472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1473
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
34
 | 
     if ($$nfa[0][0] = $accept_empty) {  | 
| 
1474
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
         $livings{0} = undef;  | 
| 
1475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove unreachable states  | 
| 
1478
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my @cur_reachables = (0);  | 
| 
1479
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my %reachables = (0 => 0);  | 
| 
1480
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
     while (@cur_reachables) {  | 
| 
1481
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
         my $from_state_ind = shift(@cur_reachables);  | 
| 
1482
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
         for (  | 
| 
1483
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
50
 | 
             map { $$_[1] }  | 
| 
 
 | 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
    | 
| 
1484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @{$$nfa[$from_state_ind][1]}  | 
| 
1485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1486
 | 
24
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
72
 | 
             if (!exists($reachables{$_})) {  | 
| 
1487
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
54
 | 
                 push(@cur_reachables, $reachables{$_} = $_);  | 
| 
1488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove dead ends  | 
| 
1493
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
     delete(@livings{grep { !exists($reachables{$_}) } keys(%livings)});  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
    | 
| 
1494
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
     @cur_livings = keys(%livings);  | 
| 
1495
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
     while (@cur_livings) {  | 
| 
1496
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
         for (  | 
| 
1497
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
             grep { exists($reachables{$_}) }  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
    | 
| 
1498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             keys(%{$path_tr{pop(@cur_livings)}})  | 
| 
1499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1500
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
             if (!exists($livings{$_})) {  | 
| 
1501
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                 push(@cur_livings, $_);  | 
| 
1502
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
                 $livings{$_} = undef;  | 
| 
1503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1507
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
41
 | 
     if (keys(%livings) == 0) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1508
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
         return [[0, []]];  | 
| 
1509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (keys(%livings) == @$nfa) {  | 
| 
1511
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return $nfa;  | 
| 
1512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # compact renumbering  | 
| 
1515
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
     my @sorted_keys = sort { $a <=> $b } keys(%livings);  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
    | 
| 
1516
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     my $i = 0;  | 
| 
1517
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
     my %compact_map = map { ($_ => $i++) } @sorted_keys;  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
    | 
| 
1518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return [  | 
| 
1520
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
         map {  | 
| 
1521
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
             @{$$_[1]}  | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
1522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               = map {  | 
| 
1523
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
                     $$_[1] = $compact_map{$$_[1]};  | 
| 
1524
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                     $_;  | 
| 
1525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1526
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
                 grep { exists($compact_map{$$_[1]}) }  | 
| 
1527
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
                 @{$$_[1]}  | 
| 
1528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
1529
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
             $_;  | 
| 
1530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @$nfa[@sorted_keys]  | 
| 
1532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ];  | 
| 
1533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
1536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns true if and only if C<$in_nfa> accepts C<$str>.  | 
| 
1538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_match {  | 
| 
1542
 | 
19
 | 
 
 | 
 
 | 
  
19
  
 | 
  
1
  
 | 
7373
 | 
     my ($nfa, $str) = @_;  | 
| 
1543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1544
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
     my %state_inds = (0 => 0);  | 
| 
1545
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
     for my $c ( map { ord($_) } split('', $str) ) {  | 
| 
 
 | 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
152
 | 
    | 
| 
1546
 | 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
292
 | 
         %state_inds  | 
| 
1547
 | 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
170
 | 
           = map { $$_[1] => $$_[1] }  | 
| 
1548
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
168
 | 
             grep { cc_match($$_[0], $c) }  # matching transition list  | 
| 
1549
 | 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
218
 | 
             map { @{$$_[1]} }              # all transition list  | 
| 
 
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
77
 | 
    | 
| 
1550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$nfa[values(%state_inds)]     # current states  | 
| 
1551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
1552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1554
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
112
 | 
     return grep { $$_[0] } @$nfa[values(%state_inds)];  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
70
 | 
    | 
| 
1555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_dump {  | 
| 
1558
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
     my ($nfa) = @_;  | 
| 
1559
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $dump = '';  | 
| 
1560
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     for my $i (0..$#$nfa) {  | 
| 
1561
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         $dump  | 
| 
1562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          .= "$i:"  | 
| 
1563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           . ($$nfa[$i][0] ? " (accepting)" : "")  | 
| 
1564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           . "\n"  | 
| 
1565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
1566
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         for my $transition (@{$$nfa[$i][1]}) {  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1567
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $dump  | 
| 
1568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          .= "    "  | 
| 
1569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           . cc_to_regex($$transition[0]) . " => $$transition[1]\n";  | 
| 
1570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1572
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return $dump;  | 
| 
1573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
1576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns true if and only if the labeled graphs represented by C<$nfa1>  | 
| 
1578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and C<$nfa2> are isomorph. While isomorph C<$nfa>s accept the same language,  | 
| 
1579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the converse is not true.  | 
| 
1580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_isomorph {  | 
| 
1584
 | 
89
 | 
 
 | 
 
 | 
  
89
  
 | 
  
1
  
 | 
1085
 | 
     my ($nfa1, $nfa2) = @_;  | 
| 
1585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1586
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
229
 | 
     my %nfa1_nfa2_indexes = (0 => 0);  | 
| 
1587
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
181
 | 
     my %nfa2_nfa1_indexes = (0 => 0);  | 
| 
1588
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
182
 | 
     my @nfa1_index_todo = (0);  | 
| 
1589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1590
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
255
 | 
     while (defined(my $nfa1_index = pop(@nfa1_index_todo))) {  | 
| 
1591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1592
 | 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
731
 | 
         my $state1 = $$nfa1[$nfa1_index];  | 
| 
1593
 | 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
672
 | 
         my $state2 = $$nfa2[$nfa1_nfa2_indexes{$nfa1_index}];  | 
| 
1594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # accepting  | 
| 
1596
 | 
470
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
989
 | 
         if ($$state1[0] != $$state2[0]) {  | 
| 
1597
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return 0;  | 
| 
1598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # transitions  | 
| 
1601
 | 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
449
 | 
         my $transitions1 = [sort { $$a[0] <=> $$b[0] } @{$$state1[1]}];  | 
| 
 
 | 
1847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2885
 | 
    | 
| 
 
 | 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1415
 | 
    | 
| 
1602
 | 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
656
 | 
         my $transitions2 = [sort { $$a[0] <=> $$b[0] } @{$$state2[1]}];  | 
| 
 
 | 
1851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2785
 | 
    | 
| 
 
 | 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
898
 | 
    | 
| 
1603
 | 
470
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1073
 | 
         if (@$transitions1 != @$transitions2) {  | 
| 
1604
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return 0;  | 
| 
1605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1606
 | 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
888
 | 
         for my $i (0..$#$transitions1) {  | 
| 
1607
 | 
1393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1390
 | 
             my ($cc1, $next_index1) = @{$$transitions1[$i]};  | 
| 
 
 | 
1393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2691
 | 
    | 
| 
1608
 | 
1393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1465
 | 
             my ($cc2, $next_index2) = @{$$transitions2[$i]};  | 
| 
 
 | 
1393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2039
 | 
    | 
| 
1609
 | 
1393
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3502
 | 
             if ($cc1 ne $cc2) {  | 
| 
1610
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 return 0;  | 
| 
1611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1612
 | 
1393
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2640
 | 
             if (exists($nfa1_nfa2_indexes{$next_index1})) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1613
 | 
1012
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4028
 | 
                 if ($nfa1_nfa2_indexes{$next_index1} != $next_index2) {  | 
| 
1614
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     return 0;  | 
| 
1615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif (exists($nfa2_nfa1_indexes{$next_index2})) {  | 
| 
1618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # $nfa2_nfa1_indexes{$next_index2} != $next_index1  | 
| 
1619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # because  | 
| 
1620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   - !exists($nfa1_nfa2_indexes{$next_index1})  | 
| 
1621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #   - $nfa1_nfa2_indexes and $nfa2_nfa1_indexes  | 
| 
1622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #     are reverse to each other by construction  | 
| 
1623
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 return 0;  | 
| 
1624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
1626
 | 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
769
 | 
                 $nfa1_nfa2_indexes{$next_index1} = $next_index2;  | 
| 
1627
 | 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
591
 | 
                 $nfa2_nfa1_indexes{$next_index2} = $next_index1;  | 
| 
1628
 | 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1135
 | 
                 push(@nfa1_index_todo, $next_index1);  | 
| 
1629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1632
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1179
 | 
     return 1;  | 
| 
1633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
1637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $dfa  | 
| 
1638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
1639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # input X:  | 
| 
1641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     Arbitrary list of intervals.  | 
| 
1642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # output Y:  | 
| 
1643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     List of paarwise disjoint intervals spanning the same subset such that  | 
| 
1644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     for any intersections/unions of intervals of X  | 
| 
1645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     an equal union of intervals of Y exists.  | 
| 
1646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     In short, all boundaries of X are preserved.  | 
| 
1647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
1648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Motivation:  | 
| 
1649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     nfas use character classes as alphabet (instead of single code points).  | 
| 
1650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     dfa operations needs a common refinement of sets of character classes.  | 
| 
1651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
1652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Example:  | 
| 
1653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     interval_cases( [ [0, 5], [2, 8] ] )  | 
| 
1654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   = [ [0, 1], [2, 5], [6, 8] ]  | 
| 
1655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
1656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     X: |0 1 2 3 4 5|  | 
| 
1657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #            |2 3 4 5 6 7 8|  | 
| 
1658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #     Y: |0 1|2 3 4 5|6 7 8|  | 
| 
1659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
1660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub interval_cases {  | 
| 
1661
 | 
1209
 | 
 
 | 
 
 | 
  
1209
  
 | 
  
0
  
 | 
1419
 | 
     my ($interval_list) = @_;  | 
| 
1662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @sorted  | 
| 
1663
 | 
31795
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
58266
 | 
       = sort {  | 
| 
1664
 | 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2974
 | 
             $$a[0] <=> $$b[0]  | 
| 
1665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          || $$b[1] <=> $$a[1]  | 
| 
1666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         @$interval_list  | 
| 
1668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
1669
 | 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1229
 | 
     my %los;  | 
| 
1670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %his;  | 
| 
1671
 | 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1399
 | 
     my $i = 0;  | 
| 
1672
 | 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2482
 | 
     while ($i < @sorted) {  | 
| 
1673
 | 
3404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6801
 | 
         $los{$sorted[$i][0]} = undef;  | 
| 
1674
 | 
3404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5292
 | 
         $his{$sorted[$i][1]} = undef;  | 
| 
1675
 | 
3404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3864
 | 
         my $j = $i + 1;  | 
| 
1676
 | 
3404
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
20574
 | 
         while (  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $j < @sorted  | 
| 
1678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $sorted[$j][0] == $sorted[$i][0]  | 
| 
1679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $sorted[$j][1] == $sorted[$i][1]  | 
| 
1680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $sorted[$i]  ---------  | 
| 
1682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $sorted[$j]  ---------  | 
| 
1683
 | 
1347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6644
 | 
             $j++;  | 
| 
1684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1685
 | 
3404
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
14800
 | 
         while (  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $j < @sorted  | 
| 
1687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $sorted[$j][0] == $sorted[$i][0]  | 
| 
1688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $sorted[$j][1] < $sorted[$i][1]  | 
| 
1689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $sorted[$i]  ---------  | 
| 
1691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $sorted[$j]  -----  | 
| 
1692
 | 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2069
 | 
             $his{$sorted[$j][1]} = undef;  | 
| 
1693
 | 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1886
 | 
             $los{$sorted[$j][1]+1} = undef;  | 
| 
1694
 | 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7359
 | 
             $j++;  | 
| 
1695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # $sorted[$j][0] > $sorted[$i][0]  | 
| 
1697
 | 
3404
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
14141
 | 
         while (  | 
| 
1698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $j < @sorted  | 
| 
1699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $sorted[$j][1] < $sorted[$i][1]  | 
| 
1700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $sorted[$i]  ---------  | 
| 
1702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $sorted[$j]    -----  | 
| 
1703
 | 
2674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4201
 | 
             $his{$sorted[$j][0]-1} = undef;  | 
| 
1704
 | 
2674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3394
 | 
             $los{$sorted[$j][0]} = undef;  | 
| 
1705
 | 
2674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3183
 | 
             $his{$sorted[$j][1]} = undef;  | 
| 
1706
 | 
2674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3617
 | 
             $los{$sorted[$j][1]+1} = undef;  | 
| 
1707
 | 
2674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11252
 | 
             $j++;  | 
| 
1708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1709
 | 
3404
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
11948
 | 
         if (  | 
| 
1710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             $j < @sorted  | 
| 
1711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && $sorted[$j][0] <= $sorted[$i][1]  | 
| 
1712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #     $sorted[$j][0] > $sorted[$i][0]  | 
| 
1714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  && $sorted[$j][0] <= $sorted[$i][1]  | 
| 
1715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  && $sorted[$j][1] >= $sorted[$i][1]  | 
| 
1716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             #  | 
| 
1717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $sorted[$i]  ---------  | 
| 
1718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # $sorted[$j]         -----  | 
| 
1719
 | 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
544
 | 
             $his{$sorted[$j][0]-1} = undef;  | 
| 
1720
 | 
343
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
786
 | 
             if ($sorted[$i][1] != $sorted[$j][1]) {  | 
| 
1721
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $los{$sorted[$i][1]+1} = undef;  | 
| 
1722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1724
 | 
3404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7809
 | 
         $i = $j;  | 
| 
1725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1726
 | 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3859
 | 
     my @sorted_los = sort( { $a <=> $b } keys(%los));  | 
| 
 
 | 
5992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8546
 | 
    | 
| 
1727
 | 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3488
 | 
     my @sorted_his = sort( { $a <=> $b } keys(%his));  | 
| 
 
 | 
6003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7953
 | 
    | 
| 
1728
 | 
1209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2880
 | 
     return [ map { [$sorted_los[$_], $sorted_his[$_]] } (0..$#sorted_los) ];  | 
| 
 
 | 
3863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13160
 | 
    | 
| 
1729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
1732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Compute a deterministic finite automaton from C<$in_nfa>  | 
| 
1734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (powerset construction).  | 
| 
1735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The data structure of a deterministic finite automaton (dfa) is  | 
| 
1737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the same as that of a non-deterministic one, but it is further constrained:  | 
| 
1738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For each state and each unicode character there exist exactly one transition  | 
| 
1739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (i.e. a pair C<(char_class, $state_index)>) matching this character.  | 
| 
1740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that the following constraint hold for both a C<$dfa> and a C<$nfa>:  | 
| 
1742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For each pair of state p1 and p2, there exists at most one transition  | 
| 
1743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 from p1 to p2 (artefact of this implementation).  | 
| 
1744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_to_dfa {  | 
| 
1748
 | 
178
 | 
 
 | 
 
 | 
  
178
  
 | 
  
1
  
 | 
223
 | 
     my ($nfa) = @_;  | 
| 
1749
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
295
 | 
     my $dfa = [];  | 
| 
1750
 | 
178
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
434
 | 
     if (!@$nfa) {  | 
| 
1751
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return [[0, [$cc_any, 0]]];  | 
| 
1752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1753
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
225
 | 
     my $trap_needed = 0;  | 
| 
1754
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
221
 | 
     my $dfa_size = 0;  | 
| 
1755
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
439
 | 
     my %dfa_indexes = ("0" => $dfa_size++);  | 
| 
1756
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
446
 | 
     my @todo = ([0]);  | 
| 
1757
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
406
 | 
     while (@todo) {  | 
| 
1758
 | 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1525
 | 
         my $nfa_indexes = pop(@todo);  | 
| 
1759
 | 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2224
 | 
         my $dfa_index = $dfa_indexes{join('.', @$nfa_indexes)};  | 
| 
1760
 | 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2759
 | 
         my @nfa_states = @$nfa[@$nfa_indexes];  | 
| 
1761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # accepting  | 
| 
1763
 | 
1037
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1346
 | 
         $$dfa[$dfa_index][0] = scalar(grep { $$_[0] } @nfa_states) ? 1 : 0;  | 
| 
 
 | 
1391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4532
 | 
    | 
| 
1764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # transitions  | 
| 
1766
 | 
3348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7401
 | 
         my $cases = interval_cases([  | 
| 
1767
 | 
3348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3167
 | 
             map { @{$$_[0]} }  | 
| 
 
 | 
1391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3651
 | 
    | 
| 
1768
 | 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1394
 | 
             map { @{$$_[1]} }  | 
| 
 
 | 
1391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1391
 | 
    | 
| 
1769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @nfa_states  | 
| 
1770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ]);  | 
| 
1771
 | 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4409
 | 
         my %dfa_index_to_intervals;  | 
| 
1772
 | 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1604
 | 
         for my $interval (@$cases) {  | 
| 
1773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my @next_nfa_indexes  | 
| 
1774
 | 
3020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3278
 | 
               = sort(keys(%{{  | 
| 
 
 | 
3735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17588
 | 
    | 
| 
1775
 | 
18666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38751
 | 
                     map { ($$_[1] => undef) }  | 
| 
1776
 | 
3899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9783
 | 
                     grep { cc_match($$_[0], $$interval[0]) }  | 
| 
1777
 | 
3020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4161
 | 
                     map { @{$$_[1]} }  | 
| 
 
 | 
3899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3453
 | 
    | 
| 
1778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     @nfa_states  | 
| 
1779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }}))  | 
| 
1780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
1781
 | 
3020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8976
 | 
             my $next_index_key = join('.', @next_nfa_indexes);  | 
| 
1782
 | 
3020
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7160
 | 
             if (!exists($dfa_indexes{$next_index_key})) {  | 
| 
1783
 | 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1526
 | 
                 $dfa_indexes{$next_index_key} = $dfa_size++;  | 
| 
1784
 | 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1465
 | 
                 push(@todo, \@next_nfa_indexes);  | 
| 
1785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1786
 | 
3020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3029
 | 
              push(@{$dfa_index_to_intervals{$dfa_indexes{$next_index_key}}},  | 
| 
 
 | 
3020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10767
 | 
    | 
| 
1787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  $interval  | 
| 
1788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              );  | 
| 
1789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1791
 | 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1713
 | 
         my @any_ccs;  | 
| 
1792
 | 
2855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5603
 | 
         $$dfa[$dfa_index][1] = [  | 
| 
1793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             map {  | 
| 
1794
 | 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2681
 | 
                 my $cc = interval_list_to_cc($dfa_index_to_intervals{$_});  | 
| 
1795
 | 
2855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4243
 | 
                 push(@any_ccs, $cc);  | 
| 
1796
 | 
2855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7361
 | 
                 [$cc, $_ ];  | 
| 
1797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             sort(keys(%dfa_index_to_intervals))  | 
| 
1799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ];  | 
| 
1800
 | 
1037
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2276
 | 
         if ((my $all_cc = cc_union(@any_ccs)) != $cc_any) {  | 
| 
1801
 | 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1072
 | 
             $trap_needed = 1;  | 
| 
1802
 | 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
879
 | 
             push(@{$$dfa[$dfa_index][1]},  | 
| 
 
 | 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2362
 | 
    | 
| 
1803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 [ cc_neg($all_cc), -1 ]  | 
| 
1804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
1805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1808
 | 
178
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
409
 | 
     if ($trap_needed) {  | 
| 
1809
 | 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
319
 | 
         for (  | 
| 
1810
 | 
3735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6604
 | 
             grep { $$_[1] == -1 }  | 
| 
 
 | 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2279
 | 
    | 
| 
1811
 | 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
862
 | 
             map { @{$$_[1]} }  | 
| 
1812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$dfa  | 
| 
1813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
1814
 | 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1225
 | 
             $$_[1] = $dfa_size;  | 
| 
1815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1816
 | 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
745
 | 
         $$dfa[$dfa_size] = [0, [[$cc_any, $dfa_size]]];  | 
| 
1817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1819
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
833
 | 
     return $dfa;  | 
| 
1820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
1824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Computes a minimal deterministic C<$dfa> from the given C<$in_dfa>  | 
| 
1827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (Hopcroft's algorithm).  | 
| 
1828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Note that the given C<$in_dfa> must be a C<$dfa>, as  | 
| 
1830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 returned from C, and not a mere C<$nfa>.  | 
| 
1831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Myhill-Nerode theorem: two minimal dfa accepting  | 
| 
1833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the same language are isomorph (i.e. C returns true).  | 
| 
1834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub dfa_to_min_dfa {  | 
| 
1838
 | 
178
 | 
 
 | 
 
 | 
  
178
  
 | 
  
1
  
 | 
267
 | 
     my ($dfa) = @_;  | 
| 
1839
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
277
 | 
     my @acceptings;  | 
| 
1840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @non_acceptings;  | 
| 
1841
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my @intervals;  | 
| 
1842
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
470
 | 
     for my $index (0..$#$dfa) {  | 
| 
1843
 | 
1205
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2179
 | 
         if ($$dfa[$index][0]) {  | 
| 
1844
 | 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
336
 | 
             push(@acceptings, $index);  | 
| 
1845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
1847
 | 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1131
 | 
             push(@non_acceptings, $index);  | 
| 
1848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1849
 | 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1130
 | 
         push(@intervals, map { @{$$_[0]} } @{$$dfa[$index][1]})  | 
| 
 
 | 
4013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3424
 | 
    | 
| 
 
 | 
4013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8077
 | 
    | 
| 
 
 | 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1855
 | 
    | 
| 
1850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1851
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
256
 | 
     my $partition;  | 
| 
1852
 | 
178
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
325
 | 
     if (@non_acceptings) {  | 
| 
1853
 | 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
375
 | 
         $partition = [\@non_acceptings, \@acceptings];  | 
| 
1854
 | 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
882
 | 
         my %todo = (join('.', @non_acceptings) => \@non_acceptings);  | 
| 
1855
 | 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
347
 | 
         my $cases = interval_cases(\@intervals);  | 
| 
1856
 | 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
661
 | 
         while (my ($todo_key) = keys(%todo)) {  | 
| 
1857
 | 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1017
 | 
             my %indexes = map { ($_ => undef) } @{delete($todo{$todo_key})};  | 
| 
 
 | 
1786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4236
 | 
    | 
| 
 
 | 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1776
 | 
    | 
| 
1858
 | 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1658
 | 
             for my $interval (@$cases) {  | 
| 
1859
 | 
15626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30673
 | 
                 my %prev_inds = (  | 
| 
1860
 | 
229606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
234666
 | 
                     map { ($_ => undef) }  | 
| 
1861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     grep {  | 
| 
1862
 | 
7419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21761
 | 
                         my $i = $_;  | 
| 
1863
 | 
796851
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2052263
 | 
                         grep {  | 
| 
1864
 | 
229606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
339323
 | 
                             exists($indexes{$$_[1]})  | 
| 
1865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          && cc_match($$_[0], $$interval[0])  | 
| 
1866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1867
 | 
229606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
205777
 | 
                         @{$$dfa[$i][1]}  | 
| 
1868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     (0..$#$dfa)  | 
| 
1870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
1871
 | 
7419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18473
 | 
                 my $refined_partition;  | 
| 
1872
 | 
7419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10048
 | 
                 for my $partition_indexes (@$partition) {  | 
| 
1873
 | 
105893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
105026
 | 
                     my (@inter, @diff);  | 
| 
1874
 | 
105893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
136095
 | 
                     for (@$partition_indexes) {  | 
| 
1875
 | 
229606
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
338336
 | 
                         if (exists($prev_inds{$_})) {  | 
| 
1876
 | 
15626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26582
 | 
                             push(@inter, $_);  | 
| 
1877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         else {  | 
| 
1879
 | 
213980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
354311
 | 
                             push(@diff, $_);  | 
| 
1880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1882
 | 
105893
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
272952
 | 
                     if (!@inter || !@diff) {  | 
| 
1883
 | 
105301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
234937
 | 
                         push(@$refined_partition, $partition_indexes);  | 
| 
1884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else {  | 
| 
1886
 | 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1389
 | 
                         push(@$refined_partition, \@inter, \@diff);  | 
| 
1887
 | 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3354
 | 
                         my $prev_inds_key = join('.', sort(keys(%prev_inds)));  | 
| 
1888
 | 
592
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
2005
 | 
                         if ($todo{$prev_inds_key}) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1889
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                             delete($todo{$prev_inds_key});  | 
| 
1890
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                             $todo{join('.', @diff)} = \@diff;  | 
| 
1891
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                             $todo{join('.', @inter)} = \@inter;  | 
| 
1892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         elsif (@diff < @inter) {  | 
| 
1894
 | 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
587
 | 
                             $todo{join('.', @diff)} = \@diff;  | 
| 
1895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         else {  | 
| 
1897
 | 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1563
 | 
                             $todo{join('.', @inter)} = \@inter;  | 
| 
1898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
1899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
1900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
1901
 | 
7419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37875
 | 
                 $partition = $refined_partition;  | 
| 
1902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
1903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
1906
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
         $partition = [\@acceptings];  | 
| 
1907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1908
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
246
 | 
     my $state_ind_to_equiv;  | 
| 
1909
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
322
 | 
     for (grep { @$_ != 1 } @$partition) {  | 
| 
 
 | 
942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1589
 | 
    | 
| 
1910
 | 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
677
 | 
         @$state_ind_to_equiv{@$_[1..$#$_]} = ($$_[0]) x $#$_;  | 
| 
1911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1912
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
586
 | 
     return _nfa_shrink_equiv($dfa, $state_ind_to_equiv);  | 
| 
1913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
1917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $tree  | 
| 
1918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
1919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
1921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Tree  | 
| 
1923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $tree = [ $star, [ $alt_0, $alt_1, ... ] ]  | 
| 
1925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        or $char_class # ref($char_class) eq CHAR_CLASS  | 
| 
1926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        or undef # accepting nothing  | 
| 
1927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $alt = [ $tree_0, $tree_1, ... ]  | 
| 
1928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A C<$tree> is a hierarchical data structure used as intermediate form for  | 
| 
1930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 regular expression generation routines.  | 
| 
1931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Similar to a parse tree, except that the C<$tree>s described here are not the  | 
| 
1933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 direct result of the parsing routines C; indeed, the parsing  | 
| 
1934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 routines generate a C<$nfa>, which then can be converted to a C<$tree>.  | 
| 
1935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A string is spanned by C<$tree = [$star, [ $alt_0, $alt_1, ... ] ]> if it is  | 
| 
1937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 spanned by one of the C<$alt_i> (if C<$star> is false) of a repetition thereof  | 
| 
1938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (if C<$star> is true).  | 
| 
1939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A string is spanned by C<$alt = [ $tree_0, $tree_1, ...]> if it is the  | 
| 
1941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 concatenation of C<@substrings>, each C<$substrings[$i]> being spanned by  | 
| 
1942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<$$alt[$i]>.  | 
| 
1943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
1945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
1947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Converts a C<$nfa> to a C<$tree>.  | 
| 
1949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns C if the C<$nfa> accepts nothing (not even the empty string).  | 
| 
1950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_to_tree {  | 
| 
1954
 | 
107
 | 
 
 | 
 
 | 
  
107
  
 | 
  
1
  
 | 
175
 | 
     my ($nfa) = @_;  | 
| 
1955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Warshall algorithm (Kleen's theorem)  | 
| 
1957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # with preliminary computations:  | 
| 
1958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #   - words-pathes (unbranched pathes) are shrinked  | 
| 
1959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #   - unique accepting state is ensured  | 
| 
1960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #   - branches (with single parent) are skipped  | 
| 
1961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1962
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
169
 | 
     my $path = {};  | 
| 
1963
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
162
 | 
     my $path_tr = {};  | 
| 
1964
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
122
 | 
     my %accepting_state_inds;  | 
| 
1965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Initialization of the pathes  | 
| 
1967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1968
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
313
 | 
     for my $i (0..$#$nfa) {  | 
| 
1969
 | 
605
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1246
 | 
         if ($$nfa[$i][0]) {  | 
| 
1970
 | 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
341
 | 
             $accepting_state_inds{$i} = $i;  | 
| 
1971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1972
 | 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
587
 | 
         for (@{$$nfa[$i][1]}) {  | 
| 
 
 | 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1147
 | 
    | 
| 
1973
 | 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3980
 | 
             $$path{$i}{$$_[1]}  | 
| 
1974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           = $$path_tr{$$_[1]}{$i}  | 
| 
1975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           = $$_[0];  | 
| 
1976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
1977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1979
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
158
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
1980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "before word shrink\n";  | 
| 
1981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (sort {$a <=> $b} (keys(%$path))) {  | 
| 
1982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $j (sort {$a <=> $b} (keys(%{$$path{$i}}))) {  | 
| 
1983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print STDERR "$i $j: " . cc_to_regex($$path{$i}{$j}) . "\n";  | 
| 
1984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }}  | 
| 
1985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1987
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
150
 | 
     my @tree_list;  | 
| 
1988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @state_ind_path;  | 
| 
1989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # word-pathes (unbranched pathes) are shrinked  | 
| 
1991
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
244
 | 
     for my $first (0..$#$nfa) {  | 
| 
1992
 | 
605
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1282
 | 
         if (!exists($$path{$first})) { next; }  | 
| 
 
 | 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
218
 | 
    | 
| 
1993
 | 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
396
 | 
         my @todo = keys(%{$$path{$first}});  | 
| 
 
 | 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1327
 | 
    | 
| 
1994
 | 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5048
 | 
         my %todo_ctrl;  | 
| 
1995
 | 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2334
 | 
         while (@todo) {  | 
| 
1996
 | 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1236
 | 
             $todo_ctrl{my $i = pop(@todo)} = undef;  | 
| 
1997
 | 
782
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
837
 | 
             if (  | 
| 
1998
 | 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2757
 | 
                 keys(%{$$path_tr{$i}}) != 1  | 
| 
1999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              || $i == $first  | 
| 
2000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2001
 | 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1604
 | 
                 next;  | 
| 
2002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2004
 | 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
409
 | 
             my @tree_list = ($$path{$first}{$i});  | 
| 
2005
 | 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
279
 | 
             my @state_ind_path = ($i);  | 
| 
2006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2007
 | 
191
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
200
 | 
             while (  | 
| 
2008
 | 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
968
 | 
                 keys(%{$$path{$i}}) == 1  | 
| 
 
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
694
 | 
    | 
| 
2009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && (my $j = (keys(%{$$path{$i}}))[0]) != $first  | 
| 
2010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2011
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
317
 | 
                 push(@tree_list, $$path{$i}{$j});  | 
| 
2012
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
229
 | 
                 push(@state_ind_path, $i = $j);  | 
| 
2013
 | 
180
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
206
 | 
                 if (keys(%{$$path_tr{$j}}) != 1) {  | 
| 
 
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
547
 | 
    | 
| 
2014
 | 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
94
 | 
                     last;  | 
| 
2015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2018
 | 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
219
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "first, state_ind_path: $first, @state_ind_path\n";  | 
| 
2020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2022
 | 
191
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
651
 | 
             if (@state_ind_path > 1) {  | 
| 
2023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2024
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "delete head $first -> $state_ind_path[0]\n";  | 
| 
2026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2027
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
138
 | 
                 delete($$path{$first}{$state_ind_path[0]});  | 
| 
2028
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
183
 | 
                 for (@state_ind_path[0..$#state_ind_path-1]) {  | 
| 
2029
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
303
 | 
                     delete($$path{$_});  | 
| 
2030
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
291
 | 
                     delete($$path_tr{$_});  | 
| 
2031
 | 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
277
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "delete path $_ -> *\n";  | 
| 
2033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "delete path * <- $_\n";  | 
| 
2034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2036
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
200
 | 
                 delete($$path_tr{$state_ind_path[-1]}{$state_ind_path[-2]});  | 
| 
2037
 | 
61
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
154
 | 
                 if (!exists($todo_ctrl{$state_ind_path[-1]})) {  | 
| 
2038
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
65
 | 
                     $todo_ctrl{$state_ind_path[-1]} = undef;  | 
| 
2039
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
                     push(@todo, $state_ind_path[-1]);  | 
| 
2040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2041
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
64
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "delete tail $state_ind_path[-1] <- $state_ind_path[-2]\n";  | 
| 
2043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # $first -> $last  | 
| 
2047
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
80
 | 
                 my $last = $state_ind_path[-1];  | 
| 
2048
 | 
61
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
300
 | 
                 $$path{$first}{$last}  | 
| 
2049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               = $$path_tr{$last}{$first}  | 
| 
2050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               = exists($$path{$first}{$last})  | 
| 
2051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   ? tree_alt(  | 
| 
2052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         $$path{$first}{$last}  | 
| 
2053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       , tree_concat(@tree_list)  | 
| 
2054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
2055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   : tree_concat(@tree_list)  | 
| 
2056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
2057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2058
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
106
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR  | 
| 
2060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "$first -> $last created (first ->last): "  | 
| 
2061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       . join('', map {_tree_to_regex($_)} @tree_list)  . "\n";  | 
| 
2062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2064
 | 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
141
 | 
                 for (0..$#state_ind_path-1) {  | 
| 
2065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # $first -> accepting  | 
| 
2067
 | 
180
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
643
 | 
                     if ($accepting_state_inds{  | 
| 
2068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         my $state_ind = $state_ind_path[$_]  | 
| 
2069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }) {  | 
| 
2070
 | 
30
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
108
 | 
                         $$path{$first}{$state_ind}  | 
| 
2071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       = $$path_tr{$state_ind}{$first}  | 
| 
2072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       = exists($$path{$first}{$state_ind})  | 
| 
2073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           ? tree_alt(  | 
| 
2074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $$path{$first}{$state_ind}  | 
| 
2075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               , tree_concat(@tree_list[0..$_])  | 
| 
2076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             )  | 
| 
2077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           : tree_concat(@tree_list[0..$_])  | 
| 
2078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ;  | 
| 
2079
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
191
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR  | 
| 
2081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "$first -> $state_ind created (first -> accepting): "  | 
| 
2082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       . join('', map {_tree_to_regex($_)} @tree_list[0..$_])  . "\n";  | 
| 
2083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
2085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2090
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
160
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "after word shrink\n";  | 
| 
2092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (sort {$a <=> $b} (keys(%$path))) {  | 
| 
2093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $j (sort {$a <=> $b} (keys(%{$$path{$i}}))) {  | 
| 
2094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print STDERR "$i $j: " . tree_dump($$path{$i}{$j}) . "\n";  | 
| 
2095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }}  | 
| 
2096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $j (sort {$a <=> $b} (keys(%$path_tr))) {  | 
| 
2097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (sort {$a <=> $b} (keys(%{$$path_tr{$j}}))) {  | 
| 
2098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print STDERR "$j <- $i: " . tree_dump($$path_tr{$j}{$i}) . "\n";  | 
| 
2099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }}  | 
| 
2100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # unique accepting state is ensured  | 
| 
2103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # (pseudo-unique: the initial state may additionally be accepting)  | 
| 
2104
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
196
 | 
     my $unique_accepting_state_ind = @$nfa;  | 
| 
2105
 | 
107
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
338
 | 
     if (  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         keys(%accepting_state_inds) == 1  | 
| 
2107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) {  | 
| 
2108
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
220
 | 
         $unique_accepting_state_ind = (keys(%accepting_state_inds))[0];  | 
| 
2109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (  | 
| 
2111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         keys(%accepting_state_inds) == 2  | 
| 
2112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      && exists($accepting_state_inds{0})  | 
| 
2113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) {  | 
| 
2114
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
         $unique_accepting_state_ind  | 
| 
2115
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
       = (grep {$_} keys(%accepting_state_inds))[0];  | 
| 
2116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
2118
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
         $unique_accepting_state_ind = @$nfa;  | 
| 
2119
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
         for my $to_state_ind (keys(%accepting_state_inds)) {  | 
| 
2120
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
             for my $from_state_ind (keys(%{$$path_tr{$to_state_ind}})) {  | 
| 
 
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
82
 | 
    | 
| 
2121
 | 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
161
 | 
                 push(  | 
| 
2122
 | 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
49
 | 
                     @{$$path_tr{$unique_accepting_state_ind}{$from_state_ind}}  | 
| 
2123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , $$path_tr{$to_state_ind}{$from_state_ind}  | 
| 
2124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
2125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2127
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
         for my $from_state_ind (  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
    | 
| 
2128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             keys(%{$$path_tr{$unique_accepting_state_ind}})  | 
| 
2129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
2130
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
133
 | 
             $$path_tr{$unique_accepting_state_ind}{$from_state_ind}  | 
| 
2131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           = $$path{$from_state_ind}{$unique_accepting_state_ind}  | 
| 
2132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           = tree_alt(  | 
| 
2133
 | 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
67
 | 
                 @{$$path_tr{$unique_accepting_state_ind}{$from_state_ind}}  | 
| 
2134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
2135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2138
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
135
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "after unique state addition\n";  | 
| 
2140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (sort {$a <=> $b} (keys(%$path))) {  | 
| 
2141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $j (sort {$a <=> $b} (keys(%{$$path{$i}}))) {  | 
| 
2142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print STDERR "$i $j: " . tree_dump($$path{$i}{$j}) . "\n";  | 
| 
2143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }}  | 
| 
2144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $j (sort {$a <=> $b} (keys(%$path_tr))) {  | 
| 
2145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (sort {$a <=> $b} (keys(%{$$path_tr{$j}}))) {  | 
| 
2146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print STDERR "$j <- $i: " . tree_dump($$path_tr{$j}{$i}) . "\n";  | 
| 
2147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }}  | 
| 
2148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2150
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
224
 | 
     for my $reversed (0, 1) {  | 
| 
2151
 | 
214
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
494
 | 
         my ($tmp_path, $tmp_path_tr)  | 
| 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           = $reversed  | 
| 
2153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ? ($path_tr, $path)  | 
| 
2154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           : ($path, $path_tr)  | 
| 
2155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
2156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # branches (with single parent) are skipped  | 
| 
2158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my @branch_inds  | 
| 
2159
 | 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
529
 | 
           = $reversed  | 
| 
2160
 | 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1002
 | 
           ? sort {$a <=> $b} (keys(%$tmp_path))  | 
| 
2161
 | 
214
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
964
 | 
           : sort {$b <=> $a} (keys(%$tmp_path))  | 
| 
2162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
2163
 | 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
524
 | 
         while (@branch_inds) {  | 
| 
2164
 | 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10855
 | 
             my $branch = pop(@branch_inds);  | 
| 
2165
 | 
924
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
4952
 | 
             if  (  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2166
 | 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1809
 | 
                 !exists($$tmp_path{$branch})  | 
| 
2167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # root cannot be un-branched  | 
| 
2168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              || $branch == 0  | 
| 
2169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # accepting states cannot be un-branched  | 
| 
2170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              || $branch == $unique_accepting_state_ind  | 
| 
2171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # single parent (non-root have one or more parents)  | 
| 
2172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              || keys(%{$$tmp_path_tr{$branch}}) != 1  | 
| 
2173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2174
 | 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1439
 | 
                 next;  | 
| 
2175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2177
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
324
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "branch at $branch\n";  | 
| 
2179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2180
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
254
 | 
             my ($parent) = keys(%{$$tmp_path_tr{$branch}}); # single parent  | 
| 
 
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
487
 | 
    | 
| 
2181
 | 
250
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
899
 | 
             if (  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ref($$tmp_path{$parent}{$branch}) ne CHAR_CLASS  | 
| 
2183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && (  | 
| 
2184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # starified parent  | 
| 
2185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $$tmp_path{$parent}{$branch}[0]  | 
| 
2186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # parent containing several pathes  | 
| 
2187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  || @{$$tmp_path{$parent}{$branch}[1]} > 1  | 
| 
2188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
2189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2190
 | 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
80
 | 
                 next;  | 
| 
2191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2193
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
225
 | 
             my (@children) = keys(%{$$tmp_path{$branch}});  | 
| 
 
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
590
 | 
    | 
| 
2194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2195
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
480
 | 
             for my $child (@children) {  | 
| 
2196
 | 
478
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2078
 | 
                 $$tmp_path{$parent}{$child}  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               = $$tmp_path_tr{$child}{$parent}  | 
| 
2198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               = exists($$tmp_path{$parent}{$child})  | 
| 
2199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   ? tree_alt(  | 
| 
2200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          $$tmp_path{$parent}{$child}  | 
| 
2201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       , tree_concat2(  | 
| 
2202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $reversed  | 
| 
2203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           ? (  | 
| 
2204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $$tmp_path{$branch}{$child}  | 
| 
2205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               , $$tmp_path{$parent}{$branch}  | 
| 
2206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             )  | 
| 
2207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           : (  | 
| 
2208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $$tmp_path{$parent}{$branch}  | 
| 
2209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               , $$tmp_path{$branch}{$child}  | 
| 
2210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             )  | 
| 
2211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         )  | 
| 
2212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
2213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   : tree_concat2(  | 
| 
2214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $reversed  | 
| 
2215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           ? (  | 
| 
2216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $$tmp_path{$branch}{$child}  | 
| 
2217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               , $$tmp_path{$parent}{$branch}  | 
| 
2218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             )  | 
| 
2219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           : (  | 
| 
2220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $$tmp_path{$parent}{$branch}  | 
| 
2221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               , $$tmp_path{$branch}{$child}  | 
| 
2222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             )  | 
| 
2223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
2224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
2225
 | 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1216
 | 
                 delete($$tmp_path_tr{$child}{$branch});  | 
| 
2226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2227
 | 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
733
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR  | 
| 
2229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "parent -> branch: "  | 
| 
2230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       . tree_dump($$tmp_path{$parent}{$branch}) . "\n";  | 
| 
2231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR  | 
| 
2232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "branch -> child : "  | 
| 
2233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       . tree_dump($$tmp_path{$branch}{$child}) . "\n";  | 
| 
2234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR  | 
| 
2235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "$parent -> $child created (un-branch): "  | 
| 
2236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       . tree_dump($$tmp_path{$parent}{$child})  | 
| 
2237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       . ($reversed ? " (reversed)" : "" )  . "\n";  | 
| 
2238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR  | 
| 
2239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         "delete $child <- $branch\n";  | 
| 
2240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2243
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
395
 | 
             delete($$tmp_path{$parent}{$branch});  | 
| 
2244
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
578
 | 
             delete($$tmp_path{$branch});  | 
| 
2245
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
533
 | 
             delete($$tmp_path_tr{$branch});  | 
| 
2246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2247
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
197
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "delete $parent -> $branch\n";  | 
| 
2249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "delete $branch -> *\n";  | 
| 
2250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "delete $branch <- *\n";  | 
| 
2251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2253
 | 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
648
 | 
             push(@branch_inds, $parent);  | 
| 
2254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2256
 | 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
597
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "after branch skip\n";  | 
| 
2258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (sort {$a <=> $b} (keys(%$tmp_path))) {  | 
| 
2259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $j (sort {$a <=> $b} (keys(%{$$tmp_path{$i}}))) {  | 
| 
2260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         if ($reversed) {  | 
| 
2261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print STDERR "$j $i: " . tree_dump($$tmp_path{$i}{$j}) . "\n";  | 
| 
2262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print STDERR "$i $j: " . tree_dump($$tmp_path{$i}{$j}) . "\n";  | 
| 
2265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }}  | 
| 
2267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $j (sort {$a <=> $b} (keys(%$tmp_path_tr))) {  | 
| 
2268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for my $i (sort {$a <=> $b} (keys(%{$$tmp_path_tr{$j}}))) {  | 
| 
2269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         print STDERR  | 
| 
2270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ($reversed ? "$i <- $j: " : "$j <- $i:")  | 
| 
2271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           . tree_dump($$tmp_path_tr{$j}{$i}) . "\n";  | 
| 
2272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }}  | 
| 
2273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # starify diagonal  | 
| 
2279
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
271
 | 
     for (grep { exists($$path{$_}{$_}) } keys(%$path)) {  | 
| 
 
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
676
 | 
    | 
| 
2280
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
275
 | 
         $$path{$_}{$_}  | 
| 
2281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       = $$path_tr{$_}{$_}  | 
| 
2282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       = tree_starify($$path{$_}{$_});  | 
| 
2283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # Warshall algorithm (Kleene's theorem)  | 
| 
2286
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
370
 | 
     my %updates;  | 
| 
2287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # strarified first  | 
| 
2288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @ks  | 
| 
2289
 | 
178
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
508
 | 
         = sort {  | 
| 
2290
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
325
 | 
               exists($$path{$b}{$b}) <=> exists($$path{$a}{$a})  | 
| 
2291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
            || $a <=> $b  | 
| 
2292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           }  | 
| 
2293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           keys(%$path)  | 
| 
2294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           # note that keys(%$path_tr) are not additionally needed  | 
| 
2295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           # case i == k && k == j: nothing to do  | 
| 
2296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           # case i != k && k != j: $$path{$k}{$j} must exist  | 
| 
2297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           # case i == k && k != j: $$path{$k}{$k} must exist  | 
| 
2298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           # case i != k && k == j: $$path{$k}{$k} must exist  | 
| 
2299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
2300
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
217
 | 
     for my $k (@ks) {  | 
| 
2301
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
215
 | 
         for my $i (keys(%{$$path_tr{$k}})) {   # i -> k  | 
| 
 
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1069
 | 
    | 
| 
2302
 | 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1096
 | 
             for my $j (keys(%{$$path{$k}})) {  # k -> j  | 
| 
 
 | 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
987
 | 
    | 
| 
2303
 | 
1004
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
2886
 | 
                 if ($i == $k && $k == $j) { next; }  | 
| 
 
 | 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
224
 | 
    | 
| 
2304
 | 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
819
 | 
                 my @trees;  | 
| 
2305
 | 
911
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
4159
 | 
                 if (  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     exists($$path{$i}{$j})  | 
| 
2307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  && ($i != $k && $k != $j)  | 
| 
2308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ) {  | 
| 
2309
 | 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
668
 | 
                     push(@trees, $$path{$i}{$j});  | 
| 
2310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2311
 | 
911
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4004
 | 
                 my $new_tree  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   = exists($$path{$k}{$k})  | 
| 
2313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   ? tree_concat(  | 
| 
2314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         (  | 
| 
2315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $i != $k  | 
| 
2316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           ? $$path{$i}{$k}  | 
| 
2317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           : ()  | 
| 
2318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         )  | 
| 
2319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       , $$path{$k}{$k}  | 
| 
2320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       , (  | 
| 
2321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $k != $j  | 
| 
2322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           ? $$path{$k}{$j}  | 
| 
2323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           : ()  | 
| 
2324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         )  | 
| 
2325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
2326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   : tree_concat2($$path{$i}{$k}, $$path{$k}{$j})  | 
| 
2327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ;  | 
| 
2328
 | 
911
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4226
 | 
                 push(@trees, $i == $j ? tree_starify($new_tree) : $new_tree);  | 
| 
2329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2330
 | 
911
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1641
 | 
                 if (@trees == 1) {  | 
| 
2331
 | 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1523
 | 
                     $updates{$i}{$j} = $trees[0];  | 
| 
2332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
2334
 | 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
739
 | 
                     $updates{$i}{$j} = tree_alt(@trees);  | 
| 
2335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2338
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
767
 | 
         for my $i (keys(%updates)) {  | 
| 
2339
 | 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
290
 | 
             for my $j (keys(%{$updates{$i}})) {  | 
| 
 
 | 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
736
 | 
    | 
| 
2340
 | 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2266
 | 
                 $$path{$i}{$j} = $$path_tr{$j}{$i} = $updates{$i}{$j};  | 
| 
2341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2344
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
258
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my $num_of_updates = map {keys(%{$updates{$_}})} keys(%updates);  | 
| 
2346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "k = $k ($num_of_updates updates)\n";  | 
| 
2347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if ($num_of_updates) {  | 
| 
2348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         for my $i (sort {$a <=> $b} (keys(%$path))) {  | 
| 
2349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         for my $j (sort {$a <=> $b} (keys(%{$$path{$i}}))) {  | 
| 
2350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print STDERR "$i $j: ";  | 
| 
2351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             print STDERR tree_dump($$path{$i}{$j}) . "\n";  | 
| 
2352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }}  | 
| 
2353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2356
 | 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
617
 | 
         %updates = ();  | 
| 
2357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2359
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
141
 | 
     my $tree;  | 
| 
2360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # accepting emtpy init  | 
| 
2362
 | 
107
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
492
 | 
     if ($$nfa[0][0]) {  | 
| 
2363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2364
 | 
43
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
139
 | 
         my $path_0_0 = exists($$path{0}{0}) ? $$path{0}{0} : $cc_none;  | 
| 
2365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2366
 | 
43
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
91
 | 
         if ($unique_accepting_state_ind == 0) {  | 
| 
2367
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
63
 | 
             $tree = $path_0_0;  | 
| 
2368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2370
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
             my $path_0_end = $$path{0}{$unique_accepting_state_ind};  | 
| 
2371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2372
 | 
5
 | 
  
 50
  
 | 
  
100
  
 | 
 
 | 
 
 | 
40
 | 
             if (  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $path_0_0 == $cc_none  | 
| 
2374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && ref($path_0_end) ne CHAR_CLASS  | 
| 
2375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && $$path_0_end[0]  | 
| 
2376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # starified expression e* does not need (|e*)  | 
| 
2378
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $tree = $path_0_end;  | 
| 
2379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # non-starified expression e needs (|e)  | 
| 
2382
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                 $tree = tree_alt($path_0_0, $path_0_end);  | 
| 
2383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
2387
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
162
 | 
         $tree = $$path{0}{$unique_accepting_state_ind};  | 
| 
2388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2390
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
121
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "tree: " . tree_dump($tree) . "\n";  | 
| 
2392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2394
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
296
 | 
     _tree_factorize_fixes($tree);  | 
| 
2395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2396
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
140
 | 
 if (TRACE_NFA_TO_TREE) {  | 
| 
2397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     print STDERR "tree (after factorization): " . tree_dump($tree) . "\n";  | 
| 
2398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2399
 | 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1140
 | 
     return $tree;  | 
| 
2400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Recursively (bottom up) factorizes prefixes and suffixes out from  | 
| 
2404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # alternations if at least one of them contains a sub-tree.  | 
| 
2405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Example 1: (ab1cd|ab2cd|ab3*cd) -> ab(1|2|3*)cd  | 
| 
2407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Example 2: (ab1cd|ab2cd|ab3cd) remains the same (no sub-tree)  | 
| 
2408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Example 2 does not need to be factorized  | 
| 
2410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # because it can be represented by a drop-down list,  | 
| 
2411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # which is the primary purpose of this module;  | 
| 
2412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # in this case, a factorization may lead to counter-intuitive results,  | 
| 
2413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # like words cut in the middle.  | 
| 
2414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # But example 1 (less common) could only be represented as mere free-text  | 
| 
2416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # if the common pre- and suf-fixes were not factorized out,  | 
| 
2417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # thus loosing information for the input helper (xxx_to_input_constraints).  | 
| 
2418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # This behavior can be changed by setting our $FULL_FACTORIZE_FIXES = 1;  | 
| 
2420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # in this case, Example 2 would produce ab(1|2|3)cd.  | 
| 
2421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Modifies $tree in place  | 
| 
2423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  | 
| 
2424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _tree_factorize_fixes {  | 
| 
2425
 | 
690
 | 
 
 | 
 
 | 
  
690
  
 | 
 
 | 
915
 | 
     my ($tree) = @_;  | 
| 
2426
 | 
690
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
3167
 | 
     if (  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2427
 | 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1881
 | 
         !defined($tree)  | 
| 
2428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      || ref($tree) eq CHAR_CLASS  | 
| 
2429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      || @{$$tree[1]} == 0  | 
| 
2430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      || !$FULL_FACTORIZE_FIXES  | 
| 
2431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      && (  | 
| 
2432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @{$$tree[1]} == 1  | 
| 
2433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          || !grep { ref($_) ne CHAR_CLASS } map { @$_ } @{$$tree[1]}  | 
| 
2434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         )  | 
| 
2435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ) {  | 
| 
2436
 | 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1161
 | 
         return $tree;  | 
| 
2437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
2439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2440
 | 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
137
 | 
         for (grep { grep { ref($_) ne CHAR_CLASS } @$_ } @{$$tree[1]} ) {  | 
| 
 
 | 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
367
 | 
    | 
| 
 
 | 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1586
 | 
    | 
| 
 
 | 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
181
 | 
    | 
| 
2441
 | 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
980
 | 
             my $tmp_tree =  | 
| 
2442
 | 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
210
 | 
                 tree_concat(map { _tree_factorize_fixes($_) } @$_)  | 
| 
2443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
2444
 | 
161
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
747
 | 
             if (  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2445
 | 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
469
 | 
                 ref($tmp_tree) eq CHAR_CLASS  | 
| 
2446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              || $$tmp_tree[0]  | 
| 
2447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              || @{$$tmp_tree[1]} > 1  | 
| 
2448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2449
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                 $_ = [$tmp_tree];  | 
| 
2450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2452
 | 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
440
 | 
                 $_ = $$tmp_tree[1][0];  | 
| 
2453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2456
 | 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
164
 | 
         my $fst_len = @{$$tree[1][0]};  | 
| 
 
 | 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
189
 | 
    | 
| 
2457
 | 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
156
 | 
         my ($pre_len, $suf_len) = (0, 0);  | 
| 
2458
 | 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
154
 | 
         for (1, 0) {  | 
| 
2459
 | 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
410
 | 
             my ($len_ref, @range)  | 
| 
2460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               = $_  | 
| 
2461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               ? (\$pre_len, (0..$fst_len-1))  | 
| 
2462
 | 
230
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
610
 | 
               : (\$suf_len, map {-$_} (1..$fst_len-$pre_len))  | 
| 
2463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
2464
 | 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
365
 | 
             for my $i (@range) {  | 
| 
2465
 | 
262
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
257
 | 
                 if (  | 
| 
2466
 | 
653
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
4011
 | 
                     grep {  | 
| 
2467
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
368
 | 
                         $i >= @$_  | 
| 
2468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                      || ref($$_[$i]) ne CHAR_CLASS  | 
| 
2469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                      || $$tree[1][0][$i] != $$_[$i]  | 
| 
2470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
2471
 | 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
399
 | 
                     @{$$tree[1]}[0..$#{$$tree[1]}]  | 
| 
2472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ) {  | 
| 
2473
 | 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
431
 | 
                     last;  | 
| 
2474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2475
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
166
 | 
                 $$len_ref++;  | 
| 
2476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2478
 | 
115
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
426
 | 
         if ($pre_len == 0 && $suf_len == 0) {  | 
| 
2479
 | 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
172
 | 
             return $tree;  | 
| 
2480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2482
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
111
 | 
         my $empty_seen = 0;  | 
| 
2483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $mid_tree = [  | 
| 
2484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             0  | 
| 
2485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , [  | 
| 
2486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 map {  | 
| 
2487
 | 
120
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
286
 | 
                     if ($pre_len <= $#$_ - $suf_len) {  | 
| 
 
 | 
55
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
99
 | 
    | 
| 
2488
 | 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
293
 | 
                         [ @$_[$pre_len..$#$_-$suf_len] ];  | 
| 
2489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
2490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     elsif (!$empty_seen++) {  | 
| 
2491
 | 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
61
 | 
                         [];  | 
| 
2492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
2493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     else {  | 
| 
2494
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         ();  | 
| 
2495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
2496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2497
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
74
 | 
                 @{$$tree[1]}  | 
| 
2498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ]  | 
| 
2499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ];  | 
| 
2500
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
94
 | 
         $$tree[1] = [[  | 
| 
2501
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
141
 | 
               @{$$tree[1][0]}[0..$pre_len-1]  | 
| 
2502
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
149
 | 
             , $empty_seen == @{$$tree[1]} ? () : $mid_tree  | 
| 
2503
 | 
55
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
94
 | 
             , @{$$tree[1][0]}[$fst_len-$suf_len..$fst_len-1]  | 
| 
2504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ]];  | 
| 
2505
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
187
 | 
         return $tree;  | 
| 
2506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
2510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Converts a C<$tree> to an C<$ere> (if C<$to_perlre> is false)  | 
| 
2512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or to a C<$perlre> (if C<$to_perlre> is true).  | 
| 
2513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
2515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tree_to_regex {  | 
| 
2517
 | 
102
 | 
  
100
  
 | 
 
 | 
  
102
  
 | 
  
1
  
 | 
312
 | 
     my $re = defined($_[0]) ? &_tree_to_regex : '$.';  | 
| 
2518
 | 
102
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
867
 | 
     return $_[1] ? qr/\A$re\z/ms : "^$re\$";  | 
| 
2519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %cc_to_regex_cache;  | 
| 
2523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub _tree_to_regex {  | 
| 
2525
 | 
657
 | 
 
 | 
 
 | 
  
657
  
 | 
 
 | 
969
 | 
         my ($tree, $to_perlre) = (@_, 0);  | 
| 
2526
 | 
657
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1083
 | 
         if (ref($tree) eq CHAR_CLASS) {  | 
| 
 
 | 
636
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1245
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             return  | 
| 
2528
 | 
21
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
123
 | 
                 $cc_to_regex_cache{$tree.$to_perlre}  | 
| 
2529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ||= cc_to_regex($tree, $to_perlre)  | 
| 
2530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
2531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2532
 | 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1865
 | 
         elsif (@{$$tree[1]} == 0) {  | 
| 
2533
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return '';  | 
| 
2534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (  | 
| 
2536
 | 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1173
 | 
             @{$$tree[1]} == 1       # single alteration  | 
| 
2537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && @{$$tree[1][0]} == 1    # single atom  | 
| 
2538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
2539
 | 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
329
 | 
             my $atom = $$tree[1][0][0];  | 
| 
2540
 | 
185
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
331
 | 
             if (ref($atom) eq CHAR_CLASS) {  | 
| 
2541
 | 
185
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
1142
 | 
                 return join('',  | 
| 
2542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     $cc_to_regex_cache{$atom.$to_perlre}  | 
| 
2543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ||= cc_to_regex($atom, $to_perlre)  | 
| 
2544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , $$tree[0] ? '*' : ()  | 
| 
2545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 );  | 
| 
2546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2548
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 return _tree_to_regex([$$tree[0], $$atom[1]], $to_perlre);  | 
| 
2549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             my $needs_parenthesis  | 
| 
2553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               = @{$$tree[1]} > 1                  # (a|...)  | 
| 
2554
 | 
451
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
431
 | 
              || $$tree[0] && @{$$tree[1][0]} > 1  # (ab...)*  | 
| 
2555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ;  | 
| 
2556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2557
 | 
1947
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
9092
 | 
             return join(''  | 
| 
2558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , ($needs_parenthesis ? ($to_perlre ? '(?:' : '(') : ())  | 
| 
2559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , (  | 
| 
2560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     join('|',  | 
| 
2561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         map {  | 
| 
2562
 | 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
700
 | 
                             join('',  | 
| 
2563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 map {  | 
| 
2564
 | 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1095
 | 
                                     ref($_) eq CHAR_CLASS  | 
| 
2565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                   ? $cc_to_regex_cache{$_.$to_perlre}  | 
| 
2566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                     ||= cc_to_regex($_, $to_perlre)  | 
| 
2567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                   : _tree_to_regex($_, $to_perlre)  | 
| 
2568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 }  | 
| 
2569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 @$_ # alternation  | 
| 
2570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             )  | 
| 
2571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
2572
 | 
451
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
975
 | 
                         @{$$tree[1]}  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     )  | 
| 
2574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
2575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , ($needs_parenthesis ? ')' : ())  | 
| 
2576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , ($$tree[0] ? '*' : ())  | 
| 
2577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
2578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # starification (regex)*  | 
| 
2583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tree_starify {  | 
| 
2584
 | 
173
 | 
 
 | 
 
 | 
  
173
  
 | 
  
0
  
 | 
214
 | 
     my ($tree) = @_;  | 
| 
2585
 | 
173
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
456
 | 
     if (ref($tree) eq CHAR_CLASS) {  | 
| 
2586
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
430
 | 
         return [1, [[$tree]]];  | 
| 
2587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
2589
 | 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
367
 | 
         return [1, $$tree[1]];  | 
| 
2590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # The behavior of tree_concat2 can be altered  | 
| 
2594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # by setting $TREE_CONCAT_FULL_EXPAND = 1;  | 
| 
2595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tree_concat2 {  | 
| 
2596
 | 
2214
 | 
 
 | 
 
 | 
  
2214
  
 | 
  
0
  
 | 
16628
 | 
     my ($tree_0, $tree_1) = @_;  | 
| 
2597
 | 
2214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2194
 | 
     my $concat;  | 
| 
2598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # main criteria:  | 
| 
2600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #     CHAR_CLASS  | 
| 
2601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #     @{$$tree_n[1]} == 0  | 
| 
2602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #     $$tree_n[0]  | 
| 
2603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     #     @{$$tree_n[1]} == 1  | 
| 
2604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2605
 | 
2214
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3781
 | 
     if (ref($tree_0) eq CHAR_CLASS) {  | 
| 
 
 | 
1649
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4332
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2606
 | 
565
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1595
 | 
         if (@$tree_0 == 0) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2607
 | 
5
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
45
 | 
             if (  | 
| 
2608
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 ref($tree_1) ne CHAR_CLASS  | 
| 
2609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
              && @{$$tree_1[1]} == 0  | 
| 
2610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #  ()  ->  empty  | 
| 
2612
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                 $concat = $cc_none;  | 
| 
2613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 #    ->    | 
| 
2616
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                 $concat = $tree_1;  | 
| 
2617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2619
 | 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
766
 | 
         elsif (ref($tree_1) eq CHAR_CLASS) {  | 
| 
2620
 | 
285
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
458
 | 
             if (@$tree_1 == 0) {  | 
| 
2621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # a   ->  a  | 
| 
2622
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 $concat = $tree_0;  | 
| 
2623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # a b  ->  (ab)  | 
| 
2626
 | 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
926
 | 
                 $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (@{$$tree_1[1]} == 0) {  | 
| 
2630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # a ()  ->  a  | 
| 
2631
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             $concat = $tree_0;  | 
| 
2632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($$tree_1[0]) {  | 
| 
2634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # a (b)*  ->  (a(b)*)  | 
| 
2635
 | 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
534
 | 
             $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2638
 | 
80
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
207
 | 
             if (  | 
| 
2639
 | 
247
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
838
 | 
                 $FULL_FACTORIZE_FIXES  | 
| 
2640
 | 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
242
 | 
              || grep { ref($_) ne CHAR_CLASS && $$_[0] }  | 
| 
2641
 | 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
130
 | 
                 map {@$_} @{$$tree_1[1]}  | 
| 
2642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # a (bc|de) -> (a(bc|de))  | 
| 
2644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # one of bcde is starified  | 
| 
2645
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
                 $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # a (bc|de) -> (abc|ade)  | 
| 
2649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # none of bcde is starified  | 
| 
2650
 | 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
673
 | 
                 $concat = [  | 
| 
2651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     0  | 
| 
2652
 | 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
99
 | 
                   , [ map { [ $tree_0, @$_ ] } @{$$tree_1[1]} ]  | 
| 
 
 | 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
133
 | 
    | 
| 
2653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ];  | 
| 
2654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (@{$$tree_0[1]} == 0) {  | 
| 
2658
 | 
5
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
19
 | 
         if (  | 
| 
2659
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
             ref($tree_1) ne CHAR_CLASS  | 
| 
2660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && @{$$tree_1[1]} == 0  | 
| 
2661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
2662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # () ()  ->  empty  | 
| 
2663
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
             $concat = $cc_none;  | 
| 
2664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # ()   ->    | 
| 
2667
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
             $concat = $tree_1;  | 
| 
2668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2670
 | 
1475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2528
 | 
     elsif ($$tree_0[0]) {  | 
| 
2671
 | 
169
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
299
 | 
         if (ref($tree_1) eq CHAR_CLASS) {  | 
| 
 
 | 
122
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
301
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2672
 | 
47
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
156
 | 
             if (@$tree_1 == 0) {  | 
| 
2673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # (a)*   -> (a)*  | 
| 
2674
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1
 | 
                 $concat = $tree_0;  | 
| 
2675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # (a)* b  -> ((a)*b)  | 
| 
2678
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
                 $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (@{$$tree_1[1]} == 0) {  | 
| 
2682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (a)* ()  ->  (a)*  | 
| 
2683
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             $concat = $tree_0;  | 
| 
2684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2685
 | 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
215
 | 
         elsif ($$tree_1[0]) {  | 
| 
2686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (a)* (b)*  ->  ((a)*(b)*)  | 
| 
2687
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (@{$$tree_1[1]} == 1) {  | 
| 
2690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (a)* (bcd)  -> ((a)*bcd)  | 
| 
2691
 | 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
224
 | 
             $concat = [  | 
| 
2692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 0  | 
| 
2693
 | 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
75
 | 
               , [[ $tree_0, @{$$tree_1[1][0]} ]]  | 
| 
2694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
2695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (a)* (b|c)  -> ((a)*(b|c))  | 
| 
2698
 | 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
163
 | 
             $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (@{$$tree_0[1]} == 1) {  | 
| 
2702
 | 
1106
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1831
 | 
         if (ref($tree_1) eq CHAR_CLASS) {  | 
| 
 
 | 
859
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1855
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2703
 | 
247
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
399
 | 
             if (@$tree_1 == 0) {  | 
| 
2704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # (ab)   -> (ab)  | 
| 
2705
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 $concat = $tree_0;  | 
| 
2706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # (ab) c  -> (abc)  | 
| 
2709
 | 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
801
 | 
                 $concat = [  | 
| 
2710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     0  | 
| 
2711
 | 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
247
 | 
                   , [[ @{$$tree_0[1][0]}, $tree_1 ]]  | 
| 
2712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ];  | 
| 
2713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (@{$$tree_1[1]} == 0) {  | 
| 
2716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (ab) ()  ->  (ab)  | 
| 
2717
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
             $concat = $tree_0;  | 
| 
2718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2719
 | 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1101
 | 
         elsif ($$tree_1[0]) {  | 
| 
2720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (ab) (c)*  ->  (ab(c)*)  | 
| 
2721
 | 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
304
 | 
             $concat = [0, [[@{$$tree_0[1][0]}, $tree_1]]];  | 
| 
 
 | 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
912
 | 
    | 
| 
2722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2723
 | 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1856
 | 
         elsif (@{$$tree_1[1]} == 1) {  | 
| 
2724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (ab) (cd)  -> (abcd)  | 
| 
2725
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
305
 | 
             $concat = [  | 
| 
2726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 0  | 
| 
2727
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
238
 | 
               , [[ @{$$tree_0[1][0]}, @{$$tree_1[1][0]} ]]  | 
| 
 
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
845
 | 
    | 
| 
2728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
2729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (  | 
| 
2731
 | 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
544
 | 
             !grep { ref($_) ne CHAR_CLASS } @{$$tree_0[1][0]}  | 
| 
2732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
2733
 | 
6
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
35
 | 
             if (  | 
| 
2734
 | 
18
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
59
 | 
                 $FULL_FACTORIZE_FIXES  | 
| 
2735
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
              || grep { ref($_) ne CHAR_CLASS && $$_[0] }  | 
| 
2736
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                 map {@$_} @{$$tree_1[1]}  | 
| 
2737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
2738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # (ab) (cd|ef)  -> (ab(cd|ef))  | 
| 
2739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # neither a nor b is a tree  | 
| 
2740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # one of cdef is starified  | 
| 
2741
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $concat = [0, [[@{$$tree_0[1][0]}, $tree_1]]];  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # (ab) (cd|ef)  -> (abcd|abef)  | 
| 
2745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # neither a nor b is a tree  | 
| 
2746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # none of cdef is starified  | 
| 
2747
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
                 $concat = [  | 
| 
2748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     0  | 
| 
2749
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                   , [ map { [ @{$$tree_0[1][0]}, @$_ ] } @{$$tree_1[1]} ]  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
2750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ];  | 
| 
2751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (ab) (cd|ef)  -> (ab(cd|ef))  | 
| 
2755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # a or b is a tree  | 
| 
2756
 | 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
431
 | 
             $concat = [0, [[@{$$tree_0[1][0]} , $tree_1 ]]];  | 
| 
 
 | 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1078
 | 
    | 
| 
2757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
2760
 | 
369
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
628
 | 
         if (ref($tree_1) eq CHAR_CLASS) {  | 
| 
 
 | 
330
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
803
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2761
 | 
39
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
82
 | 
             if (@$tree_1 == 0) {  | 
| 
2762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # (ab|cd)   -> (ab|cd)  | 
| 
2763
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 $concat = $tree_0;  | 
| 
2764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2766
 | 
39
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
121
 | 
                 if (  | 
| 
2767
 | 
226
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
633
 | 
                     $FULL_FACTORIZE_FIXES  | 
| 
2768
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
235
 | 
                  || grep { ref($_) ne CHAR_CLASS && $$_[0] }  | 
| 
2769
 | 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
                     map {@$_} @{$$tree_0[1]}  | 
| 
2770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ) {  | 
| 
2771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # (ab|cd) e  -> ((ab|cd)e)  | 
| 
2772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # one of abcd is starified  | 
| 
2773
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
                     $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
2776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # (ab|cd) e  -> (abe|cde)  | 
| 
2777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # none of abcd is starified  | 
| 
2778
 | 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
294
 | 
                     $concat = [  | 
| 
2779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         0  | 
| 
2780
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
                       , [ map { [@$_, $tree_1] } @{$$tree_0[1]} ]  | 
| 
 
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
55
 | 
    | 
| 
2781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ];  | 
| 
2782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (@{$$tree_1[1]} == 0) {  | 
| 
2786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (ab|cd) ()  ->  (ab|cd)  | 
| 
2787
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $concat = $tree_0;  | 
| 
2788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2789
 | 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
259
 | 
         elsif ($$tree_1[0]) {  | 
| 
2790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (ab|cd) (e)*  ->  ((ab|cd)(e)*)  | 
| 
2791
 | 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
654
 | 
             $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (  | 
| 
2794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @{$$tree_1[1]} == 1  | 
| 
2795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
2796
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
             if (!grep { ref($_) ne CHAR_CLASS } @{$$tree_1[1][0]}) {  | 
| 
 
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
43
 | 
    | 
| 
 
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    | 
| 
2797
 | 
7
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
31
 | 
                 if (  | 
| 
2798
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
74
 | 
                     $FULL_FACTORIZE_FIXES  | 
| 
2799
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
                  || grep { ref($_) ne CHAR_CLASS && $$_[0] }  | 
| 
2800
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                     map {@$_} @{$$tree_0[1]}  | 
| 
2801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ) {  | 
| 
2802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # (ab|cd) (ef)  -> ((ab|cd)ef)  | 
| 
2803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # e and f both CHAR_CLASS  | 
| 
2804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # one of abcd is starified  | 
| 
2805
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                     $concat = [0, [[$tree_0, @{$$tree_1[1][0]}]]];  | 
| 
 
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
34
 | 
    | 
| 
2806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
2808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # (ab|cd) (ef)  -> (acef|cdef)  | 
| 
2809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # e and f both CHAR_CLASS  | 
| 
2810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     # none of abcd is starified  | 
| 
2811
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
                     $concat = [  | 
| 
2812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         0  | 
| 
2813
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                       , [ map { [@$_, @{$$tree_1[1][0]}] } @{$$tree_0[1]} ]  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
2814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ];  | 
| 
2815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
2816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
2818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # (ab|cd) (ef)  -> ((ab|cd)ef)  | 
| 
2819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # e or f is a tree  | 
| 
2820
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                 $concat = [0, [[$tree_0, @{$$tree_1[1][0]}]]];  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
2821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($TREE_CONCAT_FULL_EXPAND) {  | 
| 
2824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (ab|cd) (ef|gh)  -> (abef|abgh|cdef|cdgh)  | 
| 
2825
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $concat = [  | 
| 
2826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 0  | 
| 
2827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , [  | 
| 
2828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     map {  | 
| 
2829
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                        my $alt_0 = $_;  | 
| 
2830
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                        map { [@$alt_0, @$_] }  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2831
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                        @{$$tree_1[1]}  | 
| 
2832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
2833
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     @{$$tree_0[1]}  | 
| 
2834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ]  | 
| 
2835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
2836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # (ab|cd) (ef|gh)  -> ((ab|cd)(ef|gh))  | 
| 
2839
 | 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
210
 | 
             $concat = [0, [[ $tree_0, $tree_1 ]]];  | 
| 
2840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2842
 | 
2214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5902
 | 
     return $concat;  | 
| 
2843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # concatenation regex0regex1...  | 
| 
2846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tree_concat {  | 
| 
2847
 | 
994
 | 
  
 50
  
 | 
 
 | 
  
994
  
 | 
  
0
  
 | 
2589
 | 
     if (@_ == 0) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2848
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return $cc_none; # neutral element: accepting empty string  | 
| 
2849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2850
 | 
2767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4992
 | 
     elsif (@_ == 1) {  | 
| 
2851
 | 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
113
 | 
         return $_[0];  | 
| 
2852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (grep {!defined($_)} @_) {  | 
| 
2854
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return undef; # one accepting nothing -> concat accepting nothing  | 
| 
2855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # resolve words first  | 
| 
2858
 | 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
968
 | 
     my @word;  | 
| 
2859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @trees;  | 
| 
2860
 | 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1404
 | 
     for (@_) {  | 
| 
2861
 | 
2767
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4526
 | 
         if (ref($_) eq CHAR_CLASS) {  | 
| 
2862
 | 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1272
 | 
             push(@word, $_);  | 
| 
2863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2865
 | 
2015
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
4259
 | 
             if (@word > 1) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2866
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
                 push(@trees, [0, [[ @word ]] ] );  | 
| 
2867
 | 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
57
 | 
                 @word = ();  | 
| 
2868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             elsif (@word) {  | 
| 
2870
 | 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
256
 | 
                 push(@trees, $word[0]);  | 
| 
2871
 | 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
294
 | 
                 @word = ();  | 
| 
2872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
2873
 | 
2015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3181
 | 
             push(@trees, $_);  | 
| 
2874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2876
 | 
963
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2220
 | 
     if (@word > 1) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2877
 | 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
264
 | 
         push(@trees, [0, [[ @word ]] ] );  | 
| 
2878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (@word) {  | 
| 
2880
 | 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
186
 | 
         push(@trees, $word[0]);  | 
| 
2881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2883
 | 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1287
 | 
     my $concat = $trees[0];  | 
| 
2884
 | 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1666
 | 
     for my $tree (@trees[1..$#trees]) {  | 
| 
2885
 | 
1542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2184
 | 
         $concat = tree_concat2($concat, $tree);  | 
| 
2886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2888
 | 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2212
 | 
     return $concat;  | 
| 
2889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # alternation regex0|regex1|...  | 
| 
2892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tree_alt {  | 
| 
2893
 | 
819
 | 
 
 | 
 
 | 
  
819
  
 | 
  
0
  
 | 
823
 | 
     my @starified_alts;  | 
| 
2894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @non_starified_alts;  | 
| 
2895
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     my $has_empty;  | 
| 
2896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2897
 | 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1014
 | 
     for (grep { defined($_) } @_) {  | 
| 
 
 | 
1604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3048
 | 
    | 
| 
2898
 | 
1604
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2721
 | 
         if (ref($_) eq CHAR_CLASS) {  | 
| 
 
 | 
1256
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5696
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2899
 | 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1675
 | 
             push(@non_starified_alts, [$_]);  | 
| 
2900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (!@{$$_[1]}) {  | 
| 
2902
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $has_empty = 1;  | 
| 
2903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($$_[0]) {  | 
| 
2905
 | 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
             push(@starified_alts, @{$$_[1]});  | 
| 
 
 | 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
288
 | 
    | 
| 
2906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2908
 | 
1113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1326
 | 
             push(@non_starified_alts, @{$$_[1]});  | 
| 
 
 | 
1113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2586
 | 
    | 
| 
2909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2912
 | 
819
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1795
 | 
     if (!@starified_alts) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2913
 | 
747
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
2223
 | 
         if (  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2914
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
122
 | 
             @non_starified_alts > 1  | 
| 
2915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          || $has_empty  | 
| 
2916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          || @non_starified_alts && @{$non_starified_alts[0]} > 1  | 
| 
2917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
2918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             return [  | 
| 
2919
 | 
712
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3773
 | 
                 0  | 
| 
2920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , [  | 
| 
2921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     @non_starified_alts  | 
| 
2922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   , ($has_empty ? [[0, []]] : ())  | 
| 
2923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ]  | 
| 
2924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
2925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (!@non_starified_alts) {  | 
| 
2927
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             return undef; # neutral element: accepting nothing  | 
| 
2928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
2930
 | 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
163
 | 
             return $non_starified_alts[0][0];  | 
| 
2931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
2932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (!@non_starified_alts) {  | 
| 
2935
 | 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
351
 | 
         return [1, \@starified_alts];  | 
| 
2936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
2938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return [  | 
| 
2939
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             0  | 
| 
2940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , [  | 
| 
2941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @non_starified_alts  | 
| 
2942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , [[1, \@starified_alts]]  | 
| 
2943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ]  | 
| 
2944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ];  | 
| 
2945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # returns an unachored $ere having exactly the same structure  | 
| 
2950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # as the given $tree. Intended for tracing/debugging.  | 
| 
2951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tree_dump {  | 
| 
2952
 | 
131
 | 
 
 | 
 
 | 
  
131
  
 | 
  
0
  
 | 
294
 | 
     my ($tree) = @_;  | 
| 
2953
 | 
131
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
265
 | 
     if (!defined($_[0])) {  | 
| 
2954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # nothing accepted (not even the empty string)  | 
| 
2955
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return '$.';  | 
| 
2956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2957
 | 
131
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
281
 | 
     if (ref($tree) eq CHAR_CLASS) {  | 
| 
 
 | 
83
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
181
 | 
    | 
| 
2958
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
83
 | 
         return cc_to_regex($tree);  | 
| 
2959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif (@{$$tree[1]} == 0) {  | 
| 
2961
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
76
 | 
         return '()';  | 
| 
2962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
2964
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
         return join(''  | 
| 
2965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , '('  | 
| 
2966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , (  | 
| 
2967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 join('|',  | 
| 
2968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     map {  | 
| 
2969
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
107
 | 
                         my $alt = $_;  | 
| 
2970
 | 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
148
 | 
                         join('',  | 
| 
2971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             map {  | 
| 
2972
 | 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
92
 | 
                                 my $atom = $_;  | 
| 
2973
 | 
132
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
265
 | 
                                 if (ref($atom) eq CHAR_CLASS) {  | 
| 
2974
 | 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
219
 | 
                                     cc_to_regex($atom);  | 
| 
2975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 }  | 
| 
2976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 else {  | 
| 
2977
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
                                     tree_dump($atom);  | 
| 
2978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 }  | 
| 
2979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             }  | 
| 
2980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             @$alt  | 
| 
2981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         )  | 
| 
2982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
2983
 | 
63
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
94
 | 
                     @{$$tree[1]}  | 
| 
2984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 )  | 
| 
2985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             )  | 
| 
2986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , ')'  | 
| 
2987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , ($$tree[0] ? '*' : ())  | 
| 
2988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         );  | 
| 
2989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
2994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $input_constraints  | 
| 
2995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
2996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 use constant {  | 
| 
2998
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33596
 | 
     FREE_TEXT => 'free text'  | 
| 
2999
 | 
7
 | 
 
 | 
 
 | 
  
7
  
 | 
 
 | 
217
 | 
 };  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
    | 
| 
3000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
3002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Input constraints  | 
| 
3004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $input_constraints = [ $input_constraint_0, $input_constraint_1, ... ]  | 
| 
3006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $input_constraint  = [ 'word_0', 'word_1', ... ]  (drop down)  | 
| 
3007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     or 'free_text'                  (free text)  | 
| 
3008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
3011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
3013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Converts a C<$tree> to a pair C<($input_constraints, $split_str)>.  | 
| 
3015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<$split_perlre> is a compiled perl regular expression splitting a string  | 
| 
3017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 according to C<$input_constraints>. This C<$perlre> matches if and only if  | 
| 
3018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 each drop down can be assigned a value; then C<$str =~ $perlre> in list  | 
| 
3019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 context returns as many values as C<@$input_constraints>.  | 
| 
3020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub tree_to_input_constraints {  | 
| 
3024
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
13
 | 
     my ($input_constraints, $perlres) = &_tree_to_input_constraints;  | 
| 
3025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # concat free texts and stronger underlying regexs  | 
| 
3027
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
     my @previous_undefs;  | 
| 
3028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my @kept;  | 
| 
3029
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
     for my $i (0..$#$input_constraints) {  | 
| 
3030
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
         if ($$input_constraints[$i] eq FREE_TEXT) {  | 
| 
3031
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
             push(@previous_undefs, $i);  | 
| 
3032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
3034
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
30
 | 
             if (@previous_undefs) {  | 
| 
3035
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                 push(@kept, $i-1);  | 
| 
3036
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                 if (@previous_undefs > 1) {  | 
| 
3037
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $$perlres[$i-1] = join('',  | 
| 
3038
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         map { '(?:' . $$perlres[$_] . ')' }  | 
| 
3039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @previous_undefs  | 
| 
3040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3042
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
                 @previous_undefs = ();  | 
| 
3043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3044
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
             push(@kept, $i);  | 
| 
3045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3047
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
     if (@previous_undefs) {  | 
| 
3048
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         push(@kept, $#$input_constraints);  | 
| 
3049
 | 
0
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         if (@previous_undefs > 1) {  | 
| 
3050
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $$perlres[$#$input_constraints] = join('',  | 
| 
3051
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 map { '(?:' . $$perlres[$_] . ')' }  | 
| 
3052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @previous_undefs  | 
| 
3053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
3054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3056
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
     @$input_constraints = @$input_constraints[@kept];  | 
| 
3057
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
     @$perlres = @$perlres[@kept];  | 
| 
3058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # sort words, remove duplicates  | 
| 
3060
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
     for (grep { $_ ne FREE_TEXT } @$input_constraints) {  | 
| 
 
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
    | 
| 
3061
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
         $_ = [ sort(keys(%{ { map { ($_ => $_) } @$_ } })) ];  | 
| 
 
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
    | 
| 
 
 | 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
103
 | 
    | 
| 
3062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # remove empty words  | 
| 
3065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # concat single words  | 
| 
3066
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     my @previous_singles;  | 
| 
3067
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
     @kept = ();  | 
| 
3068
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     for my $i (0..$#$input_constraints) {  | 
| 
3069
 | 
15
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
40
 | 
         if (  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3070
 | 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
             $$input_constraints[$i] eq FREE_TEXT  | 
| 
3071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          || @{$$input_constraints[$i]} > 1  | 
| 
3072
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
         ) {  | 
| 
3073
 | 
10
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
20
 | 
             if (@previous_singles) {  | 
| 
3074
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                 push(@kept, $i-1);  | 
| 
3075
 | 
3
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                 if (@previous_singles > 1) {  | 
| 
3076
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $$perlres[$i-1] = join('',  | 
| 
3077
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         map { $$perlres[$_] }  | 
| 
3078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @previous_singles  | 
| 
3079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3080
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     $$input_constraints[$i-1] = join('',  | 
| 
3081
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         map { $$input_constraints[$_][0] }  | 
| 
3082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @previous_singles  | 
| 
3083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3085
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                 @previous_singles = ();  | 
| 
3086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3087
 | 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
             push(@kept, $i);  | 
| 
3088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (  | 
| 
3090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @{$$input_constraints[$i]} == 1  | 
| 
3091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          && length($$input_constraints[$i][0])  | 
| 
3092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ) {  | 
| 
3093
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
             push(@previous_singles, $i);  | 
| 
3094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3096
 | 
5
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     if (@previous_singles) {  | 
| 
3097
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
         push(@kept, $#$input_constraints);  | 
| 
3098
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
         if (@previous_singles > 1) {  | 
| 
3099
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $$perlres[$#$input_constraints] = join('',  | 
| 
3100
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 map { $$perlres[$_] }  | 
| 
3101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @previous_singles  | 
| 
3102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
3103
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $$input_constraints[$#$input_constraints] = join('',  | 
| 
3104
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                 map { $$input_constraints[$_][0] }  | 
| 
3105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @previous_singles  | 
| 
3106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
3107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3109
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
26
 | 
     @$input_constraints = @$input_constraints[@kept];  | 
| 
3110
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
     @$perlres = @$perlres[@kept];  | 
| 
3111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3112
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
12
 | 
     if (!@$input_constraints) {  | 
| 
3113
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         @$input_constraints = (['']);  | 
| 
3114
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         @$perlres = ('');  | 
| 
3115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3117
 | 
15
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
59
 | 
     my $split_perlre  | 
| 
3118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       = join('',  | 
| 
3119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             map {  | 
| 
3120
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
                 $$input_constraints[$_] eq FREE_TEXT  | 
| 
3121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   ? "($$perlres[$_]|.*?)"  | 
| 
3122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   : "($$perlres[$_])"  | 
| 
3123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             (0..$#$perlres)  | 
| 
3125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         )  | 
| 
3126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
3127
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
391
 | 
     return ($input_constraints, qr/\A$split_perlre\z/ms);  | 
| 
3128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %cc_to_input_constraint_cache;  | 
| 
3133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # returns ($input_constraints, $perlres)  | 
| 
3135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # two references to arrays of the same size.  | 
| 
3136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub _tree_to_input_constraints {  | 
| 
3137
 | 
14
 | 
 
 | 
 
 | 
  
14
  
 | 
 
 | 
19
 | 
         my ($tree) = @_;  | 
| 
3138
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
         my $input_constraints;  | 
| 
3139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $perlres;  | 
| 
3140
 | 
14
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
42
 | 
         if (!defined($tree)) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # regex accepting nothing -> free text (always rejected)  | 
| 
3142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3143
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $input_constraints = [FREE_TEXT];  | 
| 
3144
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $perlres = ['$.'];  | 
| 
3145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3146
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
         elsif (ref($tree) eq CHAR_CLASS) {  | 
| 
3147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # single character class -> drop down  | 
| 
3148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3149
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
             $input_constraints = [  | 
| 
3150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 $cc_to_input_constraint_cache{$tree}  | 
| 
3151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ||= cc_to_input_constraint($tree)  | 
| 
3152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ];  | 
| 
3153
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $perlres = [_tree_to_regex($tree, 1)];  | 
| 
3154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (@{$$tree[1]} == 0) {  | 
| 
3156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # no top-level alternation  | 
| 
3157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3158
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $input_constraints = [['']];  | 
| 
3159
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $perlres = [_tree_to_regex($tree, 1)];  | 
| 
3160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3161
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
         elsif ($$tree[0]) {  | 
| 
3162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # starified regex -> free text  | 
| 
3163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3164
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             $input_constraints = [FREE_TEXT];  | 
| 
3165
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
             $perlres = [_tree_to_regex($tree, 1)];  | 
| 
3166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (@{$$tree[1]} == 1) {  | 
| 
3168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # single top-level alternation -> mixed results  | 
| 
3169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # example: ab*c(d|e)f  | 
| 
3170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3171
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             $input_constraints = [];  | 
| 
3172
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
             $perlres = [];  | 
| 
3173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3174
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             my $i = 0;  | 
| 
3175
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             while ($i != @{$$tree[1][0]}) {  | 
| 
 
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
    | 
| 
3176
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 my $beg = $i;  | 
| 
3177
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
                 my @expanded_words = ('');  | 
| 
3178
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                 my $cc;  | 
| 
3179
 | 
12
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
11
 | 
                 while (  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3180
 | 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4896
 | 
                     $i != @{$$tree[1][0]}  | 
| 
3181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  && ref($cc = $$tree[1][0][$i]) eq CHAR_CLASS  | 
| 
3182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  && (!@$cc || $$cc[-1][1] != MAX_CHAR)  | 
| 
3183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ) {  | 
| 
3184
 | 
17
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
52
 | 
                     my $input_constraint  | 
| 
3185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       = $cc_to_input_constraint_cache{$cc}  | 
| 
3186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ||= cc_to_input_constraint($cc)  | 
| 
3187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ;  | 
| 
3188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     @expanded_words  | 
| 
3190
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
                       = map {  | 
| 
3191
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
                             my $letter = $_;  | 
| 
3192
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
                             map { $_ . $letter }  | 
| 
 
 | 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
60
 | 
    | 
| 
3193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             @expanded_words  | 
| 
3194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
3195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @$input_constraint  | 
| 
3196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ;  | 
| 
3197
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
                     $i++;  | 
| 
3198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3199
 | 
12
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
53
 | 
                 if ($beg < $i && length($expanded_words[0])) {  | 
| 
3200
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
                     my $wrd_perlre = _tree_to_regex(  | 
| 
3201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         [  | 
| 
3202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             0  | 
| 
3203
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
                           , [[ @{$$tree[1][0]}[$beg..$i-1] ]]  | 
| 
3204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]  | 
| 
3205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       , 1  | 
| 
3206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3207
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                     push(@$input_constraints, \@expanded_words);  | 
| 
3208
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
                     push(@$perlres, $wrd_perlre);  | 
| 
3209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3210
 | 
12
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                 if ($i < @{$$tree[1][0]}) {  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
    | 
| 
3211
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
                     my ($sub_input_constraints, $sub_perlres)  | 
| 
3212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       = _tree_to_input_constraints($$tree[1][0][$i]);  | 
| 
3213
 | 
9
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
67
 | 
                     if (  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @$sub_input_constraints  | 
| 
3215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                      && (  | 
| 
3216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $$sub_input_constraints[0] eq FREE_TEXT  | 
| 
3217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                          || length($$sub_input_constraints[0][0])  | 
| 
3218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         )  | 
| 
3219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ) {  | 
| 
3220
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
                         push(@$input_constraints, @$sub_input_constraints);  | 
| 
3221
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
                         push(@$perlres, @$sub_perlres);  | 
| 
3222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
3223
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
                     $i++;  | 
| 
3224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
3228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             # multiple top-level alternations  | 
| 
3229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3230
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
             if (  | 
| 
3231
 | 
107
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
476
 | 
                 grep { grep {  | 
| 
 
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
3232
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
                     ref($_) ne CHAR_CLASS  | 
| 
3233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  || (@$_ && $$_[$#$_][1] == MAX_CHAR)  | 
| 
3234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 } @$_ }  | 
| 
3235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @{$$tree[1]}  | 
| 
3236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             ) {  | 
| 
3237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # some alternation contains a sub-tree -> mixed results  | 
| 
3238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # example: abd|ab*d  | 
| 
3239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # common pre/suf-fixes are factorized out  | 
| 
3240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # example: a(bd|b*)d  | 
| 
3241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3242
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 my $fst_len = @{$$tree[1][0]};  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
3243
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 my ($pre_len, $suf_len) = (0, 0);  | 
| 
3244
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                 for (1, 0) {  | 
| 
3245
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
                     my ($len_ref, @range)  | 
| 
3246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       = $_  | 
| 
3247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       ? (\$pre_len, (0..$fst_len-1))  | 
| 
3248
 | 
4
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
                       : (\$suf_len, map {-$_} (1..$fst_len-$pre_len))  | 
| 
3249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ;  | 
| 
3250
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                     for my $i (@range) {  | 
| 
3251
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                         if (  | 
| 
3252
 | 
6
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
40
 | 
                             grep {  | 
| 
3253
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                                 $i >= @$_  | 
| 
3254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                              || ref($$_[$i]) ne CHAR_CLASS  | 
| 
3255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                              || $$tree[1][0][$i] != $$_[$i]  | 
| 
3256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             }  | 
| 
3257
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                             @{$$tree[1]}[0..$#{$$tree[1]}]  | 
| 
3258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ) {  | 
| 
3259
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                             last;  | 
| 
3260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
3261
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         $$len_ref++;  | 
| 
3262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
3263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3264
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
                 if ($pre_len) {  | 
| 
3265
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     my ($pre_input_constraints, $pre_perlres)  | 
| 
3266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   = _tree_to_input_constraints(  | 
| 
3267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         [  | 
| 
3268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             0  | 
| 
3269
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                           , [[ @{$$tree[1][0]}[0..$pre_len-1] ]]  | 
| 
3270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]  | 
| 
3271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3272
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     push(@$input_constraints, @$pre_input_constraints);  | 
| 
3273
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     push(@$perlres, @$pre_perlres);  | 
| 
3274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3276
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
                 if (  | 
| 
3277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     my @mid_alts  | 
| 
3278
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
                   = map { [ @$_[$pre_len..$#$_-$suf_len] ] }  | 
| 
 
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
3279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     @{$$tree[1]}  | 
| 
3280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 ) {  | 
| 
3281
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
                     push(@$input_constraints, FREE_TEXT);  | 
| 
3282
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
                     push(@$perlres, _tree_to_regex([ 0, \@mid_alts ] , 1));  | 
| 
3283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3285
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                 if ($suf_len) {  | 
| 
3286
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     my ($suf_input_constraints, $suf_perlres)  | 
| 
3287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   = _tree_to_input_constraints(  | 
| 
3288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         [  | 
| 
3289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             0  | 
| 
3290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           , [[  | 
| 
3291
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                 @{$$tree[1][0]}  | 
| 
3292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 [$fst_len-$suf_len..$fst_len-1]  | 
| 
3293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             ]]  | 
| 
3294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]  | 
| 
3295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3296
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     push(@$input_constraints, @$suf_input_constraints);  | 
| 
3297
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     push(@$perlres, @$suf_perlres);  | 
| 
3298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
3301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # each alternation contains only non negated char classes  | 
| 
3302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 # -> drop down  | 
| 
3303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3304
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
                 $perlres = [_tree_to_regex($tree, 1)];  | 
| 
3305
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
                 for my $word (@{$$tree[1]}) {  | 
| 
 
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
    | 
| 
3306
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
                     my @expanded_words = ('');  | 
| 
3307
 | 
12
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
20
 | 
                     for my $input_constraint (  | 
| 
 
 | 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
243
 | 
    | 
| 
3308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         map {  | 
| 
3309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $cc_to_input_constraint_cache{$_}  | 
| 
3310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ||= cc_to_input_constraint($_);  | 
| 
3311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
3312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         @$word  | 
| 
3313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ) {  | 
| 
3314
 | 
85
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
115
 | 
                         if (@$input_constraint == 1) {  | 
| 
3315
 | 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
95
 | 
                             for (@expanded_words) {  | 
| 
3316
 | 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
164
 | 
                                 $_ .= $$input_constraint[0];  | 
| 
3317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             }  | 
| 
3318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
3319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         else {  | 
| 
3320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             @expanded_words  | 
| 
3321
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                               = map {  | 
| 
3322
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                     my $letter = $_;  | 
| 
3323
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                                     map { $_ . $letter }  | 
| 
 
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
3324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                     @expanded_words  | 
| 
3325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 }  | 
| 
3326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 @$input_constraint  | 
| 
3327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             ;  | 
| 
3328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
3329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
3330
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
                     push(@{$$input_constraints[0]}, @expanded_words);  | 
| 
 
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
    | 
| 
3331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3334
 | 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
         return ($input_constraints, $perlres);  | 
| 
3335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub cc_to_input_constraint {  | 
| 
3339
 | 
26
 | 
 
 | 
 
 | 
  
26
  
 | 
  
0
  
 | 
32
 | 
     my ($cc) = @_;  | 
| 
3340
 | 
26
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
67
 | 
     if (@$cc == 0) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3341
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return [''];  | 
| 
3342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($$cc[$#$cc][1] == MAX_CHAR) {  | 
| 
3344
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return FREE_TEXT;  | 
| 
3345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
3347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         return [  | 
| 
3348
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
32
 | 
             map { map { chr($_) } ($$_[0]..$$_[1]) }  | 
| 
 
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
38
 | 
    | 
| 
 
 | 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
139
 | 
    | 
| 
3349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             @$cc  | 
| 
3350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ];  | 
| 
3351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
3356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # $ere  | 
| 
3357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
3358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
3360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Ere  | 
| 
3362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 An C<$ere> is a perl string.  | 
| 
3364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The syntax an C<$ere> is assumed to follow is based on POSIX ERE  | 
| 
3366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (else the C routines will C).  | 
| 
3367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Unsupported POSIX features:  | 
| 
3369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 back-references,  | 
| 
3370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 equivalence classes C<[[=a=]]>,  | 
| 
3371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 character class C<[[:digit:]]>,  | 
| 
3372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 collating symbols C<[[.ch.]]>.  | 
| 
3373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<)> is always a special character. POSIX says that C<)> is a normal  | 
| 
3375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 character if there is no matching C<(>.  | 
| 
3376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There is no escape sequences such as C<\t> for tab or C<\n> for line feed.  | 
| 
3378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 POSIX does not specify such escape sequences neither.  | 
| 
3379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<\> before a non-special character is ignored  | 
| 
3381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (except in bracket expressions). POSIX does not allow it.  | 
| 
3382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The empty string is legal in alternations (C<(|a)> is equivalent to C<(a?)>).  | 
| 
3384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 POSIX does not allow it.  | 
| 
3385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The C<(|a)> form is generated by the C routines  | 
| 
3386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (avoiding quantifiers other than C<*>).  | 
| 
3387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C<[a-l-z]> is interpreted as C<([a-l] | - | z)> (but it is discouraged to  | 
| 
3389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 rely upon this implementation artefact). POSIX says that the interpretation  | 
| 
3390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of this construct is undefined.  | 
| 
3391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 In bracket expressions, C<\> is a normal character,  | 
| 
3393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 thus C<]> as character must occur first, or second after a C<^>  | 
| 
3394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (POSIX compliant, but possibly surprising for perl programmers).  | 
| 
3395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 All unicode characters supported by perl are allowed as litteral characters.  | 
| 
3397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
3399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
3401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Parses an C<$ere> to a C<$nfa>.  | 
| 
3403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 WARNING: the parsing routines, in particular C,  | 
| 
3405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C on syntax errors; thus the caller may want to eval-trap such errors.  | 
| 
3406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ere_to_nfa {  | 
| 
3410
 | 
187
 | 
 
 | 
 
 | 
  
187
  
 | 
  
1
  
 | 
767628
 | 
     my ($ere, $has_anchor_ref) = @_;  | 
| 
3411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # optimize very first and very last anchors  | 
| 
3413
 | 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1108
 | 
     my $has_beg_anchor = $ere =~ s/^\^+//;  | 
| 
3414
 | 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1064
 | 
     my $has_end_anchor = $ere =~ s/\$+$//;  | 
| 
3415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3416
 | 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
295
 | 
     $$has_anchor_ref = 0;  | 
| 
3417
 | 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
210
 | 
     my @alternation_nfas;  | 
| 
3418
 | 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
328
 | 
     do {  | 
| 
3419
 | 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
473
 | 
         push(@alternation_nfas, parse_alternation(\$ere, $has_anchor_ref));  | 
| 
3420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } while($ere =~ /\G \| /xmsgc);  | 
| 
3421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3422
 | 
187
 | 
  
 50
  
 | 
  
100
  
 | 
 
 | 
 
 | 
848
 | 
     if ((pos($ere) || 0) != length($ere)) {  | 
| 
3423
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         parse_die("unexpected character", \$ere);  | 
| 
3424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3426
 | 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
202
 | 
     my $nfa;  | 
| 
3427
 | 
187
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
545
 | 
     if (!$has_beg_anchor && !$has_end_anchor) {  | 
| 
3428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         # a|b|c => ^.*(a|b|c).*$  | 
| 
3429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3430
 | 
10
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
83
 | 
         $nfa = nfa_concat(  | 
| 
3431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             [[1, [[$cc_any, 0]]]]  | 
| 
3432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , @alternation_nfas == 1  | 
| 
3433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               ? $alternation_nfas[0]  | 
| 
3434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               : nfa_union(@alternation_nfas)  | 
| 
3435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           , [[1, [[$cc_any, 0]]]]  | 
| 
3436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         );  | 
| 
3437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
3439
 | 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
615
 | 
         for my $alternation_nfa (@alternation_nfas[1..$#alternation_nfas-1]) {  | 
| 
3440
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             $alternation_nfa = nfa_concat(  | 
| 
3441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 [[1, [[$cc_any, 0]]]]  | 
| 
3442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , $alternation_nfa  | 
| 
3443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , [[1, [[$cc_any, 0]]]]  | 
| 
3444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
3445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3446
 | 
177
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
930
 | 
         if (!$has_beg_anchor || @alternation_nfas > 1) {  | 
| 
3447
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
50
 | 
             $alternation_nfas[0] = nfa_concat(  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 !$has_beg_anchor ? [[1, [[$cc_any, 0]]]] : ()  | 
| 
3449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , $alternation_nfas[0]  | 
| 
3450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , @alternation_nfas > 1 ? [[1, [[$cc_any, 0]]]] : ()  | 
| 
3451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
3452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3453
 | 
177
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
881
 | 
         if (!$has_end_anchor || @alternation_nfas > 1) {  | 
| 
3454
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
             $alternation_nfas[-1] = nfa_concat(  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 @alternation_nfas > 1 ? [[1, [[$cc_any, 0]]]] : ()  | 
| 
3456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , $alternation_nfas[-1]  | 
| 
3457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               , !$has_end_anchor ? [[1, [[$cc_any, 0]]]] : ()  | 
| 
3458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             );  | 
| 
3459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         $nfa  | 
| 
3461
 | 
177
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
380
 | 
           = @alternation_nfas == 1  | 
| 
3462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           ? $alternation_nfas[0]  | 
| 
3463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           : nfa_union(@alternation_nfas)  | 
| 
3464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         ;  | 
| 
3465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3467
 | 
187
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
895
 | 
     return $$has_anchor_ref ? nfa_resolve_anchors($nfa) : $nfa;  | 
| 
3468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub _ere_to_nfa {  | 
| 
3471
 | 
218
 | 
 
 | 
 
 | 
  
218
  
 | 
 
 | 
299
 | 
     my ($str_ref, $has_anchor_ref) = @_;  | 
| 
3472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3473
 | 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
239
 | 
     my @alternation_nfas;  | 
| 
3474
 | 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
260
 | 
     do {  | 
| 
3475
 | 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
780
 | 
         push(@alternation_nfas, parse_alternation($str_ref, $has_anchor_ref));  | 
| 
3476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } while($$str_ref =~ /\G \| /xmsgc);  | 
| 
3477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return  | 
| 
3479
 | 
218
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
657
 | 
         @alternation_nfas == 1  | 
| 
3480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ? $alternation_nfas[0]  | 
| 
3481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       : nfa_union(@alternation_nfas)  | 
| 
3482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
3483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub bracket_expression_to_cc {  | 
| 
3486
 | 
82
 | 
 
 | 
 
 | 
  
82
  
 | 
  
0
  
 | 
121
 | 
     my ($str_ref) = @_;  | 
| 
3487
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
262
 | 
     my $neg = $$str_ref =~ /\G \^/xmsgc;  | 
| 
3488
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
115
 | 
     my $interval_list = [];  | 
| 
3489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     # anything is allowd a first char, in particular ']' and '-'  | 
| 
3491
 | 
82
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
331
 | 
     if ($$str_ref =~ /\G (.) - ([^]]) /xmsgc) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3492
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
42
 | 
         push(@$interval_list, [ord($1), ord($2)]);  | 
| 
3493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($$str_ref =~ /\G (.) /xmsgc) {  | 
| 
3495
 | 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
266
 | 
         push(@$interval_list, [ord($1), ord($1)]);  | 
| 
3496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3498
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
106
 | 
     my $loop = 1;  | 
| 
3499
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
153
 | 
     while ($loop) {  | 
| 
3500
 | 
148
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
567
 | 
         if ($$str_ref =~ /\G ([^]]) - ([^]]) /xmsgc) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3501
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             push(@$interval_list, [ord($1), ord($2)]);  | 
| 
3502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif ($$str_ref =~ /\G ([^]]) /xmsgc) {  | 
| 
3504
 | 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
203
 | 
             push(@$interval_list, [ord($1), ord($1)]);  | 
| 
3505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
3507
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
184
 | 
             $loop = 0;  | 
| 
3508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return  | 
| 
3512
 | 
82
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
202
 | 
         $neg  | 
| 
3513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ? cc_neg(interval_list_to_cc($interval_list))  | 
| 
3514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       : interval_list_to_cc($interval_list)  | 
| 
3515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ;  | 
| 
3516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Returns:  | 
| 
3519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   - the empty list iff no quantification has been parsed  | 
| 
3520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   - a 2-tuple ($min, $max)  | 
| 
3521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #         either $max is the empty string  | 
| 
3522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #         or $min <= $max  | 
| 
3523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parse_quant {  | 
| 
3524
 | 
245
 | 
 
 | 
 
 | 
  
245
  
 | 
  
0
  
 | 
405
 | 
     my ($str_ref) = @_;  | 
| 
3525
 | 
245
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
809
 | 
     if ($$str_ref =~ /\G \* /xmsgc) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3526
 | 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
558
 | 
         return (0, '');  | 
| 
3527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($$str_ref =~ /\G \+ /xmsgc) {  | 
| 
3529
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
         return (1, '');  | 
| 
3530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($$str_ref =~ /\G \? /xmsgc) {  | 
| 
3532
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
         return (0, 1);  | 
| 
3533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     elsif ($$str_ref =~ /\G \{ /xmsgc) {  | 
| 
3535
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
11
 | 
         my ($min, $max);  | 
| 
3536
 | 
7
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
25
 | 
         if ($$str_ref =~ /\G ( [0-9]+ ) /xmsgc) {  | 
| 
3537
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
218
 | 
             $min = $1;  | 
| 
3538
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
             if ($$str_ref =~ /\G , ([0-9]*) /xmsgc) {  | 
| 
3539
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
                 $max = $1; # may be ''  | 
| 
3540
 | 
6
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
33
 | 
                 if (length($max) && $min > $max) {  | 
| 
3541
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     parse_die("$min > $max", $str_ref);  | 
| 
3542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             else {  | 
| 
3545
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
                 $max = $min;  | 
| 
3546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
3549
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             parse_die('number expected', $str_ref);  | 
| 
3550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3552
 | 
7
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
25
 | 
         if ($$str_ref !~ /\G \} /xmsgc) {  | 
| 
3553
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
             parse_die('} expected', $str_ref);  | 
| 
3554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3555
 | 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
         return ($min, $max);  | 
| 
3556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else {  | 
| 
3558
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
         return;  | 
| 
3559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     my %char_to_cc_cache;  | 
| 
3564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     sub parse_alternation {  | 
| 
3565
 | 
558
 | 
 
 | 
 
 | 
  
558
  
 | 
  
0
  
 | 
693
 | 
         my ($str_ref, $has_anchor_ref) = @_;  | 
| 
3566
 | 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
574
 | 
         my @all_nfas;  | 
| 
3567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         my $loop;  | 
| 
3568
 | 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
570
 | 
         do {  | 
| 
3569
 | 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
936
 | 
             $loop = 0;  | 
| 
3570
 | 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1265
 | 
             my $nfa = [];  | 
| 
3571
 | 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
994
 | 
             my $next_state_index = 1;  | 
| 
3572
 | 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
972
 | 
             while (1) {  | 
| 
3573
 | 
1507
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
8480
 | 
                 if ($$str_ref =~ /\G ( $ERE_litteral + ) /xmsogc) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3574
 | 
944
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
5085
 | 
                     push(@$nfa,  | 
| 
3575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         map {  | 
| 
3576
 | 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1387
 | 
                             [ 0, [[  | 
| 
3577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $char_to_cc_cache{$_} ||= char_to_cc($_)  | 
| 
3578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               , $next_state_index++  | 
| 
3579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             ]]]  | 
| 
3580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
3581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         split('', $1)  | 
| 
3582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif ($$str_ref =~ /\G ( \. + ) /xmsgc) {  | 
| 
3585
 | 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
127
 | 
                     push(@$nfa,  | 
| 
3586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         map {  | 
| 
3587
 | 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
87
 | 
                             [ 0, [[  | 
| 
3588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                 $cc_any  | 
| 
3589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                               , $next_state_index++  | 
| 
3590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             ]]]  | 
| 
3591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         }  | 
| 
3592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         (1..length($1))  | 
| 
3593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif ($$str_ref =~ /\G ( \[ ) /xmsgc) {  | 
| 
3596
 | 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8807
 | 
                     push(@$nfa,  | 
| 
3597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         [ 0, [[  | 
| 
3598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             bracket_expression_to_cc($str_ref)  | 
| 
3599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           , $next_state_index++  | 
| 
3600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]]]  | 
| 
3601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3602
 | 
82
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
523
 | 
                     if ($$str_ref !~ /\G ] /xmsgc) {  | 
| 
3603
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                         parse_die('] expected', $str_ref);  | 
| 
3604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
3605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif ($$str_ref =~ /\G \\ (.) /xmsgc) {  | 
| 
3607
 | 
7
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
45
 | 
                     push(@$nfa,  | 
| 
3608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         [ 0, [[  | 
| 
3609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $char_to_cc_cache{$1} ||= char_to_cc($1)  | 
| 
3610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           , $next_state_index++  | 
| 
3611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]]]  | 
| 
3612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif ($$str_ref =~ /\G \^ /xmsgc) {  | 
| 
3615
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
                     push(@$nfa,  | 
| 
3616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         [ 0, [[  | 
| 
3617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $cc_beg  | 
| 
3618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           , $next_state_index++  | 
| 
3619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]]]  | 
| 
3620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3621
 | 
9
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
38
 | 
                     $$has_anchor_ref ||= 1;  | 
| 
3622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 elsif ($$str_ref =~ /\G \$ /xmsgc) {  | 
| 
3624
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
                     push(@$nfa,  | 
| 
3625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         [ 0, [[  | 
| 
3626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                             $cc_end  | 
| 
3627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                           , $next_state_index++  | 
| 
3628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         ]]]  | 
| 
3629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     );  | 
| 
3630
 | 
9
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
46
 | 
                     $$has_anchor_ref ||= 1;  | 
| 
3631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
3633
 | 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1327
 | 
                     last;  | 
| 
3634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3637
 | 
929
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
2432
 | 
             if (@$nfa) {  | 
| 
3638
 | 
488
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1184
 | 
                 if ($$str_ref =~ /\G (?= [*+?{] ) /xmsgc) {  | 
| 
3639
 | 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
345
 | 
                     my ($min, $max) = parse_quant($str_ref);  | 
| 
3640
 | 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
505
 | 
                     my $last_char_class = $$nfa[$#$nfa][1][0][0];  | 
| 
3641
 | 
161
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
355
 | 
                     if (@$nfa > 1) {  | 
| 
3642
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
119
 | 
                         @{$$nfa[$#$nfa]} = (1, []);  | 
| 
 
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
229
 | 
    | 
| 
3643
 | 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
130
 | 
                         push(@all_nfas, $nfa);  | 
| 
3644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     }  | 
| 
3645
 | 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
732
 | 
                     push(@all_nfas, nfa_quant(  | 
| 
3646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                         [ [0, [[$last_char_class, 1 ]]], [1, []] ]  | 
| 
3647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                       , $min, $max  | 
| 
3648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                     ));  | 
| 
3649
 | 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
664
 | 
                     $loop = 1;  | 
| 
3650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
3652
 | 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
750
 | 
                     push(@$nfa, [1, []]);  | 
| 
3653
 | 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
522
 | 
                     push(@all_nfas, $nfa);  | 
| 
3654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3657
 | 
929
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
3705
 | 
             if ($$str_ref =~ /\G \( /xmsgc) {  | 
| 
3658
 | 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
464
 | 
                 $nfa = _ere_to_nfa($str_ref, $has_anchor_ref);  | 
| 
3659
 | 
218
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
950
 | 
                 if ($$str_ref !~ /\G \) /xmsgc) {  | 
| 
3660
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
                     parse_die(') expected', $str_ref);  | 
| 
3661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3662
 | 
218
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
571
 | 
                 if ($$str_ref =~ /\G (?= [*+?{] ) /xmsgc) {  | 
| 
3663
 | 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
212
 | 
                     my ($min, $max) = parse_quant($str_ref);  | 
| 
3664
 | 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
205
 | 
                     push(@all_nfas, nfa_quant($nfa, $min, $max));  | 
| 
3665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 else {  | 
| 
3667
 | 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
203
 | 
                     push(@all_nfas, $nfa);  | 
| 
3668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                 }  | 
| 
3669
 | 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
825
 | 
                 $loop = 1;  | 
| 
3670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
             }  | 
| 
3671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         } while ($loop);  | 
| 
3672
 | 
558
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
1504
 | 
         if (@all_nfas > 1) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3673
 | 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
399
 | 
             return nfa_concat(@all_nfas);  | 
| 
3674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         elsif (@all_nfas) {  | 
| 
3676
 | 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1680
 | 
             return $all_nfas[0];  | 
| 
3677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         else {  | 
| 
3679
 | 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
320
 | 
             return [[1, []]];  | 
| 
3680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         }  | 
| 
3681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parse_die {  | 
| 
3685
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
0
  
 | 
0
 | 
     my ($msg, $str_ref) = @_;  | 
| 
3686
 | 
0
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
     die("malformed regex: $msg at "  | 
| 
3687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       . (pos($$str_ref) || 0) . " in $$str_ref");  | 
| 
3688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
3692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # Shorthands  | 
| 
3693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ##############################################################################  | 
| 
3694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
3696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Shorthands  | 
| 
3698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
3700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
3702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  := C  | 
| 
3703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ere_to_tree {  | 
| 
3707
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($ere) = @_;  | 
| 
3708
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return nfa_to_tree(ere_to_nfa($ere));  | 
| 
3709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
3712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  := C  | 
| 
3713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ere_to_regex {  | 
| 
3717
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($ere, $to_perlre) = (@_, 0);  | 
| 
3718
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return tree_to_regex(ere_to_tree($ere), $to_perlre);  | 
| 
3719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
3722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  := C  | 
| 
3723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_to_regex {  | 
| 
3727
 | 
102
 | 
 
 | 
 
 | 
  
102
  
 | 
  
1
  
 | 
310
 | 
     my ($nfa, $to_perlre) = (@_, 0);  | 
| 
3728
 | 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
271
 | 
     return tree_to_regex(nfa_to_tree($nfa), $to_perlre);  | 
| 
3729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
3732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  := C  | 
| 
3733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub ere_to_input_constraints {  | 
| 
3737
 | 
0
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
     my ($ere) = @_;  | 
| 
3738
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
     return tree_to_input_constraints(ere_to_tree($ere));  | 
| 
3739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
3742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  := C  | 
| 
3743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_to_input_constraints {  | 
| 
3747
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
78
 | 
     my ($nfa) = @_;  | 
| 
3748
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
     return tree_to_input_constraints(nfa_to_tree($nfa));  | 
| 
3749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item C  | 
| 
3752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  := C  | 
| 
3753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
3755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub nfa_to_min_dfa {  | 
| 
3757
 | 
178
 | 
 
 | 
 
 | 
  
178
  
 | 
  
1
  
 | 
21243
 | 
     my ($nfa) = @_;  | 
| 
3758
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
518
 | 
     return dfa_to_min_dfa(nfa_to_dfa($nfa));  | 
| 
3759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
3762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
3764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 AUTHOR  | 
| 
3766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Loïc Jonas Etienne   | 
| 
3768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 COPYRIGHT and LICENSE  | 
| 
3770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Artistic License 2.0  | 
| 
3772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 http://www.perlfoundation.org/artistic_license_2_0  |