|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Extended regular expression matching and search library.  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Copyright (C) 2002-2014 Free Software Foundation, Inc.  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    This file is part of the GNU C Library.  | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Contributed by Isamu Hasegawa .  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    The GNU C Library is free software; you can redistribute it and/or  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    modify it under the terms of the GNU Lesser General Public  | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    License as published by the Free Software Foundation; either  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    version 2.1 of the License, or (at your option) any later version.  | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    The GNU C Library is distributed in the hope that it will be useful,  | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    but WITHOUT ANY WARRANTY; without even the implied warranty of  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Lesser General Public License for more details.  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    You should have received a copy of the GNU Lesser General Public  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    License along with the GNU C Library; if not, see  | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    .  */  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* memset calls are left as they are: they are ok from Perl memory pool point of view IMHO */  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* static */ reg_errcode_t re_compile_internal (pTHX_ regex_t *preg, const char * pattern,  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                                                 size_t length, reg_syntax_t syntax, bool is_utf8);  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void re_compile_fastmap_iter (pTHX_ regex_t *bufp,  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				     const re_dfastate_t *init_state,  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				     char *fastmap);  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t init_dfa (pTHX_ re_dfa_t *dfa, size_t pat_len, bool is_utf8);  | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void free_charset (pTHX_ re_charset_t *cset);  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void free_workarea_compile (pTHX_ regex_t *preg);  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t create_initial_state (pTHX_ re_dfa_t *dfa);  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void optimize_utf8 (pTHX_ re_dfa_t *dfa);  | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t analyze (pTHX_ regex_t *preg);  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t preorder (pTHX_ bin_tree_t *root,  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			       reg_errcode_t (fn (pTHX_ void *, bin_tree_t *)),  | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			       void *extra);  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t postorder (pTHX_ bin_tree_t *root,  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				reg_errcode_t (fn (pTHX_ void *, bin_tree_t *)),  | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				void *extra);  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t optimize_subexps (pTHX_ void *extra, bin_tree_t *node);  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t lower_subexps (pTHX_ void *extra, bin_tree_t *node);  | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *lower_subexp (pTHX_ reg_errcode_t *err, regex_t *preg,  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 bin_tree_t *node);  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t calc_first (pTHX_ void *extra, bin_tree_t *node);  | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t calc_next (pTHX_ void *extra, bin_tree_t *node);  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t link_nfa_nodes (pTHX_ void *extra, bin_tree_t *node);  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static Idx duplicate_node (pTHX_ re_dfa_t *dfa, Idx org_idx, unsigned int constraint);  | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static Idx search_duplicated_node (pTHX_ const re_dfa_t *dfa, Idx org_node,  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				   unsigned int constraint);  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t calc_eclosure (pTHX_ re_dfa_t *dfa);  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t calc_eclosure_iter (pTHX_ re_node_set *new_set, re_dfa_t *dfa,  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					 Idx node, bool root);  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t calc_inveclosure (pTHX_ re_dfa_t *dfa);  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static Idx fetch_number (pTHX_ re_string_t *input, re_token_t *token,  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 reg_syntax_t syntax);  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static int peek_token (pTHX_ re_token_t *token, re_string_t *input,  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			reg_syntax_t syntax) internal_function;  | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *parse (pTHX_ re_string_t *regexp, regex_t *preg,  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  reg_syntax_t syntax, reg_errcode_t *err);  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *parse_reg_exp (pTHX_ re_string_t *regexp, regex_t *preg,  | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  re_token_t *token, reg_syntax_t syntax,  | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  Idx nest, reg_errcode_t *err);  | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *parse_branch (pTHX_ re_string_t *regexp, regex_t *preg,  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 re_token_t *token, reg_syntax_t syntax,  | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 Idx nest, reg_errcode_t *err);  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *parse_expression (pTHX_ re_string_t *regexp, regex_t *preg,  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				     re_token_t *token, reg_syntax_t syntax,  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				     Idx nest, reg_errcode_t *err);  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *parse_sub_exp (pTHX_ re_string_t *regexp, regex_t *preg,  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  re_token_t *token, reg_syntax_t syntax,  | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  Idx nest, reg_errcode_t *err);  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *parse_dup_op (pTHX_ bin_tree_t *dup_elem, re_string_t *regexp,  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 re_dfa_t *dfa, re_token_t *token,  | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 reg_syntax_t syntax, reg_errcode_t *err);  | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *parse_bracket_exp (pTHX_ re_string_t *regexp, re_dfa_t *dfa,  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      re_token_t *token, reg_syntax_t syntax,  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      reg_errcode_t *err);  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t parse_bracket_element (pTHX_ bracket_elem_t *elem,  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					    re_string_t *regexp,  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					    re_token_t *token, int token_len,  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					    re_dfa_t *dfa,  | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					    reg_syntax_t syntax,  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					    bool accept_hyphen);  | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t parse_bracket_symbol (pTHX_ bracket_elem_t *elem,  | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					  re_string_t *regexp,  | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					  re_token_t *token);  | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t build_equiv_class (pTHX_ bitset_t sbcset,  | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					re_charset_t *mbcset,  | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					Idx *equiv_class_alloc,  | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					const unsigned char *name);  | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t build_charclass (pTHX_ RE_TRANSLATE_TYPE trans,  | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      bitset_t sbcset,  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      re_charset_t *mbcset,  | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      Idx *char_class_alloc,  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      const char *class_name,  | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      reg_syntax_t syntax);  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else  /* not RE_ENABLE_I18N */  | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t build_equiv_class (pTHX_ bitset_t sbcset,  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					const unsigned char *name);  | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t build_charclass (pTHX_ RE_TRANSLATE_TYPE trans,  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      bitset_t sbcset,  | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      const char *class_name,  | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      reg_syntax_t syntax);  | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *build_charclass_op (pTHX_ re_dfa_t *dfa,  | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       RE_TRANSLATE_TYPE trans,  | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       const char *class_name,  | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       const char *extra,  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       bool non_match, reg_errcode_t *err);  | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *create_tree (pTHX_ re_dfa_t *dfa,  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				bin_tree_t *left, bin_tree_t *right,  | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				re_token_type_t type);  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *create_token_tree (pTHX_ re_dfa_t *dfa,  | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      bin_tree_t *left, bin_tree_t *right,  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      const re_token_t *token);  | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *duplicate_tree (pTHX_ const bin_tree_t *src, re_dfa_t *dfa);  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void free_token (pTHX_ re_token_t *node);  | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t free_tree (pTHX_ void *extra, bin_tree_t *node);  | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t mark_opt_subexp (pTHX_ void *extra, bin_tree_t *node);  | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This table gives an error message for each of the error codes listed  | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    in regex.h.  Obviously the order here has to be same as there.  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    POSIX doesn't require that we do anything for REG_NOERROR,  | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    but why not be nice?  */  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static const char __re_error_msgid[] =  | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_NOERROR_IDX	0  | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Success")	/* REG_NOERROR */  | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")  | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("No match")	/* REG_NOMATCH */  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_BADPAT_IDX	(REG_NOMATCH_IDX + sizeof "No match")  | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Invalid regular expression") /* REG_BADPAT */  | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Invalid collation character") /* REG_ECOLLATE */  | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_ECTYPE_IDX	(REG_ECOLLATE_IDX + sizeof "Invalid collation character")  | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Invalid character class name") /* REG_ECTYPE */  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_EESCAPE_IDX	(REG_ECTYPE_IDX + sizeof "Invalid character class name")  | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Trailing backslash") /* REG_EESCAPE */  | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_ESUBREG_IDX	(REG_EESCAPE_IDX + sizeof "Trailing backslash")  | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Invalid back reference") /* REG_ESUBREG */  | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_EBRACK_IDX	(REG_ESUBREG_IDX + sizeof "Invalid back reference")  | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Unmatched [ or [^")	/* REG_EBRACK */  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_EPAREN_IDX	(REG_EBRACK_IDX + sizeof "Unmatched [ or [^")  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */  | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_EBRACE_IDX	(REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")  | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Unmatched \\{") /* REG_EBRACE */  | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_BADBR_IDX	(REG_EBRACE_IDX + sizeof "Unmatched \\{")  | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Invalid content of \\{\\}") /* REG_BADBR */  | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_ERANGE_IDX	(REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")  | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Invalid range end")	/* REG_ERANGE */  | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_ESPACE_IDX	(REG_ERANGE_IDX + sizeof "Invalid range end")  | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Memory exhausted") /* REG_ESPACE */  | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_BADRPT_IDX	(REG_ESPACE_IDX + sizeof "Memory exhausted")  | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Invalid preceding regular expression") /* REG_BADRPT */  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_EEND_IDX	(REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Premature end of regular expression") /* REG_EEND */  | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_ESIZE_IDX	(REG_EEND_IDX + sizeof "Premature end of regular expression")  | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Regular expression too big") /* REG_ESIZE */  | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     "\0"  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define REG_ERPAREN_IDX	(REG_ESIZE_IDX + sizeof "Regular expression too big")  | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */  | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static const size_t __re_error_msgid_idx[] =  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_NOERROR_IDX,  | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_NOMATCH_IDX,  | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_BADPAT_IDX,  | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_ECOLLATE_IDX,  | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_ECTYPE_IDX,  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_EESCAPE_IDX,  | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_ESUBREG_IDX,  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_EBRACK_IDX,  | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_EPAREN_IDX,  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_EBRACE_IDX,  | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_BADBR_IDX,  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_ERANGE_IDX,  | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_ESPACE_IDX,  | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_BADRPT_IDX,  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_EEND_IDX,  | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_ESIZE_IDX,  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     REG_ERPAREN_IDX  | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   };  | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Entry points for GNU code.  */  | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* re_compile_pattern is the GNU regular expression compiler: it  | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    compiles PATTERN (of length LENGTH) and puts the result in BUFP.  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Returns 0 if the pattern was valid, otherwise an error string.  | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields  | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    are set in BUFP on entry.  */  | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 const char *  | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 re_compile_pattern (pTHX_ pattern, length, bufp, is_utf8)  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     const char *pattern;  | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     size_t length;  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     struct re_pattern_buffer *bufp;  | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     bool is_utf8;  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else /* size_t might promote */  | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 const char *  | 
| 
223
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 re_compile_pattern (pTHX_ const char *pattern, size_t length,  | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    struct re_pattern_buffer *bufp, bool is_utf8)  | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t ret;  | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* And GNU code determines whether or not to get register information  | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      by passing null for the REGS argument to re_match, etc., not by  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      setting no_sub, unless RE_NO_SUB is set.  */  | 
| 
232
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Match anchors at newline.  */  | 
| 
235
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bufp->newline_anchor = 1;  | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
237
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ret = re_compile_internal (aTHX_ bufp, pattern, length, re_syntax_options, is_utf8);  | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
239
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (!ret)  | 
| 
240
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return NULL;  | 
| 
241
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 weak_alias (__re_compile_pattern, re_compile_pattern)  | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Set by 're_set_syntax' to the current regexp syntax to recognize.  Can  | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    also be assigned to arbitrarily: each pattern buffer stores its own  | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    syntax, so it can be changed between regex compilations.  */  | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This has no initializer because initialized variables in Emacs  | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    become read-only after dumping.  */  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reg_syntax_t re_syntax_options;  | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Specify the precise syntax of regexps for compilation.  This provides  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    for compatibility for various utilities which historically have  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    different, incompatible syntaxes.  | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    The argument SYNTAX is a bit mask comprised of the various bits  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    defined in regex.h.  We return the old syntax.  */  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 reg_syntax_t  | 
| 
263
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 re_set_syntax (pTHX_ reg_syntax_t syntax)  | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
265
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_syntax_t ret = re_syntax_options;  | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
267
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_syntax_options = syntax;  | 
| 
268
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return ret;  | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 weak_alias (__re_set_syntax, re_set_syntax)  | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 int  | 
| 
275
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 re_compile_fastmap (pTHX_ struct re_pattern_buffer *bufp)  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
277
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = bufp->buffer;  | 
| 
278
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   char *fastmap = bufp->fastmap;  | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
280
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   memset (fastmap, '\0', sizeof (char) * SBC_MAX);  | 
| 
281
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_compile_fastmap_iter (aTHX_ bufp, dfa->init_state, fastmap);  | 
| 
282
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->init_state != dfa->init_state_word)  | 
| 
283
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     re_compile_fastmap_iter (aTHX_ bufp, dfa->init_state_word, fastmap);  | 
| 
284
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->init_state != dfa->init_state_nl)  | 
| 
285
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     re_compile_fastmap_iter (aTHX_ bufp, dfa->init_state_nl, fastmap);  | 
| 
286
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->init_state != dfa->init_state_begbuf)  | 
| 
287
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     re_compile_fastmap_iter (aTHX_ bufp, dfa->init_state_begbuf, fastmap);  | 
| 
288
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bufp->fastmap_accurate = 1;  | 
| 
289
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return 0;  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 weak_alias (__re_compile_fastmap, re_compile_fastmap)  | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static inline void  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __attribute__ ((always_inline))  | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 re_set_fastmap (pTHX_ char *fastmap, bool icase, int ch)  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
299
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fastmap[ch] = 1;  | 
| 
300
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (icase)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
301
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     fastmap[rpl__tolower (ch)] = 1;  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Helper function for re_compile_fastmap.  | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Compile fastmap for the initial_state INIT_STATE.  */  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void  | 
| 
308
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 re_compile_fastmap_iter (pTHX_ regex_t *bufp, const re_dfastate_t *init_state,  | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 char *fastmap)  | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
311
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = bufp->buffer;  | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx node_cnt;  | 
| 
313
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool icase = (dfa->mb_cur_max == 1 && (bufp->syntax & RE_ICASE));  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
314
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (node_cnt = 0; node_cnt < init_state->nodes.nelem; ++node_cnt)  | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
316
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Idx node = init_state->nodes.elems[node_cnt];  | 
| 
317
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_token_type_t type = dfa->nodes[node].type;  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (type == CHARACTER)  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
321
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_set_fastmap (aTHX_ fastmap, icase, dfa->nodes[node].opr.c);  | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
323
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      unsigned char buf[MB_LEN_MAX];  | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      unsigned char *p;  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      rpl__wchar_t wc;  | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      rpl__mbstate_t state;  | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
330
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      p = buf;  | 
| 
331
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *p++ = dfa->nodes[node].opr.c;  | 
| 
332
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      while (++node < dfa->nodes_len  | 
| 
333
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     &&	dfa->nodes[node].type == CHARACTER  | 
| 
334
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     && dfa->nodes[node].mb_partial)  | 
| 
335
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		*p++ = dfa->nodes[node].opr.c;  | 
| 
336
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      memset (&state, '\0', sizeof (state));  | 
| 
337
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (rpl__mbrtowc (&wc, (const char *) buf, p - buf,  | 
| 
338
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			     &state) == p - buf  | 
| 
339
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  && (rpl__wcrtomb ((char *) buf, rpl__towlower (wc), &state)  | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      != (size_t) -1))  | 
| 
341
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		re_set_fastmap (aTHX_ fastmap, false, buf[0]);  | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
345
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (type == SIMPLE_BRACKET)  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  int i, ch;  | 
| 
348
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)  | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      int j;  | 
| 
351
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      bitset_word_t w = dfa->nodes[node].opr.sbcset[i];  | 
| 
352
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)  | 
| 
353
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (w & ((bitset_word_t) 1 << j))  | 
| 
354
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  re_set_fastmap (aTHX_ fastmap, icase, ch);  | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
358
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (type == COMPLEX_BRACKET)  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
360
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_charset_t *cset = dfa->nodes[node].opr.mbcset;  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  Idx i;  | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ifdef _LIBC  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* See if we have to try all bytes which start multiple collation  | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     elements.  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     e.g. In da_DK, we want to catch 'a' since "aa" is a valid  | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  collation element, and don't catch 'b' since 'b' is  | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  the only collation element which starts from 'b' (and  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  it is caught by SIMPLE_BRACKET).  */  | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != 0  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  && (cset->ncoll_syms || cset->nranges))  | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  const int32_t *table = (const int32_t *)  | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);  | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  for (i = 0; i < SBC_MAX; ++i)  | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    if (table[i] < 0)  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      re_set_fastmap (aTHX_ fastmap, icase, i);  | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # else  | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
               /* For _PERL_I18N we do not use the fastmap -; */  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif /* _LIBC */  | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* See if we have to start the match at all multibyte characters,  | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     i.e. where we would not find an invalid sequence.  This only  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     applies to multibyte character sets; for single byte character  | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     sets, the SIMPLE_BRACKET again suffices.  */  | 
| 
387
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (dfa->mb_cur_max > 1  | 
| 
388
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      && (cset->nchar_classes || cset->non_match || cset->nranges  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # if (defined(_LIBC) || defined(_PERL_I18N))  | 
| 
390
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  || cset->nequiv_classes  | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif /* _LIBC */  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 ))  | 
| 
393
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
394
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      unsigned char c = 0;  | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      do  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  rpl__mbstate_t mbs;  | 
| 
398
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  memset (&mbs, 0, sizeof (mbs));  | 
| 
399
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  if (rpl__mbrtowc (NULL, (char *) &c, 1, &mbs) == (size_t) -2)  | 
| 
400
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    re_set_fastmap (aTHX_ fastmap, false, (int) c);  | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
402
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      while (++c != 0);  | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* ... Else catch all bytes which can start the mbchars.  */  | 
| 
408
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      for (i = 0; i < cset->nmbchars; ++i)  | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  char buf[256];  | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  rpl__mbstate_t state;  | 
| 
412
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  memset (&state, '\0', sizeof (state));  | 
| 
413
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  if (rpl__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -1)  | 
| 
414
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    re_set_fastmap (aTHX_ fastmap, icase, *(unsigned char *) buf);  | 
| 
415
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > 1)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    {  | 
| 
417
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      if (rpl__wcrtomb (buf, rpl__towlower (cset->mbchars[i]), &state)  | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  != (size_t) -1)  | 
| 
419
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			re_set_fastmap (aTHX_ fastmap, false, *(unsigned char *) buf);  | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    }  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
425
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (type == OP_PERIOD  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
427
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       || type == OP_UTF8_PERIOD  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
429
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       || type == END_OF_RE)  | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
431
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  memset (fastmap, '\1', sizeof (char) * SBC_MAX);  | 
| 
432
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (type == END_OF_RE)  | 
| 
433
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    bufp->can_be_null = 1;  | 
| 
434
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return;  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Entry point for POSIX code.  */  | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* regcomp takes a regular expression as a string and compiles it.  | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    PREG is a regex_t *.  We do not expect any fields to be initialized,  | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    since POSIX says we shouldn't.  Thus, we set  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      'buffer' to the compiled pattern;  | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      'used' to the length of the compiled pattern;  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      'syntax' to RE_SYNTAX_POSIX_EXTENDED if the  | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        REG_EXTENDED bit in CFLAGS is set; otherwise, to  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        RE_SYNTAX_POSIX_BASIC;  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      'newline_anchor' to REG_NEWLINE being set in CFLAGS;  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      'fastmap' to an allocated space for the fastmap;  | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      'fastmap_accurate' to zero;  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      're_nsub' to the number of subexpressions in PATTERN.  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    PATTERN is the address of the pattern string.  | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    CFLAGS is a series of bits which affect compilation.  | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we  | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      use POSIX basic syntax.  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      If REG_NEWLINE is set, then . and [^...] don't match newline.  | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Also, regexec will try a match beginning after every newline.  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      If REG_ICASE is set, then we considers upper- and lowercase  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      versions of letters to be equivalent when matching.  | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      If REG_NOSUB is set, then when PREG is passed to regexec, that  | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      routine will report only success or failure, and nothing about the  | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      registers.  | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for  | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    the return codes and their meanings.)  */  | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 int  | 
| 
476
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 regcomp (pTHX_ regex_t *_Restrict_ preg, const char *_Restrict_ pattern, int cflags, bool is_utf8)  | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t ret;  | 
| 
479
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED  | 
| 
480
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 : RE_SYNTAX_POSIX_BASIC);  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
482
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->buffer = NULL;  | 
| 
483
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->allocated = 0;  | 
| 
484
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->used = 0;  | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Try to allocate space for the fastmap.  */  | 
| 
487
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_malloc (preg->fastmap, char, SBC_MAX);  | 
| 
488
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (preg->fastmap == NULL, 0))  | 
| 
489
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_ESPACE;  | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
491
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   syntax |= (cflags & REG_ICASE) ? RE_ICASE : 0;  | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* If REG_NEWLINE is set, newlines are treated differently.  */  | 
| 
494
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (cflags & REG_NEWLINE)  | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */  | 
| 
496
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       syntax &= ~RE_DOT_NEWLINE;  | 
| 
497
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       syntax |= RE_HAT_LISTS_NOT_NEWLINE;  | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* It also changes the matching behavior.  */  | 
| 
499
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->newline_anchor = 1;  | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
502
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     preg->newline_anchor = 0;  | 
| 
503
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->no_sub = !!(cflags & REG_NOSUB);  | 
| 
504
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->translate = NULL;  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
506
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ret = re_compile_internal (aTHX_ preg, pattern, strlen (pattern), syntax, is_utf8);  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* POSIX doesn't distinguish between an unmatched open-group and an  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      unmatched close-group: both are REG_EPAREN.  */  | 
| 
510
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (ret == REG_ERPAREN)  | 
| 
511
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     ret = REG_EPAREN;  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* We have already checked preg->fastmap != NULL.  */  | 
| 
514
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (ret == REG_NOERROR, 1))  | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* Compute the fastmap now, since regexec cannot modify the pattern  | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        buffer.  This function never fails in this implementation.  */  | 
| 
517
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     (void) re_compile_fastmap (aTHX_ preg);  | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Some error occurred while compiling the expression.  */  | 
| 
521
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_free (preg->fastmap);  | 
| 
522
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->fastmap = NULL;  | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
525
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return (int) ret;  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 weak_alias (__regcomp, regcomp)  | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Returns a message corresponding to an error code, ERRCODE, returned  | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    from either regcomp or regexec.   We don't use PREG here.  */  | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 size_t  | 
| 
535
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 regerror (pTHX_ int errcode, const regex_t *_Restrict_ preg, char *_Restrict_ errbuf, size_t errbuf_size)  | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   const char *msg;  | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   size_t msg_size;  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
540
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (errcode < 0  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  || errcode >= (int) (sizeof (__re_error_msgid_idx)  | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			       / sizeof (__re_error_msgid_idx[0])), 0))  | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* Only error codes returned by the rest of the code should be passed  | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        to this routine.  If we are given anything else, or if other regex  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        code generates an invalid error code, then the program has a bug.  | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        Dump core so we can fix it.  */  | 
| 
547
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     abort ();  | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
549
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);  | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
551
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   msg_size = strlen (msg) + 1; /* Includes the null.  */  | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
553
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (errbuf_size != 0, 1))  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
555
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       size_t cpy_size = msg_size;  | 
| 
556
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (msg_size > errbuf_size, 0))  | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
558
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  cpy_size = errbuf_size - 1;  | 
| 
559
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  errbuf[cpy_size] = '\0';  | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
561
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Copy (msg, errbuf, cpy_size, char);  | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
564
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return msg_size;  | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 weak_alias (__regerror, regerror)  | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This static array is used for the map to single-byte characters when  | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    UTF-8 is used.  Otherwise we would allocate memory just to initialize  | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    it the same all the time.  UTF-8 is the preferred encoding so this is  | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    a worthwhile optimization.  */  | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static const bitset_t utf8_sb_map =  | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Set the first 128 bits.  */  | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # if defined __GNUC__ && !defined __STRICT_ANSI__  | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   [0 ... 0x80 / BITSET_WORD_BITS - 1] = BITSET_WORD_MAX  | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # else  | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  if 4 * BITSET_WORD_BITS < ASCII_CHARS  | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #   error "bitset_word_t is narrower than 32 bits"  | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  elif 3 * BITSET_WORD_BITS < ASCII_CHARS  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,  | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  elif 2 * BITSET_WORD_BITS < ASCII_CHARS  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   BITSET_WORD_MAX, BITSET_WORD_MAX,  | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  elif 1 * BITSET_WORD_BITS < ASCII_CHARS  | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   BITSET_WORD_MAX,  | 
| 
590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #  endif  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   (BITSET_WORD_MAX  | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    >> (SBC_MAX % BITSET_WORD_BITS == 0  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        ? 0  | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        : BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif  | 
| 
596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 };  | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void  | 
| 
601
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 free_dfa_content (pTHX_ re_dfa_t *dfa)  | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx i, j;  | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
605
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->nodes)  | 
| 
606
 | 
54
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (i = 0; i < dfa->nodes_len; ++i)  | 
| 
607
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_token (aTHX_ dfa->nodes + i);  | 
| 
608
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->nexts);  | 
| 
609
 | 
54
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (i = 0; i < dfa->nodes_len; ++i)  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
611
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->eclosures != NULL)  | 
| 
612
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	re_node_set_free (dfa->eclosures + i);  | 
| 
613
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->inveclosures != NULL)  | 
| 
614
 | 
15
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	re_node_set_free (dfa->inveclosures + i);  | 
| 
615
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->edests != NULL)  | 
| 
616
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	re_node_set_free (dfa->edests + i);  | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
618
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->edests);  | 
| 
619
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->eclosures);  | 
| 
620
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->inveclosures);  | 
| 
621
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->nodes);  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
623
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->state_table)  | 
| 
624
 | 
102
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (i = 0; i <= dfa->state_hash_mask; ++i)  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
626
 | 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	struct re_state_table_entry *entry = dfa->state_table + i;  | 
| 
627
 | 
126
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	for (j = 0; j < entry->num; ++j)  | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
629
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    re_dfastate_t *state = entry->array[j];  | 
| 
630
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    free_state (aTHX_ state);  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
632
 | 
96
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	re_free (entry->array);  | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
634
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->state_table);  | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
636
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->sb_char != utf8_sb_map)  | 
| 
637
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     re_free (dfa->sb_char);  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
639
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->subexp_map);  | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef DEBUG  | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->re_str);  | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
644
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa);  | 
| 
645
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Free dynamically allocated space used by PREG.  */  | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 void  | 
| 
651
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 regfree (pTHX_ regex_t *preg)  | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
653
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
654
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (dfa != NULL, 1))  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       lock_fini (dfa->lock);  | 
| 
657
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_dfa_content (aTHX_ dfa);  | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
659
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->buffer = NULL;  | 
| 
660
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->allocated = 0;  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
662
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (preg->fastmap);  | 
| 
663
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->fastmap = NULL;  | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
665
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (preg->translate);  | 
| 
666
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->translate = NULL;  | 
| 
667
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 weak_alias (__regfree, regfree)  | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Entry points compatible with 4.2 BSD regex library.  We don't define  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    them unless specifically requested.  */  | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #if defined _REGEX_RE_COMP || defined _LIBC  | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* BSD has one and only one pattern buffer.  */  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static struct re_pattern_buffer re_comp_buf;  | 
| 
679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 char *  | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ifdef _LIBC  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Make these definitions weak in libc, so POSIX programs can redefine  | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    these names if they don't use our functions, and still use  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    regcomp/regexec above without link errors.  */  | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 weak_function  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif  | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 re_comp (s)  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      const char *s;  | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t ret;  | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   char *fastmap;  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (!s)  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (!re_comp_buf.buffer)  | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return gettext ("No previous regular expression");  | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return 0;  | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (re_comp_buf.buffer)  | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       fastmap = re_comp_buf.fastmap;  | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_comp_buf.fastmap = NULL;  | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       __regfree (&re_comp_buf);  | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       memset (&re_comp_buf, '\0', sizeof (re_comp_buf));  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_comp_buf.fastmap = fastmap;  | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (re_comp_buf.fastmap == NULL)  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* re_comp_buf.fastmap = (char *) malloc (SBC_MAX); */  | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Newx(re_comp_buf.fastmap, SBC_MAX, char);  | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (re_comp_buf.fastmap == NULL)  | 
| 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return (char *) gettext (__re_error_msgid  | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 + __re_error_msgid_idx[(int) REG_ESPACE]);  | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Since 're_exec' always passes NULL for the 'regs' argument, we  | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      don't need to initialize the pattern buffer fields which affect it.  */  | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Match anchors at newlines.  */  | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_comp_buf.newline_anchor = 1;  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ret = re_compile_internal (aTHX_ &re_comp_buf, s, strlen (s), re_syntax_options, is_utf8);  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (!ret)  | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return NULL;  | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Yes, we're discarding 'const' here if !HAVE_LIBINTL.  */  | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return (char *) gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);  | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 libc_freeres_fn (free_mem)  | 
| 
735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   __regfree (&re_comp_buf);  | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* _REGEX_RE_COMP */  | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Internal entry point.  | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Compile the regular expression PATTERN, whose length is LENGTH.  | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    SYNTAX indicate regular expression's syntax.  */  | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* static */ reg_errcode_t  | 
| 
747
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 re_compile_internal (pTHX_ regex_t *preg, const char * pattern, size_t length,  | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     reg_syntax_t syntax, bool is_utf8)  | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
750
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t err = REG_NOERROR;  | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa;  | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_string_t regexp;  | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Initialize the pattern buffer.  */  | 
| 
755
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->fastmap_accurate = 0;  | 
| 
756
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->syntax = syntax;  | 
| 
757
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->not_bol = preg->not_eol = 0;  | 
| 
758
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->used = 0;  | 
| 
759
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->re_nsub = 0;  | 
| 
760
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->can_be_null = 0;  | 
| 
761
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->regs_allocated = REGS_UNALLOCATED;  | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Initialize the dfa.  */  | 
| 
764
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa = preg->buffer;  | 
| 
765
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (preg->allocated < sizeof (re_dfa_t), 0))  | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* If zero allocated, but buffer is non-null, try to realloc  | 
| 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 enough space.  This loses if buffer's address is bogus, but  | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 that is the user's responsibility.  If ->buffer is NULL this  | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 is a simple allocation.  */  | 
| 
771
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (preg->buffer == NULL) {  | 
| 
772
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         re_malloc (preg->buffer, re_dfa_t, 1);  | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       } else {  | 
| 
774
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         re_realloc (preg->buffer, re_dfa_t, 1);  | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
776
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa = preg->buffer;  | 
| 
777
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->allocated = sizeof (re_dfa_t);  | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
779
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->used = sizeof (re_dfa_t);  | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
781
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   err = init_dfa (aTHX_ dfa, length, is_utf8);  | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (err == REG_NOERROR && lock_init (dfa->lock) != 0, 0))  | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     err = REG_ESPACE;  | 
| 
784
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (err != REG_NOERROR, 0))  | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
786
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_dfa_content (aTHX_ dfa);  | 
| 
787
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->buffer = NULL;  | 
| 
788
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->allocated = 0;  | 
| 
789
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return err;  | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef DEBUG  | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Note: length+1 will not overflow since it is checked in init_dfa.  */  | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_malloc (dfa->re_str, char, length + 1);  | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   strncpy (dfa->re_str, pattern, length + 1);  | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
797
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   err = re_string_construct (aTHX_ ®exp, pattern, length, preg->translate,  | 
| 
798
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			     (syntax & RE_ICASE) != 0, dfa);  | 
| 
799
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (err != REG_NOERROR, 0))  | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     re_compile_internal_free_return:  | 
| 
802
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_workarea_compile (aTHX_ preg);  | 
| 
803
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_string_destruct (aTHX_ ®exp);  | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       lock_fini (dfa->lock);  | 
| 
805
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_dfa_content (aTHX_ dfa);  | 
| 
806
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->buffer = NULL;  | 
| 
807
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->allocated = 0;  | 
| 
808
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return err;  | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Parse the regular expression, and build a structure tree.  */  | 
| 
812
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preg->re_nsub = 0;  | 
| 
813
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->str_tree = parse (aTHX_ ®exp, preg, syntax, &err);  | 
| 
814
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (dfa->str_tree == NULL, 0))  | 
| 
815
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     goto re_compile_internal_free_return;  | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Analyze the tree and create the nfa.  */  | 
| 
818
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   err = analyze (aTHX_ preg);  | 
| 
819
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (err != REG_NOERROR, 0))  | 
| 
820
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     goto re_compile_internal_free_return;  | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* If possible, do searching in single byte encoding to speed things up.  */  | 
| 
824
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
825
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     optimize_utf8 (aTHX_ dfa);  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Then create the initial state of the dfa.  */  | 
| 
829
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   err = create_initial_state (aTHX_ dfa);  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Release work areas.  */  | 
| 
832
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   free_workarea_compile (aTHX_ preg);  | 
| 
833
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_string_destruct (aTHX_ ®exp);  | 
| 
834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
835
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (err != REG_NOERROR, 0))  | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       lock_fini (dfa->lock);  | 
| 
838
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_dfa_content (aTHX_ dfa);  | 
| 
839
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->buffer = NULL;  | 
| 
840
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preg->allocated = 0;  | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
843
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return err;  | 
| 
844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Initialize DFA.  We use the length of the regular expression PAT_LEN  | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    as the initial length of some arrays.  */  | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
850
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 init_dfa (pTHX_ re_dfa_t *dfa, size_t pat_len, bool is_utf8)  | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   __re_size_t table_size;  | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifndef _LIBC  | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifndef _PERL_I18N  | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   const char *codeset_name;  | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
859
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   size_t max_i18n_object_size = MAX (sizeof (rpl__wchar_t), sizeof (rpl__wctype_t));  | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   size_t max_i18n_object_size = 0;  | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
863
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   size_t max_object_size =  | 
| 
864
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     MAX (sizeof (struct re_state_table_entry),  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 MAX (sizeof (re_token_t),  | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      MAX (sizeof (re_node_set),  | 
| 
867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   MAX (sizeof (regmatch_t),  | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			max_i18n_object_size))));  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
870
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   memset (dfa, '\0', sizeof (re_dfa_t));  | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Force allocation of str_tree_storage the first time.  */  | 
| 
873
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;  | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Avoid overflows.  The extra "/ 2" is for the table_size doubling  | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      calculation below, and for similar doubling calculations  | 
| 
877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      elsewhere.  And it's <= rather than <, because some of the  | 
| 
878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      doubling calculations add 1 afterwards.  */  | 
| 
879
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / 2 <= pat_len, 0))  | 
| 
880
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_ESPACE;  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
882
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->nodes_alloc = pat_len + 1;  | 
| 
883
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_malloc (dfa->nodes, re_token_t, dfa->nodes_alloc);  | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /*  table_size = 2 ^ ceil(log pat_len) */  | 
| 
886
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (table_size = 1; ; table_size <<= 1)  | 
| 
887
 | 
30
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (table_size > pat_len)  | 
| 
888
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
890
 | 
30
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_calloc(dfa->state_table, struct re_state_table_entry, table_size);  | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Note: isn't the original mixing number of elements and element size ? */  | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size); */  | 
| 
893
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->state_hash_mask = table_size - 1;  | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
895
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->mb_cur_max = rpl__MB_CUR_MAX;  | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->mb_cur_max == 6  | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0)  | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dfa->is_utf8 = 1;  | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		       != 0);  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else  | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* We require that input coming from perl will say if it Perl's utf-8 or not */  | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Note that perl's utf-8 is not official UTF-8, but a superset of the latest */  | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifndef _PERL_I18N  | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   codeset_name = nl_langinfo (CODESET);  | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ((codeset_name[0] == 'U' || codeset_name[0] == 'u')  | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && (codeset_name[1] == 'T' || codeset_name[1] == 't')  | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && (codeset_name[2] == 'F' || codeset_name[2] == 'f')  | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && strcmp (codeset_name + 3 + (codeset_name[3] == '-'), "8") == 0)  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
912
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dfa->is_utf8 = is_utf8;  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* We check exhaustively in the loop below if this charset is a  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      superset of ASCII.  */  | 
| 
916
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->map_notascii = 0;  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
920
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->mb_cur_max > 1)  | 
| 
921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
922
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->is_utf8)  | 
| 
923
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;  | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  int i, j, ch;  | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
928
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
           re_calloc(dfa->sb_char, bitset_word_t, BITSET_WORDS);  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); */  | 
| 
930
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (dfa->sb_char == NULL, 0))  | 
| 
931
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ESPACE;  | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Set the bits corresponding to single byte chars.  */  | 
| 
934
 | 
30
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  for (i = 0, ch = 0; i < BITSET_WORDS; ++i)  | 
| 
935
 | 
1560
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)  | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      {  | 
| 
937
 | 
1536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		rpl__wint_t wch = rpl__btowc (ch);  | 
| 
938
 | 
1536
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (wch != rpl__WEOF)  | 
| 
939
 | 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  dfa->sb_char[i] |= (bitset_word_t) 1 << j;  | 
| 
940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ifndef _LIBC  | 
| 
941
 | 
1536
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (rpl__isascii (ch) && wch != ch)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
942
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  dfa->map_notascii = 1;  | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif  | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      }  | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
949
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (dfa->nodes == NULL || dfa->state_table == NULL, 0))  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
950
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_ESPACE;  | 
| 
951
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Initialize WORD_CHAR table, which indicate which character is  | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    "word".  In this case "word" means that it is the word construction  | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    character used by some operators like "\<", "\>", etc.  */  | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void  | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 internal_function  | 
| 
960
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 init_word_char (pTHX_ re_dfa_t *dfa)  | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
962
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int i = 0;  | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int j;  | 
| 
964
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int ch = 0;  | 
| 
965
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->word_ops_used = 1;  | 
| 
966
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (dfa->map_notascii == 0, 1))  | 
| 
967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
968
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bitset_word_t bits0 = 0x00000000;  | 
| 
969
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bitset_word_t bits1 = 0x03ff0000;  | 
| 
970
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bitset_word_t bits2 = 0x87fffffe;  | 
| 
971
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bitset_word_t bits3 = 0x07fffffe;  | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BITSET_WORD_BITS == 64)  | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
974
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->word_char[0] = bits1 << 31 << 1 | bits0;  | 
| 
975
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->word_char[1] = bits3 << 31 << 1 | bits2;  | 
| 
976
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  i = 2;  | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (BITSET_WORD_BITS == 32)  | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->word_char[0] = bits0;  | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->word_char[1] = bits1;  | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->word_char[2] = bits2;  | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->word_char[3] = bits3;  | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  i = 4;  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         goto general_case;  | 
| 
988
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ch = 128;  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
990
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (dfa->is_utf8, 1))  | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
992
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  memset (&dfa->word_char[i], '\0', (SBC_MAX - ch) / 8);  | 
| 
993
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return;  | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  general_case:  | 
| 
998
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (; i < BITSET_WORDS; ++i)  | 
| 
999
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (j = 0; j < BITSET_WORD_BITS; ++j, ++ch)  | 
| 
1000
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (rpl__isalnum (ch) || ch == '_')  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1001
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	dfa->word_char[i] |= (bitset_word_t) 1 << j;  | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Free the work area which are only used while compiling.  */  | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void  | 
| 
1007
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 free_workarea_compile (pTHX_ regex_t *preg)  | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1009
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_storage_t *storage, *next;  | 
| 
1011
 | 
14
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (storage = dfa->str_tree_storage; storage; storage = next)  | 
| 
1012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1013
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       next = storage->next;  | 
| 
1014
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_free (storage);  | 
| 
1015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1016
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->str_tree_storage = NULL;  | 
| 
1017
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;  | 
| 
1018
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->str_tree = NULL;  | 
| 
1019
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (dfa->org_indices);  | 
| 
1020
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->org_indices = NULL;  | 
| 
1021
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Create initial states for all contexts.  */  | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1026
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 create_initial_state (pTHX_ re_dfa_t *dfa)  | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx first, i;  | 
| 
1029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t err;  | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_node_set init_nodes;  | 
| 
1031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Initial states have the epsilon closure of the node which is  | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      the first node of the regular expression.  */  | 
| 
1034
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   first = dfa->str_tree->first->node_idx;  | 
| 
1035
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->init_node = first;  | 
| 
1036
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   err = re_node_set_init_copy (aTHX_ &init_nodes, dfa->eclosures + first);  | 
| 
1037
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (err != REG_NOERROR, 0))  | 
| 
1038
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return err;  | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* The back-references which are in initial states can epsilon transit,  | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      since in this case all of the subexpressions can be null.  | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Then we add epsilon closures of the nodes which are the next nodes of  | 
| 
1043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      the back-references.  */  | 
| 
1044
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->nbackref > 0)  | 
| 
1045
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (i = 0; i < init_nodes.nelem; ++i)  | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
1047
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	Idx node_idx = init_nodes.elems[i];  | 
| 
1048
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	re_token_type_t type = dfa->nodes[node_idx].type;  | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	Idx clexp_idx;  | 
| 
1051
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (type != OP_BACK_REF)  | 
| 
1052
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  continue;  | 
| 
1053
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)  | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    re_token_t *clexp_node;  | 
| 
1056
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];  | 
| 
1057
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    if (clexp_node->type == OP_CLOSE_SUBEXP  | 
| 
1058
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)  | 
| 
1059
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
1061
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (clexp_idx == init_nodes.nelem)  | 
| 
1062
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  continue;  | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1064
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (type == OP_BACK_REF)  | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
1066
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    Idx dest_idx = dfa->edests[node_idx].elems[0];  | 
| 
1067
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    if (!re_node_set_contains (aTHX_ &init_nodes, dest_idx))  | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      {  | 
| 
1069
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		reg_errcode_t merge_err  | 
| 
1070
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                   = re_node_set_merge (aTHX_ &init_nodes, dfa->eclosures + dest_idx);  | 
| 
1071
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		if (merge_err != REG_NOERROR)  | 
| 
1072
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  return merge_err;  | 
| 
1073
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		i = 0;  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      }  | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* It must be the first time to invoke acquire_state.  */  | 
| 
1079
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->init_state = re_acquire_state_context (aTHX_ &err, dfa, &init_nodes, 0);  | 
| 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* We don't check ERR here, since the initial state must not be NULL.  */  | 
| 
1081
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (dfa->init_state == NULL, 0))  | 
| 
1082
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return err;  | 
| 
1083
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->init_state->has_constraint)  | 
| 
1084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1085
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->init_state_word = re_acquire_state_context (aTHX_ &err, dfa, &init_nodes,  | 
| 
1086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						       CONTEXT_WORD);  | 
| 
1087
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->init_state_nl = re_acquire_state_context (aTHX_ &err, dfa, &init_nodes,  | 
| 
1088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						     CONTEXT_NEWLINE);  | 
| 
1089
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->init_state_begbuf = re_acquire_state_context (aTHX_ &err, dfa,  | 
| 
1090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 							 &init_nodes,  | 
| 
1091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 							 CONTEXT_NEWLINE  | 
| 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 							 | CONTEXT_BEGBUF);  | 
| 
1093
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (dfa->init_state_word == NULL || dfa->init_state_nl == NULL  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      || dfa->init_state_begbuf == NULL, 0))  | 
| 
1095
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return err;  | 
| 
1096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
1098
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dfa->init_state_word = dfa->init_state_nl  | 
| 
1099
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       = dfa->init_state_begbuf = dfa->init_state;  | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1101
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_node_set_free (&init_nodes);  | 
| 
1102
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
1103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* If it is possible to do searching in single byte encoding instead of UTF-8  | 
| 
1107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change  | 
| 
1108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    DFA nodes where needed.  */  | 
| 
1109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void  | 
| 
1111
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 optimize_utf8 (pTHX_ re_dfa_t *dfa)  | 
| 
1112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx node;  | 
| 
1114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int i;  | 
| 
1115
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool mb_chars = false;  | 
| 
1116
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool has_period = false;  | 
| 
1117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1118
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (node = 0; node < dfa->nodes_len; ++node)  | 
| 
1119
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     switch (dfa->nodes[node].type)  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
1121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case CHARACTER:  | 
| 
1122
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (dfa->nodes[node].opr.c >= ASCII_CHARS)  | 
| 
1123
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  mb_chars = true;  | 
| 
1124
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	break;  | 
| 
1125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case ANCHOR:  | 
| 
1126
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	switch (dfa->nodes[node].opr.ctx_type)  | 
| 
1127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
1128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  case LINE_FIRST:  | 
| 
1129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  case LINE_LAST:  | 
| 
1130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  case BUF_FIRST:  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  case BUF_LAST:  | 
| 
1132
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    break;  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  default:  | 
| 
1134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    /* Word anchors etc. cannot be handled.  It's okay to test  | 
| 
1135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       opr.ctx_type since constraints (for all DFA nodes) are  | 
| 
1136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       created by ORing one or more opr.ctx_type values.  */  | 
| 
1137
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return;  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
1139
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	break;  | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case OP_PERIOD:  | 
| 
1141
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	has_period = true;  | 
| 
1142
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	break;  | 
| 
1143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case OP_BACK_REF:  | 
| 
1144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case OP_ALT:  | 
| 
1145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case END_OF_RE:  | 
| 
1146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case OP_DUP_ASTERISK:  | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case OP_OPEN_SUBEXP:  | 
| 
1148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case OP_CLOSE_SUBEXP:  | 
| 
1149
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	break;  | 
| 
1150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case COMPLEX_BRACKET:  | 
| 
1151
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return;  | 
| 
1152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       case SIMPLE_BRACKET:  | 
| 
1153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/* Just double check.  */  | 
| 
1154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1155
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  int rshift = (ASCII_CHARS % BITSET_WORD_BITS == 0  | 
| 
1156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			? 0  | 
| 
1157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			: BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);  | 
| 
1158
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)  | 
| 
1159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1160
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (dfa->nodes[node].opr.sbcset[i] >> rshift != 0)  | 
| 
1161
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return;  | 
| 
1162
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      rshift = 0;  | 
| 
1163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1165
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	break;  | 
| 
1166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       default:  | 
| 
1167
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	abort ();  | 
| 
1168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1170
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (mb_chars || has_period)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1171
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (node = 0; node < dfa->nodes_len; ++node)  | 
| 
1172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
1173
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (dfa->nodes[node].type == CHARACTER  | 
| 
1174
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    && dfa->nodes[node].opr.c >= ASCII_CHARS)  | 
| 
1175
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->nodes[node].mb_partial = 0;  | 
| 
1176
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else if (dfa->nodes[node].type == OP_PERIOD)  | 
| 
1177
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->nodes[node].type = OP_UTF8_PERIOD;  | 
| 
1178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* The search can be in single byte locale.  */  | 
| 
1181
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->mb_cur_max = 1;  | 
| 
1182
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->is_utf8 = 0;  | 
| 
1183
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->has_mb_node = dfa->nbackref > 0 || has_period;  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
1186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
1187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Analyze the structure tree, and calculate "first", "next", "edest",  | 
| 
1188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    "eclosure", and "inveclosure".  */  | 
| 
1189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1191
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 analyze (pTHX_ regex_t *preg)  | 
| 
1192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1193
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
1194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t ret;  | 
| 
1195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Allocate arrays.  */  | 
| 
1197
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_malloc (dfa->nexts, Idx, dfa->nodes_alloc);  | 
| 
1198
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_malloc (dfa->org_indices, Idx, dfa->nodes_alloc);  | 
| 
1199
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_malloc (dfa->edests, re_node_set, dfa->nodes_alloc);  | 
| 
1200
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_malloc (dfa->eclosures, re_node_set, dfa->nodes_alloc);  | 
| 
1201
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (preg->re_nsub > 0) {  | 
| 
1202
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     re_malloc (dfa->subexp_map, Idx, preg->re_nsub);  | 
| 
1203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } else {  | 
| 
1204
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dfa->subexp_map = NULL;  | 
| 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1206
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->subexp_map != NULL)  | 
| 
1207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Idx i;  | 
| 
1209
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (i = 0; i < preg->re_nsub; i++)  | 
| 
1210
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	dfa->subexp_map[i] = i;  | 
| 
1211
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       preorder (aTHX_ dfa->str_tree, optimize_subexps, dfa);  | 
| 
1212
 | 
11
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (i = 0; i < preg->re_nsub; i++)  | 
| 
1213
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (dfa->subexp_map[i] != i)  | 
| 
1214
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1215
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (i == preg->re_nsub)  | 
| 
1216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1217
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_free (dfa->subexp_map);  | 
| 
1218
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->subexp_map = NULL;  | 
| 
1219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1222
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ret = postorder (aTHX_ dfa->str_tree, lower_subexps, preg);  | 
| 
1223
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (ret != REG_NOERROR, 0))  | 
| 
1224
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return ret;  | 
| 
1225
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ret = postorder (aTHX_ dfa->str_tree, calc_first, dfa);  | 
| 
1226
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (ret != REG_NOERROR, 0))  | 
| 
1227
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return ret;  | 
| 
1228
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   preorder (aTHX_ dfa->str_tree, calc_next, dfa);  | 
| 
1229
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ret = preorder (aTHX_ dfa->str_tree, link_nfa_nodes, dfa);  | 
| 
1230
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (ret != REG_NOERROR, 0))  | 
| 
1231
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return ret;  | 
| 
1232
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ret = calc_eclosure (aTHX_ dfa);  | 
| 
1233
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (ret != REG_NOERROR, 0))  | 
| 
1234
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return ret;  | 
| 
1235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* We only need this during the prune_impossible_nodes pass in regexec.c;  | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      skip it if p_i_n will not run, as calc_inveclosure can be quadratic.  */  | 
| 
1238
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1239
 | 
4
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       || dfa->nbackref)  | 
| 
1240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1241
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_malloc (dfa->inveclosures, re_node_set, dfa->nodes_len);  | 
| 
1242
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ret = calc_inveclosure (aTHX_ dfa);  | 
| 
1243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1245
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return ret;  | 
| 
1246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Our parse trees are very unbalanced, so we cannot use a stack to  | 
| 
1249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    implement parse tree visits.  Instead, we use parent pointers and  | 
| 
1250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    some hairy code in these two functions.  */  | 
| 
1251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1252
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 postorder (pTHX_ bin_tree_t *root, reg_errcode_t (fn (pTHX_ void *, bin_tree_t *)),  | 
| 
1253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   void *extra)  | 
| 
1254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *node, *prev;  | 
| 
1256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1257
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (node = root; ; )  | 
| 
1258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Descend down the tree, preferably to the left (or to the right  | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 if that's the only child).  */  | 
| 
1261
 | 
154
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       while (node->left || node->right)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1262
 | 
74
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (node->left)  | 
| 
1263
 | 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  node = node->left;  | 
| 
1264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
1265
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  node = node->right;  | 
| 
1266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       do  | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1269
 | 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  reg_errcode_t err = fn (aTHX_ extra, node);  | 
| 
1270
 | 
154
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (err != REG_NOERROR, 0))  | 
| 
1271
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return err;  | 
| 
1272
 | 
154
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (node->parent == NULL)  | 
| 
1273
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_NOERROR;  | 
| 
1274
 | 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  prev = node;  | 
| 
1275
 | 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  node = node->parent;  | 
| 
1276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Go up while we have a node that is reached from the right.  */  | 
| 
1278
 | 
142
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       while (node->right == prev || node->right == NULL);  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1279
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node = node->right;  | 
| 
1280
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1284
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 preorder (pTHX_ bin_tree_t *root, reg_errcode_t (fn (pTHX_ void *, bin_tree_t *)),  | 
| 
1285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  void *extra)  | 
| 
1286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *node;  | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1289
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (node = root; ; )  | 
| 
1290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1291
 | 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       reg_errcode_t err = fn (aTHX_ extra, node);  | 
| 
1292
 | 
217
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (err != REG_NOERROR, 0))  | 
| 
1293
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return err;  | 
| 
1294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Go to the left node, or up and to the right.  */  | 
| 
1296
 | 
217
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->left)  | 
| 
1297
 | 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	node = node->left;  | 
| 
1298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1300
 | 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  bin_tree_t *prev = NULL;  | 
| 
1301
 | 
314
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  while (node->right == prev || node->right == NULL)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1303
 | 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      prev = node;  | 
| 
1304
 | 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      node = node->parent;  | 
| 
1305
 | 
217
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (!node)  | 
| 
1306
 | 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return REG_NOERROR;  | 
| 
1307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1308
 | 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  node = node->right;  | 
| 
1309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1310
 | 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Optimization pass: if a SUBEXP is entirely contained, strip it and tell  | 
| 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    re_search_internal to map the inner one's opr.idx to this one's.  Adjust  | 
| 
1315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    backreferences as well.  Requires a preorder visit.  */  | 
| 
1316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1317
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 optimize_subexps (pTHX_ void *extra, bin_tree_t *node)  | 
| 
1318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1319
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = (re_dfa_t *) extra;  | 
| 
1320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1321
 | 
45
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (node->token.type == OP_BACK_REF && dfa->subexp_map)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1322
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1323
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Idx idx = node->token.opr.idx;  | 
| 
1324
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->token.opr.idx = dfa->subexp_map[idx];  | 
| 
1325
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->used_bkref_map |= 1 << node->token.opr.idx;  | 
| 
1326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1328
 | 
45
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (node->token.type == SUBEXP  | 
| 
1329
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   && node->left && node->left->token.type == SUBEXP)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1331
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Idx other_idx = node->left->token.opr.idx;  | 
| 
1332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1333
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->left = node->left->left;  | 
| 
1334
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->left)  | 
| 
1335
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	node->left->parent = node;  | 
| 
1336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1337
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];  | 
| 
1338
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (other_idx < BITSET_WORD_BITS)  | 
| 
1339
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	dfa->used_bkref_map &= ~((bitset_word_t) 1 << other_idx);  | 
| 
1340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1342
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
1343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Lowering pass: Turn each SUBEXP node into the appropriate concatenation  | 
| 
1346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP.  */  | 
| 
1347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1348
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 lower_subexps (pTHX_ void *extra, bin_tree_t *node)  | 
| 
1349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1350
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   regex_t *preg = (regex_t *) extra;  | 
| 
1351
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t err = REG_NOERROR;  | 
| 
1352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1353
 | 
68
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (node->left && node->left->token.type == SUBEXP)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1355
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->left = lower_subexp (aTHX_ &err, preg, node->left);  | 
| 
1356
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->left)  | 
| 
1357
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	node->left->parent = node;  | 
| 
1358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1359
 | 
68
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (node->right && node->right->token.type == SUBEXP)  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1361
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->right = lower_subexp (aTHX_ &err, preg, node->right);  | 
| 
1362
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->right)  | 
| 
1363
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	node->right->parent = node;  | 
| 
1364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1366
 | 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return err;  | 
| 
1367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
1370
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 lower_subexp (pTHX_ reg_errcode_t *err, regex_t *preg, bin_tree_t *node)  | 
| 
1371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1372
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
1373
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *body = node->left;  | 
| 
1374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *op, *cls, *tree1, *tree;  | 
| 
1375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1376
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (preg->no_sub  | 
| 
1377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* We do not optimize empty subexpressions, because otherwise we may  | 
| 
1378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 have bad CONCAT nodes with NULL children.  This is obviously not  | 
| 
1379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 very common, so we do not lose much.  An example that triggers  | 
| 
1380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 this case is the sed "script" /\(\)/x.  */  | 
| 
1381
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && node->left != NULL  | 
| 
1382
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && (node->token.opr.idx >= BITSET_WORD_BITS  | 
| 
1383
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  || !(dfa->used_bkref_map  | 
| 
1384
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       & ((bitset_word_t) 1 << node->token.opr.idx))))  | 
| 
1385
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return node->left;  | 
| 
1386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Convert the SUBEXP node to the concatenation of an  | 
| 
1388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP.  */  | 
| 
1389
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   op = create_tree (aTHX_ dfa, NULL, NULL, OP_OPEN_SUBEXP);  | 
| 
1390
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   cls = create_tree (aTHX_ dfa, NULL, NULL, OP_CLOSE_SUBEXP);  | 
| 
1391
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree1 = body ? create_tree (aTHX_ dfa, body, cls, CONCAT) : cls;  | 
| 
1392
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree = create_tree (aTHX_ dfa, op, tree1, CONCAT);  | 
| 
1393
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1395
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *err = REG_ESPACE;  | 
| 
1396
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
1397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1399
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;  | 
| 
1400
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;  | 
| 
1401
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return tree;  | 
| 
1402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Pass 1 in building the NFA: compute FIRST and create unlinked automaton  | 
| 
1405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    nodes.  Requires a postorder visit.  */  | 
| 
1406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1407
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 calc_first (pTHX_ void *extra, bin_tree_t *node)  | 
| 
1408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1409
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = (re_dfa_t *) extra;  | 
| 
1410
 | 
86
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (node->token.type == CONCAT)  | 
| 
1411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1412
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->first = node->left->first;  | 
| 
1413
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->node_idx = node->left->node_idx;  | 
| 
1414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
1416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1417
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->first = node;  | 
| 
1418
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->node_idx = re_dfa_add_node (aTHX_ dfa, node->token);  | 
| 
1419
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (node->node_idx == REG_MISSING, 0))  | 
| 
1420
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_ESPACE;  | 
| 
1421
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->token.type == ANCHOR)  | 
| 
1422
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;  | 
| 
1423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1424
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
1425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Pass 2: compute NEXT on the tree.  Preorder visit.  */  | 
| 
1428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1429
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 calc_next (pTHX_ void *extra, bin_tree_t *node)  | 
| 
1430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1431
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   switch (node->token.type)  | 
| 
1432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_DUP_ASTERISK:  | 
| 
1434
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->left->next = node;  | 
| 
1435
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case CONCAT:  | 
| 
1437
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->left->next = node->right->first;  | 
| 
1438
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       node->right->next = node->next;  | 
| 
1439
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default:  | 
| 
1441
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->left)  | 
| 
1442
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	node->left->next = node->next;  | 
| 
1443
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->right)  | 
| 
1444
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	node->right->next = node->next;  | 
| 
1445
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1447
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
1448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Pass 3: link all DFA nodes to their NEXT node (any order will do).  */  | 
| 
1451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1452
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 link_nfa_nodes (pTHX_ void *extra, bin_tree_t *node)  | 
| 
1453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1454
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = (re_dfa_t *) extra;  | 
| 
1455
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx idx = node->node_idx;  | 
| 
1456
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t err = REG_NOERROR;  | 
| 
1457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1458
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   switch (node->token.type)  | 
| 
1459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case CONCAT:  | 
| 
1461
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case END_OF_RE:  | 
| 
1464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       assert (node->next == NULL);  | 
| 
1465
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_DUP_ASTERISK:  | 
| 
1468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_ALT:  | 
| 
1469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
1470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	Idx left, right;  | 
| 
1471
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	dfa->has_plural_match = 1;  | 
| 
1472
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (node->left != NULL)  | 
| 
1473
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  left = node->left->first->node_idx;  | 
| 
1474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
1475
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  left = node->next->node_idx;  | 
| 
1476
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (node->right != NULL)  | 
| 
1477
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  right = node->right->first->node_idx;  | 
| 
1478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
1479
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  right = node->next->node_idx;  | 
| 
1480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	assert (REG_VALID_INDEX (left));  | 
| 
1481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	assert (REG_VALID_INDEX (right));  | 
| 
1482
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	err = re_node_set_init_2 (aTHX_ dfa->edests + idx, left, right);  | 
| 
1483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1484
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case ANCHOR:  | 
| 
1487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_OPEN_SUBEXP:  | 
| 
1488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_CLOSE_SUBEXP:  | 
| 
1489
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       err = re_node_set_init_1 (aTHX_ dfa->edests + idx, node->next->node_idx);  | 
| 
1490
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_BACK_REF:  | 
| 
1493
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->nexts[idx] = node->next->node_idx;  | 
| 
1494
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->token.type == OP_BACK_REF)  | 
| 
1495
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	err = re_node_set_init_1 (aTHX_ dfa->edests + idx, dfa->nexts[idx]);  | 
| 
1496
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default:  | 
| 
1499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       assert (!IS_EPSILON_NODE (node->token.type));  | 
| 
1500
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->nexts[idx] = node->next->node_idx;  | 
| 
1501
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1504
 | 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return err;  | 
| 
1505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Duplicate the epsilon closure of the node ROOT_NODE.  | 
| 
1508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Note that duplicated nodes have constraint INIT_CONSTRAINT in addition  | 
| 
1509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    to their own constraint.  */  | 
| 
1510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 internal_function  | 
| 
1513
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 duplicate_node_closure (pTHX_ re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,  | 
| 
1514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			Idx root_node, unsigned int init_constraint)  | 
| 
1515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx org_node, clone_node;  | 
| 
1517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool ok;  | 
| 
1518
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   unsigned int constraint = init_constraint;  | 
| 
1519
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (org_node = top_org_node, clone_node = top_clone_node;;)  | 
| 
1520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Idx org_dest, clone_dest;  | 
| 
1522
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->nodes[org_node].type == OP_BACK_REF)  | 
| 
1523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* If the back reference epsilon-transit, its destination must  | 
| 
1525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     also have the constraint.  Then duplicate the epsilon closure  | 
| 
1526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     of the destination of the back reference, and store it in  | 
| 
1527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     edests of the back reference.  */  | 
| 
1528
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  org_dest = dfa->nexts[org_node];  | 
| 
1529
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_node_set_empty (dfa->edests + clone_node);  | 
| 
1530
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  clone_dest = duplicate_node (aTHX_ dfa, org_dest, constraint);  | 
| 
1531
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (clone_dest == REG_MISSING, 0))  | 
| 
1532
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ESPACE;  | 
| 
1533
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->nexts[clone_node] = dfa->nexts[org_node];  | 
| 
1534
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  ok = re_node_set_insert (aTHX_ dfa->edests + clone_node, clone_dest);  | 
| 
1535
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (! ok, 0))  | 
| 
1536
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ESPACE;  | 
| 
1537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1538
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (dfa->edests[org_node].nelem == 0)  | 
| 
1539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* In case of the node can't epsilon-transit, don't duplicate the  | 
| 
1541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     destination and store the original destination as the  | 
| 
1542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     destination of the node.  */  | 
| 
1543
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  dfa->nexts[clone_node] = dfa->nexts[org_node];  | 
| 
1544
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1546
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (dfa->edests[org_node].nelem == 1)  | 
| 
1547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* In case of the node can epsilon-transit, and it has only one  | 
| 
1549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     destination.  */  | 
| 
1550
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  org_dest = dfa->edests[org_node].elems[0];  | 
| 
1551
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_node_set_empty (dfa->edests + clone_node);  | 
| 
1552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* If the node is root_node itself, it means the epsilon closure  | 
| 
1553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     has a loop.  Then tie it to the destination of the root_node.  */  | 
| 
1554
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (org_node == root_node && clone_node != org_node)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1556
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      ok = re_node_set_insert (aTHX_ dfa->edests + clone_node, org_dest);  | 
| 
1557
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (! ok, 0))  | 
| 
1558
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	        return REG_ESPACE;  | 
| 
1559
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
1560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* In case the node has another constraint, append it.  */  | 
| 
1562
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  constraint |= dfa->nodes[org_node].constraint;  | 
| 
1563
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  clone_dest = duplicate_node (aTHX_ dfa, org_dest, constraint);  | 
| 
1564
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (clone_dest == REG_MISSING, 0))  | 
| 
1565
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ESPACE;  | 
| 
1566
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  ok = re_node_set_insert (aTHX_ dfa->edests + clone_node, clone_dest);  | 
| 
1567
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (! ok, 0))  | 
| 
1568
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ESPACE;  | 
| 
1569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else /* dfa->edests[org_node].nelem == 2 */  | 
| 
1571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* In case of the node can epsilon-transit, and it has two  | 
| 
1573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     destinations. In the bin_tree_t and DFA, that's '|' and '*'.   */  | 
| 
1574
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  org_dest = dfa->edests[org_node].elems[0];  | 
| 
1575
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_node_set_empty (dfa->edests + clone_node);  | 
| 
1576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Search for a duplicated node which satisfies the constraint.  */  | 
| 
1577
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  clone_dest = search_duplicated_node (aTHX_ dfa, org_dest, constraint);  | 
| 
1578
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (clone_dest == REG_MISSING)  | 
| 
1579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* There is no such duplicated node, create a new one.  */  | 
| 
1581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      reg_errcode_t err;  | 
| 
1582
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      clone_dest = duplicate_node (aTHX_ dfa, org_dest, constraint);  | 
| 
1583
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (clone_dest == REG_MISSING, 0))  | 
| 
1584
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return REG_ESPACE;  | 
| 
1585
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      ok = re_node_set_insert (aTHX_ dfa->edests + clone_node, clone_dest);  | 
| 
1586
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (! ok, 0))  | 
| 
1587
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return REG_ESPACE;  | 
| 
1588
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      err = duplicate_node_closure (aTHX_ dfa, org_dest, clone_dest,  | 
| 
1589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					    root_node, constraint);  | 
| 
1590
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (err != REG_NOERROR, 0))  | 
| 
1591
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return err;  | 
| 
1592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else  | 
| 
1594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* There is a duplicated node which satisfies the constraint,  | 
| 
1596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 use it to avoid infinite loop.  */  | 
| 
1597
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      ok = re_node_set_insert (aTHX_ dfa->edests + clone_node, clone_dest);  | 
| 
1598
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (! ok, 0))  | 
| 
1599
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return REG_ESPACE;  | 
| 
1600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1602
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  org_dest = dfa->edests[org_node].elems[1];  | 
| 
1603
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  clone_dest = duplicate_node (aTHX_ dfa, org_dest, constraint);  | 
| 
1604
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (clone_dest == REG_MISSING, 0))  | 
| 
1605
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ESPACE;  | 
| 
1606
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  ok = re_node_set_insert (aTHX_ dfa->edests + clone_node, clone_dest);  | 
| 
1607
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (! ok, 0))  | 
| 
1608
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ESPACE;  | 
| 
1609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1610
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       org_node = org_dest;  | 
| 
1611
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       clone_node = clone_dest;  | 
| 
1612
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1613
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
1614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Search for a node which is duplicated from the node ORG_NODE, and  | 
| 
1617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    satisfies the constraint CONSTRAINT.  */  | 
| 
1618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static Idx  | 
| 
1620
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 search_duplicated_node (pTHX_ const re_dfa_t *dfa, Idx org_node,  | 
| 
1621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			unsigned int constraint)  | 
| 
1622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx idx;  | 
| 
1624
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (idx = dfa->nodes_len - 1; dfa->nodes[idx].duplicated && idx > 0; --idx)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1626
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (org_node == dfa->org_indices[idx]  | 
| 
1627
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  && constraint == dfa->nodes[idx].constraint)  | 
| 
1628
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return idx; /* Found.  */  | 
| 
1629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1630
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_MISSING; /* Not found.  */  | 
| 
1631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.  | 
| 
1634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Return the index of the new node, or REG_MISSING if insufficient storage is  | 
| 
1635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    available.  */  | 
| 
1636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static Idx  | 
| 
1638
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 duplicate_node (pTHX_ re_dfa_t *dfa, Idx org_idx, unsigned int constraint)  | 
| 
1639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1640
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx dup_idx = re_dfa_add_node (aTHX_ dfa, dfa->nodes[org_idx]);  | 
| 
1641
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (dup_idx != REG_MISSING, 1))  | 
| 
1642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1643
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->nodes[dup_idx].constraint = constraint;  | 
| 
1644
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].constraint;  | 
| 
1645
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->nodes[dup_idx].duplicated = 1;  | 
| 
1646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Store the index of the original node.  */  | 
| 
1648
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->org_indices[dup_idx] = org_idx;  | 
| 
1649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1650
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return dup_idx;  | 
| 
1651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1654
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 calc_inveclosure (pTHX_ re_dfa_t *dfa)  | 
| 
1655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx src, idx;  | 
| 
1657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool ok;  | 
| 
1658
 | 
17
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (idx = 0; idx < dfa->nodes_len; ++idx)  | 
| 
1659
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     re_node_set_init_empty (dfa->inveclosures + idx);  | 
| 
1660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1661
 | 
17
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (src = 0; src < dfa->nodes_len; ++src)  | 
| 
1662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1663
 | 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Idx *elems = dfa->eclosures[src].elems;  | 
| 
1664
 | 
45
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)  | 
| 
1665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1666
 | 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  ok = re_node_set_insert_last (aTHX_ dfa->inveclosures + elems[idx], src);  | 
| 
1667
 | 
30
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (! ok, 0))  | 
| 
1668
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ESPACE;  | 
| 
1669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1672
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
1673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Calculate "eclosure" for all the node in DFA.  */  | 
| 
1676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1678
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 calc_eclosure (pTHX_ re_dfa_t *dfa)  | 
| 
1679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx node_idx;  | 
| 
1681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool incomplete;  | 
| 
1682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef DEBUG  | 
| 
1683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   assert (dfa->nodes_len > 0);  | 
| 
1684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
1685
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   incomplete = false;  | 
| 
1686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* For each nodes, calculate epsilon closure.  */  | 
| 
1687
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (node_idx = 0; ; ++node_idx)  | 
| 
1688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       reg_errcode_t err;  | 
| 
1690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_node_set eclosure_elem;  | 
| 
1691
 | 
54
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node_idx == dfa->nodes_len)  | 
| 
1692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1693
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!incomplete)  | 
| 
1694
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    break;  | 
| 
1695
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  incomplete = false;  | 
| 
1696
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  node_idx = 0;  | 
| 
1697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef DEBUG  | 
| 
1700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       assert (dfa->eclosures[node_idx].nelem != REG_MISSING);  | 
| 
1701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
1702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* If we have already calculated, skip it.  */  | 
| 
1704
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->eclosures[node_idx].nelem != 0)  | 
| 
1705
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	continue;  | 
| 
1706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Calculate epsilon closure of 'node_idx'.  */  | 
| 
1707
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       err = calc_eclosure_iter (aTHX_ &eclosure_elem, dfa, node_idx, true);  | 
| 
1708
 | 
32
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (err != REG_NOERROR, 0))  | 
| 
1709
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return err;  | 
| 
1710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1711
 | 
32
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->eclosures[node_idx].nelem == 0)  | 
| 
1712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1713
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  incomplete = true;  | 
| 
1714
 | 
32
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_node_set_free (&eclosure_elem);  | 
| 
1715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1716
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1717
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
1718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Calculate epsilon closure of NODE.  */  | 
| 
1721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
1723
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 calc_eclosure_iter (pTHX_ re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)  | 
| 
1724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t err;  | 
| 
1726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx i;  | 
| 
1727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_node_set eclosure;  | 
| 
1728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool ok;  | 
| 
1729
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool incomplete = false;  | 
| 
1730
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   err = re_node_set_alloc (aTHX_ &eclosure, dfa->edests[node].nelem + 1);  | 
| 
1731
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (err != REG_NOERROR, 0))  | 
| 
1732
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return err;  | 
| 
1733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* This indicates that we are calculating this node now.  | 
| 
1735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      We reference this value to avoid infinite loop.  */  | 
| 
1736
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->eclosures[node].nelem = REG_MISSING;  | 
| 
1737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* If the current node has constraints, duplicate all nodes  | 
| 
1739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      since they must inherit the constraints.  */  | 
| 
1740
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->nodes[node].constraint  | 
| 
1741
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && dfa->edests[node].nelem  | 
| 
1742
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)  | 
| 
1743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1744
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       err = duplicate_node_closure (aTHX_ dfa, node, node, node,  | 
| 
1745
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				    dfa->nodes[node].constraint);  | 
| 
1746
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (err != REG_NOERROR, 0))  | 
| 
1747
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return err;  | 
| 
1748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Expand each epsilon destination nodes.  */  | 
| 
1751
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (IS_EPSILON_NODE(dfa->nodes[node].type))  | 
| 
1752
 | 
30
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (i = 0; i < dfa->edests[node].nelem; ++i)  | 
| 
1753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
1754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	re_node_set eclosure_elem;  | 
| 
1755
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	Idx edest = dfa->edests[node].elems[i];  | 
| 
1756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/* If calculating the epsilon closure of 'edest' is in progress,  | 
| 
1757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   return intermediate result.  */  | 
| 
1758
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (dfa->eclosures[edest].nelem == REG_MISSING)  | 
| 
1759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
1760
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    incomplete = true;  | 
| 
1761
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    continue;  | 
| 
1762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
1763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/* If we haven't calculated the epsilon closure of 'edest' yet,  | 
| 
1764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   calculate now. Otherwise use calculated epsilon closure.  */  | 
| 
1765
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (dfa->eclosures[edest].nelem == 0)  | 
| 
1766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
1767
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    err = calc_eclosure_iter (aTHX_ &eclosure_elem, dfa, edest, false);  | 
| 
1768
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    if (BE (err != REG_NOERROR, 0))  | 
| 
1769
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return err;  | 
| 
1770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
1771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else  | 
| 
1772
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  eclosure_elem = dfa->eclosures[edest];  | 
| 
1773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/* Merge the epsilon closure of 'edest'.  */  | 
| 
1774
 | 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	err = re_node_set_merge (aTHX_ &eclosure, &eclosure_elem);  | 
| 
1775
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (BE (err != REG_NOERROR, 0))  | 
| 
1776
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return err;  | 
| 
1777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/* If the epsilon closure of 'edest' is incomplete,  | 
| 
1778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   the epsilon closure of this node is also incomplete.  */  | 
| 
1779
 | 
16
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (dfa->eclosures[edest].nelem == 0)  | 
| 
1780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
1781
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    incomplete = true;  | 
| 
1782
 | 
16
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    re_node_set_free (&eclosure_elem);  | 
| 
1783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
1784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
1785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* An epsilon closure includes itself.  */  | 
| 
1787
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ok = re_node_set_insert (aTHX_ &eclosure, node);  | 
| 
1788
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (! ok, 0))  | 
| 
1789
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_ESPACE;  | 
| 
1790
 | 
48
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (incomplete && !root)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1791
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dfa->eclosures[node].nelem = 0;  | 
| 
1792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
1793
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dfa->eclosures[node] = eclosure;  | 
| 
1794
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   *new_set = eclosure;  | 
| 
1795
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
1796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
1798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Functions for token which are used in the parser.  */  | 
| 
1799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Fetch a token from INPUT.  | 
| 
1801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    We must not use this function inside bracket expressions.  */  | 
| 
1802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void  | 
| 
1804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 internal_function  | 
| 
1805
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 fetch_token (pTHX_ re_token_t *result, re_string_t *input, reg_syntax_t syntax)  | 
| 
1806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1807
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_string_skip_bytes (aTHX_ input, peek_token (aTHX_ result, input, syntax));  | 
| 
1808
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Peek a token from INPUT, and return the length of the token.  | 
| 
1811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    We must not use this function inside bracket expressions.  */  | 
| 
1812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static int  | 
| 
1814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 internal_function  | 
| 
1815
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 peek_token (pTHX_ re_token_t *token, re_string_t *input, reg_syntax_t syntax)  | 
| 
1816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
1817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   unsigned char c;  | 
| 
1818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1819
 | 
44
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (re_string_eoi (input))  | 
| 
1820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1821
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = END_OF_RE;  | 
| 
1822
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return 0;  | 
| 
1823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1825
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   c = re_string_peek_byte (aTHX_ input, 0);  | 
| 
1826
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   token->opr.c = c;  | 
| 
1827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1828
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   token->word_char = 0;  | 
| 
1829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
1830
 | 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   token->mb_partial = 0;  | 
| 
1831
 | 
38
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (input->mb_cur_max > 1 &&  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1832
 | 
38
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       !re_string_first_byte (input, re_string_cur_idx (input)))  | 
| 
1833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1834
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = CHARACTER;  | 
| 
1835
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->mb_partial = 1;  | 
| 
1836
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return 1;  | 
| 
1837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
1839
 | 
38
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (c == '\\')  | 
| 
1840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       unsigned char c2;  | 
| 
1842
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (re_string_cur_idx (input) + 1 >= re_string_length (aTHX_ input))  | 
| 
1843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1844
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token->type = BACK_SLASH;  | 
| 
1845
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return 1;  | 
| 
1846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1848
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       c2 = re_string_peek_byte_case (aTHX_ input, 1);  | 
| 
1849
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->opr.c = c2;  | 
| 
1850
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = CHARACTER;  | 
| 
1851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
1852
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (input->mb_cur_max > 1)  | 
| 
1853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1854
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  rpl__wint_t wc = re_string_wchar_at (aTHX_ input,  | 
| 
1855
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					  re_string_cur_idx (input) + 1);  | 
| 
1856
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
1859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
1860
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->word_char = IS_WORD_CHAR (c2) != 0;  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1862
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       switch (c2)  | 
| 
1863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
1864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '|':  | 
| 
1865
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1866
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_ALT;  | 
| 
1867
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '1': case '2': case '3': case '4': case '5':  | 
| 
1869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '6': case '7': case '8': case '9':  | 
| 
1870
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_BK_REFS))  | 
| 
1871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1872
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->type = OP_BACK_REF;  | 
| 
1873
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.idx = c2 - '1';  | 
| 
1874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1875
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '<':  | 
| 
1877
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1879
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->type = ANCHOR;  | 
| 
1880
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = WORD_FIRST;  | 
| 
1881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1882
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '>':  | 
| 
1884
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1886
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->type = ANCHOR;  | 
| 
1887
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = WORD_LAST;  | 
| 
1888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1889
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case 'b':  | 
| 
1891
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1893
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->type = ANCHOR;  | 
| 
1894
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = WORD_DELIM;  | 
| 
1895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1896
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case 'B':  | 
| 
1898
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1900
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->type = ANCHOR;  | 
| 
1901
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = NOT_WORD_DELIM;  | 
| 
1902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1903
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case 'w':  | 
| 
1905
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1906
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_WORD;  | 
| 
1907
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case 'W':  | 
| 
1909
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1910
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_NOTWORD;  | 
| 
1911
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case 's':  | 
| 
1913
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1914
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_SPACE;  | 
| 
1915
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case 'S':  | 
| 
1917
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1918
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_NOTSPACE;  | 
| 
1919
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '`':  | 
| 
1921
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1923
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->type = ANCHOR;  | 
| 
1924
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = BUF_FIRST;  | 
| 
1925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1926
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '\'':  | 
| 
1928
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_GNU_OPS))  | 
| 
1929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
1930
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->type = ANCHOR;  | 
| 
1931
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = BUF_LAST;  | 
| 
1932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
1933
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '(':  | 
| 
1935
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_BK_PARENS))  | 
| 
1936
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_OPEN_SUBEXP;  | 
| 
1937
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case ')':  | 
| 
1939
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NO_BK_PARENS))  | 
| 
1940
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_CLOSE_SUBEXP;  | 
| 
1941
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '+':  | 
| 
1943
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1944
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_DUP_PLUS;  | 
| 
1945
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '?':  | 
| 
1947
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1948
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_DUP_QUESTION;  | 
| 
1949
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '{':  | 
| 
1951
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1952
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_OPEN_DUP_NUM;  | 
| 
1953
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '}':  | 
| 
1955
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1956
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    token->type = OP_CLOSE_DUP_NUM;  | 
| 
1957
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	default:  | 
| 
1959
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
1960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1961
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return 2;  | 
| 
1962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1964
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   token->type = CHARACTER;  | 
| 
1965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
1966
 | 
26
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (input->mb_cur_max > 1)  | 
| 
1967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1968
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       rpl__wint_t wc = re_string_wchar_at (aTHX_ input, re_string_cur_idx (input));  | 
| 
1969
 | 
26
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
1971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
1972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
1973
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     token->word_char = IS_WORD_CHAR (token->opr.c);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1975
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   switch (c)  | 
| 
1976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
1977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '\n':  | 
| 
1978
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (syntax & RE_NEWLINE_ALT)  | 
| 
1979
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->type = OP_ALT;  | 
| 
1980
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '|':  | 
| 
1982
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1983
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->type = OP_ALT;  | 
| 
1984
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '*':  | 
| 
1986
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = OP_DUP_ASTERISK;  | 
| 
1987
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '+':  | 
| 
1989
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1990
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->type = OP_DUP_PLUS;  | 
| 
1991
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '?':  | 
| 
1993
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1994
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->type = OP_DUP_QUESTION;  | 
| 
1995
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
1996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '{':  | 
| 
1997
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1998
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->type = OP_OPEN_DUP_NUM;  | 
| 
1999
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '}':  | 
| 
2001
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2002
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->type = OP_CLOSE_DUP_NUM;  | 
| 
2003
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '(':  | 
| 
2005
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (syntax & RE_NO_BK_PARENS)  | 
| 
2006
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->type = OP_OPEN_SUBEXP;  | 
| 
2007
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case ')':  | 
| 
2009
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (syntax & RE_NO_BK_PARENS)  | 
| 
2010
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	token->type = OP_CLOSE_SUBEXP;  | 
| 
2011
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '[':  | 
| 
2013
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = OP_OPEN_BRACKET;  | 
| 
2014
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '.':  | 
| 
2016
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = OP_PERIOD;  | 
| 
2017
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '^':  | 
| 
2019
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS | RE_CARET_ANCHORS_HERE)) &&  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2020
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_string_cur_idx (input) != 0)  | 
| 
2021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2022
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  char prev = re_string_peek_byte (aTHX_ input, -1);  | 
| 
2023
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (!(syntax & RE_NEWLINE_ALT) || prev != '\n')  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    break;  | 
| 
2025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2026
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = ANCHOR;  | 
| 
2027
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->opr.ctx_type = LINE_FIRST;  | 
| 
2028
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '$':  | 
| 
2030
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2031
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_string_cur_idx (input) + 1 != re_string_length (aTHX_ input))  | 
| 
2032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_token_t next;  | 
| 
2034
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_string_skip_bytes (aTHX_ input, 1);  | 
| 
2035
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  peek_token (aTHX_ &next, input, syntax);  | 
| 
2036
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_string_skip_bytes (aTHX_ input, -1);  | 
| 
2037
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2038
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    break;  | 
| 
2039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2040
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = ANCHOR;  | 
| 
2041
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->opr.ctx_type = LINE_LAST;  | 
| 
2042
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default:  | 
| 
2044
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2046
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return 1;  | 
| 
2047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Peek a token from INPUT, and return the length of the token.  | 
| 
2050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    We must not use this function out of bracket expressions.  */  | 
| 
2051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static int  | 
| 
2053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 internal_function  | 
| 
2054
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 peek_token_bracket (pTHX_ re_token_t *token, re_string_t *input, reg_syntax_t syntax)  | 
| 
2055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   unsigned char c;  | 
| 
2057
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (re_string_eoi (input))  | 
| 
2058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2059
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = END_OF_RE;  | 
| 
2060
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return 0;  | 
| 
2061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2062
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   c = re_string_peek_byte (aTHX_ input, 0);  | 
| 
2063
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   token->opr.c = c;  | 
| 
2064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
2066
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (input->mb_cur_max > 1 &&  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2067
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       !re_string_first_byte (input, re_string_cur_idx (input)))  | 
| 
2068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2069
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = CHARACTER;  | 
| 
2070
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return 1;  | 
| 
2071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
2073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2074
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (c == '\\' && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2075
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && re_string_cur_idx (input) + 1 < re_string_length (input))  | 
| 
2076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* In this case, '\' escape a character.  */  | 
| 
2078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       unsigned char c2;  | 
| 
2079
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_string_skip_bytes (aTHX_ input, 1);  | 
| 
2080
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       c2 = re_string_peek_byte (aTHX_ input, 0);  | 
| 
2081
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->opr.c = c2;  | 
| 
2082
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = CHARACTER;  | 
| 
2083
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return 1;  | 
| 
2084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2085
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (c == '[') /* '[' is a special char in a bracket exps.  */  | 
| 
2086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       unsigned char c2;  | 
| 
2088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       int token_len;  | 
| 
2089
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (re_string_cur_idx (input) + 1 < re_string_length (input))  | 
| 
2090
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	c2 = re_string_peek_byte (aTHX_ input, 1);  | 
| 
2091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
2092
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	c2 = 0;  | 
| 
2093
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->opr.c = c2;  | 
| 
2094
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token_len = 2;  | 
| 
2095
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       switch (c2)  | 
| 
2096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '.':  | 
| 
2098
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token->type = OP_OPEN_COLL_ELEM;  | 
| 
2099
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
2100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case '=':  | 
| 
2101
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token->type = OP_OPEN_EQUIV_CLASS;  | 
| 
2102
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
2103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	case ':':  | 
| 
2104
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (syntax & RE_CHAR_CLASSES)  | 
| 
2105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2106
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->type = OP_OPEN_CHAR_CLASS;  | 
| 
2107
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
2108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* else fall through.  */  | 
| 
2110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	default:  | 
| 
2111
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token->type = CHARACTER;  | 
| 
2112
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token->opr.c = c;  | 
| 
2113
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token_len = 1;  | 
| 
2114
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
2115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2116
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return token_len;  | 
| 
2117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2118
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   switch (c)  | 
| 
2119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '-':  | 
| 
2121
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = OP_CHARSET_RANGE;  | 
| 
2122
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case ']':  | 
| 
2124
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = OP_CLOSE_BRACKET;  | 
| 
2125
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case '^':  | 
| 
2127
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = OP_NON_MATCH_LIST;  | 
| 
2128
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default:  | 
| 
2130
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = CHARACTER;  | 
| 
2131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2132
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return 1;  | 
| 
2133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
2135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Functions for parser.  */  | 
| 
2136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Entry point of the parser.  | 
| 
2138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Parse the regular expression REGEXP and return the structure tree.  | 
| 
2139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    If an error occurs, ERR is set by error code, and return NULL.  | 
| 
2140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    This function build the following tree, from regular expression :  | 
| 
2141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   CAT  | 
| 
2142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   / \  | 
| 
2143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /   \  | 
| 
2144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      EOR  | 
| 
2145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    CAT means concatenation.  | 
| 
2147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    EOR means end of regular expression.  */  | 
| 
2148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
2150
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse (pTHX_ re_string_t *regexp, regex_t *preg, reg_syntax_t syntax,  | 
| 
2151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        reg_errcode_t *err)  | 
| 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2153
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
2154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *tree, *eor, *root;  | 
| 
2155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_token_t current_token;  | 
| 
2156
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   dfa->syntax = syntax;  | 
| 
2157
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fetch_token (aTHX_ ¤t_token, regexp, syntax | RE_CARET_ANCHORS_HERE);  | 
| 
2158
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree = parse_reg_exp (aTHX_ regexp, preg, ¤t_token, syntax, 0, err);  | 
| 
2159
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (*err != REG_NOERROR && tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2160
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return NULL;  | 
| 
2161
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   eor = create_tree (aTHX_ dfa, NULL, NULL, END_OF_RE);  | 
| 
2162
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (tree != NULL)  | 
| 
2163
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     root = create_tree (aTHX_ dfa, tree, eor, CONCAT);  | 
| 
2164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
2165
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     root = eor;  | 
| 
2166
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (eor == NULL || root == NULL, 0))  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2168
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *err = REG_ESPACE;  | 
| 
2169
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
2170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2171
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return root;  | 
| 
2172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This function build the following tree, from regular expression  | 
| 
2175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    |:  | 
| 
2176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   ALT  | 
| 
2177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   / \  | 
| 
2178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /   \  | 
| 
2179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
2180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    ALT means alternative, which represents the operator '|'.  */  | 
| 
2182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
2184
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse_reg_exp (pTHX_ re_string_t *regexp, regex_t *preg, re_token_t *token,  | 
| 
2185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       reg_syntax_t syntax, Idx nest, reg_errcode_t *err)  | 
| 
2186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2187
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
2188
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *tree, *branch = NULL;  | 
| 
2189
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree = parse_branch (aTHX_ regexp, preg, token, syntax, nest, err);  | 
| 
2190
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (*err != REG_NOERROR && tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2191
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return NULL;  | 
| 
2192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2193
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while (token->type == OP_ALT)  | 
| 
2194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2195
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       fetch_token (aTHX_ token, regexp, syntax | RE_CARET_ANCHORS_HERE);  | 
| 
2196
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (token->type != OP_ALT && token->type != END_OF_RE  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2197
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  && (nest == 0 || token->type != OP_CLOSE_SUBEXP))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2199
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  branch = parse_branch (aTHX_ regexp, preg, token, syntax, nest, err);  | 
| 
2200
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (*err != REG_NOERROR && branch == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2201
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return NULL;  | 
| 
2202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
2204
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	branch = NULL;  | 
| 
2205
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = create_tree (aTHX_ dfa, tree, branch, OP_ALT);  | 
| 
2206
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (tree == NULL, 0))  | 
| 
2207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2208
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_ESPACE;  | 
| 
2209
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2212
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return tree;  | 
| 
2213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This function build the following tree, from regular expression  | 
| 
2216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    :  | 
| 
2217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	CAT  | 
| 
2218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/ \  | 
| 
2219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        /   \  | 
| 
2220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
2221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    CAT means concatenation.  */  | 
| 
2223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
2225
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse_branch (pTHX_ re_string_t *regexp, regex_t *preg, re_token_t *token,  | 
| 
2226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      reg_syntax_t syntax, Idx nest, reg_errcode_t *err)  | 
| 
2227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *tree, *expr;  | 
| 
2229
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
2230
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree = parse_expression (aTHX_ regexp, preg, token, syntax, nest, err);  | 
| 
2231
 | 
12
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (*err != REG_NOERROR && tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2232
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return NULL;  | 
| 
2233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2234
 | 
32
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while (token->type != OP_ALT && token->type != END_OF_RE  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2235
 | 
26
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 && (nest == 0 || token->type != OP_CLOSE_SUBEXP))  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2237
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       expr = parse_expression (aTHX_ regexp, preg, token, syntax, nest, err);  | 
| 
2238
 | 
20
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*err != REG_NOERROR && expr == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2240
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (tree != NULL)  | 
| 
2241
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    postorder (aTHX_ tree, free_tree, NULL);  | 
| 
2242
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2244
 | 
20
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (tree != NULL && expr != NULL)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2245
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2246
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  bin_tree_t *newtree = create_tree (aTHX_ dfa, tree, expr, CONCAT);  | 
| 
2247
 | 
20
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (newtree == NULL)  | 
| 
2248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2249
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      postorder (aTHX_ expr, free_tree, NULL);  | 
| 
2250
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      postorder (aTHX_ tree, free_tree, NULL);  | 
| 
2251
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = REG_ESPACE;  | 
| 
2252
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return NULL;  | 
| 
2253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2254
 | 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  tree = newtree;  | 
| 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2256
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (tree == NULL)  | 
| 
2257
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	tree = expr;  | 
| 
2258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Otherwise expr == NULL, we don't need to create new tree.  */  | 
| 
2259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2260
 | 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return tree;  | 
| 
2261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This function build the following tree, from regular expression a*:  | 
| 
2264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 *  | 
| 
2265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 |  | 
| 
2266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 a  | 
| 
2267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
2268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
2270
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse_expression (pTHX_ re_string_t *regexp, regex_t *preg, re_token_t *token,  | 
| 
2271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  reg_syntax_t syntax, Idx nest, reg_errcode_t *err)  | 
| 
2272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2273
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
2274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *tree;  | 
| 
2275
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   switch (token->type)  | 
| 
2276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case CHARACTER:  | 
| 
2278
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2279
 | 
24
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (tree == NULL, 0))  | 
| 
2280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2281
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_ESPACE;  | 
| 
2282
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
2285
 | 
24
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->mb_cur_max > 1)  | 
| 
2286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2287
 | 
24
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  while (!re_string_eoi (regexp)  | 
| 
2288
 | 
20
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 && !re_string_first_byte (regexp, re_string_cur_idx (regexp)))  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      bin_tree_t *mbc_remain;  | 
| 
2291
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      fetch_token (aTHX_ token, regexp, syntax);  | 
| 
2292
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      mbc_remain = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2293
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      tree = create_tree (aTHX_ dfa, tree, mbc_remain, CONCAT);  | 
| 
2294
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (mbc_remain == NULL || tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
2296
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  *err = REG_ESPACE;  | 
| 
2297
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  return NULL;  | 
| 
2298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
2302
 | 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_OPEN_SUBEXP:  | 
| 
2304
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = parse_sub_exp (aTHX_ regexp, preg, token, syntax, nest + 1, err);  | 
| 
2305
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*err != REG_NOERROR && tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2306
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return NULL;  | 
| 
2307
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_OPEN_BRACKET:  | 
| 
2309
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = parse_bracket_exp (aTHX_ regexp, dfa, token, syntax, err);  | 
| 
2310
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*err != REG_NOERROR && tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2311
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return NULL;  | 
| 
2312
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_BACK_REF:  | 
| 
2314
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (!BE (dfa->completed_bkref_map & (1 << token->opr.idx), 1))  | 
| 
2315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2316
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_ESUBREG;  | 
| 
2317
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2319
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->used_bkref_map |= 1 << token->opr.idx;  | 
| 
2320
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2321
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (tree == NULL, 0))  | 
| 
2322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2323
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_ESPACE;  | 
| 
2324
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2326
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ++dfa->nbackref;  | 
| 
2327
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->has_mb_node = 1;  | 
| 
2328
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_OPEN_DUP_NUM:  | 
| 
2330
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (syntax & RE_CONTEXT_INVALID_DUP)  | 
| 
2331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2332
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_BADRPT;  | 
| 
2333
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* FALLTHROUGH */  | 
| 
2336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_DUP_ASTERISK:  | 
| 
2337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_DUP_PLUS:  | 
| 
2338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_DUP_QUESTION:  | 
| 
2339
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (syntax & RE_CONTEXT_INVALID_OPS)  | 
| 
2340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2341
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_BADRPT;  | 
| 
2342
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2344
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (syntax & RE_CONTEXT_INDEP_OPS)  | 
| 
2345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2346
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  fetch_token (aTHX_ token, regexp, syntax);  | 
| 
2347
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return parse_expression (aTHX_ regexp, preg, token, syntax, nest, err);  | 
| 
2348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* else fall through  */  | 
| 
2350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_CLOSE_SUBEXP:  | 
| 
2351
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ((token->type == OP_CLOSE_SUBEXP) &&  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2352
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  !(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))  | 
| 
2353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2354
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_ERPAREN;  | 
| 
2355
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* else fall through  */  | 
| 
2358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_CLOSE_DUP_NUM:  | 
| 
2359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* We treat it as a normal character.  */  | 
| 
2360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Then we can these characters as normal characters.  */  | 
| 
2362
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token->type = CHARACTER;  | 
| 
2363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* mb_partial and word_char bits should be initialized already  | 
| 
2364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 by peek_token.  */  | 
| 
2365
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2366
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (tree == NULL, 0))  | 
| 
2367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2368
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_ESPACE;  | 
| 
2369
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2371
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case ANCHOR:  | 
| 
2373
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ((token->opr.ctx_type  | 
| 
2374
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))  | 
| 
2375
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  && dfa->word_ops_used == 0)  | 
| 
2376
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	init_word_char (aTHX_ dfa);  | 
| 
2377
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (token->opr.ctx_type == WORD_DELIM  | 
| 
2378
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  || token->opr.ctx_type == NOT_WORD_DELIM)  | 
| 
2379
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  bin_tree_t *tree_first, *tree_last;  | 
| 
2381
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (token->opr.ctx_type == WORD_DELIM)  | 
| 
2382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2383
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = WORD_FIRST;  | 
| 
2384
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      tree_first = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2385
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = WORD_LAST;  | 
| 
2386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else  | 
| 
2388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2389
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = INSIDE_WORD;  | 
| 
2390
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      tree_first = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2391
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token->opr.ctx_type = INSIDE_NOTWORD;  | 
| 
2392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2393
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  tree_last = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2394
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  tree = create_tree (aTHX_ dfa, tree_first, tree_last, OP_ALT);  | 
| 
2395
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2397
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = REG_ESPACE;  | 
| 
2398
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return NULL;  | 
| 
2399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
2402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2403
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  tree = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2404
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (tree == NULL, 0))  | 
| 
2405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2406
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = REG_ESPACE;  | 
| 
2407
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return NULL;  | 
| 
2408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* We must return here, since ANCHORs can't be followed  | 
| 
2411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 by repetition operators.  | 
| 
2412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 eg. RE"^*" is invalid or "",  | 
| 
2413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     it must not be "".  */  | 
| 
2414
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       fetch_token (aTHX_ token, regexp, syntax);  | 
| 
2415
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return tree;  | 
| 
2416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_PERIOD:  | 
| 
2417
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = create_token_tree (aTHX_ dfa, NULL, NULL, token);  | 
| 
2418
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (tree == NULL, 0))  | 
| 
2419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2420
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_ESPACE;  | 
| 
2421
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2423
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (dfa->mb_cur_max > 1)  | 
| 
2424
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	dfa->has_mb_node = 1;  | 
| 
2425
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_WORD:  | 
| 
2427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_NOTWORD:  | 
| 
2428
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = build_charclass_op (aTHX_ dfa, regexp->trans,  | 
| 
2429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 "alnum",  | 
| 
2430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 "_",  | 
| 
2431
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 token->type == OP_NOTWORD, err);  | 
| 
2432
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*err != REG_NOERROR && tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2433
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return NULL;  | 
| 
2434
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_SPACE:  | 
| 
2436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_NOTSPACE:  | 
| 
2437
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = build_charclass_op (aTHX_ dfa, regexp->trans,  | 
| 
2438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 "space",  | 
| 
2439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 "",  | 
| 
2440
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				 token->type == OP_NOTSPACE, err);  | 
| 
2441
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*err != REG_NOERROR && tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2442
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return NULL;  | 
| 
2443
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
2444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_ALT:  | 
| 
2445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case END_OF_RE:  | 
| 
2446
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
2447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case BACK_SLASH:  | 
| 
2448
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *err = REG_EESCAPE;  | 
| 
2449
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
2450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default:  | 
| 
2451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Must not happen?  */  | 
| 
2452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef DEBUG  | 
| 
2453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       assert (0);  | 
| 
2454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
2455
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
2456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2457
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fetch_token (aTHX_ token, regexp, syntax);  | 
| 
2458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2459
 | 
32
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while (token->type == OP_DUP_ASTERISK || token->type == OP_DUP_PLUS  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2460
 | 
32
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 || token->type == OP_DUP_QUESTION || token->type == OP_OPEN_DUP_NUM)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2462
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = parse_dup_op (aTHX_ tree, regexp, dfa, token, syntax, err);  | 
| 
2463
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*err != REG_NOERROR && tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2464
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return NULL;  | 
| 
2465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* In BRE consecutive duplications are not allowed.  */  | 
| 
2466
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if ((syntax & RE_CONTEXT_INVALID_DUP)  | 
| 
2467
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  && (token->type == OP_DUP_ASTERISK  | 
| 
2468
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      || token->type == OP_OPEN_DUP_NUM))  | 
| 
2469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2470
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_BADRPT;  | 
| 
2471
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2475
 | 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return tree;  | 
| 
2476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This function build the following tree, from regular expression  | 
| 
2479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    ():  | 
| 
2480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 SUBEXP  | 
| 
2481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    |  | 
| 
2482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
2484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
2486
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse_sub_exp (pTHX_ re_string_t *regexp, regex_t *preg, re_token_t *token,  | 
| 
2487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       reg_syntax_t syntax, Idx nest, reg_errcode_t *err)  | 
| 
2488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2489
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_dfa_t *dfa = preg->buffer;  | 
| 
2490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *tree;  | 
| 
2491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   size_t cur_nsub;  | 
| 
2492
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   cur_nsub = preg->re_nsub++;  | 
| 
2493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2494
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fetch_token (aTHX_ token, regexp, syntax | RE_CARET_ANCHORS_HERE);  | 
| 
2495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* The subexpression may be a null string.  */  | 
| 
2497
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (token->type == OP_CLOSE_SUBEXP)  | 
| 
2498
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     tree = NULL;  | 
| 
2499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
2500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2501
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = parse_reg_exp (aTHX_ regexp, preg, token, syntax, nest, err);  | 
| 
2502
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2504
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (tree != NULL)  | 
| 
2505
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    postorder (aTHX_ tree, free_tree, NULL);  | 
| 
2506
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_EPAREN;  | 
| 
2507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2508
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*err != REG_NOERROR, 0))  | 
| 
2509
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return NULL;  | 
| 
2510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2512
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (cur_nsub <= '9' - '1')  | 
| 
2513
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     dfa->completed_bkref_map |= 1 << cur_nsub;  | 
| 
2514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2515
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree = create_tree (aTHX_ dfa, tree, NULL, SUBEXP);  | 
| 
2516
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (tree == NULL, 0))  | 
| 
2517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2518
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *err = REG_ESPACE;  | 
| 
2519
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
2520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2521
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->token.opr.idx = cur_nsub;  | 
| 
2522
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return tree;  | 
| 
2523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This function parse repetition operators like "*", "+", "{1,3}" etc.  */  | 
| 
2526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
2528
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse_dup_op (pTHX_ bin_tree_t *elem, re_string_t *regexp, re_dfa_t *dfa,  | 
| 
2529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      re_token_t *token, reg_syntax_t syntax, reg_errcode_t *err)  | 
| 
2530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2531
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *tree = NULL, *old_tree = NULL;  | 
| 
2532
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx i, start, end, start_idx = re_string_cur_idx (aTHX_ regexp);  | 
| 
2533
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_token_t start_token = *token;  | 
| 
2534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2535
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (token->type == OP_OPEN_DUP_NUM)  | 
| 
2536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2537
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       end = 0;  | 
| 
2538
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       start = fetch_number (aTHX_ regexp, token, syntax);  | 
| 
2539
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (start == REG_MISSING)  | 
| 
2540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2541
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (token->type == CHARACTER && token->opr.c == ',')  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2542
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    start = 0; /* We treat "{,m}" as "{0,m}".  */  | 
| 
2543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else  | 
| 
2544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2545
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = REG_BADBR; /* {} is invalid.  */  | 
| 
2546
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return NULL;  | 
| 
2547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2549
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (start != REG_ERROR, 1))  | 
| 
2550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* We treat "{n}" as "{n,n}".  */  | 
| 
2552
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  end = ((token->type == OP_CLOSE_DUP_NUM) ? start  | 
| 
2553
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 : ((token->type == CHARACTER && token->opr.c == ',')  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2554
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    ? fetch_number (aTHX_ regexp, token, syntax) : REG_ERROR));  | 
| 
2555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2556
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (start == REG_ERROR || end == REG_ERROR, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Invalid sequence.  */  | 
| 
2559
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), 0))  | 
| 
2560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2561
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (token->type == END_OF_RE)  | 
| 
2562
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		*err = REG_EBRACE;  | 
| 
2563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      else  | 
| 
2564
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		*err = REG_BADBR;  | 
| 
2565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2566
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return NULL;  | 
| 
2567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* If the syntax bit is set, rollback.  */  | 
| 
2570
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_string_set_index (regexp, start_idx);  | 
| 
2571
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *token = start_token;  | 
| 
2572
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token->type = CHARACTER;  | 
| 
2573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* mb_partial and word_char bits should be already initialized by  | 
| 
2574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     peek_token.  */  | 
| 
2575
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return elem;  | 
| 
2576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2578
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE ((end != REG_MISSING && start > end)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      || token->type != OP_CLOSE_DUP_NUM, 0))  | 
| 
2580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* First number greater than second.  */  | 
| 
2582
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_BADBR;  | 
| 
2583
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2586
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (RE_DUP_MAX < (end == REG_MISSING ? start : end), 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2588
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_ESIZE;  | 
| 
2589
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return NULL;  | 
| 
2590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
2593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2594
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       start = (token->type == OP_DUP_PLUS) ? 1 : 0;  | 
| 
2595
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       end = (token->type == OP_DUP_QUESTION) ? 1 : REG_MISSING;  | 
| 
2596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2598
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   fetch_token (aTHX_ token, regexp, syntax);  | 
| 
2599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2600
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (elem == NULL, 0))  | 
| 
2601
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return NULL;  | 
| 
2602
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (start == 0 && end == 0, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2604
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       postorder (aTHX_ elem, free_tree, NULL);  | 
| 
2605
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
2606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Extract "{n,m}" to "...{0,}".  */  | 
| 
2609
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (start > 0, 0))  | 
| 
2610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2611
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = elem;  | 
| 
2612
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (i = 2; i <= start; ++i)  | 
| 
2613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2614
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  elem = duplicate_tree (aTHX_ elem, dfa);  | 
| 
2615
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  tree = create_tree (aTHX_ dfa, tree, elem, CONCAT);  | 
| 
2616
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (elem == NULL || tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    goto parse_dup_op_espace;  | 
| 
2618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2620
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (start == end)  | 
| 
2621
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return tree;  | 
| 
2622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Duplicate ELEM before it is marked optional.  */  | 
| 
2624
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       elem = duplicate_tree (aTHX_ elem, dfa);  | 
| 
2625
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       old_tree = tree;  | 
| 
2626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
2628
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     old_tree = NULL;  | 
| 
2629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2630
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (elem->token.type == SUBEXP)  | 
| 
2631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2632
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       uintptr_t subidx = elem->token.opr.idx;  | 
| 
2633
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       postorder (aTHX_ elem, mark_opt_subexp, (void *) subidx);  | 
| 
2634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2636
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree = create_tree (aTHX_ dfa, elem, NULL,  | 
| 
2637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      (end == REG_MISSING ? OP_DUP_ASTERISK : OP_ALT));  | 
| 
2638
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (tree == NULL, 0))  | 
| 
2639
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     goto parse_dup_op_espace;  | 
| 
2640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* From gnulib's "intprops.h":  | 
| 
2642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    True if the arithmetic type T is signed.  */  | 
| 
2643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))  | 
| 
2644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* This loop is actually executed only when end != REG_MISSING,  | 
| 
2646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      to rewrite {0,n} as ((...?)?)?...  We have  | 
| 
2647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      already created the start+1-th copy.  */  | 
| 
2648
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (TYPE_SIGNED (Idx) || end != REG_MISSING)  | 
| 
2649
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (i = start + 2; i <= end; ++i)  | 
| 
2650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
2651
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elem = duplicate_tree (aTHX_ elem, dfa);  | 
| 
2652
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	tree = create_tree (aTHX_ dfa, tree, elem, CONCAT);  | 
| 
2653
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (BE (elem == NULL || tree == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  goto parse_dup_op_espace;  | 
| 
2655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2656
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	tree = create_tree (aTHX_ dfa, tree, NULL, OP_ALT);  | 
| 
2657
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (BE (tree == NULL, 0))  | 
| 
2658
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  goto parse_dup_op_espace;  | 
| 
2659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
2660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2661
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (old_tree)  | 
| 
2662
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     tree = create_tree (aTHX_ dfa, old_tree, tree, CONCAT);  | 
| 
2663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2664
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return tree;  | 
| 
2665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  parse_dup_op_espace:  | 
| 
2667
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   *err = REG_ESPACE;  | 
| 
2668
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return NULL;  | 
| 
2669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Size of the names for collating symbol/equivalence_class/character_class.  | 
| 
2672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    I'm not sure, but maybe enough.  */  | 
| 
2673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define BRACKET_NAME_BUF_SIZE 32  | 
| 
2674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifndef _LIBC  | 
| 
2676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Local function for parse_bracket_exp only used in case of NOT _LIBC.  | 
| 
2677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Build the range expression which starts from START_ELEM, and ends  | 
| 
2678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      at END_ELEM.  The result are written to MBCSET and SBCSET.  | 
| 
2679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      RANGE_ALLOC is the allocated size of mbcset->range_starts, and  | 
| 
2680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      mbcset->range_ends, is a pointer argument since we may  | 
| 
2681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      update it.  */  | 
| 
2682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
2684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 internal_function  | 
| 
2685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ifdef RE_ENABLE_I18N  | 
| 
2686
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_range_exp (pTHX_ const reg_syntax_t syntax,  | 
| 
2687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  bitset_t sbcset,  | 
| 
2688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  re_charset_t *mbcset,  | 
| 
2689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  Idx *range_alloc,  | 
| 
2690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  const bracket_elem_t *start_elem,  | 
| 
2691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  const bracket_elem_t *end_elem)  | 
| 
2692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # else /* not RE_ENABLE_I18N */  | 
| 
2693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_range_exp (pTHX_ const reg_syntax_t syntax,  | 
| 
2694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  bitset_t sbcset,  | 
| 
2695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  const bracket_elem_t *start_elem,  | 
| 
2696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
                  const bracket_elem_t *end_elem)  | 
| 
2697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif /* not RE_ENABLE_I18N */  | 
| 
2698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   unsigned int start_ch, end_ch;  | 
| 
2700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Equivalence Classes and Character Classes can't be a range start/end.  */  | 
| 
2701
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,  | 
| 
2703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  0))  | 
| 
2704
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_ERANGE;  | 
| 
2705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* We can handle no multi character collating elements without libc  | 
| 
2707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      support.  */  | 
| 
2708
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE ((start_elem->type == COLL_SYM  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   && strlen ((char *) start_elem->opr.name) > 1)  | 
| 
2710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  || (end_elem->type == COLL_SYM  | 
| 
2711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      && strlen ((char *) end_elem->opr.name) > 1), 0))  | 
| 
2712
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_ECOLLATE;  | 
| 
2713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ifdef RE_ENABLE_I18N  | 
| 
2715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
2716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rpl__wchar_t wc;  | 
| 
2717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rpl__wint_t start_wc;  | 
| 
2718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     rpl__wint_t end_wc;  | 
| 
2719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2720
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2721
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]  | 
| 
2722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   : 0));  | 
| 
2723
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2724
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]  | 
| 
2725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 : 0));  | 
| 
2726
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     start_wc = ((start_elem->type == SB_CHAR || start_elem->type == COLL_SYM)  | 
| 
2727
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		? rpl__btowc (start_ch) : start_elem->opr.wch);  | 
| 
2728
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     end_wc = ((end_elem->type == SB_CHAR || end_elem->type == COLL_SYM)  | 
| 
2729
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      ? rpl__btowc (end_ch) : end_elem->opr.wch);  | 
| 
2730
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (start_wc == rpl__WEOF || end_wc == rpl__WEOF)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2731
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return REG_ECOLLATE;  | 
| 
2732
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2733
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return REG_ERANGE;  | 
| 
2734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* Got valid collation sequence values, add them as a new entry.  | 
| 
2736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        However, for !_LIBC we have no collation elements: if the  | 
| 
2737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        character set is single byte, the single byte character set  | 
| 
2738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        that we build below suffices.  parse_bracket_exp passes  | 
| 
2739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        no MBCSET if dfa->mb_cur_max == 1.  */  | 
| 
2740
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (mbcset)  | 
| 
2741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
2742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/* Check the space of the arrays.  */  | 
| 
2743
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (BE (*range_alloc == mbcset->nranges, 0))  | 
| 
2744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
2745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    /* There is not enough space, need realloc.  */  | 
| 
2746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    Idx new_nranges;  | 
| 
2747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    /* +1 in case of mbcset->nranges is 0.  */  | 
| 
2749
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    new_nranges = 2 * mbcset->nranges + 1;  | 
| 
2750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    /* Use realloc since mbcset->range_starts and mbcset->range_ends  | 
| 
2751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       are NULL if *range_alloc == 0.  */  | 
| 
2752
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    re_realloc (mbcset->range_starts, rpl__wchar_t, new_nranges);  | 
| 
2753
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    re_realloc (mbcset->range_ends, rpl__wchar_t, new_nranges);  | 
| 
2754
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    *range_alloc = new_nranges;  | 
| 
2755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
2756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2757
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	mbcset->range_starts[mbcset->nranges] = start_wc;  | 
| 
2758
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	mbcset->range_ends[mbcset->nranges++] = end_wc;  | 
| 
2759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
2760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* Build the table for single byte characters.  */  | 
| 
2762
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (wc = 0; wc < SBC_MAX; ++wc)  | 
| 
2763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {  | 
| 
2764
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (start_wc <= wc && wc <= end_wc)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2765
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  bitset_set (aTHX_ sbcset, wc);  | 
| 
2766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }  | 
| 
2767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
2768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # else /* not RE_ENABLE_I18N */  | 
| 
2769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   {  | 
| 
2770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     unsigned int ch;  | 
| 
2771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch  | 
| 
2772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[0]  | 
| 
2773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   : 0));  | 
| 
2774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch  | 
| 
2775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      : ((end_elem->type == COLL_SYM) ? end_elem->opr.name[0]  | 
| 
2776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 : 0));  | 
| 
2777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (start_ch > end_ch)  | 
| 
2778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return REG_ERANGE;  | 
| 
2779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     /* Build the table for single byte characters.  */  | 
| 
2780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     for (ch = 0; ch < SBC_MAX; ++ch)  | 
| 
2781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (start_ch <= ch  && ch <= end_ch)  | 
| 
2782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	bitset_set (aTHX_ sbcset, ch);  | 
| 
2783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
2784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif /* not RE_ENABLE_I18N */  | 
| 
2785
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
2786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not _LIBC */  | 
| 
2788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifndef _LIBC  | 
| 
2790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Helper function for parse_bracket_exp only used in case of NOT _LIBC..  | 
| 
2791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Build the collating element which is represented by NAME.  | 
| 
2792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    The result are written to MBCSET and SBCSET.  | 
| 
2793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a  | 
| 
2794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    pointer argument since we may update it.  */  | 
| 
2795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
2797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 internal_function  | 
| 
2798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ifdef RE_ENABLE_I18N  | 
| 
2799
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_collating_symbol (pTHX_ bitset_t sbcset, re_charset_t *mbcset,  | 
| 
2800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			Idx *coll_sym_alloc, const unsigned char *name)  | 
| 
2801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # else /* not RE_ENABLE_I18N */  | 
| 
2802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_collating_symbol (pTHX_ bitset_t sbcset, const unsigned char *name)  | 
| 
2803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif /* not RE_ENABLE_I18N */  | 
| 
2804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2805
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   size_t name_len = strlen ((const char *) name);  | 
| 
2806
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (name_len != 1, 0))  | 
| 
2807
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_ECOLLATE;  | 
| 
2808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
2809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2810
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bitset_set (aTHX_ sbcset, name[0]);  | 
| 
2811
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return REG_NOERROR;  | 
| 
2812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not _LIBC */  | 
| 
2815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This function parse bracket expression like "[abc]", "[a-c]",  | 
| 
2817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    "[[.a-a.]]" etc.  */  | 
| 
2818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
2820
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse_bracket_exp (pTHX_ re_string_t *regexp, re_dfa_t *dfa, re_token_t *token,  | 
| 
2821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   reg_syntax_t syntax, reg_errcode_t *err)  | 
| 
2822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
2823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
2824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   const unsigned char *collseqmb;  | 
| 
2825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   const char *collseqwc;  | 
| 
2826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   uint32_t nrules;  | 
| 
2827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int32_t table_size;  | 
| 
2828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   const int32_t *symb_table;  | 
| 
2829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   const unsigned char *extra;  | 
| 
2830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Local function for parse_bracket_exp used in _LIBC environment.  | 
| 
2832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Seek the collating symbol entry corresponding to NAME.  | 
| 
2833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Return the index of the symbol in the SYMB_TABLE,  | 
| 
2834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      or -1 if not found.  */  | 
| 
2835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   auto inline int32_t  | 
| 
2837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   __attribute__ ((always_inline))  | 
| 
2838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   seek_collating_symbol_entry (const unsigned char *name, size_t name_len)  | 
| 
2839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       int32_t elem;  | 
| 
2841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (elem = 0; elem < table_size; elem++)  | 
| 
2843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (symb_table[2 * elem] != 0)  | 
| 
2844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  {  | 
| 
2845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    int32_t idx = symb_table[2 * elem + 1];  | 
| 
2846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    /* Skip the name of collating element name.  */  | 
| 
2847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    idx += 1 + extra[idx];  | 
| 
2848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    if (/* Compare the length of the name.  */  | 
| 
2849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		name_len == extra[idx]  | 
| 
2850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		/* Compare the name.  */  | 
| 
2851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		&& memcmp (name, &extra[idx + 1], name_len) == 0)  | 
| 
2852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* Yep, this is the entry.  */  | 
| 
2853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return elem;  | 
| 
2854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  }  | 
| 
2855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return -1;  | 
| 
2856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Local function for parse_bracket_exp used in _LIBC environment.  | 
| 
2859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Look up the collation sequence value of BR_ELEM.  | 
| 
2860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Return the value if succeeded, UINT_MAX otherwise.  */  | 
| 
2861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   auto inline unsigned int  | 
| 
2863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   __attribute__ ((always_inline))  | 
| 
2864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   lookup_collation_sequence_value (bracket_elem_t *br_elem)  | 
| 
2865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (br_elem->type == SB_CHAR)  | 
| 
2867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /*  | 
| 
2869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (rpl__MB_CUR_MAX == 1)  | 
| 
2870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  */  | 
| 
2871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (nrules == 0)  | 
| 
2872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return collseqmb[br_elem->opr.ch];  | 
| 
2873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else  | 
| 
2874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      rpl__wint_t wc = rpl__btowc (br_elem->opr.ch);  | 
| 
2876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return __collseq_table_lookup (collseqwc, wc);  | 
| 
2877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (br_elem->type == MB_CHAR)  | 
| 
2880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (nrules != 0)  | 
| 
2882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return __collseq_table_lookup (collseqwc, br_elem->opr.wch);  | 
| 
2883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else if (br_elem->type == COLL_SYM)  | 
| 
2885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  size_t sym_name_len = strlen ((char *) br_elem->opr.name);  | 
| 
2887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (nrules != 0)  | 
| 
2888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      int32_t elem, idx;  | 
| 
2890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      elem = seek_collating_symbol_entry (br_elem->opr.name,  | 
| 
2891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						  sym_name_len);  | 
| 
2892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (elem != -1)  | 
| 
2893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
2894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* We found the entry.  */  | 
| 
2895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  idx = symb_table[2 * elem + 1];  | 
| 
2896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* Skip the name of collating element name.  */  | 
| 
2897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  idx += 1 + extra[idx];  | 
| 
2898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* Skip the byte sequence of the collating element.  */  | 
| 
2899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  idx += 1 + extra[idx];  | 
| 
2900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* Adjust for the alignment.  */  | 
| 
2901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  idx = (idx + 3) & ~3;  | 
| 
2902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* Skip the multibyte collation sequence value.  */  | 
| 
2903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  idx += sizeof (unsigned int);  | 
| 
2904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* Skip the wide char sequence of the collating element.  */  | 
| 
2905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  idx += sizeof (unsigned int) *  | 
| 
2906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    (1 + *(unsigned int *) (extra + idx));  | 
| 
2907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* Return the collation sequence value.  */  | 
| 
2908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  return *(unsigned int *) (extra + idx);  | 
| 
2909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      else if (sym_name_len == 1)  | 
| 
2911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
2912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* No valid character.  Match it as a single byte  | 
| 
2913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     character.  */  | 
| 
2914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  return collseqmb[br_elem->opr.name[0]];  | 
| 
2915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else if (sym_name_len == 1)  | 
| 
2918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return collseqmb[br_elem->opr.name[0]];  | 
| 
2919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return UINT_MAX;  | 
| 
2921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Local function for parse_bracket_exp used in _LIBC environment.  | 
| 
2924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Build the range expression which starts from START_ELEM, and ends  | 
| 
2925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      at END_ELEM.  The result are written to MBCSET and SBCSET.  | 
| 
2926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      RANGE_ALLOC is the allocated size of mbcset->range_starts, and  | 
| 
2927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      mbcset->range_ends, is a pointer argument since we may  | 
| 
2928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      update it.  */  | 
| 
2929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   auto inline reg_errcode_t  | 
| 
2931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   __attribute__ ((always_inline))  | 
| 
2932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   build_range_exp (pTHX_ bitset_t sbcset, re_charset_t *mbcset, int *range_alloc,  | 
| 
2933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   bracket_elem_t *start_elem, bracket_elem_t *end_elem)  | 
| 
2934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
2935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       unsigned int ch;  | 
| 
2936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       uint32_t start_collseq;  | 
| 
2937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       uint32_t end_collseq;  | 
| 
2938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Equivalence Classes and Character Classes can't be a range  | 
| 
2940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 start/end.  */  | 
| 
2941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (start_elem->type == EQUIV_CLASS || start_elem->type == CHAR_CLASS  | 
| 
2942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      || end_elem->type == EQUIV_CLASS || end_elem->type == CHAR_CLASS,  | 
| 
2943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      0))  | 
| 
2944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_ERANGE;  | 
| 
2945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* FIXME: Implement rational ranges here, too.  */  | 
| 
2947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       start_collseq = lookup_collation_sequence_value (start_elem);  | 
| 
2948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       end_collseq = lookup_collation_sequence_value (end_elem);  | 
| 
2949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Check start/end collation sequence values.  */  | 
| 
2950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (start_collseq == UINT_MAX || end_collseq == UINT_MAX, 0))  | 
| 
2951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_ECOLLATE;  | 
| 
2952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, 0))  | 
| 
2953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_ERANGE;  | 
| 
2954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Got valid collation sequence values, add them as a new entry.  | 
| 
2956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 However, if we have no collation elements, and the character set  | 
| 
2957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 is single byte, the single byte character set that we  | 
| 
2958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 build below suffices. */  | 
| 
2959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (nrules > 0 || dfa->mb_cur_max > 1)  | 
| 
2960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Check the space of the arrays.  */  | 
| 
2962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (*range_alloc == mbcset->nranges, 0))  | 
| 
2963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
2964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* There is not enough space, need realloc.  */  | 
| 
2965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      Idx new_nranges;  | 
| 
2966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* +1 in case of mbcset->nranges is 0.  */  | 
| 
2968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      new_nranges = 2 * mbcset->nranges + 1;  | 
| 
2969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      re_realloc (mbcset->range_starts, uint32_t, new_nranges);  | 
| 
2970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      re_realloc (mbcset->range_ends, uint32_t, new_nranges);  | 
| 
2971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *range_alloc = new_nranges;  | 
| 
2972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
2973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  mbcset->range_starts[mbcset->nranges] = start_collseq;  | 
| 
2975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  mbcset->range_ends[mbcset->nranges++] = end_collseq;  | 
| 
2976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Build the table for single byte characters.  */  | 
| 
2979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (ch = 0; ch < SBC_MAX; ch++)  | 
| 
2980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
2981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  uint32_t ch_collseq;  | 
| 
2982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /*  | 
| 
2983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (rpl__MB_CUR_MAX == 1)  | 
| 
2984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  */  | 
| 
2985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (nrules == 0)  | 
| 
2986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ch_collseq = collseqmb[ch];  | 
| 
2987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else  | 
| 
2988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    ch_collseq = __collseq_table_lookup (collseqwc, rpl__btowc (ch));  | 
| 
2989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)  | 
| 
2990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    bitset_set (aTHX_ sbcset, ch);  | 
| 
2991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return REG_NOERROR;  | 
| 
2993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
2994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Local function for parse_bracket_exp used in _LIBC environment.  | 
| 
2996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Build the collating element which is represented by NAME.  | 
| 
2997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      The result are written to MBCSET and SBCSET.  | 
| 
2998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a  | 
| 
2999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      pointer argument since we may update it.  */  | 
| 
3000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   auto inline reg_errcode_t  | 
| 
3002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   __attribute__ ((always_inline))  | 
| 
3003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   build_collating_symbol (pTHX_ bitset_t sbcset, re_charset_t *mbcset,  | 
| 
3004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  Idx *coll_sym_alloc, const unsigned char *name)  | 
| 
3005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       int32_t elem, idx;  | 
| 
3007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       size_t name_len = strlen ((const char *) name);  | 
| 
3008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (nrules != 0)  | 
| 
3009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  elem = seek_collating_symbol_entry (name, name_len);  | 
| 
3011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (elem != -1)  | 
| 
3012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* We found the entry.  */  | 
| 
3014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      idx = symb_table[2 * elem + 1];  | 
| 
3015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* Skip the name of collating element name.  */  | 
| 
3016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      idx += 1 + extra[idx];  | 
| 
3017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else if (name_len == 1)  | 
| 
3019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* No valid character, treat it as a normal  | 
| 
3021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 character.  */  | 
| 
3022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      bitset_set (aTHX_ sbcset, name[0]);  | 
| 
3023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return REG_NOERROR;  | 
| 
3024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else  | 
| 
3026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ECOLLATE;  | 
| 
3027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Got valid collation sequence, add it as a new entry.  */  | 
| 
3029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Check the space of the arrays.  */  | 
| 
3030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (*coll_sym_alloc == mbcset->ncoll_syms, 0))  | 
| 
3031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* Not enough, realloc it.  */  | 
| 
3033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* +1 in case of mbcset->ncoll_syms is 0.  */  | 
| 
3034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      Idx new_coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;  | 
| 
3035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* Use realloc since mbcset->coll_syms is NULL  | 
| 
3036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 if *alloc == 0.  */  | 
| 
3037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      re_realloc (mbcset->coll_syms, int32_t, new_coll_sym_alloc);  | 
| 
3038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *coll_sym_alloc = new_coll_sym_alloc;  | 
| 
3039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  mbcset->coll_syms[mbcset->ncoll_syms++] = idx;  | 
| 
3041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  return REG_NOERROR;  | 
| 
3042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
3044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (name_len != 1, 0))  | 
| 
3046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    return REG_ECOLLATE;  | 
| 
3047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  else  | 
| 
3048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      bitset_set (aTHX_ sbcset, name[0]);  | 
| 
3050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      return REG_NOERROR;  | 
| 
3051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
3055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_token_t br_token;  | 
| 
3057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_bitset_ptr_t sbcset;  | 
| 
3058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_charset_t *mbcset;  | 
| 
3060
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx coll_sym_alloc = 0, range_alloc = 0, mbchar_alloc = 0;  | 
| 
3061
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx equiv_class_alloc = 0, char_class_alloc = 0;  | 
| 
3062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3063
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool non_match = false;  | 
| 
3064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *work_tree;  | 
| 
3065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int token_len;  | 
| 
3066
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bool first_round = true;  | 
| 
3067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
3068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   collseqmb = (const unsigned char *)  | 
| 
3069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);  | 
| 
3070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);  | 
| 
3071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (nrules)  | 
| 
3072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /*  | 
| 
3074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (rpl__MB_CUR_MAX > 1)  | 
| 
3075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       */  | 
| 
3076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);  | 
| 
3077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);  | 
| 
3078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,  | 
| 
3079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						  _NL_COLLATE_SYMB_TABLEMB);  | 
| 
3080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,  | 
| 
3081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						   _NL_COLLATE_SYMB_EXTRAMB);  | 
| 
3082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
3084
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_calloc(sbcset, bitset_word_t, BITSET_WORDS);  | 
| 
3085
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); */  | 
| 
3086
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3087
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_calloc(mbcset, re_charset_t, 1);  | 
| 
3088
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); */  | 
| 
3089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3091
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (sbcset == NULL || mbcset == NULL, 0))  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else  | 
| 
3093
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (sbcset == NULL, 0))  | 
| 
3094
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3096
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_free (sbcset);  | 
| 
3097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3098
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_free (mbcset);  | 
| 
3099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
3100
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *err = REG_ESPACE;  | 
| 
3101
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
3102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3104
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   token_len = peek_token_bracket (aTHX_ token, regexp, syntax);  | 
| 
3105
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (token->type == END_OF_RE, 0))  | 
| 
3106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3107
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *err = REG_BADPAT;  | 
| 
3108
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       goto parse_bracket_exp_free_return;  | 
| 
3109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3110
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (token->type == OP_NON_MATCH_LIST)  | 
| 
3111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3113
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       mbcset->non_match = 1;  | 
| 
3114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3115
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       non_match = true;  | 
| 
3116
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (syntax & RE_HAT_LISTS_NOT_NEWLINE)  | 
| 
3117
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	bitset_set (aTHX_ sbcset, '\n');  | 
| 
3118
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_string_skip_bytes (aTHX_ regexp, token_len); /* Skip a token.  */  | 
| 
3119
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token_len = peek_token_bracket (aTHX_ token, regexp, syntax);  | 
| 
3120
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (token->type == END_OF_RE, 0))  | 
| 
3121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3122
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_BADPAT;  | 
| 
3123
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  goto parse_bracket_exp_free_return;  | 
| 
3124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* We treat the first ']' as a normal character.  */  | 
| 
3128
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (token->type == OP_CLOSE_BRACKET)  | 
| 
3129
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     token->type = CHARACTER;  | 
| 
3130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while (1)  | 
| 
3132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bracket_elem_t start_elem, end_elem;  | 
| 
3134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];  | 
| 
3135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];  | 
| 
3136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       reg_errcode_t ret;  | 
| 
3137
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       int token_len2 = 0;  | 
| 
3138
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bool is_range_exp = false;  | 
| 
3139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_token_t token2;  | 
| 
3140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3141
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       start_elem.opr.name = start_name_buf;  | 
| 
3142
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       ret = parse_bracket_element (aTHX_ &start_elem, regexp, token, token_len, dfa,  | 
| 
3143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				   syntax, first_round);  | 
| 
3144
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (ret != REG_NOERROR, 0))  | 
| 
3145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3146
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = ret;  | 
| 
3147
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  goto parse_bracket_exp_free_return;  | 
| 
3148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3149
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       first_round = false;  | 
| 
3150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Get information about the next token.  We need it in any case.  */  | 
| 
3152
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       token_len = peek_token_bracket (aTHX_ token, regexp, syntax);  | 
| 
3153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Do not check for ranges if we know they are not allowed.  */  | 
| 
3155
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3157
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (token->type == END_OF_RE, 0))  | 
| 
3158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3159
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = REG_EBRACK;  | 
| 
3160
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      goto parse_bracket_exp_free_return;  | 
| 
3161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3162
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (token->type == OP_CHARSET_RANGE)  | 
| 
3163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3164
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      re_string_skip_bytes (aTHX_ regexp, token_len); /* Skip '-'.  */  | 
| 
3165
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      token_len2 = peek_token_bracket (aTHX_ &token2, regexp, syntax);  | 
| 
3166
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (token2.type == END_OF_RE, 0))  | 
| 
3167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
3168
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  *err = REG_EBRACK;  | 
| 
3169
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  goto parse_bracket_exp_free_return;  | 
| 
3170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
3171
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (token2.type == OP_CLOSE_BRACKET)  | 
| 
3172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
3173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* We treat the last '-' as a normal character.  */  | 
| 
3174
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  re_string_skip_bytes (aTHX_ regexp, -token_len);  | 
| 
3175
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  token->type = CHARACTER;  | 
| 
3176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
3177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      else  | 
| 
3178
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		is_range_exp = true;  | 
| 
3179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3182
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (is_range_exp == true)  | 
| 
3183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3184
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  end_elem.opr.name = end_name_buf;  | 
| 
3185
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  ret = parse_bracket_element (aTHX_ &end_elem, regexp, &token2, token_len2,  | 
| 
3186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				       dfa, syntax, true);  | 
| 
3187
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (ret != REG_NOERROR, 0))  | 
| 
3188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3189
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = ret;  | 
| 
3190
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      goto parse_bracket_exp_free_return;  | 
| 
3191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3193
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  token_len = peek_token_bracket (aTHX_ token, regexp, syntax);  | 
| 
3194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
3196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = build_range_exp (aTHX_ sbcset, mbcset, &range_alloc,  | 
| 
3197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  &start_elem, &end_elem);  | 
| 
3198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else  | 
| 
3199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ifdef RE_ENABLE_I18N  | 
| 
3200
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = build_range_exp (aTHX_ syntax, sbcset,  | 
| 
3201
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  dfa->mb_cur_max > 1 ? mbcset : NULL,  | 
| 
3202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				  &range_alloc, &start_elem, &end_elem);  | 
| 
3203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # else  | 
| 
3204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = build_range_exp (aTHX_ syntax, sbcset, &start_elem, &end_elem);  | 
| 
3205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif  | 
| 
3206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3207
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (*err != REG_NOERROR, 0))  | 
| 
3208
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    goto parse_bracket_exp_free_return;  | 
| 
3209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
3211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3212
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  switch (start_elem.type)  | 
| 
3213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    case SB_CHAR:  | 
| 
3215
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      bitset_set (aTHX_ sbcset, start_elem.opr.ch);  | 
| 
3216
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
3217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    case MB_CHAR:  | 
| 
3219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      /* Check whether the array has enough space.  */  | 
| 
3220
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (mbchar_alloc == mbcset->nmbchars, 0))  | 
| 
3221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		{  | 
| 
3222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* Not enough, realloc it.  */  | 
| 
3223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* +1 in case of mbcset->nmbchars is 0.  */  | 
| 
3224
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  mbchar_alloc = 2 * mbcset->nmbchars + 1;  | 
| 
3225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  /* Use realloc since array is NULL if *alloc == 0.  */  | 
| 
3226
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  re_realloc (mbcset->mbchars, rpl__wchar_t, mbchar_alloc);  | 
| 
3227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
3228
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;  | 
| 
3229
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
3230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    case EQUIV_CLASS:  | 
| 
3232
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = build_equiv_class (aTHX_ sbcset,  | 
| 
3233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					mbcset, &equiv_class_alloc,  | 
| 
3235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3236
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					start_elem.opr.name);  | 
| 
3237
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (*err != REG_NOERROR, 0))  | 
| 
3238
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		goto parse_bracket_exp_free_return;  | 
| 
3239
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
3240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    case COLL_SYM:  | 
| 
3241
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = build_collating_symbol (aTHX_ sbcset,  | 
| 
3242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					     mbcset, &coll_sym_alloc,  | 
| 
3244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3245
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					     start_elem.opr.name);  | 
| 
3246
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (*err != REG_NOERROR, 0))  | 
| 
3247
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		goto parse_bracket_exp_free_return;  | 
| 
3248
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
3249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    case CHAR_CLASS:  | 
| 
3250
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      *err = build_charclass (aTHX_ regexp->trans, sbcset,  | 
| 
3251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      mbcset, &char_class_alloc,  | 
| 
3253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3254
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      (const char *) start_elem.opr.name,  | 
| 
3255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				      syntax);  | 
| 
3256
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (BE (*err != REG_NOERROR, 0))  | 
| 
3257
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	       goto parse_bracket_exp_free_return;  | 
| 
3258
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
3259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    default:  | 
| 
3260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      assert (0);  | 
| 
3261
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      break;  | 
| 
3262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3264
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (token->type == END_OF_RE, 0))  | 
| 
3265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3266
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *err = REG_EBRACK;  | 
| 
3267
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  goto parse_bracket_exp_free_return;  | 
| 
3268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3269
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (token->type == OP_CLOSE_BRACKET)  | 
| 
3270
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	break;  | 
| 
3271
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3273
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_string_skip_bytes (aTHX_ regexp, token_len); /* Skip a token.  */  | 
| 
3274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* If it is non-matching list.  */  | 
| 
3276
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (non_match)  | 
| 
3277
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     bitset_not (aTHX_ sbcset);  | 
| 
3278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Ensure only single byte characters are set.  */  | 
| 
3281
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->mb_cur_max > 1)  | 
| 
3282
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     bitset_mask (aTHX_ sbcset, dfa->sb_char);  | 
| 
3283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3284
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3285
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       || mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3286
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						     || mbcset->non_match)))  | 
| 
3287
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bin_tree_t *mbc_tree;  | 
| 
3289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       int sbc_idx;  | 
| 
3290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Build a tree for complex bracket.  */  | 
| 
3291
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->has_mb_node = 1;  | 
| 
3292
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       br_token.type = COMPLEX_BRACKET;  | 
| 
3293
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       br_token.opr.mbcset = mbcset;  | 
| 
3294
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       mbc_tree = create_token_tree (aTHX_ dfa, NULL, NULL, &br_token);  | 
| 
3295
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (mbc_tree == NULL, 0))  | 
| 
3296
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	goto parse_bracket_exp_espace;  | 
| 
3297
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (sbc_idx = 0; sbc_idx < BITSET_WORDS; ++sbc_idx)  | 
| 
3298
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	if (sbcset[sbc_idx])  | 
| 
3299
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  break;  | 
| 
3300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* If there are no bits set in sbcset, there is no point  | 
| 
3301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 of having both SIMPLE_BRACKET and COMPLEX_BRACKET.  */  | 
| 
3302
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (sbc_idx < BITSET_WORDS)  | 
| 
3303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Build a tree for simple bracket.  */  | 
| 
3305
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  br_token.type = SIMPLE_BRACKET;  | 
| 
3306
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  br_token.opr.sbcset = sbcset;  | 
| 
3307
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  work_tree = create_token_tree (aTHX_ dfa, NULL, NULL, &br_token);  | 
| 
3308
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (work_tree == NULL, 0))  | 
| 
3309
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    goto parse_bracket_exp_espace;  | 
| 
3310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Then join them by ALT node.  */  | 
| 
3312
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  work_tree = create_tree (aTHX_ dfa, work_tree, mbc_tree, OP_ALT);  | 
| 
3313
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (BE (work_tree == NULL, 0))  | 
| 
3314
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    goto parse_bracket_exp_espace;  | 
| 
3315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
3317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3318
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_free (sbcset);  | 
| 
3319
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  work_tree = mbc_tree;  | 
| 
3320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
3323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3326
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_charset (aTHX_ mbcset);  | 
| 
3327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
3328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Build a tree for simple bracket.  */  | 
| 
3329
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       br_token.type = SIMPLE_BRACKET;  | 
| 
3330
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       br_token.opr.sbcset = sbcset;  | 
| 
3331
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       work_tree = create_token_tree (aTHX_ dfa, NULL, NULL, &br_token);  | 
| 
3332
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (work_tree == NULL, 0))  | 
| 
3333
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	goto parse_bracket_exp_espace;  | 
| 
3334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3335
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return work_tree;  | 
| 
3336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  parse_bracket_exp_espace:  | 
| 
3338
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   *err = REG_ESPACE;  | 
| 
3339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  parse_bracket_exp_free_return:  | 
| 
3340
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (sbcset);  | 
| 
3341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3342
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   free_charset (aTHX_ mbcset);  | 
| 
3343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3344
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return NULL;  | 
| 
3345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Parse an element in the bracket expression.  */  | 
| 
3348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
3350
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse_bracket_element (pTHX_ bracket_elem_t *elem, re_string_t *regexp,  | 
| 
3351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		       re_token_t *token, int token_len, re_dfa_t *dfa,  | 
| 
3352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		       reg_syntax_t syntax, bool accept_hyphen)  | 
| 
3353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int cur_char_size;  | 
| 
3356
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   cur_char_size = re_string_char_size_at (aTHX_ regexp, re_string_cur_idx (regexp));  | 
| 
3357
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (cur_char_size > 1)  | 
| 
3358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3359
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       elem->type = MB_CHAR;  | 
| 
3360
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       elem->opr.wch = re_string_wchar_at (aTHX_ regexp, re_string_cur_idx (regexp));  | 
| 
3361
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_string_skip_bytes (aTHX_ regexp, cur_char_size);  | 
| 
3362
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return REG_NOERROR;  | 
| 
3363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3365
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_string_skip_bytes (aTHX_ regexp, token_len); /* Skip a token.  */  | 
| 
3366
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (token->type == OP_OPEN_COLL_ELEM || token->type == OP_OPEN_CHAR_CLASS  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3367
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       || token->type == OP_OPEN_EQUIV_CLASS)  | 
| 
3368
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return parse_bracket_symbol (aTHX_ elem, regexp, token);  | 
| 
3369
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (token->type == OP_CHARSET_RANGE, 0) && !accept_hyphen)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* A '-' must only appear as anything but a range indicator before  | 
| 
3372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	 the closing bracket.  Everything else is an error.  */  | 
| 
3373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_token_t token2;  | 
| 
3374
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (void) peek_token_bracket (aTHX_ &token2, regexp, syntax);  | 
| 
3375
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (token2.type != OP_CLOSE_BRACKET)  | 
| 
3376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/* The actual error value is not standardized since this whole  | 
| 
3377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	   case is undefined.  But ERANGE makes good sense.  */  | 
| 
3378
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_ERANGE;  | 
| 
3379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3380
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   elem->type = SB_CHAR;  | 
| 
3381
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   elem->opr.ch = token->opr.c;  | 
| 
3382
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
3383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Parse a bracket symbol in the bracket expression.  Bracket symbols are  | 
| 
3386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    such as [::], [..], and  | 
| 
3387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    [==].  */  | 
| 
3388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
3390
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parse_bracket_symbol (pTHX_ bracket_elem_t *elem, re_string_t *regexp,  | 
| 
3391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		      re_token_t *token)  | 
| 
3392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3393
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   unsigned char ch, delim = token->opr.c;  | 
| 
3394
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int i = 0;  | 
| 
3395
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (re_string_eoi(regexp))  | 
| 
3396
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_EBRACK;  | 
| 
3397
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (;; ++i)  | 
| 
3398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3399
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (i >= BRACKET_NAME_BUF_SIZE)  | 
| 
3400
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_EBRACK;  | 
| 
3401
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (token->type == OP_OPEN_CHAR_CLASS)  | 
| 
3402
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	ch = re_string_fetch_byte_case (aTHX_ regexp);  | 
| 
3403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
3404
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	ch = re_string_fetch_byte (aTHX_ regexp);  | 
| 
3405
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (re_string_eoi(regexp))  | 
| 
3406
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_EBRACK;  | 
| 
3407
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (ch == delim && re_string_peek_byte (aTHX_ regexp, 0) == ']')  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3408
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	break;  | 
| 
3409
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       elem->opr.name[i] = ch;  | 
| 
3410
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3411
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_string_skip_bytes (aTHX_ regexp, 1);  | 
| 
3412
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   elem->opr.name[i] = '\0';  | 
| 
3413
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   switch (token->type)  | 
| 
3414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_OPEN_COLL_ELEM:  | 
| 
3416
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       elem->type = COLL_SYM;  | 
| 
3417
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
3418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_OPEN_EQUIV_CLASS:  | 
| 
3419
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       elem->type = EQUIV_CLASS;  | 
| 
3420
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
3421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     case OP_OPEN_CHAR_CLASS:  | 
| 
3422
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       elem->type = CHAR_CLASS;  | 
| 
3423
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
3424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     default:  | 
| 
3425
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       break;  | 
| 
3426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3427
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
3428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Helper function for parse_bracket_exp.  | 
| 
3431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Build the equivalence class which is represented by NAME.  | 
| 
3432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      The result are written to MBCSET and SBCSET.  | 
| 
3433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,  | 
| 
3434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      is a pointer argument since we may update it.  */  | 
| 
3435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
3437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3438
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_equiv_class (pTHX_ bitset_t sbcset, re_charset_t *mbcset,  | 
| 
3439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   Idx *equiv_class_alloc, const unsigned char *name)  | 
| 
3440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else /* not RE_ENABLE_I18N */  | 
| 
3441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_equiv_class (pTHX_ bitset_t sbcset, const unsigned char *name)  | 
| 
3442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef _LIBC  | 
| 
3445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);  | 
| 
3446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (nrules != 0)  | 
| 
3447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       const int32_t *table, *indirect;  | 
| 
3449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       const unsigned char *weights, *extra, *cp;  | 
| 
3450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       unsigned char char_buf[2];  | 
| 
3451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       int32_t idx1, idx2;  | 
| 
3452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       unsigned int ch;  | 
| 
3453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       size_t len;  | 
| 
3454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* This #include defines a local function!  */  | 
| 
3455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # include   | 
| 
3456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Calculate the index for equivalence class.  */  | 
| 
3457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       cp = name;  | 
| 
3458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);  | 
| 
3459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,  | 
| 
3460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					       _NL_COLLATE_WEIGHTMB);  | 
| 
3461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,  | 
| 
3462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						   _NL_COLLATE_EXTRAMB);  | 
| 
3463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,  | 
| 
3464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 						_NL_COLLATE_INDIRECTMB);  | 
| 
3465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       idx1 = findidx (&cp, -1);  | 
| 
3466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (idx1 == 0 || *cp != '\0', 0))  | 
| 
3467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	/* This isn't a valid character.  */  | 
| 
3468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_ECOLLATE;  | 
| 
3469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Build single byte matching table for this equivalence class.  */  | 
| 
3471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       len = weights[idx1 & 0xffffff];  | 
| 
3472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       for (ch = 0; ch < SBC_MAX; ++ch)  | 
| 
3473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  char_buf[0] = ch;  | 
| 
3475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  cp = char_buf;  | 
| 
3476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  idx2 = findidx (&cp, 1);  | 
| 
3477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /*  | 
| 
3478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  idx2 = table[ch];  | 
| 
3479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 */  | 
| 
3480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (idx2 == 0)  | 
| 
3481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    /* This isn't a valid character.  */  | 
| 
3482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    continue;  | 
| 
3483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Compare only if the length matches and the collation rule  | 
| 
3484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     index is the same.  */  | 
| 
3485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (len == weights[idx2 & 0xffffff] && (idx1 >> 24) == (idx2 >> 24))  | 
| 
3486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      int cnt = 0;  | 
| 
3488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      while (cnt <= len &&  | 
| 
3490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     weights[(idx1 & 0xffffff) + 1 + cnt]  | 
| 
3491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		     == weights[(idx2 & 0xffffff) + 1 + cnt])  | 
| 
3492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		++cnt;  | 
| 
3493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (cnt > len)  | 
| 
3495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		bitset_set (aTHX_ sbcset, ch);  | 
| 
3496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Check whether the array has enough space.  */  | 
| 
3499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (*equiv_class_alloc == mbcset->nequiv_classes, 0))  | 
| 
3500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Not enough, realloc it.  */  | 
| 
3502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* +1 in case of mbcset->nequiv_classes is 0.  */  | 
| 
3503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  Idx new_equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;  | 
| 
3504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  /* Use realloc since the array is NULL if *alloc == 0.  */  | 
| 
3505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  re_realloc (mbcset->equiv_classes, int32_t, new_equiv_class_alloc);  | 
| 
3506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  *equiv_class_alloc = new_equiv_class_alloc;  | 
| 
3507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;  | 
| 
3509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
3511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* _LIBC */  | 
| 
3512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3513
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (strlen ((const char *) name) != 1, 0))  | 
| 
3514
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_ECOLLATE;  | 
| 
3515
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bitset_set (aTHX_ sbcset, *name);  | 
| 
3516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3517
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
3518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Helper function for parse_bracket_exp.  | 
| 
3521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      Build the character class which is represented by NAME.  | 
| 
3522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      The result are written to MBCSET and SBCSET.  | 
| 
3523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,  | 
| 
3524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      is a pointer argument since we may update it.  */  | 
| 
3525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
3527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3528
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_charclass (pTHX_ RE_TRANSLATE_TYPE trans, bitset_t sbcset,  | 
| 
3529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 re_charset_t *mbcset, Idx *char_class_alloc,  | 
| 
3530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 const char *class_name, reg_syntax_t syntax)  | 
| 
3531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else /* not RE_ENABLE_I18N */  | 
| 
3532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_charclass (pTHX_ RE_TRANSLATE_TYPE trans, bitset_t sbcset,  | 
| 
3533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		 const char *class_name, reg_syntax_t syntax)  | 
| 
3534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   int i;  | 
| 
3537
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   const char *name = class_name;  | 
| 
3538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* In case of REG_ICASE "upper" and "lower" match the both of  | 
| 
3540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      upper and lower cases.  */  | 
| 
3541
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if ((syntax & RE_ICASE)  | 
| 
3542
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       && (strcmp (name, "upper") == 0 || strcmp (name, "lower") == 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3543
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     name = "alpha";  | 
| 
3544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Check the space of the arrays.  */  | 
| 
3547
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (*char_class_alloc == mbcset->nchar_classes, 0))  | 
| 
3548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Not enough, realloc it.  */  | 
| 
3550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* +1 in case of mbcset->nchar_classes is 0.  */  | 
| 
3551
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       Idx new_char_class_alloc = 2 * mbcset->nchar_classes + 1;  | 
| 
3552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Use realloc since array is NULL if *alloc == 0.  */  | 
| 
3553
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_realloc (mbcset->char_classes, rpl__wctype_t, new_char_class_alloc);  | 
| 
3554
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *char_class_alloc = new_char_class_alloc;  | 
| 
3555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3556
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   mbcset->char_classes[mbcset->nchar_classes++] = rpl__wctype (name);  | 
| 
3557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #define BUILD_CHARCLASS_LOOP(ctype_func)	\  | 
| 
3560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   do {						\  | 
| 
3561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (BE (trans != NULL, 0))			\  | 
| 
3562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {						\  | 
| 
3563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	for (i = 0; i < SBC_MAX; ++i)		\  | 
| 
3564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (ctype_func (i))                    \  | 
| 
3565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    bitset_set (aTHX_ sbcset, trans[i]); \  | 
| 
3566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }						\  | 
| 
3567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     else					\  | 
| 
3568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       {						\  | 
| 
3569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	for (i = 0; i < SBC_MAX; ++i)		\  | 
| 
3570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  if (ctype_func (i))                   \  | 
| 
3571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    bitset_set (aTHX_ sbcset, i);       \  | 
| 
3572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       }						\  | 
| 
3573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } while (0)  | 
| 
3574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3575
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (strcmp (name, "alnum") == 0)  | 
| 
3576
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isalnum);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3577
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "cntrl") == 0)  | 
| 
3578
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__iscntrl);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3579
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "lower") == 0)  | 
| 
3580
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__islower);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3581
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "space") == 0)  | 
| 
3582
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isspace);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3583
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "alpha") == 0)  | 
| 
3584
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isalpha);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3585
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "digit") == 0)  | 
| 
3586
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isdigit);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3587
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "print") == 0)  | 
| 
3588
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isprint);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3589
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "upper") == 0)  | 
| 
3590
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isupper);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3591
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "blank") == 0)  | 
| 
3592
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isblank);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3593
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "graph") == 0)  | 
| 
3594
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isgraph);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3595
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "punct") == 0)  | 
| 
3596
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__ispunct);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3597
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else if (strcmp (name, "xdigit") == 0)  | 
| 
3598
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     BUILD_CHARCLASS_LOOP (rpl__isxdigit);  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
3600
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     return REG_ECTYPE;  | 
| 
3601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3602
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
3603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
3606
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 build_charclass_op (pTHX_ re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,  | 
| 
3607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    const char *class_name,  | 
| 
3608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    const char *extra, bool non_match,  | 
| 
3609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		    reg_errcode_t *err)  | 
| 
3610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_bitset_ptr_t sbcset;  | 
| 
3612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_charset_t *mbcset;  | 
| 
3614
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx alloc = 0;  | 
| 
3615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   reg_errcode_t ret;  | 
| 
3617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_token_t br_token;  | 
| 
3618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *tree;  | 
| 
3619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3620
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_calloc(sbcset, bitset_word_t, BITSET_WORDS);  | 
| 
3621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), 1); */  | 
| 
3622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3623
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_calloc(mbcset, re_charset_t, 1);  | 
| 
3624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* mbcset = (re_charset_t *) calloc (sizeof (re_charset_t), 1); */  | 
| 
3625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3628
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (sbcset == NULL || mbcset == NULL, 0))  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else /* not RE_ENABLE_I18N */  | 
| 
3630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (sbcset == NULL, 0))  | 
| 
3631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3633
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *err = REG_ESPACE;  | 
| 
3634
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
3635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3637
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (non_match)  | 
| 
3638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3640
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       mbcset->non_match = 1;  | 
| 
3641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* We don't care the syntax in this case.  */  | 
| 
3645
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ret = build_charclass (aTHX_ trans, sbcset,  | 
| 
3646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 mbcset, &alloc,  | 
| 
3648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 class_name, 0);  | 
| 
3650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3651
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (ret != REG_NOERROR, 0))  | 
| 
3652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3653
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_free (sbcset);  | 
| 
3654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3655
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_charset (aTHX_ mbcset);  | 
| 
3656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3657
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *err = ret;  | 
| 
3658
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return NULL;  | 
| 
3659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* \w match '_' also.  */  | 
| 
3661
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (; *extra; extra++)  | 
| 
3662
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     bitset_set (aTHX_ sbcset, *extra);  | 
| 
3663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* If it is non-matching list.  */  | 
| 
3665
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (non_match)  | 
| 
3666
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     bitset_not (aTHX_ sbcset);  | 
| 
3667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Ensure only single byte characters are set.  */  | 
| 
3670
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->mb_cur_max > 1)  | 
| 
3671
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     bitset_mask (aTHX_ sbcset, dfa->sb_char);  | 
| 
3672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif  | 
| 
3673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   /* Build a tree for simple bracket.  */  | 
| 
3675
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   br_token.type = SIMPLE_BRACKET;  | 
| 
3676
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   br_token.opr.sbcset = sbcset;  | 
| 
3677
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree = create_token_tree (aTHX_ dfa, NULL, NULL, &br_token);  | 
| 
3678
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (tree == NULL, 0))  | 
| 
3679
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     goto build_word_op_espace;  | 
| 
3680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3682
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (dfa->mb_cur_max > 1)  | 
| 
3683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bin_tree_t *mbc_tree;  | 
| 
3685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Build a tree for complex bracket.  */  | 
| 
3686
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       br_token.type = COMPLEX_BRACKET;  | 
| 
3687
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       br_token.opr.mbcset = mbcset;  | 
| 
3688
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->has_mb_node = 1;  | 
| 
3689
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       mbc_tree = create_token_tree (aTHX_ dfa, NULL, NULL, &br_token);  | 
| 
3690
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (mbc_tree == NULL, 0))  | 
| 
3691
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	goto build_word_op_espace;  | 
| 
3692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Then join them by ALT node.  */  | 
| 
3693
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       tree = create_tree (aTHX_ dfa, tree, mbc_tree, OP_ALT);  | 
| 
3694
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (mbc_tree != NULL, 1))  | 
| 
3695
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return tree;  | 
| 
3696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
3698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3699
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       free_charset (aTHX_ mbcset);  | 
| 
3700
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       return tree;  | 
| 
3701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #else /* not RE_ENABLE_I18N */  | 
| 
3703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return tree;  | 
| 
3704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* not RE_ENABLE_I18N */  | 
| 
3705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  build_word_op_espace:  | 
| 
3707
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (sbcset);  | 
| 
3708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3709
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   free_charset (aTHX_ mbcset);  | 
| 
3710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3711
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   *err = REG_ESPACE;  | 
| 
3712
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return NULL;  | 
| 
3713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* This is intended for the expressions like "a{1,3}".  | 
| 
3716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Fetch a number from 'input', and return the number.  | 
| 
3717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Return REG_MISSING if the number field is empty like "{,1}".  | 
| 
3718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Return RE_DUP_MAX + 1 if the number field is too large.  | 
| 
3719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    Return REG_ERROR if an error occurred.  */  | 
| 
3720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static Idx  | 
| 
3722
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 fetch_number (pTHX_ re_string_t *input, re_token_t *token, reg_syntax_t syntax)  | 
| 
3723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3724
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx num = REG_MISSING;  | 
| 
3725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   unsigned char c;  | 
| 
3726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while (1)  | 
| 
3727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3728
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       fetch_token (aTHX_ token, input, syntax);  | 
| 
3729
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       c = token->opr.c;  | 
| 
3730
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (BE (token->type == END_OF_RE, 0))  | 
| 
3731
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return REG_ERROR;  | 
| 
3732
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (token->type == OP_CLOSE_DUP_NUM || c == ',')  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	break;  | 
| 
3734
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       num = ((token->type != CHARACTER || c < '0' || '9' < c  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3735
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      || num == REG_ERROR)  | 
| 
3736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     ? REG_ERROR  | 
| 
3737
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     : num == REG_MISSING  | 
| 
3738
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     ? c - '0'  | 
| 
3739
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     : MIN (RE_DUP_MAX + 1, num * 10 + c - '0'));  | 
| 
3740
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3741
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return num;  | 
| 
3742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
3744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void  | 
| 
3746
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 free_charset (pTHX_ re_charset_t *cset)  | 
| 
3747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3748
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (cset->mbchars);  | 
| 
3749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # ifdef _LIBC  | 
| 
3750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (cset->coll_syms);  | 
| 
3751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (cset->equiv_classes);  | 
| 
3752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (cset->range_starts);  | 
| 
3753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (cset->range_ends);  | 
| 
3754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 # endif  | 
| 
3755
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (cset->char_classes);  | 
| 
3756
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_free (cset);  | 
| 
3757
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   | 
| 
3760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Functions for binary tree operation.  */  | 
| 
3761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Create a tree node.  */  | 
| 
3763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
3765
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 create_tree (pTHX_ re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,  | 
| 
3766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	     re_token_type_t type)  | 
| 
3767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   re_token_t t;  | 
| 
3769
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   t.type = type;  | 
| 
3770
 | 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return create_token_tree (aTHX_ dfa, left, right, &t);  | 
| 
3771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
3774
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 create_token_tree (pTHX_ re_dfa_t *dfa, bin_tree_t *left, bin_tree_t *right,  | 
| 
3775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		   const re_token_t *token)  | 
| 
3776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *tree;  | 
| 
3778
 | 
92
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))  | 
| 
3779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       bin_tree_storage_t *storage;  | 
| 
3781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3782
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_malloc (storage, bin_tree_storage_t, 1);  | 
| 
3783
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       storage->next = dfa->str_tree_storage;  | 
| 
3784
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->str_tree_storage = storage;  | 
| 
3785
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dfa->str_tree_storage_idx = 0;  | 
| 
3786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3787
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];  | 
| 
3788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3789
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->parent = NULL;  | 
| 
3790
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->left = left;  | 
| 
3791
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->right = right;  | 
| 
3792
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->token = *token;  | 
| 
3793
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->token.duplicated = 0;  | 
| 
3794
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->token.opt_subexp = 0;  | 
| 
3795
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->first = NULL;  | 
| 
3796
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->next = NULL;  | 
| 
3797
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   tree->node_idx = REG_MISSING;  | 
| 
3798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3799
 | 
92
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (left != NULL)  | 
| 
3800
 | 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     left->parent = tree;  | 
| 
3801
 | 
92
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (right != NULL)  | 
| 
3802
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     right->parent = tree;  | 
| 
3803
 | 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return tree;  | 
| 
3804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Mark the tree SRC as an optional subexpression.  | 
| 
3807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    To be called from preorder or postorder.  */  | 
| 
3808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
3810
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 mark_opt_subexp (pTHX_ void *extra, bin_tree_t *node)  | 
| 
3811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3812
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   Idx idx = (uintptr_t) extra;  | 
| 
3813
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (node->token.type == SUBEXP && node->token.opr.idx == idx)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3814
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     node->token.opt_subexp = 1;  | 
| 
3815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3816
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
3817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Free the allocated memory inside NODE. */  | 
| 
3820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static void  | 
| 
3822
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 free_token (pTHX_ re_token_t *node)  | 
| 
3823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #ifdef RE_ENABLE_I18N  | 
| 
3825
 | 
48
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   if (node->type == COMPLEX_BRACKET && node->duplicated == 0)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3826
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     free_charset (aTHX_ node->opr.mbcset);  | 
| 
3827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   else  | 
| 
3828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #endif /* RE_ENABLE_I18N */  | 
| 
3829
 | 
46
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     if (node->type == SIMPLE_BRACKET && node->duplicated == 0)  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3830
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       re_free (node->opr.sbcset);  | 
| 
3831
 | 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Worker function for tree walking.  Free the allocated memory inside NODE  | 
| 
3834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    and its children. */  | 
| 
3835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static reg_errcode_t  | 
| 
3837
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 free_tree (pTHX_ void *extra, bin_tree_t *node)  | 
| 
3838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3839
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   free_token (aTHX_ &node->token);  | 
| 
3840
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   return REG_NOERROR;  | 
| 
3841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
3842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 /* Duplicate the node SRC, and return new node.  This is a preorder  | 
| 
3845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    visit similar to the one implemented by the generic visitor, but  | 
| 
3846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    we need more infrastructure to maintain two parallel trees --- so,  | 
| 
3847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    it's easier to duplicate.  */  | 
| 
3848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 static bin_tree_t *  | 
| 
3850
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 duplicate_tree (pTHX_ const bin_tree_t *root, re_dfa_t *dfa)  | 
| 
3851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 {  | 
| 
3852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   const bin_tree_t *node;  | 
| 
3853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t *dup_root;  | 
| 
3854
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   bin_tree_t **p_new = &dup_root, *dup_node = root->parent;  | 
| 
3855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3856
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   for (node = root; ; )  | 
| 
3857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     {  | 
| 
3858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Create a new tree and link it back to the current parent.  */  | 
| 
3859
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       *p_new = create_token_tree (aTHX_ dfa, NULL, NULL, &node->token);  | 
| 
3860
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (*p_new == NULL)  | 
| 
3861
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return NULL;  | 
| 
3862
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (*p_new)->parent = dup_node;  | 
| 
3863
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       (*p_new)->token.duplicated = 1;  | 
| 
3864
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       dup_node = *p_new;  | 
| 
3865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       /* Go to the left node, or up and to the right.  */  | 
| 
3867
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       if (node->left)  | 
| 
3868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3869
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  node = node->left;  | 
| 
3870
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  p_new = &dup_node->left;  | 
| 
3871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       else  | 
| 
3873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	{  | 
| 
3874
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  const bin_tree_t *prev = NULL;  | 
| 
3875
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  while (node->right == prev || node->right == NULL)  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
3876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    {  | 
| 
3877
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      prev = node;  | 
| 
3878
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      node = node->parent;  | 
| 
3879
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      dup_node = dup_node->parent;  | 
| 
3880
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	      if (!node)  | 
| 
3881
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		return dup_root;  | 
| 
3882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	    }  | 
| 
3883
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  node = node->right;  | 
| 
3884
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  p_new = &dup_node->right;  | 
| 
3885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
3886
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     }  | 
| 
3887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  |