line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/************************************************* |
2
|
|
|
|
|
|
|
* Perl-Compatible Regular Expressions * |
3
|
|
|
|
|
|
|
*************************************************/ |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
/* PCRE is a library of functions to support regular expressions whose syntax |
6
|
|
|
|
|
|
|
and semantics are as close as possible to those of the Perl 5 language. |
7
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
Written by Philip Hazel |
9
|
|
|
|
|
|
|
Copyright (c) 1997-2013 University of Cambridge |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
The machine code generator part (this module) was written by Zoltan Herczeg |
12
|
|
|
|
|
|
|
Copyright (c) 2010-2013 |
13
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
----------------------------------------------------------------------------- |
15
|
|
|
|
|
|
|
Redistribution and use in source and binary forms, with or without |
16
|
|
|
|
|
|
|
modification, are permitted provided that the following conditions are met: |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
* Redistributions of source code must retain the above copyright notice, |
19
|
|
|
|
|
|
|
this list of conditions and the following disclaimer. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
* Redistributions in binary form must reproduce the above copyright |
22
|
|
|
|
|
|
|
notice, this list of conditions and the following disclaimer in the |
23
|
|
|
|
|
|
|
documentation and/or other materials provided with the distribution. |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
* Neither the name of the University of Cambridge nor the names of its |
26
|
|
|
|
|
|
|
contributors may be used to endorse or promote products derived from |
27
|
|
|
|
|
|
|
this software without specific prior written permission. |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
30
|
|
|
|
|
|
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
31
|
|
|
|
|
|
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
32
|
|
|
|
|
|
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
33
|
|
|
|
|
|
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
34
|
|
|
|
|
|
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
35
|
|
|
|
|
|
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
36
|
|
|
|
|
|
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
37
|
|
|
|
|
|
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
38
|
|
|
|
|
|
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
39
|
|
|
|
|
|
|
POSSIBILITY OF SUCH DAMAGE. |
40
|
|
|
|
|
|
|
----------------------------------------------------------------------------- |
41
|
|
|
|
|
|
|
*/ |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
#ifdef HAVE_CONFIG_H |
44
|
|
|
|
|
|
|
#include "config.h" |
45
|
|
|
|
|
|
|
#endif |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#include "pcre_internal.h" |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
#if defined SUPPORT_JIT |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
/* All-in-one: Since we use the JIT compiler only from here, |
52
|
|
|
|
|
|
|
we just include it. This way we don't need to touch the build |
53
|
|
|
|
|
|
|
system files. */ |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
#define SLJIT_MALLOC(size, allocator_data) (PUBL(malloc))(size) |
56
|
|
|
|
|
|
|
#define SLJIT_FREE(ptr, allocator_data) (PUBL(free))(ptr) |
57
|
|
|
|
|
|
|
#define SLJIT_CONFIG_AUTO 1 |
58
|
|
|
|
|
|
|
#define SLJIT_CONFIG_STATIC 1 |
59
|
|
|
|
|
|
|
#define SLJIT_VERBOSE 0 |
60
|
|
|
|
|
|
|
#define SLJIT_DEBUG 0 |
61
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
#include "sljit/sljitLir.c" |
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
#if defined SLJIT_CONFIG_UNSUPPORTED && SLJIT_CONFIG_UNSUPPORTED |
65
|
|
|
|
|
|
|
#error Unsupported architecture |
66
|
|
|
|
|
|
|
#endif |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
/* Defines for debugging purposes. */ |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
/* 1 - Use unoptimized capturing brackets. |
71
|
|
|
|
|
|
|
2 - Enable capture_last_ptr (includes option 1). */ |
72
|
|
|
|
|
|
|
/* #define DEBUG_FORCE_UNOPTIMIZED_CBRAS 2 */ |
73
|
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
/* 1 - Always have a control head. */ |
75
|
|
|
|
|
|
|
/* #define DEBUG_FORCE_CONTROL_HEAD 1 */ |
76
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
/* Allocate memory for the regex stack on the real machine stack. |
78
|
|
|
|
|
|
|
Fast, but limited size. */ |
79
|
|
|
|
|
|
|
#define MACHINE_STACK_SIZE 32768 |
80
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
/* Growth rate for stack allocated by the OS. Should be the multiply |
82
|
|
|
|
|
|
|
of page size. */ |
83
|
|
|
|
|
|
|
#define STACK_GROWTH_RATE 8192 |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
/* Enable to check that the allocation could destroy temporaries. */ |
86
|
|
|
|
|
|
|
#if defined SLJIT_DEBUG && SLJIT_DEBUG |
87
|
|
|
|
|
|
|
#define DESTROY_REGISTERS 1 |
88
|
|
|
|
|
|
|
#endif |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
/* |
91
|
|
|
|
|
|
|
Short summary about the backtracking mechanism empolyed by the jit code generator: |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
The code generator follows the recursive nature of the PERL compatible regular |
94
|
|
|
|
|
|
|
expressions. The basic blocks of regular expressions are condition checkers |
95
|
|
|
|
|
|
|
whose execute different commands depending on the result of the condition check. |
96
|
|
|
|
|
|
|
The relationship between the operators can be horizontal (concatenation) and |
97
|
|
|
|
|
|
|
vertical (sub-expression) (See struct backtrack_common for more details). |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
'ab' - 'a' and 'b' regexps are concatenated |
100
|
|
|
|
|
|
|
'a+' - 'a' is the sub-expression of the '+' operator |
101
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
The condition checkers are boolean (true/false) checkers. Machine code is generated |
103
|
|
|
|
|
|
|
for the checker itself and for the actions depending on the result of the checker. |
104
|
|
|
|
|
|
|
The 'true' case is called as the matching path (expected path), and the other is called as |
105
|
|
|
|
|
|
|
the 'backtrack' path. Branch instructions are expesive for all CPUs, so we avoid taken |
106
|
|
|
|
|
|
|
branches on the matching path. |
107
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
Greedy star operator (*) : |
109
|
|
|
|
|
|
|
Matching path: match happens. |
110
|
|
|
|
|
|
|
Backtrack path: match failed. |
111
|
|
|
|
|
|
|
Non-greedy star operator (*?) : |
112
|
|
|
|
|
|
|
Matching path: no need to perform a match. |
113
|
|
|
|
|
|
|
Backtrack path: match is required. |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
The following example shows how the code generated for a capturing bracket |
116
|
|
|
|
|
|
|
with two alternatives. Let A, B, C, D are arbirary regular expressions, and |
117
|
|
|
|
|
|
|
we have the following regular expression: |
118
|
|
|
|
|
|
|
|
119
|
|
|
|
|
|
|
A(B|C)D |
120
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
The generated code will be the following: |
122
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
A matching path |
124
|
|
|
|
|
|
|
'(' matching path (pushing arguments to the stack) |
125
|
|
|
|
|
|
|
B matching path |
126
|
|
|
|
|
|
|
')' matching path (pushing arguments to the stack) |
127
|
|
|
|
|
|
|
D matching path |
128
|
|
|
|
|
|
|
return with successful match |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
D backtrack path |
131
|
|
|
|
|
|
|
')' backtrack path (If we arrived from "C" jump to the backtrack of "C") |
132
|
|
|
|
|
|
|
B backtrack path |
133
|
|
|
|
|
|
|
C expected path |
134
|
|
|
|
|
|
|
jump to D matching path |
135
|
|
|
|
|
|
|
C backtrack path |
136
|
|
|
|
|
|
|
A backtrack path |
137
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
Notice, that the order of backtrack code paths are the opposite of the fast |
139
|
|
|
|
|
|
|
code paths. In this way the topmost value on the stack is always belong |
140
|
|
|
|
|
|
|
to the current backtrack code path. The backtrack path must check |
141
|
|
|
|
|
|
|
whether there is a next alternative. If so, it needs to jump back to |
142
|
|
|
|
|
|
|
the matching path eventually. Otherwise it needs to clear out its own stack |
143
|
|
|
|
|
|
|
frame and continue the execution on the backtrack code paths. |
144
|
|
|
|
|
|
|
*/ |
145
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
/* |
147
|
|
|
|
|
|
|
Saved stack frames: |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Atomic blocks and asserts require reloading the values of private data |
150
|
|
|
|
|
|
|
when the backtrack mechanism performed. Because of OP_RECURSE, the data |
151
|
|
|
|
|
|
|
are not necessarly known in compile time, thus we need a dynamic restore |
152
|
|
|
|
|
|
|
mechanism. |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
The stack frames are stored in a chain list, and have the following format: |
155
|
|
|
|
|
|
|
([ capturing bracket offset ][ start value ][ end value ])+ ... [ 0 ] [ previous head ] |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
Thus we can restore the private data to a particular point in the stack. |
158
|
|
|
|
|
|
|
*/ |
159
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
typedef struct jit_arguments { |
161
|
|
|
|
|
|
|
/* Pointers first. */ |
162
|
|
|
|
|
|
|
struct sljit_stack *stack; |
163
|
|
|
|
|
|
|
const pcre_uchar *str; |
164
|
|
|
|
|
|
|
const pcre_uchar *begin; |
165
|
|
|
|
|
|
|
const pcre_uchar *end; |
166
|
|
|
|
|
|
|
int *offsets; |
167
|
|
|
|
|
|
|
pcre_uchar *mark_ptr; |
168
|
|
|
|
|
|
|
void *callout_data; |
169
|
|
|
|
|
|
|
/* Everything else after. */ |
170
|
|
|
|
|
|
|
sljit_u32 limit_match; |
171
|
|
|
|
|
|
|
int real_offset_count; |
172
|
|
|
|
|
|
|
int offset_count; |
173
|
|
|
|
|
|
|
sljit_u8 notbol; |
174
|
|
|
|
|
|
|
sljit_u8 noteol; |
175
|
|
|
|
|
|
|
sljit_u8 notempty; |
176
|
|
|
|
|
|
|
sljit_u8 notempty_atstart; |
177
|
|
|
|
|
|
|
} jit_arguments; |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
typedef struct executable_functions { |
180
|
|
|
|
|
|
|
void *executable_funcs[JIT_NUMBER_OF_COMPILE_MODES]; |
181
|
|
|
|
|
|
|
void *read_only_data_heads[JIT_NUMBER_OF_COMPILE_MODES]; |
182
|
|
|
|
|
|
|
sljit_uw executable_sizes[JIT_NUMBER_OF_COMPILE_MODES]; |
183
|
|
|
|
|
|
|
PUBL(jit_callback) callback; |
184
|
|
|
|
|
|
|
void *userdata; |
185
|
|
|
|
|
|
|
sljit_u32 top_bracket; |
186
|
|
|
|
|
|
|
sljit_u32 limit_match; |
187
|
|
|
|
|
|
|
} executable_functions; |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
typedef struct jump_list { |
190
|
|
|
|
|
|
|
struct sljit_jump *jump; |
191
|
|
|
|
|
|
|
struct jump_list *next; |
192
|
|
|
|
|
|
|
} jump_list; |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
typedef struct stub_list { |
195
|
|
|
|
|
|
|
struct sljit_jump *start; |
196
|
|
|
|
|
|
|
struct sljit_label *quit; |
197
|
|
|
|
|
|
|
struct stub_list *next; |
198
|
|
|
|
|
|
|
} stub_list; |
199
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
typedef struct label_addr_list { |
201
|
|
|
|
|
|
|
struct sljit_label *label; |
202
|
|
|
|
|
|
|
sljit_uw *update_addr; |
203
|
|
|
|
|
|
|
struct label_addr_list *next; |
204
|
|
|
|
|
|
|
} label_addr_list; |
205
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
enum frame_types { |
207
|
|
|
|
|
|
|
no_frame = -1, |
208
|
|
|
|
|
|
|
no_stack = -2 |
209
|
|
|
|
|
|
|
}; |
210
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
enum control_types { |
212
|
|
|
|
|
|
|
type_mark = 0, |
213
|
|
|
|
|
|
|
type_then_trap = 1 |
214
|
|
|
|
|
|
|
}; |
215
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
typedef int (SLJIT_FUNC *jit_function)(jit_arguments *args); |
217
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
/* The following structure is the key data type for the recursive |
219
|
|
|
|
|
|
|
code generator. It is allocated by compile_matchingpath, and contains |
220
|
|
|
|
|
|
|
the arguments for compile_backtrackingpath. Must be the first member |
221
|
|
|
|
|
|
|
of its descendants. */ |
222
|
|
|
|
|
|
|
typedef struct backtrack_common { |
223
|
|
|
|
|
|
|
/* Concatenation stack. */ |
224
|
|
|
|
|
|
|
struct backtrack_common *prev; |
225
|
|
|
|
|
|
|
jump_list *nextbacktracks; |
226
|
|
|
|
|
|
|
/* Internal stack (for component operators). */ |
227
|
|
|
|
|
|
|
struct backtrack_common *top; |
228
|
|
|
|
|
|
|
jump_list *topbacktracks; |
229
|
|
|
|
|
|
|
/* Opcode pointer. */ |
230
|
|
|
|
|
|
|
pcre_uchar *cc; |
231
|
|
|
|
|
|
|
} backtrack_common; |
232
|
|
|
|
|
|
|
|
233
|
|
|
|
|
|
|
typedef struct assert_backtrack { |
234
|
|
|
|
|
|
|
backtrack_common common; |
235
|
|
|
|
|
|
|
jump_list *condfailed; |
236
|
|
|
|
|
|
|
/* Less than 0 if a frame is not needed. */ |
237
|
|
|
|
|
|
|
int framesize; |
238
|
|
|
|
|
|
|
/* Points to our private memory word on the stack. */ |
239
|
|
|
|
|
|
|
int private_data_ptr; |
240
|
|
|
|
|
|
|
/* For iterators. */ |
241
|
|
|
|
|
|
|
struct sljit_label *matchingpath; |
242
|
|
|
|
|
|
|
} assert_backtrack; |
243
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
typedef struct bracket_backtrack { |
245
|
|
|
|
|
|
|
backtrack_common common; |
246
|
|
|
|
|
|
|
/* Where to coninue if an alternative is successfully matched. */ |
247
|
|
|
|
|
|
|
struct sljit_label *alternative_matchingpath; |
248
|
|
|
|
|
|
|
/* For rmin and rmax iterators. */ |
249
|
|
|
|
|
|
|
struct sljit_label *recursive_matchingpath; |
250
|
|
|
|
|
|
|
/* For greedy ? operator. */ |
251
|
|
|
|
|
|
|
struct sljit_label *zero_matchingpath; |
252
|
|
|
|
|
|
|
/* Contains the branches of a failed condition. */ |
253
|
|
|
|
|
|
|
union { |
254
|
|
|
|
|
|
|
/* Both for OP_COND, OP_SCOND. */ |
255
|
|
|
|
|
|
|
jump_list *condfailed; |
256
|
|
|
|
|
|
|
assert_backtrack *assert; |
257
|
|
|
|
|
|
|
/* For OP_ONCE. Less than 0 if not needed. */ |
258
|
|
|
|
|
|
|
int framesize; |
259
|
|
|
|
|
|
|
} u; |
260
|
|
|
|
|
|
|
/* Points to our private memory word on the stack. */ |
261
|
|
|
|
|
|
|
int private_data_ptr; |
262
|
|
|
|
|
|
|
} bracket_backtrack; |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
typedef struct bracketpos_backtrack { |
265
|
|
|
|
|
|
|
backtrack_common common; |
266
|
|
|
|
|
|
|
/* Points to our private memory word on the stack. */ |
267
|
|
|
|
|
|
|
int private_data_ptr; |
268
|
|
|
|
|
|
|
/* Reverting stack is needed. */ |
269
|
|
|
|
|
|
|
int framesize; |
270
|
|
|
|
|
|
|
/* Allocated stack size. */ |
271
|
|
|
|
|
|
|
int stacksize; |
272
|
|
|
|
|
|
|
} bracketpos_backtrack; |
273
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
typedef struct braminzero_backtrack { |
275
|
|
|
|
|
|
|
backtrack_common common; |
276
|
|
|
|
|
|
|
struct sljit_label *matchingpath; |
277
|
|
|
|
|
|
|
} braminzero_backtrack; |
278
|
|
|
|
|
|
|
|
279
|
|
|
|
|
|
|
typedef struct char_iterator_backtrack { |
280
|
|
|
|
|
|
|
backtrack_common common; |
281
|
|
|
|
|
|
|
/* Next iteration. */ |
282
|
|
|
|
|
|
|
struct sljit_label *matchingpath; |
283
|
|
|
|
|
|
|
union { |
284
|
|
|
|
|
|
|
jump_list *backtracks; |
285
|
|
|
|
|
|
|
struct { |
286
|
|
|
|
|
|
|
unsigned int othercasebit; |
287
|
|
|
|
|
|
|
pcre_uchar chr; |
288
|
|
|
|
|
|
|
BOOL enabled; |
289
|
|
|
|
|
|
|
} charpos; |
290
|
|
|
|
|
|
|
} u; |
291
|
|
|
|
|
|
|
} char_iterator_backtrack; |
292
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
typedef struct ref_iterator_backtrack { |
294
|
|
|
|
|
|
|
backtrack_common common; |
295
|
|
|
|
|
|
|
/* Next iteration. */ |
296
|
|
|
|
|
|
|
struct sljit_label *matchingpath; |
297
|
|
|
|
|
|
|
} ref_iterator_backtrack; |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
typedef struct recurse_entry { |
300
|
|
|
|
|
|
|
struct recurse_entry *next; |
301
|
|
|
|
|
|
|
/* Contains the function entry. */ |
302
|
|
|
|
|
|
|
struct sljit_label *entry; |
303
|
|
|
|
|
|
|
/* Collects the calls until the function is not created. */ |
304
|
|
|
|
|
|
|
jump_list *calls; |
305
|
|
|
|
|
|
|
/* Points to the starting opcode. */ |
306
|
|
|
|
|
|
|
sljit_sw start; |
307
|
|
|
|
|
|
|
} recurse_entry; |
308
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
typedef struct recurse_backtrack { |
310
|
|
|
|
|
|
|
backtrack_common common; |
311
|
|
|
|
|
|
|
BOOL inlined_pattern; |
312
|
|
|
|
|
|
|
} recurse_backtrack; |
313
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
#define OP_THEN_TRAP OP_TABLE_LENGTH |
315
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
typedef struct then_trap_backtrack { |
317
|
|
|
|
|
|
|
backtrack_common common; |
318
|
|
|
|
|
|
|
/* If then_trap is not NULL, this structure contains the real |
319
|
|
|
|
|
|
|
then_trap for the backtracking path. */ |
320
|
|
|
|
|
|
|
struct then_trap_backtrack *then_trap; |
321
|
|
|
|
|
|
|
/* Points to the starting opcode. */ |
322
|
|
|
|
|
|
|
sljit_sw start; |
323
|
|
|
|
|
|
|
/* Exit point for the then opcodes of this alternative. */ |
324
|
|
|
|
|
|
|
jump_list *quit; |
325
|
|
|
|
|
|
|
/* Frame size of the current alternative. */ |
326
|
|
|
|
|
|
|
int framesize; |
327
|
|
|
|
|
|
|
} then_trap_backtrack; |
328
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
#define MAX_RANGE_SIZE 4 |
330
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
typedef struct compiler_common { |
332
|
|
|
|
|
|
|
/* The sljit ceneric compiler. */ |
333
|
|
|
|
|
|
|
struct sljit_compiler *compiler; |
334
|
|
|
|
|
|
|
/* First byte code. */ |
335
|
|
|
|
|
|
|
pcre_uchar *start; |
336
|
|
|
|
|
|
|
/* Maps private data offset to each opcode. */ |
337
|
|
|
|
|
|
|
sljit_s32 *private_data_ptrs; |
338
|
|
|
|
|
|
|
/* Chain list of read-only data ptrs. */ |
339
|
|
|
|
|
|
|
void *read_only_data_head; |
340
|
|
|
|
|
|
|
/* Tells whether the capturing bracket is optimized. */ |
341
|
|
|
|
|
|
|
sljit_u8 *optimized_cbracket; |
342
|
|
|
|
|
|
|
/* Tells whether the starting offset is a target of then. */ |
343
|
|
|
|
|
|
|
sljit_u8 *then_offsets; |
344
|
|
|
|
|
|
|
/* Current position where a THEN must jump. */ |
345
|
|
|
|
|
|
|
then_trap_backtrack *then_trap; |
346
|
|
|
|
|
|
|
/* Starting offset of private data for capturing brackets. */ |
347
|
|
|
|
|
|
|
sljit_s32 cbra_ptr; |
348
|
|
|
|
|
|
|
/* Output vector starting point. Must be divisible by 2. */ |
349
|
|
|
|
|
|
|
sljit_s32 ovector_start; |
350
|
|
|
|
|
|
|
/* Points to the starting character of the current match. */ |
351
|
|
|
|
|
|
|
sljit_s32 start_ptr; |
352
|
|
|
|
|
|
|
/* Last known position of the requested byte. */ |
353
|
|
|
|
|
|
|
sljit_s32 req_char_ptr; |
354
|
|
|
|
|
|
|
/* Head of the last recursion. */ |
355
|
|
|
|
|
|
|
sljit_s32 recursive_head_ptr; |
356
|
|
|
|
|
|
|
/* First inspected character for partial matching. |
357
|
|
|
|
|
|
|
(Needed for avoiding zero length partial matches.) */ |
358
|
|
|
|
|
|
|
sljit_s32 start_used_ptr; |
359
|
|
|
|
|
|
|
/* Starting pointer for partial soft matches. */ |
360
|
|
|
|
|
|
|
sljit_s32 hit_start; |
361
|
|
|
|
|
|
|
/* Pointer of the match end position. */ |
362
|
|
|
|
|
|
|
sljit_s32 match_end_ptr; |
363
|
|
|
|
|
|
|
/* Points to the marked string. */ |
364
|
|
|
|
|
|
|
sljit_s32 mark_ptr; |
365
|
|
|
|
|
|
|
/* Recursive control verb management chain. */ |
366
|
|
|
|
|
|
|
sljit_s32 control_head_ptr; |
367
|
|
|
|
|
|
|
/* Points to the last matched capture block index. */ |
368
|
|
|
|
|
|
|
sljit_s32 capture_last_ptr; |
369
|
|
|
|
|
|
|
/* Fast forward skipping byte code pointer. */ |
370
|
|
|
|
|
|
|
pcre_uchar *fast_forward_bc_ptr; |
371
|
|
|
|
|
|
|
/* Locals used by fast fail optimization. */ |
372
|
|
|
|
|
|
|
sljit_s32 fast_fail_start_ptr; |
373
|
|
|
|
|
|
|
sljit_s32 fast_fail_end_ptr; |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
/* Flipped and lower case tables. */ |
376
|
|
|
|
|
|
|
const sljit_u8 *fcc; |
377
|
|
|
|
|
|
|
sljit_sw lcc; |
378
|
|
|
|
|
|
|
/* Mode can be PCRE_STUDY_JIT_COMPILE and others. */ |
379
|
|
|
|
|
|
|
int mode; |
380
|
|
|
|
|
|
|
/* TRUE, when minlength is greater than 0. */ |
381
|
|
|
|
|
|
|
BOOL might_be_empty; |
382
|
|
|
|
|
|
|
/* \K is found in the pattern. */ |
383
|
|
|
|
|
|
|
BOOL has_set_som; |
384
|
|
|
|
|
|
|
/* (*SKIP:arg) is found in the pattern. */ |
385
|
|
|
|
|
|
|
BOOL has_skip_arg; |
386
|
|
|
|
|
|
|
/* (*THEN) is found in the pattern. */ |
387
|
|
|
|
|
|
|
BOOL has_then; |
388
|
|
|
|
|
|
|
/* (*SKIP) or (*SKIP:arg) is found in lookbehind assertion. */ |
389
|
|
|
|
|
|
|
BOOL has_skip_in_assert_back; |
390
|
|
|
|
|
|
|
/* Currently in recurse or negative assert. */ |
391
|
|
|
|
|
|
|
BOOL local_exit; |
392
|
|
|
|
|
|
|
/* Currently in a positive assert. */ |
393
|
|
|
|
|
|
|
BOOL positive_assert; |
394
|
|
|
|
|
|
|
/* Newline control. */ |
395
|
|
|
|
|
|
|
int nltype; |
396
|
|
|
|
|
|
|
sljit_u32 nlmax; |
397
|
|
|
|
|
|
|
sljit_u32 nlmin; |
398
|
|
|
|
|
|
|
int newline; |
399
|
|
|
|
|
|
|
int bsr_nltype; |
400
|
|
|
|
|
|
|
sljit_u32 bsr_nlmax; |
401
|
|
|
|
|
|
|
sljit_u32 bsr_nlmin; |
402
|
|
|
|
|
|
|
/* Dollar endonly. */ |
403
|
|
|
|
|
|
|
int endonly; |
404
|
|
|
|
|
|
|
/* Tables. */ |
405
|
|
|
|
|
|
|
sljit_sw ctypes; |
406
|
|
|
|
|
|
|
/* Named capturing brackets. */ |
407
|
|
|
|
|
|
|
pcre_uchar *name_table; |
408
|
|
|
|
|
|
|
sljit_sw name_count; |
409
|
|
|
|
|
|
|
sljit_sw name_entry_size; |
410
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
/* Labels and jump lists. */ |
412
|
|
|
|
|
|
|
struct sljit_label *partialmatchlabel; |
413
|
|
|
|
|
|
|
struct sljit_label *quit_label; |
414
|
|
|
|
|
|
|
struct sljit_label *forced_quit_label; |
415
|
|
|
|
|
|
|
struct sljit_label *accept_label; |
416
|
|
|
|
|
|
|
struct sljit_label *ff_newline_shortcut; |
417
|
|
|
|
|
|
|
stub_list *stubs; |
418
|
|
|
|
|
|
|
label_addr_list *label_addrs; |
419
|
|
|
|
|
|
|
recurse_entry *entries; |
420
|
|
|
|
|
|
|
recurse_entry *currententry; |
421
|
|
|
|
|
|
|
jump_list *partialmatch; |
422
|
|
|
|
|
|
|
jump_list *quit; |
423
|
|
|
|
|
|
|
jump_list *positive_assert_quit; |
424
|
|
|
|
|
|
|
jump_list *forced_quit; |
425
|
|
|
|
|
|
|
jump_list *accept; |
426
|
|
|
|
|
|
|
jump_list *calllimit; |
427
|
|
|
|
|
|
|
jump_list *stackalloc; |
428
|
|
|
|
|
|
|
jump_list *revertframes; |
429
|
|
|
|
|
|
|
jump_list *wordboundary; |
430
|
|
|
|
|
|
|
jump_list *anynewline; |
431
|
|
|
|
|
|
|
jump_list *hspace; |
432
|
|
|
|
|
|
|
jump_list *vspace; |
433
|
|
|
|
|
|
|
jump_list *casefulcmp; |
434
|
|
|
|
|
|
|
jump_list *caselesscmp; |
435
|
|
|
|
|
|
|
jump_list *reset_match; |
436
|
|
|
|
|
|
|
BOOL jscript_compat; |
437
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
438
|
|
|
|
|
|
|
BOOL utf; |
439
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
440
|
|
|
|
|
|
|
BOOL use_ucp; |
441
|
|
|
|
|
|
|
jump_list *getucd; |
442
|
|
|
|
|
|
|
#endif |
443
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
444
|
|
|
|
|
|
|
jump_list *utfreadchar; |
445
|
|
|
|
|
|
|
jump_list *utfreadchar16; |
446
|
|
|
|
|
|
|
jump_list *utfreadtype8; |
447
|
|
|
|
|
|
|
#endif |
448
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
449
|
|
|
|
|
|
|
} compiler_common; |
450
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
/* For byte_sequence_compare. */ |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
typedef struct compare_context { |
454
|
|
|
|
|
|
|
int length; |
455
|
|
|
|
|
|
|
int sourcereg; |
456
|
|
|
|
|
|
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
457
|
|
|
|
|
|
|
int ucharptr; |
458
|
|
|
|
|
|
|
union { |
459
|
|
|
|
|
|
|
sljit_s32 asint; |
460
|
|
|
|
|
|
|
sljit_u16 asushort; |
461
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
462
|
|
|
|
|
|
|
sljit_u8 asbyte; |
463
|
|
|
|
|
|
|
sljit_u8 asuchars[4]; |
464
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
465
|
|
|
|
|
|
|
sljit_u16 asuchars[2]; |
466
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
467
|
|
|
|
|
|
|
sljit_u32 asuchars[1]; |
468
|
|
|
|
|
|
|
#endif |
469
|
|
|
|
|
|
|
} c; |
470
|
|
|
|
|
|
|
union { |
471
|
|
|
|
|
|
|
sljit_s32 asint; |
472
|
|
|
|
|
|
|
sljit_u16 asushort; |
473
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
474
|
|
|
|
|
|
|
sljit_u8 asbyte; |
475
|
|
|
|
|
|
|
sljit_u8 asuchars[4]; |
476
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
477
|
|
|
|
|
|
|
sljit_u16 asuchars[2]; |
478
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
479
|
|
|
|
|
|
|
sljit_u32 asuchars[1]; |
480
|
|
|
|
|
|
|
#endif |
481
|
|
|
|
|
|
|
} oc; |
482
|
|
|
|
|
|
|
#endif |
483
|
|
|
|
|
|
|
} compare_context; |
484
|
|
|
|
|
|
|
|
485
|
|
|
|
|
|
|
/* Undefine sljit macros. */ |
486
|
|
|
|
|
|
|
#undef CMP |
487
|
|
|
|
|
|
|
|
488
|
|
|
|
|
|
|
/* Used for accessing the elements of the stack. */ |
489
|
|
|
|
|
|
|
#define STACK(i) ((i) * (int)sizeof(sljit_sw)) |
490
|
|
|
|
|
|
|
|
491
|
|
|
|
|
|
|
#ifdef SLJIT_PREF_SHIFT_REG |
492
|
|
|
|
|
|
|
#if SLJIT_PREF_SHIFT_REG == SLJIT_R2 |
493
|
|
|
|
|
|
|
/* Nothing. */ |
494
|
|
|
|
|
|
|
#elif SLJIT_PREF_SHIFT_REG == SLJIT_R3 |
495
|
|
|
|
|
|
|
#define SHIFT_REG_IS_R3 |
496
|
|
|
|
|
|
|
#else |
497
|
|
|
|
|
|
|
#error "Unsupported shift register" |
498
|
|
|
|
|
|
|
#endif |
499
|
|
|
|
|
|
|
#endif |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
#define TMP1 SLJIT_R0 |
502
|
|
|
|
|
|
|
#ifdef SHIFT_REG_IS_R3 |
503
|
|
|
|
|
|
|
#define TMP2 SLJIT_R3 |
504
|
|
|
|
|
|
|
#define TMP3 SLJIT_R2 |
505
|
|
|
|
|
|
|
#else |
506
|
|
|
|
|
|
|
#define TMP2 SLJIT_R2 |
507
|
|
|
|
|
|
|
#define TMP3 SLJIT_R3 |
508
|
|
|
|
|
|
|
#endif |
509
|
|
|
|
|
|
|
#define STR_PTR SLJIT_S0 |
510
|
|
|
|
|
|
|
#define STR_END SLJIT_S1 |
511
|
|
|
|
|
|
|
#define STACK_TOP SLJIT_R1 |
512
|
|
|
|
|
|
|
#define STACK_LIMIT SLJIT_S2 |
513
|
|
|
|
|
|
|
#define COUNT_MATCH SLJIT_S3 |
514
|
|
|
|
|
|
|
#define ARGUMENTS SLJIT_S4 |
515
|
|
|
|
|
|
|
#define RETURN_ADDR SLJIT_R4 |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
/* Local space layout. */ |
518
|
|
|
|
|
|
|
/* These two locals can be used by the current opcode. */ |
519
|
|
|
|
|
|
|
#define LOCALS0 (0 * sizeof(sljit_sw)) |
520
|
|
|
|
|
|
|
#define LOCALS1 (1 * sizeof(sljit_sw)) |
521
|
|
|
|
|
|
|
/* Two local variables for possessive quantifiers (char1 cannot use them). */ |
522
|
|
|
|
|
|
|
#define POSSESSIVE0 (2 * sizeof(sljit_sw)) |
523
|
|
|
|
|
|
|
#define POSSESSIVE1 (3 * sizeof(sljit_sw)) |
524
|
|
|
|
|
|
|
/* Max limit of recursions. */ |
525
|
|
|
|
|
|
|
#define LIMIT_MATCH (4 * sizeof(sljit_sw)) |
526
|
|
|
|
|
|
|
/* The output vector is stored on the stack, and contains pointers |
527
|
|
|
|
|
|
|
to characters. The vector data is divided into two groups: the first |
528
|
|
|
|
|
|
|
group contains the start / end character pointers, and the second is |
529
|
|
|
|
|
|
|
the start pointers when the end of the capturing group has not yet reached. */ |
530
|
|
|
|
|
|
|
#define OVECTOR_START (common->ovector_start) |
531
|
|
|
|
|
|
|
#define OVECTOR(i) (OVECTOR_START + (i) * (sljit_sw)sizeof(sljit_sw)) |
532
|
|
|
|
|
|
|
#define OVECTOR_PRIV(i) (common->cbra_ptr + (i) * (sljit_sw)sizeof(sljit_sw)) |
533
|
|
|
|
|
|
|
#define PRIVATE_DATA(cc) (common->private_data_ptrs[(cc) - common->start]) |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
536
|
|
|
|
|
|
|
#define MOV_UCHAR SLJIT_MOV_U8 |
537
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
538
|
|
|
|
|
|
|
#define MOV_UCHAR SLJIT_MOV_U16 |
539
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
540
|
|
|
|
|
|
|
#define MOV_UCHAR SLJIT_MOV_U32 |
541
|
|
|
|
|
|
|
#else |
542
|
|
|
|
|
|
|
#error Unsupported compiling mode |
543
|
|
|
|
|
|
|
#endif |
544
|
|
|
|
|
|
|
|
545
|
|
|
|
|
|
|
/* Shortcuts. */ |
546
|
|
|
|
|
|
|
#define DEFINE_COMPILER \ |
547
|
|
|
|
|
|
|
struct sljit_compiler *compiler = common->compiler |
548
|
|
|
|
|
|
|
#define OP1(op, dst, dstw, src, srcw) \ |
549
|
|
|
|
|
|
|
sljit_emit_op1(compiler, (op), (dst), (dstw), (src), (srcw)) |
550
|
|
|
|
|
|
|
#define OP2(op, dst, dstw, src1, src1w, src2, src2w) \ |
551
|
|
|
|
|
|
|
sljit_emit_op2(compiler, (op), (dst), (dstw), (src1), (src1w), (src2), (src2w)) |
552
|
|
|
|
|
|
|
#define LABEL() \ |
553
|
|
|
|
|
|
|
sljit_emit_label(compiler) |
554
|
|
|
|
|
|
|
#define JUMP(type) \ |
555
|
|
|
|
|
|
|
sljit_emit_jump(compiler, (type)) |
556
|
|
|
|
|
|
|
#define JUMPTO(type, label) \ |
557
|
|
|
|
|
|
|
sljit_set_label(sljit_emit_jump(compiler, (type)), (label)) |
558
|
|
|
|
|
|
|
#define JUMPHERE(jump) \ |
559
|
|
|
|
|
|
|
sljit_set_label((jump), sljit_emit_label(compiler)) |
560
|
|
|
|
|
|
|
#define SET_LABEL(jump, label) \ |
561
|
|
|
|
|
|
|
sljit_set_label((jump), (label)) |
562
|
|
|
|
|
|
|
#define CMP(type, src1, src1w, src2, src2w) \ |
563
|
|
|
|
|
|
|
sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)) |
564
|
|
|
|
|
|
|
#define CMPTO(type, src1, src1w, src2, src2w, label) \ |
565
|
|
|
|
|
|
|
sljit_set_label(sljit_emit_cmp(compiler, (type), (src1), (src1w), (src2), (src2w)), (label)) |
566
|
|
|
|
|
|
|
#define OP_FLAGS(op, dst, dstw, type) \ |
567
|
|
|
|
|
|
|
sljit_emit_op_flags(compiler, (op), (dst), (dstw), (type)) |
568
|
|
|
|
|
|
|
#define GET_LOCAL_BASE(dst, dstw, offset) \ |
569
|
|
|
|
|
|
|
sljit_get_local_base(compiler, (dst), (dstw), (offset)) |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
#define READ_CHAR_MAX 0x7fffffff |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
#define INVALID_UTF_CHAR 888 |
574
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
static pcre_uchar *bracketend(pcre_uchar *cc) |
576
|
|
|
|
|
|
|
{ |
577
|
|
|
|
|
|
|
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); |
578
|
|
|
|
|
|
|
do cc += GET(cc, 1); while (*cc == OP_ALT); |
579
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); |
580
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
581
|
|
|
|
|
|
|
return cc; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
static int no_alternatives(pcre_uchar *cc) |
585
|
|
|
|
|
|
|
{ |
586
|
|
|
|
|
|
|
int count = 0; |
587
|
|
|
|
|
|
|
SLJIT_ASSERT((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)); |
588
|
|
|
|
|
|
|
do |
589
|
|
|
|
|
|
|
{ |
590
|
|
|
|
|
|
|
cc += GET(cc, 1); |
591
|
|
|
|
|
|
|
count++; |
592
|
|
|
|
|
|
|
} |
593
|
|
|
|
|
|
|
while (*cc == OP_ALT); |
594
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc >= OP_KET && *cc <= OP_KETRPOS); |
595
|
|
|
|
|
|
|
return count; |
596
|
|
|
|
|
|
|
} |
597
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
/* Functions whose might need modification for all new supported opcodes: |
599
|
|
|
|
|
|
|
next_opcode |
600
|
|
|
|
|
|
|
check_opcode_types |
601
|
|
|
|
|
|
|
set_private_data_ptrs |
602
|
|
|
|
|
|
|
get_framesize |
603
|
|
|
|
|
|
|
init_frame |
604
|
|
|
|
|
|
|
get_private_data_copy_length |
605
|
|
|
|
|
|
|
copy_private_data |
606
|
|
|
|
|
|
|
compile_matchingpath |
607
|
|
|
|
|
|
|
compile_backtrackingpath |
608
|
|
|
|
|
|
|
*/ |
609
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
static pcre_uchar *next_opcode(compiler_common *common, pcre_uchar *cc) |
611
|
|
|
|
|
|
|
{ |
612
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(common); |
613
|
|
|
|
|
|
|
switch(*cc) |
614
|
|
|
|
|
|
|
{ |
615
|
|
|
|
|
|
|
case OP_SOD: |
616
|
|
|
|
|
|
|
case OP_SOM: |
617
|
|
|
|
|
|
|
case OP_SET_SOM: |
618
|
|
|
|
|
|
|
case OP_NOT_WORD_BOUNDARY: |
619
|
|
|
|
|
|
|
case OP_WORD_BOUNDARY: |
620
|
|
|
|
|
|
|
case OP_NOT_DIGIT: |
621
|
|
|
|
|
|
|
case OP_DIGIT: |
622
|
|
|
|
|
|
|
case OP_NOT_WHITESPACE: |
623
|
|
|
|
|
|
|
case OP_WHITESPACE: |
624
|
|
|
|
|
|
|
case OP_NOT_WORDCHAR: |
625
|
|
|
|
|
|
|
case OP_WORDCHAR: |
626
|
|
|
|
|
|
|
case OP_ANY: |
627
|
|
|
|
|
|
|
case OP_ALLANY: |
628
|
|
|
|
|
|
|
case OP_NOTPROP: |
629
|
|
|
|
|
|
|
case OP_PROP: |
630
|
|
|
|
|
|
|
case OP_ANYNL: |
631
|
|
|
|
|
|
|
case OP_NOT_HSPACE: |
632
|
|
|
|
|
|
|
case OP_HSPACE: |
633
|
|
|
|
|
|
|
case OP_NOT_VSPACE: |
634
|
|
|
|
|
|
|
case OP_VSPACE: |
635
|
|
|
|
|
|
|
case OP_EXTUNI: |
636
|
|
|
|
|
|
|
case OP_EODN: |
637
|
|
|
|
|
|
|
case OP_EOD: |
638
|
|
|
|
|
|
|
case OP_CIRC: |
639
|
|
|
|
|
|
|
case OP_CIRCM: |
640
|
|
|
|
|
|
|
case OP_DOLL: |
641
|
|
|
|
|
|
|
case OP_DOLLM: |
642
|
|
|
|
|
|
|
case OP_CRSTAR: |
643
|
|
|
|
|
|
|
case OP_CRMINSTAR: |
644
|
|
|
|
|
|
|
case OP_CRPLUS: |
645
|
|
|
|
|
|
|
case OP_CRMINPLUS: |
646
|
|
|
|
|
|
|
case OP_CRQUERY: |
647
|
|
|
|
|
|
|
case OP_CRMINQUERY: |
648
|
|
|
|
|
|
|
case OP_CRRANGE: |
649
|
|
|
|
|
|
|
case OP_CRMINRANGE: |
650
|
|
|
|
|
|
|
case OP_CRPOSSTAR: |
651
|
|
|
|
|
|
|
case OP_CRPOSPLUS: |
652
|
|
|
|
|
|
|
case OP_CRPOSQUERY: |
653
|
|
|
|
|
|
|
case OP_CRPOSRANGE: |
654
|
|
|
|
|
|
|
case OP_CLASS: |
655
|
|
|
|
|
|
|
case OP_NCLASS: |
656
|
|
|
|
|
|
|
case OP_REF: |
657
|
|
|
|
|
|
|
case OP_REFI: |
658
|
|
|
|
|
|
|
case OP_DNREF: |
659
|
|
|
|
|
|
|
case OP_DNREFI: |
660
|
|
|
|
|
|
|
case OP_RECURSE: |
661
|
|
|
|
|
|
|
case OP_CALLOUT: |
662
|
|
|
|
|
|
|
case OP_ALT: |
663
|
|
|
|
|
|
|
case OP_KET: |
664
|
|
|
|
|
|
|
case OP_KETRMAX: |
665
|
|
|
|
|
|
|
case OP_KETRMIN: |
666
|
|
|
|
|
|
|
case OP_KETRPOS: |
667
|
|
|
|
|
|
|
case OP_REVERSE: |
668
|
|
|
|
|
|
|
case OP_ASSERT: |
669
|
|
|
|
|
|
|
case OP_ASSERT_NOT: |
670
|
|
|
|
|
|
|
case OP_ASSERTBACK: |
671
|
|
|
|
|
|
|
case OP_ASSERTBACK_NOT: |
672
|
|
|
|
|
|
|
case OP_ONCE: |
673
|
|
|
|
|
|
|
case OP_ONCE_NC: |
674
|
|
|
|
|
|
|
case OP_BRA: |
675
|
|
|
|
|
|
|
case OP_BRAPOS: |
676
|
|
|
|
|
|
|
case OP_CBRA: |
677
|
|
|
|
|
|
|
case OP_CBRAPOS: |
678
|
|
|
|
|
|
|
case OP_COND: |
679
|
|
|
|
|
|
|
case OP_SBRA: |
680
|
|
|
|
|
|
|
case OP_SBRAPOS: |
681
|
|
|
|
|
|
|
case OP_SCBRA: |
682
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
683
|
|
|
|
|
|
|
case OP_SCOND: |
684
|
|
|
|
|
|
|
case OP_CREF: |
685
|
|
|
|
|
|
|
case OP_DNCREF: |
686
|
|
|
|
|
|
|
case OP_RREF: |
687
|
|
|
|
|
|
|
case OP_DNRREF: |
688
|
|
|
|
|
|
|
case OP_DEF: |
689
|
|
|
|
|
|
|
case OP_BRAZERO: |
690
|
|
|
|
|
|
|
case OP_BRAMINZERO: |
691
|
|
|
|
|
|
|
case OP_BRAPOSZERO: |
692
|
|
|
|
|
|
|
case OP_PRUNE: |
693
|
|
|
|
|
|
|
case OP_SKIP: |
694
|
|
|
|
|
|
|
case OP_THEN: |
695
|
|
|
|
|
|
|
case OP_COMMIT: |
696
|
|
|
|
|
|
|
case OP_FAIL: |
697
|
|
|
|
|
|
|
case OP_ACCEPT: |
698
|
|
|
|
|
|
|
case OP_ASSERT_ACCEPT: |
699
|
|
|
|
|
|
|
case OP_CLOSE: |
700
|
|
|
|
|
|
|
case OP_SKIPZERO: |
701
|
|
|
|
|
|
|
return cc + PRIV(OP_lengths)[*cc]; |
702
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
case OP_CHAR: |
704
|
|
|
|
|
|
|
case OP_CHARI: |
705
|
|
|
|
|
|
|
case OP_NOT: |
706
|
|
|
|
|
|
|
case OP_NOTI: |
707
|
|
|
|
|
|
|
case OP_STAR: |
708
|
|
|
|
|
|
|
case OP_MINSTAR: |
709
|
|
|
|
|
|
|
case OP_PLUS: |
710
|
|
|
|
|
|
|
case OP_MINPLUS: |
711
|
|
|
|
|
|
|
case OP_QUERY: |
712
|
|
|
|
|
|
|
case OP_MINQUERY: |
713
|
|
|
|
|
|
|
case OP_UPTO: |
714
|
|
|
|
|
|
|
case OP_MINUPTO: |
715
|
|
|
|
|
|
|
case OP_EXACT: |
716
|
|
|
|
|
|
|
case OP_POSSTAR: |
717
|
|
|
|
|
|
|
case OP_POSPLUS: |
718
|
|
|
|
|
|
|
case OP_POSQUERY: |
719
|
|
|
|
|
|
|
case OP_POSUPTO: |
720
|
|
|
|
|
|
|
case OP_STARI: |
721
|
|
|
|
|
|
|
case OP_MINSTARI: |
722
|
|
|
|
|
|
|
case OP_PLUSI: |
723
|
|
|
|
|
|
|
case OP_MINPLUSI: |
724
|
|
|
|
|
|
|
case OP_QUERYI: |
725
|
|
|
|
|
|
|
case OP_MINQUERYI: |
726
|
|
|
|
|
|
|
case OP_UPTOI: |
727
|
|
|
|
|
|
|
case OP_MINUPTOI: |
728
|
|
|
|
|
|
|
case OP_EXACTI: |
729
|
|
|
|
|
|
|
case OP_POSSTARI: |
730
|
|
|
|
|
|
|
case OP_POSPLUSI: |
731
|
|
|
|
|
|
|
case OP_POSQUERYI: |
732
|
|
|
|
|
|
|
case OP_POSUPTOI: |
733
|
|
|
|
|
|
|
case OP_NOTSTAR: |
734
|
|
|
|
|
|
|
case OP_NOTMINSTAR: |
735
|
|
|
|
|
|
|
case OP_NOTPLUS: |
736
|
|
|
|
|
|
|
case OP_NOTMINPLUS: |
737
|
|
|
|
|
|
|
case OP_NOTQUERY: |
738
|
|
|
|
|
|
|
case OP_NOTMINQUERY: |
739
|
|
|
|
|
|
|
case OP_NOTUPTO: |
740
|
|
|
|
|
|
|
case OP_NOTMINUPTO: |
741
|
|
|
|
|
|
|
case OP_NOTEXACT: |
742
|
|
|
|
|
|
|
case OP_NOTPOSSTAR: |
743
|
|
|
|
|
|
|
case OP_NOTPOSPLUS: |
744
|
|
|
|
|
|
|
case OP_NOTPOSQUERY: |
745
|
|
|
|
|
|
|
case OP_NOTPOSUPTO: |
746
|
|
|
|
|
|
|
case OP_NOTSTARI: |
747
|
|
|
|
|
|
|
case OP_NOTMINSTARI: |
748
|
|
|
|
|
|
|
case OP_NOTPLUSI: |
749
|
|
|
|
|
|
|
case OP_NOTMINPLUSI: |
750
|
|
|
|
|
|
|
case OP_NOTQUERYI: |
751
|
|
|
|
|
|
|
case OP_NOTMINQUERYI: |
752
|
|
|
|
|
|
|
case OP_NOTUPTOI: |
753
|
|
|
|
|
|
|
case OP_NOTMINUPTOI: |
754
|
|
|
|
|
|
|
case OP_NOTEXACTI: |
755
|
|
|
|
|
|
|
case OP_NOTPOSSTARI: |
756
|
|
|
|
|
|
|
case OP_NOTPOSPLUSI: |
757
|
|
|
|
|
|
|
case OP_NOTPOSQUERYI: |
758
|
|
|
|
|
|
|
case OP_NOTPOSUPTOI: |
759
|
|
|
|
|
|
|
cc += PRIV(OP_lengths)[*cc]; |
760
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
761
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
762
|
|
|
|
|
|
|
#endif |
763
|
|
|
|
|
|
|
return cc; |
764
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
/* Special cases. */ |
766
|
|
|
|
|
|
|
case OP_TYPESTAR: |
767
|
|
|
|
|
|
|
case OP_TYPEMINSTAR: |
768
|
|
|
|
|
|
|
case OP_TYPEPLUS: |
769
|
|
|
|
|
|
|
case OP_TYPEMINPLUS: |
770
|
|
|
|
|
|
|
case OP_TYPEQUERY: |
771
|
|
|
|
|
|
|
case OP_TYPEMINQUERY: |
772
|
|
|
|
|
|
|
case OP_TYPEUPTO: |
773
|
|
|
|
|
|
|
case OP_TYPEMINUPTO: |
774
|
|
|
|
|
|
|
case OP_TYPEEXACT: |
775
|
|
|
|
|
|
|
case OP_TYPEPOSSTAR: |
776
|
|
|
|
|
|
|
case OP_TYPEPOSPLUS: |
777
|
|
|
|
|
|
|
case OP_TYPEPOSQUERY: |
778
|
|
|
|
|
|
|
case OP_TYPEPOSUPTO: |
779
|
|
|
|
|
|
|
return cc + PRIV(OP_lengths)[*cc] - 1; |
780
|
|
|
|
|
|
|
|
781
|
|
|
|
|
|
|
case OP_ANYBYTE: |
782
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
783
|
|
|
|
|
|
|
if (common->utf) return NULL; |
784
|
|
|
|
|
|
|
#endif |
785
|
|
|
|
|
|
|
return cc + 1; |
786
|
|
|
|
|
|
|
|
787
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
788
|
|
|
|
|
|
|
case OP_XCLASS: |
789
|
|
|
|
|
|
|
return cc + GET(cc, 1); |
790
|
|
|
|
|
|
|
#endif |
791
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
case OP_MARK: |
793
|
|
|
|
|
|
|
case OP_PRUNE_ARG: |
794
|
|
|
|
|
|
|
case OP_SKIP_ARG: |
795
|
|
|
|
|
|
|
case OP_THEN_ARG: |
796
|
|
|
|
|
|
|
return cc + 1 + 2 + cc[1]; |
797
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
default: |
799
|
|
|
|
|
|
|
/* All opcodes are supported now! */ |
800
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
801
|
|
|
|
|
|
|
return NULL; |
802
|
|
|
|
|
|
|
} |
803
|
|
|
|
|
|
|
} |
804
|
|
|
|
|
|
|
|
805
|
|
|
|
|
|
|
static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend) |
806
|
|
|
|
|
|
|
{ |
807
|
|
|
|
|
|
|
int count; |
808
|
|
|
|
|
|
|
pcre_uchar *slot; |
809
|
|
|
|
|
|
|
pcre_uchar *assert_back_end = cc - 1; |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
/* Calculate important variables (like stack size) and checks whether all opcodes are supported. */ |
812
|
|
|
|
|
|
|
while (cc < ccend) |
813
|
|
|
|
|
|
|
{ |
814
|
|
|
|
|
|
|
switch(*cc) |
815
|
|
|
|
|
|
|
{ |
816
|
|
|
|
|
|
|
case OP_SET_SOM: |
817
|
|
|
|
|
|
|
common->has_set_som = TRUE; |
818
|
|
|
|
|
|
|
common->might_be_empty = TRUE; |
819
|
|
|
|
|
|
|
cc += 1; |
820
|
|
|
|
|
|
|
break; |
821
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
case OP_REF: |
823
|
|
|
|
|
|
|
case OP_REFI: |
824
|
|
|
|
|
|
|
common->optimized_cbracket[GET2(cc, 1)] = 0; |
825
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE; |
826
|
|
|
|
|
|
|
break; |
827
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
case OP_CBRAPOS: |
829
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
830
|
|
|
|
|
|
|
common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] = 0; |
831
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE; |
832
|
|
|
|
|
|
|
break; |
833
|
|
|
|
|
|
|
|
834
|
|
|
|
|
|
|
case OP_COND: |
835
|
|
|
|
|
|
|
case OP_SCOND: |
836
|
|
|
|
|
|
|
/* Only AUTO_CALLOUT can insert this opcode. We do |
837
|
|
|
|
|
|
|
not intend to support this case. */ |
838
|
|
|
|
|
|
|
if (cc[1 + LINK_SIZE] == OP_CALLOUT) |
839
|
|
|
|
|
|
|
return FALSE; |
840
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
841
|
|
|
|
|
|
|
break; |
842
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
case OP_CREF: |
844
|
|
|
|
|
|
|
common->optimized_cbracket[GET2(cc, 1)] = 0; |
845
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE; |
846
|
|
|
|
|
|
|
break; |
847
|
|
|
|
|
|
|
|
848
|
|
|
|
|
|
|
case OP_DNREF: |
849
|
|
|
|
|
|
|
case OP_DNREFI: |
850
|
|
|
|
|
|
|
case OP_DNCREF: |
851
|
|
|
|
|
|
|
count = GET2(cc, 1 + IMM2_SIZE); |
852
|
|
|
|
|
|
|
slot = common->name_table + GET2(cc, 1) * common->name_entry_size; |
853
|
|
|
|
|
|
|
while (count-- > 0) |
854
|
|
|
|
|
|
|
{ |
855
|
|
|
|
|
|
|
common->optimized_cbracket[GET2(slot, 0)] = 0; |
856
|
|
|
|
|
|
|
slot += common->name_entry_size; |
857
|
|
|
|
|
|
|
} |
858
|
|
|
|
|
|
|
cc += 1 + 2 * IMM2_SIZE; |
859
|
|
|
|
|
|
|
break; |
860
|
|
|
|
|
|
|
|
861
|
|
|
|
|
|
|
case OP_RECURSE: |
862
|
|
|
|
|
|
|
/* Set its value only once. */ |
863
|
|
|
|
|
|
|
if (common->recursive_head_ptr == 0) |
864
|
|
|
|
|
|
|
{ |
865
|
|
|
|
|
|
|
common->recursive_head_ptr = common->ovector_start; |
866
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
867
|
|
|
|
|
|
|
} |
868
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
869
|
|
|
|
|
|
|
break; |
870
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
case OP_CALLOUT: |
872
|
|
|
|
|
|
|
if (common->capture_last_ptr == 0) |
873
|
|
|
|
|
|
|
{ |
874
|
|
|
|
|
|
|
common->capture_last_ptr = common->ovector_start; |
875
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
876
|
|
|
|
|
|
|
} |
877
|
|
|
|
|
|
|
cc += 2 + 2 * LINK_SIZE; |
878
|
|
|
|
|
|
|
break; |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
case OP_ASSERTBACK: |
881
|
|
|
|
|
|
|
slot = bracketend(cc); |
882
|
|
|
|
|
|
|
if (slot > assert_back_end) |
883
|
|
|
|
|
|
|
assert_back_end = slot; |
884
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
885
|
|
|
|
|
|
|
break; |
886
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
case OP_THEN_ARG: |
888
|
|
|
|
|
|
|
common->has_then = TRUE; |
889
|
|
|
|
|
|
|
common->control_head_ptr = 1; |
890
|
|
|
|
|
|
|
/* Fall through. */ |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
case OP_PRUNE_ARG: |
893
|
|
|
|
|
|
|
case OP_MARK: |
894
|
|
|
|
|
|
|
if (common->mark_ptr == 0) |
895
|
|
|
|
|
|
|
{ |
896
|
|
|
|
|
|
|
common->mark_ptr = common->ovector_start; |
897
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
898
|
|
|
|
|
|
|
} |
899
|
|
|
|
|
|
|
cc += 1 + 2 + cc[1]; |
900
|
|
|
|
|
|
|
break; |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
case OP_THEN: |
903
|
|
|
|
|
|
|
common->has_then = TRUE; |
904
|
|
|
|
|
|
|
common->control_head_ptr = 1; |
905
|
|
|
|
|
|
|
cc += 1; |
906
|
|
|
|
|
|
|
break; |
907
|
|
|
|
|
|
|
|
908
|
|
|
|
|
|
|
case OP_SKIP: |
909
|
|
|
|
|
|
|
if (cc < assert_back_end) |
910
|
|
|
|
|
|
|
common->has_skip_in_assert_back = TRUE; |
911
|
|
|
|
|
|
|
cc += 1; |
912
|
|
|
|
|
|
|
break; |
913
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
case OP_SKIP_ARG: |
915
|
|
|
|
|
|
|
common->control_head_ptr = 1; |
916
|
|
|
|
|
|
|
common->has_skip_arg = TRUE; |
917
|
|
|
|
|
|
|
if (cc < assert_back_end) |
918
|
|
|
|
|
|
|
common->has_skip_in_assert_back = TRUE; |
919
|
|
|
|
|
|
|
cc += 1 + 2 + cc[1]; |
920
|
|
|
|
|
|
|
break; |
921
|
|
|
|
|
|
|
|
922
|
|
|
|
|
|
|
default: |
923
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
924
|
|
|
|
|
|
|
if (cc == NULL) |
925
|
|
|
|
|
|
|
return FALSE; |
926
|
|
|
|
|
|
|
break; |
927
|
|
|
|
|
|
|
} |
928
|
|
|
|
|
|
|
} |
929
|
|
|
|
|
|
|
return TRUE; |
930
|
|
|
|
|
|
|
} |
931
|
|
|
|
|
|
|
|
932
|
|
|
|
|
|
|
static BOOL is_accelerated_repeat(pcre_uchar *cc) |
933
|
|
|
|
|
|
|
{ |
934
|
|
|
|
|
|
|
switch(*cc) |
935
|
|
|
|
|
|
|
{ |
936
|
|
|
|
|
|
|
case OP_TYPESTAR: |
937
|
|
|
|
|
|
|
case OP_TYPEMINSTAR: |
938
|
|
|
|
|
|
|
case OP_TYPEPLUS: |
939
|
|
|
|
|
|
|
case OP_TYPEMINPLUS: |
940
|
|
|
|
|
|
|
case OP_TYPEPOSSTAR: |
941
|
|
|
|
|
|
|
case OP_TYPEPOSPLUS: |
942
|
|
|
|
|
|
|
return (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI); |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
case OP_STAR: |
945
|
|
|
|
|
|
|
case OP_MINSTAR: |
946
|
|
|
|
|
|
|
case OP_PLUS: |
947
|
|
|
|
|
|
|
case OP_MINPLUS: |
948
|
|
|
|
|
|
|
case OP_POSSTAR: |
949
|
|
|
|
|
|
|
case OP_POSPLUS: |
950
|
|
|
|
|
|
|
|
951
|
|
|
|
|
|
|
case OP_STARI: |
952
|
|
|
|
|
|
|
case OP_MINSTARI: |
953
|
|
|
|
|
|
|
case OP_PLUSI: |
954
|
|
|
|
|
|
|
case OP_MINPLUSI: |
955
|
|
|
|
|
|
|
case OP_POSSTARI: |
956
|
|
|
|
|
|
|
case OP_POSPLUSI: |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
case OP_NOTSTAR: |
959
|
|
|
|
|
|
|
case OP_NOTMINSTAR: |
960
|
|
|
|
|
|
|
case OP_NOTPLUS: |
961
|
|
|
|
|
|
|
case OP_NOTMINPLUS: |
962
|
|
|
|
|
|
|
case OP_NOTPOSSTAR: |
963
|
|
|
|
|
|
|
case OP_NOTPOSPLUS: |
964
|
|
|
|
|
|
|
|
965
|
|
|
|
|
|
|
case OP_NOTSTARI: |
966
|
|
|
|
|
|
|
case OP_NOTMINSTARI: |
967
|
|
|
|
|
|
|
case OP_NOTPLUSI: |
968
|
|
|
|
|
|
|
case OP_NOTMINPLUSI: |
969
|
|
|
|
|
|
|
case OP_NOTPOSSTARI: |
970
|
|
|
|
|
|
|
case OP_NOTPOSPLUSI: |
971
|
|
|
|
|
|
|
return TRUE; |
972
|
|
|
|
|
|
|
|
973
|
|
|
|
|
|
|
case OP_CLASS: |
974
|
|
|
|
|
|
|
case OP_NCLASS: |
975
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
976
|
|
|
|
|
|
|
case OP_XCLASS: |
977
|
|
|
|
|
|
|
cc += (*cc == OP_XCLASS) ? GET(cc, 1) : (int)(1 + (32 / sizeof(pcre_uchar))); |
978
|
|
|
|
|
|
|
#else |
979
|
|
|
|
|
|
|
cc += (1 + (32 / sizeof(pcre_uchar))); |
980
|
|
|
|
|
|
|
#endif |
981
|
|
|
|
|
|
|
|
982
|
|
|
|
|
|
|
switch(*cc) |
983
|
|
|
|
|
|
|
{ |
984
|
|
|
|
|
|
|
case OP_CRSTAR: |
985
|
|
|
|
|
|
|
case OP_CRMINSTAR: |
986
|
|
|
|
|
|
|
case OP_CRPLUS: |
987
|
|
|
|
|
|
|
case OP_CRMINPLUS: |
988
|
|
|
|
|
|
|
case OP_CRPOSSTAR: |
989
|
|
|
|
|
|
|
case OP_CRPOSPLUS: |
990
|
|
|
|
|
|
|
return TRUE; |
991
|
|
|
|
|
|
|
} |
992
|
|
|
|
|
|
|
break; |
993
|
|
|
|
|
|
|
} |
994
|
|
|
|
|
|
|
return FALSE; |
995
|
|
|
|
|
|
|
} |
996
|
|
|
|
|
|
|
|
997
|
|
|
|
|
|
|
static SLJIT_INLINE BOOL detect_fast_forward_skip(compiler_common *common, int *private_data_start) |
998
|
|
|
|
|
|
|
{ |
999
|
|
|
|
|
|
|
pcre_uchar *cc = common->start; |
1000
|
|
|
|
|
|
|
pcre_uchar *end; |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
/* Skip not repeated brackets. */ |
1003
|
|
|
|
|
|
|
while (TRUE) |
1004
|
|
|
|
|
|
|
{ |
1005
|
|
|
|
|
|
|
switch(*cc) |
1006
|
|
|
|
|
|
|
{ |
1007
|
|
|
|
|
|
|
case OP_SOD: |
1008
|
|
|
|
|
|
|
case OP_SOM: |
1009
|
|
|
|
|
|
|
case OP_SET_SOM: |
1010
|
|
|
|
|
|
|
case OP_NOT_WORD_BOUNDARY: |
1011
|
|
|
|
|
|
|
case OP_WORD_BOUNDARY: |
1012
|
|
|
|
|
|
|
case OP_EODN: |
1013
|
|
|
|
|
|
|
case OP_EOD: |
1014
|
|
|
|
|
|
|
case OP_CIRC: |
1015
|
|
|
|
|
|
|
case OP_CIRCM: |
1016
|
|
|
|
|
|
|
case OP_DOLL: |
1017
|
|
|
|
|
|
|
case OP_DOLLM: |
1018
|
|
|
|
|
|
|
/* Zero width assertions. */ |
1019
|
|
|
|
|
|
|
cc++; |
1020
|
|
|
|
|
|
|
continue; |
1021
|
|
|
|
|
|
|
} |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
if (*cc != OP_BRA && *cc != OP_CBRA) |
1024
|
|
|
|
|
|
|
break; |
1025
|
|
|
|
|
|
|
|
1026
|
|
|
|
|
|
|
end = cc + GET(cc, 1); |
1027
|
|
|
|
|
|
|
if (*end != OP_KET || PRIVATE_DATA(end) != 0) |
1028
|
|
|
|
|
|
|
return FALSE; |
1029
|
|
|
|
|
|
|
if (*cc == OP_CBRA) |
1030
|
|
|
|
|
|
|
{ |
1031
|
|
|
|
|
|
|
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) |
1032
|
|
|
|
|
|
|
return FALSE; |
1033
|
|
|
|
|
|
|
cc += IMM2_SIZE; |
1034
|
|
|
|
|
|
|
} |
1035
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1036
|
|
|
|
|
|
|
} |
1037
|
|
|
|
|
|
|
|
1038
|
|
|
|
|
|
|
if (is_accelerated_repeat(cc)) |
1039
|
|
|
|
|
|
|
{ |
1040
|
|
|
|
|
|
|
common->fast_forward_bc_ptr = cc; |
1041
|
|
|
|
|
|
|
common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; |
1042
|
|
|
|
|
|
|
*private_data_start += sizeof(sljit_sw); |
1043
|
|
|
|
|
|
|
return TRUE; |
1044
|
|
|
|
|
|
|
} |
1045
|
|
|
|
|
|
|
return FALSE; |
1046
|
|
|
|
|
|
|
} |
1047
|
|
|
|
|
|
|
|
1048
|
|
|
|
|
|
|
static SLJIT_INLINE void detect_fast_fail(compiler_common *common, pcre_uchar *cc, int *private_data_start, sljit_s32 depth) |
1049
|
|
|
|
|
|
|
{ |
1050
|
|
|
|
|
|
|
pcre_uchar *next_alt; |
1051
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA); |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
if (*cc == OP_CBRA && common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) |
1055
|
|
|
|
|
|
|
return; |
1056
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
next_alt = bracketend(cc) - (1 + LINK_SIZE); |
1058
|
|
|
|
|
|
|
if (*next_alt != OP_KET || PRIVATE_DATA(next_alt) != 0) |
1059
|
|
|
|
|
|
|
return; |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
do |
1062
|
|
|
|
|
|
|
{ |
1063
|
|
|
|
|
|
|
next_alt = cc + GET(cc, 1); |
1064
|
|
|
|
|
|
|
|
1065
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE + ((*cc == OP_CBRA) ? IMM2_SIZE : 0); |
1066
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
while (TRUE) |
1068
|
|
|
|
|
|
|
{ |
1069
|
|
|
|
|
|
|
switch(*cc) |
1070
|
|
|
|
|
|
|
{ |
1071
|
|
|
|
|
|
|
case OP_SOD: |
1072
|
|
|
|
|
|
|
case OP_SOM: |
1073
|
|
|
|
|
|
|
case OP_SET_SOM: |
1074
|
|
|
|
|
|
|
case OP_NOT_WORD_BOUNDARY: |
1075
|
|
|
|
|
|
|
case OP_WORD_BOUNDARY: |
1076
|
|
|
|
|
|
|
case OP_EODN: |
1077
|
|
|
|
|
|
|
case OP_EOD: |
1078
|
|
|
|
|
|
|
case OP_CIRC: |
1079
|
|
|
|
|
|
|
case OP_CIRCM: |
1080
|
|
|
|
|
|
|
case OP_DOLL: |
1081
|
|
|
|
|
|
|
case OP_DOLLM: |
1082
|
|
|
|
|
|
|
/* Zero width assertions. */ |
1083
|
|
|
|
|
|
|
cc++; |
1084
|
|
|
|
|
|
|
continue; |
1085
|
|
|
|
|
|
|
} |
1086
|
|
|
|
|
|
|
break; |
1087
|
|
|
|
|
|
|
} |
1088
|
|
|
|
|
|
|
|
1089
|
|
|
|
|
|
|
if (depth > 0 && (*cc == OP_BRA || *cc == OP_CBRA)) |
1090
|
|
|
|
|
|
|
detect_fast_fail(common, cc, private_data_start, depth - 1); |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
if (is_accelerated_repeat(cc)) |
1093
|
|
|
|
|
|
|
{ |
1094
|
|
|
|
|
|
|
common->private_data_ptrs[(cc + 1) - common->start] = *private_data_start; |
1095
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
if (common->fast_fail_start_ptr == 0) |
1097
|
|
|
|
|
|
|
common->fast_fail_start_ptr = *private_data_start; |
1098
|
|
|
|
|
|
|
|
1099
|
|
|
|
|
|
|
*private_data_start += sizeof(sljit_sw); |
1100
|
|
|
|
|
|
|
common->fast_fail_end_ptr = *private_data_start; |
1101
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
if (*private_data_start > SLJIT_MAX_LOCAL_SIZE) |
1103
|
|
|
|
|
|
|
return; |
1104
|
|
|
|
|
|
|
} |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
cc = next_alt; |
1107
|
|
|
|
|
|
|
} |
1108
|
|
|
|
|
|
|
while (*cc == OP_ALT); |
1109
|
|
|
|
|
|
|
} |
1110
|
|
|
|
|
|
|
|
1111
|
|
|
|
|
|
|
static int get_class_iterator_size(pcre_uchar *cc) |
1112
|
|
|
|
|
|
|
{ |
1113
|
|
|
|
|
|
|
sljit_u32 min; |
1114
|
|
|
|
|
|
|
sljit_u32 max; |
1115
|
|
|
|
|
|
|
switch(*cc) |
1116
|
|
|
|
|
|
|
{ |
1117
|
|
|
|
|
|
|
case OP_CRSTAR: |
1118
|
|
|
|
|
|
|
case OP_CRPLUS: |
1119
|
|
|
|
|
|
|
return 2; |
1120
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
case OP_CRMINSTAR: |
1122
|
|
|
|
|
|
|
case OP_CRMINPLUS: |
1123
|
|
|
|
|
|
|
case OP_CRQUERY: |
1124
|
|
|
|
|
|
|
case OP_CRMINQUERY: |
1125
|
|
|
|
|
|
|
return 1; |
1126
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
case OP_CRRANGE: |
1128
|
|
|
|
|
|
|
case OP_CRMINRANGE: |
1129
|
|
|
|
|
|
|
min = GET2(cc, 1); |
1130
|
|
|
|
|
|
|
max = GET2(cc, 1 + IMM2_SIZE); |
1131
|
|
|
|
|
|
|
if (max == 0) |
1132
|
|
|
|
|
|
|
return (*cc == OP_CRRANGE) ? 2 : 1; |
1133
|
|
|
|
|
|
|
max -= min; |
1134
|
|
|
|
|
|
|
if (max > 2) |
1135
|
|
|
|
|
|
|
max = 2; |
1136
|
|
|
|
|
|
|
return max; |
1137
|
|
|
|
|
|
|
|
1138
|
|
|
|
|
|
|
default: |
1139
|
|
|
|
|
|
|
return 0; |
1140
|
|
|
|
|
|
|
} |
1141
|
|
|
|
|
|
|
} |
1142
|
|
|
|
|
|
|
|
1143
|
|
|
|
|
|
|
static BOOL detect_repeat(compiler_common *common, pcre_uchar *begin) |
1144
|
|
|
|
|
|
|
{ |
1145
|
|
|
|
|
|
|
pcre_uchar *end = bracketend(begin); |
1146
|
|
|
|
|
|
|
pcre_uchar *next; |
1147
|
|
|
|
|
|
|
pcre_uchar *next_end; |
1148
|
|
|
|
|
|
|
pcre_uchar *max_end; |
1149
|
|
|
|
|
|
|
pcre_uchar type; |
1150
|
|
|
|
|
|
|
sljit_sw length = end - begin; |
1151
|
|
|
|
|
|
|
int min, max, i; |
1152
|
|
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
/* Detect fixed iterations first. */ |
1154
|
|
|
|
|
|
|
if (end[-(1 + LINK_SIZE)] != OP_KET) |
1155
|
|
|
|
|
|
|
return FALSE; |
1156
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
/* Already detected repeat. */ |
1158
|
|
|
|
|
|
|
if (common->private_data_ptrs[end - common->start - LINK_SIZE] != 0) |
1159
|
|
|
|
|
|
|
return TRUE; |
1160
|
|
|
|
|
|
|
|
1161
|
|
|
|
|
|
|
next = end; |
1162
|
|
|
|
|
|
|
min = 1; |
1163
|
|
|
|
|
|
|
while (1) |
1164
|
|
|
|
|
|
|
{ |
1165
|
|
|
|
|
|
|
if (*next != *begin) |
1166
|
|
|
|
|
|
|
break; |
1167
|
|
|
|
|
|
|
next_end = bracketend(next); |
1168
|
|
|
|
|
|
|
if (next_end - next != length || memcmp(begin, next, IN_UCHARS(length)) != 0) |
1169
|
|
|
|
|
|
|
break; |
1170
|
|
|
|
|
|
|
next = next_end; |
1171
|
|
|
|
|
|
|
min++; |
1172
|
|
|
|
|
|
|
} |
1173
|
|
|
|
|
|
|
|
1174
|
|
|
|
|
|
|
if (min == 2) |
1175
|
|
|
|
|
|
|
return FALSE; |
1176
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
max = 0; |
1178
|
|
|
|
|
|
|
max_end = next; |
1179
|
|
|
|
|
|
|
if (*next == OP_BRAZERO || *next == OP_BRAMINZERO) |
1180
|
|
|
|
|
|
|
{ |
1181
|
|
|
|
|
|
|
type = *next; |
1182
|
|
|
|
|
|
|
while (1) |
1183
|
|
|
|
|
|
|
{ |
1184
|
|
|
|
|
|
|
if (next[0] != type || next[1] != OP_BRA || next[2 + LINK_SIZE] != *begin) |
1185
|
|
|
|
|
|
|
break; |
1186
|
|
|
|
|
|
|
next_end = bracketend(next + 2 + LINK_SIZE); |
1187
|
|
|
|
|
|
|
if (next_end - next != (length + 2 + LINK_SIZE) || memcmp(begin, next + 2 + LINK_SIZE, IN_UCHARS(length)) != 0) |
1188
|
|
|
|
|
|
|
break; |
1189
|
|
|
|
|
|
|
next = next_end; |
1190
|
|
|
|
|
|
|
max++; |
1191
|
|
|
|
|
|
|
} |
1192
|
|
|
|
|
|
|
|
1193
|
|
|
|
|
|
|
if (next[0] == type && next[1] == *begin && max >= 1) |
1194
|
|
|
|
|
|
|
{ |
1195
|
|
|
|
|
|
|
next_end = bracketend(next + 1); |
1196
|
|
|
|
|
|
|
if (next_end - next == (length + 1) && memcmp(begin, next + 1, IN_UCHARS(length)) == 0) |
1197
|
|
|
|
|
|
|
{ |
1198
|
|
|
|
|
|
|
for (i = 0; i < max; i++, next_end += 1 + LINK_SIZE) |
1199
|
|
|
|
|
|
|
if (*next_end != OP_KET) |
1200
|
|
|
|
|
|
|
break; |
1201
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
if (i == max) |
1203
|
|
|
|
|
|
|
{ |
1204
|
|
|
|
|
|
|
common->private_data_ptrs[max_end - common->start - LINK_SIZE] = next_end - max_end; |
1205
|
|
|
|
|
|
|
common->private_data_ptrs[max_end - common->start - LINK_SIZE + 1] = (type == OP_BRAZERO) ? OP_UPTO : OP_MINUPTO; |
1206
|
|
|
|
|
|
|
/* +2 the original and the last. */ |
1207
|
|
|
|
|
|
|
common->private_data_ptrs[max_end - common->start - LINK_SIZE + 2] = max + 2; |
1208
|
|
|
|
|
|
|
if (min == 1) |
1209
|
|
|
|
|
|
|
return TRUE; |
1210
|
|
|
|
|
|
|
min--; |
1211
|
|
|
|
|
|
|
max_end -= (1 + LINK_SIZE) + GET(max_end, -LINK_SIZE); |
1212
|
|
|
|
|
|
|
} |
1213
|
|
|
|
|
|
|
} |
1214
|
|
|
|
|
|
|
} |
1215
|
|
|
|
|
|
|
} |
1216
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
if (min >= 3) |
1218
|
|
|
|
|
|
|
{ |
1219
|
|
|
|
|
|
|
common->private_data_ptrs[end - common->start - LINK_SIZE] = max_end - end; |
1220
|
|
|
|
|
|
|
common->private_data_ptrs[end - common->start - LINK_SIZE + 1] = OP_EXACT; |
1221
|
|
|
|
|
|
|
common->private_data_ptrs[end - common->start - LINK_SIZE + 2] = min; |
1222
|
|
|
|
|
|
|
return TRUE; |
1223
|
|
|
|
|
|
|
} |
1224
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
return FALSE; |
1226
|
|
|
|
|
|
|
} |
1227
|
|
|
|
|
|
|
|
1228
|
|
|
|
|
|
|
#define CASE_ITERATOR_PRIVATE_DATA_1 \ |
1229
|
|
|
|
|
|
|
case OP_MINSTAR: \ |
1230
|
|
|
|
|
|
|
case OP_MINPLUS: \ |
1231
|
|
|
|
|
|
|
case OP_QUERY: \ |
1232
|
|
|
|
|
|
|
case OP_MINQUERY: \ |
1233
|
|
|
|
|
|
|
case OP_MINSTARI: \ |
1234
|
|
|
|
|
|
|
case OP_MINPLUSI: \ |
1235
|
|
|
|
|
|
|
case OP_QUERYI: \ |
1236
|
|
|
|
|
|
|
case OP_MINQUERYI: \ |
1237
|
|
|
|
|
|
|
case OP_NOTMINSTAR: \ |
1238
|
|
|
|
|
|
|
case OP_NOTMINPLUS: \ |
1239
|
|
|
|
|
|
|
case OP_NOTQUERY: \ |
1240
|
|
|
|
|
|
|
case OP_NOTMINQUERY: \ |
1241
|
|
|
|
|
|
|
case OP_NOTMINSTARI: \ |
1242
|
|
|
|
|
|
|
case OP_NOTMINPLUSI: \ |
1243
|
|
|
|
|
|
|
case OP_NOTQUERYI: \ |
1244
|
|
|
|
|
|
|
case OP_NOTMINQUERYI: |
1245
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
#define CASE_ITERATOR_PRIVATE_DATA_2A \ |
1247
|
|
|
|
|
|
|
case OP_STAR: \ |
1248
|
|
|
|
|
|
|
case OP_PLUS: \ |
1249
|
|
|
|
|
|
|
case OP_STARI: \ |
1250
|
|
|
|
|
|
|
case OP_PLUSI: \ |
1251
|
|
|
|
|
|
|
case OP_NOTSTAR: \ |
1252
|
|
|
|
|
|
|
case OP_NOTPLUS: \ |
1253
|
|
|
|
|
|
|
case OP_NOTSTARI: \ |
1254
|
|
|
|
|
|
|
case OP_NOTPLUSI: |
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
#define CASE_ITERATOR_PRIVATE_DATA_2B \ |
1257
|
|
|
|
|
|
|
case OP_UPTO: \ |
1258
|
|
|
|
|
|
|
case OP_MINUPTO: \ |
1259
|
|
|
|
|
|
|
case OP_UPTOI: \ |
1260
|
|
|
|
|
|
|
case OP_MINUPTOI: \ |
1261
|
|
|
|
|
|
|
case OP_NOTUPTO: \ |
1262
|
|
|
|
|
|
|
case OP_NOTMINUPTO: \ |
1263
|
|
|
|
|
|
|
case OP_NOTUPTOI: \ |
1264
|
|
|
|
|
|
|
case OP_NOTMINUPTOI: |
1265
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_1 \ |
1267
|
|
|
|
|
|
|
case OP_TYPEMINSTAR: \ |
1268
|
|
|
|
|
|
|
case OP_TYPEMINPLUS: \ |
1269
|
|
|
|
|
|
|
case OP_TYPEQUERY: \ |
1270
|
|
|
|
|
|
|
case OP_TYPEMINQUERY: |
1271
|
|
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2A \ |
1273
|
|
|
|
|
|
|
case OP_TYPESTAR: \ |
1274
|
|
|
|
|
|
|
case OP_TYPEPLUS: |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
#define CASE_ITERATOR_TYPE_PRIVATE_DATA_2B \ |
1277
|
|
|
|
|
|
|
case OP_TYPEUPTO: \ |
1278
|
|
|
|
|
|
|
case OP_TYPEMINUPTO: |
1279
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
static void set_private_data_ptrs(compiler_common *common, int *private_data_start, pcre_uchar *ccend) |
1281
|
|
|
|
|
|
|
{ |
1282
|
|
|
|
|
|
|
pcre_uchar *cc = common->start; |
1283
|
|
|
|
|
|
|
pcre_uchar *alternative; |
1284
|
|
|
|
|
|
|
pcre_uchar *end = NULL; |
1285
|
|
|
|
|
|
|
int private_data_ptr = *private_data_start; |
1286
|
|
|
|
|
|
|
int space, size, bracketlen; |
1287
|
|
|
|
|
|
|
BOOL repeat_check = TRUE; |
1288
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
while (cc < ccend) |
1290
|
|
|
|
|
|
|
{ |
1291
|
|
|
|
|
|
|
space = 0; |
1292
|
|
|
|
|
|
|
size = 0; |
1293
|
|
|
|
|
|
|
bracketlen = 0; |
1294
|
|
|
|
|
|
|
if (private_data_ptr > SLJIT_MAX_LOCAL_SIZE) |
1295
|
|
|
|
|
|
|
break; |
1296
|
|
|
|
|
|
|
|
1297
|
|
|
|
|
|
|
if (repeat_check && (*cc == OP_ONCE || *cc == OP_ONCE_NC || *cc == OP_BRA || *cc == OP_CBRA || *cc == OP_COND)) |
1298
|
|
|
|
|
|
|
{ |
1299
|
|
|
|
|
|
|
if (detect_repeat(common, cc)) |
1300
|
|
|
|
|
|
|
{ |
1301
|
|
|
|
|
|
|
/* These brackets are converted to repeats, so no global |
1302
|
|
|
|
|
|
|
based single character repeat is allowed. */ |
1303
|
|
|
|
|
|
|
if (cc >= end) |
1304
|
|
|
|
|
|
|
end = bracketend(cc); |
1305
|
|
|
|
|
|
|
} |
1306
|
|
|
|
|
|
|
} |
1307
|
|
|
|
|
|
|
repeat_check = TRUE; |
1308
|
|
|
|
|
|
|
|
1309
|
|
|
|
|
|
|
switch(*cc) |
1310
|
|
|
|
|
|
|
{ |
1311
|
|
|
|
|
|
|
case OP_KET: |
1312
|
|
|
|
|
|
|
if (common->private_data_ptrs[cc + 1 - common->start] != 0) |
1313
|
|
|
|
|
|
|
{ |
1314
|
|
|
|
|
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1315
|
|
|
|
|
|
|
private_data_ptr += sizeof(sljit_sw); |
1316
|
|
|
|
|
|
|
cc += common->private_data_ptrs[cc + 1 - common->start]; |
1317
|
|
|
|
|
|
|
} |
1318
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1319
|
|
|
|
|
|
|
break; |
1320
|
|
|
|
|
|
|
|
1321
|
|
|
|
|
|
|
case OP_ASSERT: |
1322
|
|
|
|
|
|
|
case OP_ASSERT_NOT: |
1323
|
|
|
|
|
|
|
case OP_ASSERTBACK: |
1324
|
|
|
|
|
|
|
case OP_ASSERTBACK_NOT: |
1325
|
|
|
|
|
|
|
case OP_ONCE: |
1326
|
|
|
|
|
|
|
case OP_ONCE_NC: |
1327
|
|
|
|
|
|
|
case OP_BRAPOS: |
1328
|
|
|
|
|
|
|
case OP_SBRA: |
1329
|
|
|
|
|
|
|
case OP_SBRAPOS: |
1330
|
|
|
|
|
|
|
case OP_SCOND: |
1331
|
|
|
|
|
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1332
|
|
|
|
|
|
|
private_data_ptr += sizeof(sljit_sw); |
1333
|
|
|
|
|
|
|
bracketlen = 1 + LINK_SIZE; |
1334
|
|
|
|
|
|
|
break; |
1335
|
|
|
|
|
|
|
|
1336
|
|
|
|
|
|
|
case OP_CBRAPOS: |
1337
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
1338
|
|
|
|
|
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1339
|
|
|
|
|
|
|
private_data_ptr += sizeof(sljit_sw); |
1340
|
|
|
|
|
|
|
bracketlen = 1 + LINK_SIZE + IMM2_SIZE; |
1341
|
|
|
|
|
|
|
break; |
1342
|
|
|
|
|
|
|
|
1343
|
|
|
|
|
|
|
case OP_COND: |
1344
|
|
|
|
|
|
|
/* Might be a hidden SCOND. */ |
1345
|
|
|
|
|
|
|
alternative = cc + GET(cc, 1); |
1346
|
|
|
|
|
|
|
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
1347
|
|
|
|
|
|
|
{ |
1348
|
|
|
|
|
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1349
|
|
|
|
|
|
|
private_data_ptr += sizeof(sljit_sw); |
1350
|
|
|
|
|
|
|
} |
1351
|
|
|
|
|
|
|
bracketlen = 1 + LINK_SIZE; |
1352
|
|
|
|
|
|
|
break; |
1353
|
|
|
|
|
|
|
|
1354
|
|
|
|
|
|
|
case OP_BRA: |
1355
|
|
|
|
|
|
|
bracketlen = 1 + LINK_SIZE; |
1356
|
|
|
|
|
|
|
break; |
1357
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
case OP_CBRA: |
1359
|
|
|
|
|
|
|
case OP_SCBRA: |
1360
|
|
|
|
|
|
|
bracketlen = 1 + LINK_SIZE + IMM2_SIZE; |
1361
|
|
|
|
|
|
|
break; |
1362
|
|
|
|
|
|
|
|
1363
|
|
|
|
|
|
|
case OP_BRAZERO: |
1364
|
|
|
|
|
|
|
case OP_BRAMINZERO: |
1365
|
|
|
|
|
|
|
case OP_BRAPOSZERO: |
1366
|
|
|
|
|
|
|
repeat_check = FALSE; |
1367
|
|
|
|
|
|
|
size = 1; |
1368
|
|
|
|
|
|
|
break; |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_1 |
1371
|
|
|
|
|
|
|
space = 1; |
1372
|
|
|
|
|
|
|
size = -2; |
1373
|
|
|
|
|
|
|
break; |
1374
|
|
|
|
|
|
|
|
1375
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2A |
1376
|
|
|
|
|
|
|
space = 2; |
1377
|
|
|
|
|
|
|
size = -2; |
1378
|
|
|
|
|
|
|
break; |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2B |
1381
|
|
|
|
|
|
|
space = 2; |
1382
|
|
|
|
|
|
|
size = -(2 + IMM2_SIZE); |
1383
|
|
|
|
|
|
|
break; |
1384
|
|
|
|
|
|
|
|
1385
|
|
|
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_1 |
1386
|
|
|
|
|
|
|
space = 1; |
1387
|
|
|
|
|
|
|
size = 1; |
1388
|
|
|
|
|
|
|
break; |
1389
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A |
1391
|
|
|
|
|
|
|
if (cc[1] != OP_ANYNL && cc[1] != OP_EXTUNI) |
1392
|
|
|
|
|
|
|
space = 2; |
1393
|
|
|
|
|
|
|
size = 1; |
1394
|
|
|
|
|
|
|
break; |
1395
|
|
|
|
|
|
|
|
1396
|
|
|
|
|
|
|
case OP_TYPEUPTO: |
1397
|
|
|
|
|
|
|
if (cc[1 + IMM2_SIZE] != OP_ANYNL && cc[1 + IMM2_SIZE] != OP_EXTUNI) |
1398
|
|
|
|
|
|
|
space = 2; |
1399
|
|
|
|
|
|
|
size = 1 + IMM2_SIZE; |
1400
|
|
|
|
|
|
|
break; |
1401
|
|
|
|
|
|
|
|
1402
|
|
|
|
|
|
|
case OP_TYPEMINUPTO: |
1403
|
|
|
|
|
|
|
space = 2; |
1404
|
|
|
|
|
|
|
size = 1 + IMM2_SIZE; |
1405
|
|
|
|
|
|
|
break; |
1406
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
case OP_CLASS: |
1408
|
|
|
|
|
|
|
case OP_NCLASS: |
1409
|
|
|
|
|
|
|
space = get_class_iterator_size(cc + size); |
1410
|
|
|
|
|
|
|
size = 1 + 32 / sizeof(pcre_uchar); |
1411
|
|
|
|
|
|
|
break; |
1412
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
1414
|
|
|
|
|
|
|
case OP_XCLASS: |
1415
|
|
|
|
|
|
|
space = get_class_iterator_size(cc + size); |
1416
|
|
|
|
|
|
|
size = GET(cc, 1); |
1417
|
|
|
|
|
|
|
break; |
1418
|
|
|
|
|
|
|
#endif |
1419
|
|
|
|
|
|
|
|
1420
|
|
|
|
|
|
|
default: |
1421
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
1422
|
|
|
|
|
|
|
SLJIT_ASSERT(cc != NULL); |
1423
|
|
|
|
|
|
|
break; |
1424
|
|
|
|
|
|
|
} |
1425
|
|
|
|
|
|
|
|
1426
|
|
|
|
|
|
|
/* Character iterators, which are not inside a repeated bracket, |
1427
|
|
|
|
|
|
|
gets a private slot instead of allocating it on the stack. */ |
1428
|
|
|
|
|
|
|
if (space > 0 && cc >= end) |
1429
|
|
|
|
|
|
|
{ |
1430
|
|
|
|
|
|
|
common->private_data_ptrs[cc - common->start] = private_data_ptr; |
1431
|
|
|
|
|
|
|
private_data_ptr += sizeof(sljit_sw) * space; |
1432
|
|
|
|
|
|
|
} |
1433
|
|
|
|
|
|
|
|
1434
|
|
|
|
|
|
|
if (size != 0) |
1435
|
|
|
|
|
|
|
{ |
1436
|
|
|
|
|
|
|
if (size < 0) |
1437
|
|
|
|
|
|
|
{ |
1438
|
|
|
|
|
|
|
cc += -size; |
1439
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
1440
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1441
|
|
|
|
|
|
|
#endif |
1442
|
|
|
|
|
|
|
} |
1443
|
|
|
|
|
|
|
else |
1444
|
|
|
|
|
|
|
cc += size; |
1445
|
|
|
|
|
|
|
} |
1446
|
|
|
|
|
|
|
|
1447
|
|
|
|
|
|
|
if (bracketlen > 0) |
1448
|
|
|
|
|
|
|
{ |
1449
|
|
|
|
|
|
|
if (cc >= end) |
1450
|
|
|
|
|
|
|
{ |
1451
|
|
|
|
|
|
|
end = bracketend(cc); |
1452
|
|
|
|
|
|
|
if (end[-1 - LINK_SIZE] == OP_KET) |
1453
|
|
|
|
|
|
|
end = NULL; |
1454
|
|
|
|
|
|
|
} |
1455
|
|
|
|
|
|
|
cc += bracketlen; |
1456
|
|
|
|
|
|
|
} |
1457
|
|
|
|
|
|
|
} |
1458
|
|
|
|
|
|
|
*private_data_start = private_data_ptr; |
1459
|
|
|
|
|
|
|
} |
1460
|
|
|
|
|
|
|
|
1461
|
|
|
|
|
|
|
/* Returns with a frame_types (always < 0) if no need for frame. */ |
1462
|
|
|
|
|
|
|
static int get_framesize(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL recursive, BOOL *needs_control_head) |
1463
|
|
|
|
|
|
|
{ |
1464
|
|
|
|
|
|
|
int length = 0; |
1465
|
|
|
|
|
|
|
int possessive = 0; |
1466
|
|
|
|
|
|
|
BOOL stack_restore = FALSE; |
1467
|
|
|
|
|
|
|
BOOL setsom_found = recursive; |
1468
|
|
|
|
|
|
|
BOOL setmark_found = recursive; |
1469
|
|
|
|
|
|
|
/* The last capture is a local variable even for recursions. */ |
1470
|
|
|
|
|
|
|
BOOL capture_last_found = FALSE; |
1471
|
|
|
|
|
|
|
|
1472
|
|
|
|
|
|
|
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD |
1473
|
|
|
|
|
|
|
SLJIT_ASSERT(common->control_head_ptr != 0); |
1474
|
|
|
|
|
|
|
*needs_control_head = TRUE; |
1475
|
|
|
|
|
|
|
#else |
1476
|
|
|
|
|
|
|
*needs_control_head = FALSE; |
1477
|
|
|
|
|
|
|
#endif |
1478
|
|
|
|
|
|
|
|
1479
|
|
|
|
|
|
|
if (ccend == NULL) |
1480
|
|
|
|
|
|
|
{ |
1481
|
|
|
|
|
|
|
ccend = bracketend(cc) - (1 + LINK_SIZE); |
1482
|
|
|
|
|
|
|
if (!recursive && (*cc == OP_CBRAPOS || *cc == OP_SCBRAPOS)) |
1483
|
|
|
|
|
|
|
{ |
1484
|
|
|
|
|
|
|
possessive = length = (common->capture_last_ptr != 0) ? 5 : 3; |
1485
|
|
|
|
|
|
|
/* This is correct regardless of common->capture_last_ptr. */ |
1486
|
|
|
|
|
|
|
capture_last_found = TRUE; |
1487
|
|
|
|
|
|
|
} |
1488
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
1489
|
|
|
|
|
|
|
} |
1490
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
SLJIT_ASSERT(cc != NULL); |
1492
|
|
|
|
|
|
|
while (cc < ccend) |
1493
|
|
|
|
|
|
|
switch(*cc) |
1494
|
|
|
|
|
|
|
{ |
1495
|
|
|
|
|
|
|
case OP_SET_SOM: |
1496
|
|
|
|
|
|
|
SLJIT_ASSERT(common->has_set_som); |
1497
|
|
|
|
|
|
|
stack_restore = TRUE; |
1498
|
|
|
|
|
|
|
if (!setsom_found) |
1499
|
|
|
|
|
|
|
{ |
1500
|
|
|
|
|
|
|
length += 2; |
1501
|
|
|
|
|
|
|
setsom_found = TRUE; |
1502
|
|
|
|
|
|
|
} |
1503
|
|
|
|
|
|
|
cc += 1; |
1504
|
|
|
|
|
|
|
break; |
1505
|
|
|
|
|
|
|
|
1506
|
|
|
|
|
|
|
case OP_MARK: |
1507
|
|
|
|
|
|
|
case OP_PRUNE_ARG: |
1508
|
|
|
|
|
|
|
case OP_THEN_ARG: |
1509
|
|
|
|
|
|
|
SLJIT_ASSERT(common->mark_ptr != 0); |
1510
|
|
|
|
|
|
|
stack_restore = TRUE; |
1511
|
|
|
|
|
|
|
if (!setmark_found) |
1512
|
|
|
|
|
|
|
{ |
1513
|
|
|
|
|
|
|
length += 2; |
1514
|
|
|
|
|
|
|
setmark_found = TRUE; |
1515
|
|
|
|
|
|
|
} |
1516
|
|
|
|
|
|
|
if (common->control_head_ptr != 0) |
1517
|
|
|
|
|
|
|
*needs_control_head = TRUE; |
1518
|
|
|
|
|
|
|
cc += 1 + 2 + cc[1]; |
1519
|
|
|
|
|
|
|
break; |
1520
|
|
|
|
|
|
|
|
1521
|
|
|
|
|
|
|
case OP_RECURSE: |
1522
|
|
|
|
|
|
|
stack_restore = TRUE; |
1523
|
|
|
|
|
|
|
if (common->has_set_som && !setsom_found) |
1524
|
|
|
|
|
|
|
{ |
1525
|
|
|
|
|
|
|
length += 2; |
1526
|
|
|
|
|
|
|
setsom_found = TRUE; |
1527
|
|
|
|
|
|
|
} |
1528
|
|
|
|
|
|
|
if (common->mark_ptr != 0 && !setmark_found) |
1529
|
|
|
|
|
|
|
{ |
1530
|
|
|
|
|
|
|
length += 2; |
1531
|
|
|
|
|
|
|
setmark_found = TRUE; |
1532
|
|
|
|
|
|
|
} |
1533
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0 && !capture_last_found) |
1534
|
|
|
|
|
|
|
{ |
1535
|
|
|
|
|
|
|
length += 2; |
1536
|
|
|
|
|
|
|
capture_last_found = TRUE; |
1537
|
|
|
|
|
|
|
} |
1538
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1539
|
|
|
|
|
|
|
break; |
1540
|
|
|
|
|
|
|
|
1541
|
|
|
|
|
|
|
case OP_CBRA: |
1542
|
|
|
|
|
|
|
case OP_CBRAPOS: |
1543
|
|
|
|
|
|
|
case OP_SCBRA: |
1544
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
1545
|
|
|
|
|
|
|
stack_restore = TRUE; |
1546
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0 && !capture_last_found) |
1547
|
|
|
|
|
|
|
{ |
1548
|
|
|
|
|
|
|
length += 2; |
1549
|
|
|
|
|
|
|
capture_last_found = TRUE; |
1550
|
|
|
|
|
|
|
} |
1551
|
|
|
|
|
|
|
length += 3; |
1552
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE; |
1553
|
|
|
|
|
|
|
break; |
1554
|
|
|
|
|
|
|
|
1555
|
|
|
|
|
|
|
case OP_THEN: |
1556
|
|
|
|
|
|
|
stack_restore = TRUE; |
1557
|
|
|
|
|
|
|
if (common->control_head_ptr != 0) |
1558
|
|
|
|
|
|
|
*needs_control_head = TRUE; |
1559
|
|
|
|
|
|
|
cc ++; |
1560
|
|
|
|
|
|
|
break; |
1561
|
|
|
|
|
|
|
|
1562
|
|
|
|
|
|
|
default: |
1563
|
|
|
|
|
|
|
stack_restore = TRUE; |
1564
|
|
|
|
|
|
|
/* Fall through. */ |
1565
|
|
|
|
|
|
|
|
1566
|
|
|
|
|
|
|
case OP_NOT_WORD_BOUNDARY: |
1567
|
|
|
|
|
|
|
case OP_WORD_BOUNDARY: |
1568
|
|
|
|
|
|
|
case OP_NOT_DIGIT: |
1569
|
|
|
|
|
|
|
case OP_DIGIT: |
1570
|
|
|
|
|
|
|
case OP_NOT_WHITESPACE: |
1571
|
|
|
|
|
|
|
case OP_WHITESPACE: |
1572
|
|
|
|
|
|
|
case OP_NOT_WORDCHAR: |
1573
|
|
|
|
|
|
|
case OP_WORDCHAR: |
1574
|
|
|
|
|
|
|
case OP_ANY: |
1575
|
|
|
|
|
|
|
case OP_ALLANY: |
1576
|
|
|
|
|
|
|
case OP_ANYBYTE: |
1577
|
|
|
|
|
|
|
case OP_NOTPROP: |
1578
|
|
|
|
|
|
|
case OP_PROP: |
1579
|
|
|
|
|
|
|
case OP_ANYNL: |
1580
|
|
|
|
|
|
|
case OP_NOT_HSPACE: |
1581
|
|
|
|
|
|
|
case OP_HSPACE: |
1582
|
|
|
|
|
|
|
case OP_NOT_VSPACE: |
1583
|
|
|
|
|
|
|
case OP_VSPACE: |
1584
|
|
|
|
|
|
|
case OP_EXTUNI: |
1585
|
|
|
|
|
|
|
case OP_EODN: |
1586
|
|
|
|
|
|
|
case OP_EOD: |
1587
|
|
|
|
|
|
|
case OP_CIRC: |
1588
|
|
|
|
|
|
|
case OP_CIRCM: |
1589
|
|
|
|
|
|
|
case OP_DOLL: |
1590
|
|
|
|
|
|
|
case OP_DOLLM: |
1591
|
|
|
|
|
|
|
case OP_CHAR: |
1592
|
|
|
|
|
|
|
case OP_CHARI: |
1593
|
|
|
|
|
|
|
case OP_NOT: |
1594
|
|
|
|
|
|
|
case OP_NOTI: |
1595
|
|
|
|
|
|
|
|
1596
|
|
|
|
|
|
|
case OP_EXACT: |
1597
|
|
|
|
|
|
|
case OP_POSSTAR: |
1598
|
|
|
|
|
|
|
case OP_POSPLUS: |
1599
|
|
|
|
|
|
|
case OP_POSQUERY: |
1600
|
|
|
|
|
|
|
case OP_POSUPTO: |
1601
|
|
|
|
|
|
|
|
1602
|
|
|
|
|
|
|
case OP_EXACTI: |
1603
|
|
|
|
|
|
|
case OP_POSSTARI: |
1604
|
|
|
|
|
|
|
case OP_POSPLUSI: |
1605
|
|
|
|
|
|
|
case OP_POSQUERYI: |
1606
|
|
|
|
|
|
|
case OP_POSUPTOI: |
1607
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
case OP_NOTEXACT: |
1609
|
|
|
|
|
|
|
case OP_NOTPOSSTAR: |
1610
|
|
|
|
|
|
|
case OP_NOTPOSPLUS: |
1611
|
|
|
|
|
|
|
case OP_NOTPOSQUERY: |
1612
|
|
|
|
|
|
|
case OP_NOTPOSUPTO: |
1613
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
case OP_NOTEXACTI: |
1615
|
|
|
|
|
|
|
case OP_NOTPOSSTARI: |
1616
|
|
|
|
|
|
|
case OP_NOTPOSPLUSI: |
1617
|
|
|
|
|
|
|
case OP_NOTPOSQUERYI: |
1618
|
|
|
|
|
|
|
case OP_NOTPOSUPTOI: |
1619
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
case OP_TYPEEXACT: |
1621
|
|
|
|
|
|
|
case OP_TYPEPOSSTAR: |
1622
|
|
|
|
|
|
|
case OP_TYPEPOSPLUS: |
1623
|
|
|
|
|
|
|
case OP_TYPEPOSQUERY: |
1624
|
|
|
|
|
|
|
case OP_TYPEPOSUPTO: |
1625
|
|
|
|
|
|
|
|
1626
|
|
|
|
|
|
|
case OP_CLASS: |
1627
|
|
|
|
|
|
|
case OP_NCLASS: |
1628
|
|
|
|
|
|
|
case OP_XCLASS: |
1629
|
|
|
|
|
|
|
case OP_CALLOUT: |
1630
|
|
|
|
|
|
|
|
1631
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
1632
|
|
|
|
|
|
|
SLJIT_ASSERT(cc != NULL); |
1633
|
|
|
|
|
|
|
break; |
1634
|
|
|
|
|
|
|
} |
1635
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
/* Possessive quantifiers can use a special case. */ |
1637
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(possessive == length)) |
1638
|
|
|
|
|
|
|
return stack_restore ? no_frame : no_stack; |
1639
|
|
|
|
|
|
|
|
1640
|
|
|
|
|
|
|
if (length > 0) |
1641
|
|
|
|
|
|
|
return length + 1; |
1642
|
|
|
|
|
|
|
return stack_restore ? no_frame : no_stack; |
1643
|
|
|
|
|
|
|
} |
1644
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
static void init_frame(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, int stackpos, int stacktop, BOOL recursive) |
1646
|
|
|
|
|
|
|
{ |
1647
|
|
|
|
|
|
|
DEFINE_COMPILER; |
1648
|
|
|
|
|
|
|
BOOL setsom_found = recursive; |
1649
|
|
|
|
|
|
|
BOOL setmark_found = recursive; |
1650
|
|
|
|
|
|
|
/* The last capture is a local variable even for recursions. */ |
1651
|
|
|
|
|
|
|
BOOL capture_last_found = FALSE; |
1652
|
|
|
|
|
|
|
int offset; |
1653
|
|
|
|
|
|
|
|
1654
|
|
|
|
|
|
|
/* >= 1 + shortest item size (2) */ |
1655
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(stacktop); |
1656
|
|
|
|
|
|
|
SLJIT_ASSERT(stackpos >= stacktop + 2); |
1657
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
stackpos = STACK(stackpos); |
1659
|
|
|
|
|
|
|
if (ccend == NULL) |
1660
|
|
|
|
|
|
|
{ |
1661
|
|
|
|
|
|
|
ccend = bracketend(cc) - (1 + LINK_SIZE); |
1662
|
|
|
|
|
|
|
if (recursive || (*cc != OP_CBRAPOS && *cc != OP_SCBRAPOS)) |
1663
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
1664
|
|
|
|
|
|
|
} |
1665
|
|
|
|
|
|
|
|
1666
|
|
|
|
|
|
|
SLJIT_ASSERT(cc != NULL); |
1667
|
|
|
|
|
|
|
while (cc < ccend) |
1668
|
|
|
|
|
|
|
switch(*cc) |
1669
|
|
|
|
|
|
|
{ |
1670
|
|
|
|
|
|
|
case OP_SET_SOM: |
1671
|
|
|
|
|
|
|
SLJIT_ASSERT(common->has_set_som); |
1672
|
|
|
|
|
|
|
if (!setsom_found) |
1673
|
|
|
|
|
|
|
{ |
1674
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
1675
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); |
1676
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1677
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1678
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1679
|
|
|
|
|
|
|
setsom_found = TRUE; |
1680
|
|
|
|
|
|
|
} |
1681
|
|
|
|
|
|
|
cc += 1; |
1682
|
|
|
|
|
|
|
break; |
1683
|
|
|
|
|
|
|
|
1684
|
|
|
|
|
|
|
case OP_MARK: |
1685
|
|
|
|
|
|
|
case OP_PRUNE_ARG: |
1686
|
|
|
|
|
|
|
case OP_THEN_ARG: |
1687
|
|
|
|
|
|
|
SLJIT_ASSERT(common->mark_ptr != 0); |
1688
|
|
|
|
|
|
|
if (!setmark_found) |
1689
|
|
|
|
|
|
|
{ |
1690
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
1691
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); |
1692
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1693
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1694
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1695
|
|
|
|
|
|
|
setmark_found = TRUE; |
1696
|
|
|
|
|
|
|
} |
1697
|
|
|
|
|
|
|
cc += 1 + 2 + cc[1]; |
1698
|
|
|
|
|
|
|
break; |
1699
|
|
|
|
|
|
|
|
1700
|
|
|
|
|
|
|
case OP_RECURSE: |
1701
|
|
|
|
|
|
|
if (common->has_set_som && !setsom_found) |
1702
|
|
|
|
|
|
|
{ |
1703
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
1704
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -OVECTOR(0)); |
1705
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1706
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1707
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1708
|
|
|
|
|
|
|
setsom_found = TRUE; |
1709
|
|
|
|
|
|
|
} |
1710
|
|
|
|
|
|
|
if (common->mark_ptr != 0 && !setmark_found) |
1711
|
|
|
|
|
|
|
{ |
1712
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
1713
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->mark_ptr); |
1714
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1715
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1716
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1717
|
|
|
|
|
|
|
setmark_found = TRUE; |
1718
|
|
|
|
|
|
|
} |
1719
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0 && !capture_last_found) |
1720
|
|
|
|
|
|
|
{ |
1721
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
1722
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); |
1723
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1724
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1725
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1726
|
|
|
|
|
|
|
capture_last_found = TRUE; |
1727
|
|
|
|
|
|
|
} |
1728
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1729
|
|
|
|
|
|
|
break; |
1730
|
|
|
|
|
|
|
|
1731
|
|
|
|
|
|
|
case OP_CBRA: |
1732
|
|
|
|
|
|
|
case OP_CBRAPOS: |
1733
|
|
|
|
|
|
|
case OP_SCBRA: |
1734
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
1735
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0 && !capture_last_found) |
1736
|
|
|
|
|
|
|
{ |
1737
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
1738
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, -common->capture_last_ptr); |
1739
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1740
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1741
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1742
|
|
|
|
|
|
|
capture_last_found = TRUE; |
1743
|
|
|
|
|
|
|
} |
1744
|
|
|
|
|
|
|
offset = (GET2(cc, 1 + LINK_SIZE)) << 1; |
1745
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, OVECTOR(offset)); |
1746
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1747
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
1748
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
1749
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP1, 0); |
1750
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1751
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, TMP2, 0); |
1752
|
|
|
|
|
|
|
stackpos -= (int)sizeof(sljit_sw); |
1753
|
|
|
|
|
|
|
|
1754
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE; |
1755
|
|
|
|
|
|
|
break; |
1756
|
|
|
|
|
|
|
|
1757
|
|
|
|
|
|
|
default: |
1758
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
1759
|
|
|
|
|
|
|
SLJIT_ASSERT(cc != NULL); |
1760
|
|
|
|
|
|
|
break; |
1761
|
|
|
|
|
|
|
} |
1762
|
|
|
|
|
|
|
|
1763
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackpos, SLJIT_IMM, 0); |
1764
|
|
|
|
|
|
|
SLJIT_ASSERT(stackpos == STACK(stacktop)); |
1765
|
|
|
|
|
|
|
} |
1766
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
static SLJIT_INLINE int get_private_data_copy_length(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, BOOL needs_control_head) |
1768
|
|
|
|
|
|
|
{ |
1769
|
|
|
|
|
|
|
int private_data_length = needs_control_head ? 3 : 2; |
1770
|
|
|
|
|
|
|
int size; |
1771
|
|
|
|
|
|
|
pcre_uchar *alternative; |
1772
|
|
|
|
|
|
|
/* Calculate the sum of the private machine words. */ |
1773
|
|
|
|
|
|
|
while (cc < ccend) |
1774
|
|
|
|
|
|
|
{ |
1775
|
|
|
|
|
|
|
size = 0; |
1776
|
|
|
|
|
|
|
switch(*cc) |
1777
|
|
|
|
|
|
|
{ |
1778
|
|
|
|
|
|
|
case OP_KET: |
1779
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc) != 0) |
1780
|
|
|
|
|
|
|
{ |
1781
|
|
|
|
|
|
|
private_data_length++; |
1782
|
|
|
|
|
|
|
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); |
1783
|
|
|
|
|
|
|
cc += PRIVATE_DATA(cc + 1); |
1784
|
|
|
|
|
|
|
} |
1785
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1786
|
|
|
|
|
|
|
break; |
1787
|
|
|
|
|
|
|
|
1788
|
|
|
|
|
|
|
case OP_ASSERT: |
1789
|
|
|
|
|
|
|
case OP_ASSERT_NOT: |
1790
|
|
|
|
|
|
|
case OP_ASSERTBACK: |
1791
|
|
|
|
|
|
|
case OP_ASSERTBACK_NOT: |
1792
|
|
|
|
|
|
|
case OP_ONCE: |
1793
|
|
|
|
|
|
|
case OP_ONCE_NC: |
1794
|
|
|
|
|
|
|
case OP_BRAPOS: |
1795
|
|
|
|
|
|
|
case OP_SBRA: |
1796
|
|
|
|
|
|
|
case OP_SBRAPOS: |
1797
|
|
|
|
|
|
|
case OP_SCOND: |
1798
|
|
|
|
|
|
|
private_data_length++; |
1799
|
|
|
|
|
|
|
SLJIT_ASSERT(PRIVATE_DATA(cc) != 0); |
1800
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1801
|
|
|
|
|
|
|
break; |
1802
|
|
|
|
|
|
|
|
1803
|
|
|
|
|
|
|
case OP_CBRA: |
1804
|
|
|
|
|
|
|
case OP_SCBRA: |
1805
|
|
|
|
|
|
|
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) |
1806
|
|
|
|
|
|
|
private_data_length++; |
1807
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE; |
1808
|
|
|
|
|
|
|
break; |
1809
|
|
|
|
|
|
|
|
1810
|
|
|
|
|
|
|
case OP_CBRAPOS: |
1811
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
1812
|
|
|
|
|
|
|
private_data_length += 2; |
1813
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE; |
1814
|
|
|
|
|
|
|
break; |
1815
|
|
|
|
|
|
|
|
1816
|
|
|
|
|
|
|
case OP_COND: |
1817
|
|
|
|
|
|
|
/* Might be a hidden SCOND. */ |
1818
|
|
|
|
|
|
|
alternative = cc + GET(cc, 1); |
1819
|
|
|
|
|
|
|
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
1820
|
|
|
|
|
|
|
private_data_length++; |
1821
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1822
|
|
|
|
|
|
|
break; |
1823
|
|
|
|
|
|
|
|
1824
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_1 |
1825
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
1826
|
|
|
|
|
|
|
private_data_length++; |
1827
|
|
|
|
|
|
|
cc += 2; |
1828
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
1829
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1830
|
|
|
|
|
|
|
#endif |
1831
|
|
|
|
|
|
|
break; |
1832
|
|
|
|
|
|
|
|
1833
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2A |
1834
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
1835
|
|
|
|
|
|
|
private_data_length += 2; |
1836
|
|
|
|
|
|
|
cc += 2; |
1837
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
1838
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1839
|
|
|
|
|
|
|
#endif |
1840
|
|
|
|
|
|
|
break; |
1841
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2B |
1843
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
1844
|
|
|
|
|
|
|
private_data_length += 2; |
1845
|
|
|
|
|
|
|
cc += 2 + IMM2_SIZE; |
1846
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
1847
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
1848
|
|
|
|
|
|
|
#endif |
1849
|
|
|
|
|
|
|
break; |
1850
|
|
|
|
|
|
|
|
1851
|
|
|
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_1 |
1852
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
1853
|
|
|
|
|
|
|
private_data_length++; |
1854
|
|
|
|
|
|
|
cc += 1; |
1855
|
|
|
|
|
|
|
break; |
1856
|
|
|
|
|
|
|
|
1857
|
|
|
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A |
1858
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
1859
|
|
|
|
|
|
|
private_data_length += 2; |
1860
|
|
|
|
|
|
|
cc += 1; |
1861
|
|
|
|
|
|
|
break; |
1862
|
|
|
|
|
|
|
|
1863
|
|
|
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B |
1864
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
1865
|
|
|
|
|
|
|
private_data_length += 2; |
1866
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE; |
1867
|
|
|
|
|
|
|
break; |
1868
|
|
|
|
|
|
|
|
1869
|
|
|
|
|
|
|
case OP_CLASS: |
1870
|
|
|
|
|
|
|
case OP_NCLASS: |
1871
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
1872
|
|
|
|
|
|
|
case OP_XCLASS: |
1873
|
|
|
|
|
|
|
size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); |
1874
|
|
|
|
|
|
|
#else |
1875
|
|
|
|
|
|
|
size = 1 + 32 / (int)sizeof(pcre_uchar); |
1876
|
|
|
|
|
|
|
#endif |
1877
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
1878
|
|
|
|
|
|
|
private_data_length += get_class_iterator_size(cc + size); |
1879
|
|
|
|
|
|
|
cc += size; |
1880
|
|
|
|
|
|
|
break; |
1881
|
|
|
|
|
|
|
|
1882
|
|
|
|
|
|
|
default: |
1883
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
1884
|
|
|
|
|
|
|
SLJIT_ASSERT(cc != NULL); |
1885
|
|
|
|
|
|
|
break; |
1886
|
|
|
|
|
|
|
} |
1887
|
|
|
|
|
|
|
} |
1888
|
|
|
|
|
|
|
SLJIT_ASSERT(cc == ccend); |
1889
|
|
|
|
|
|
|
return private_data_length; |
1890
|
|
|
|
|
|
|
} |
1891
|
|
|
|
|
|
|
|
1892
|
|
|
|
|
|
|
static void copy_private_data(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, |
1893
|
|
|
|
|
|
|
BOOL save, int stackptr, int stacktop, BOOL needs_control_head) |
1894
|
|
|
|
|
|
|
{ |
1895
|
|
|
|
|
|
|
DEFINE_COMPILER; |
1896
|
|
|
|
|
|
|
int srcw[2]; |
1897
|
|
|
|
|
|
|
int count, size; |
1898
|
|
|
|
|
|
|
BOOL tmp1next = TRUE; |
1899
|
|
|
|
|
|
|
BOOL tmp1empty = TRUE; |
1900
|
|
|
|
|
|
|
BOOL tmp2empty = TRUE; |
1901
|
|
|
|
|
|
|
pcre_uchar *alternative; |
1902
|
|
|
|
|
|
|
enum { |
1903
|
|
|
|
|
|
|
loop, |
1904
|
|
|
|
|
|
|
end |
1905
|
|
|
|
|
|
|
} status; |
1906
|
|
|
|
|
|
|
|
1907
|
|
|
|
|
|
|
status = loop; |
1908
|
|
|
|
|
|
|
stackptr = STACK(stackptr); |
1909
|
|
|
|
|
|
|
stacktop = STACK(stacktop - 1); |
1910
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
if (!save) |
1912
|
|
|
|
|
|
|
{ |
1913
|
|
|
|
|
|
|
stacktop -= (needs_control_head ? 2 : 1) * sizeof(sljit_sw); |
1914
|
|
|
|
|
|
|
if (stackptr < stacktop) |
1915
|
|
|
|
|
|
|
{ |
1916
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
1917
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
1918
|
|
|
|
|
|
|
tmp1empty = FALSE; |
1919
|
|
|
|
|
|
|
} |
1920
|
|
|
|
|
|
|
if (stackptr < stacktop) |
1921
|
|
|
|
|
|
|
{ |
1922
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
1923
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
1924
|
|
|
|
|
|
|
tmp2empty = FALSE; |
1925
|
|
|
|
|
|
|
} |
1926
|
|
|
|
|
|
|
/* The tmp1next must be TRUE in either way. */ |
1927
|
|
|
|
|
|
|
} |
1928
|
|
|
|
|
|
|
|
1929
|
|
|
|
|
|
|
SLJIT_ASSERT(common->recursive_head_ptr != 0); |
1930
|
|
|
|
|
|
|
|
1931
|
|
|
|
|
|
|
do |
1932
|
|
|
|
|
|
|
{ |
1933
|
|
|
|
|
|
|
count = 0; |
1934
|
|
|
|
|
|
|
if (cc >= ccend) |
1935
|
|
|
|
|
|
|
{ |
1936
|
|
|
|
|
|
|
if (!save) |
1937
|
|
|
|
|
|
|
break; |
1938
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
count = 1; |
1940
|
|
|
|
|
|
|
srcw[0] = common->recursive_head_ptr; |
1941
|
|
|
|
|
|
|
if (needs_control_head) |
1942
|
|
|
|
|
|
|
{ |
1943
|
|
|
|
|
|
|
SLJIT_ASSERT(common->control_head_ptr != 0); |
1944
|
|
|
|
|
|
|
count = 2; |
1945
|
|
|
|
|
|
|
srcw[0] = common->control_head_ptr; |
1946
|
|
|
|
|
|
|
srcw[1] = common->recursive_head_ptr; |
1947
|
|
|
|
|
|
|
} |
1948
|
|
|
|
|
|
|
status = end; |
1949
|
|
|
|
|
|
|
} |
1950
|
|
|
|
|
|
|
else switch(*cc) |
1951
|
|
|
|
|
|
|
{ |
1952
|
|
|
|
|
|
|
case OP_KET: |
1953
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc) != 0) |
1954
|
|
|
|
|
|
|
{ |
1955
|
|
|
|
|
|
|
count = 1; |
1956
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
1957
|
|
|
|
|
|
|
SLJIT_ASSERT(PRIVATE_DATA(cc + 1) != 0); |
1958
|
|
|
|
|
|
|
cc += PRIVATE_DATA(cc + 1); |
1959
|
|
|
|
|
|
|
} |
1960
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1961
|
|
|
|
|
|
|
break; |
1962
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
case OP_ASSERT: |
1964
|
|
|
|
|
|
|
case OP_ASSERT_NOT: |
1965
|
|
|
|
|
|
|
case OP_ASSERTBACK: |
1966
|
|
|
|
|
|
|
case OP_ASSERTBACK_NOT: |
1967
|
|
|
|
|
|
|
case OP_ONCE: |
1968
|
|
|
|
|
|
|
case OP_ONCE_NC: |
1969
|
|
|
|
|
|
|
case OP_BRAPOS: |
1970
|
|
|
|
|
|
|
case OP_SBRA: |
1971
|
|
|
|
|
|
|
case OP_SBRAPOS: |
1972
|
|
|
|
|
|
|
case OP_SCOND: |
1973
|
|
|
|
|
|
|
count = 1; |
1974
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
1975
|
|
|
|
|
|
|
SLJIT_ASSERT(srcw[0] != 0); |
1976
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
1977
|
|
|
|
|
|
|
break; |
1978
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
case OP_CBRA: |
1980
|
|
|
|
|
|
|
case OP_SCBRA: |
1981
|
|
|
|
|
|
|
if (common->optimized_cbracket[GET2(cc, 1 + LINK_SIZE)] == 0) |
1982
|
|
|
|
|
|
|
{ |
1983
|
|
|
|
|
|
|
count = 1; |
1984
|
|
|
|
|
|
|
srcw[0] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); |
1985
|
|
|
|
|
|
|
} |
1986
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE; |
1987
|
|
|
|
|
|
|
break; |
1988
|
|
|
|
|
|
|
|
1989
|
|
|
|
|
|
|
case OP_CBRAPOS: |
1990
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
1991
|
|
|
|
|
|
|
count = 2; |
1992
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
1993
|
|
|
|
|
|
|
srcw[1] = OVECTOR_PRIV(GET2(cc, 1 + LINK_SIZE)); |
1994
|
|
|
|
|
|
|
SLJIT_ASSERT(srcw[0] != 0 && srcw[1] != 0); |
1995
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE + IMM2_SIZE; |
1996
|
|
|
|
|
|
|
break; |
1997
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
case OP_COND: |
1999
|
|
|
|
|
|
|
/* Might be a hidden SCOND. */ |
2000
|
|
|
|
|
|
|
alternative = cc + GET(cc, 1); |
2001
|
|
|
|
|
|
|
if (*alternative == OP_KETRMAX || *alternative == OP_KETRMIN) |
2002
|
|
|
|
|
|
|
{ |
2003
|
|
|
|
|
|
|
count = 1; |
2004
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2005
|
|
|
|
|
|
|
SLJIT_ASSERT(srcw[0] != 0); |
2006
|
|
|
|
|
|
|
} |
2007
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
2008
|
|
|
|
|
|
|
break; |
2009
|
|
|
|
|
|
|
|
2010
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_1 |
2011
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
2012
|
|
|
|
|
|
|
{ |
2013
|
|
|
|
|
|
|
count = 1; |
2014
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2015
|
|
|
|
|
|
|
} |
2016
|
|
|
|
|
|
|
cc += 2; |
2017
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
2018
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
2019
|
|
|
|
|
|
|
#endif |
2020
|
|
|
|
|
|
|
break; |
2021
|
|
|
|
|
|
|
|
2022
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2A |
2023
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
2024
|
|
|
|
|
|
|
{ |
2025
|
|
|
|
|
|
|
count = 2; |
2026
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2027
|
|
|
|
|
|
|
srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); |
2028
|
|
|
|
|
|
|
} |
2029
|
|
|
|
|
|
|
cc += 2; |
2030
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
2031
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
2032
|
|
|
|
|
|
|
#endif |
2033
|
|
|
|
|
|
|
break; |
2034
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
CASE_ITERATOR_PRIVATE_DATA_2B |
2036
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
2037
|
|
|
|
|
|
|
{ |
2038
|
|
|
|
|
|
|
count = 2; |
2039
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2040
|
|
|
|
|
|
|
srcw[1] = PRIVATE_DATA(cc) + sizeof(sljit_sw); |
2041
|
|
|
|
|
|
|
} |
2042
|
|
|
|
|
|
|
cc += 2 + IMM2_SIZE; |
2043
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
2044
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
2045
|
|
|
|
|
|
|
#endif |
2046
|
|
|
|
|
|
|
break; |
2047
|
|
|
|
|
|
|
|
2048
|
|
|
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_1 |
2049
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
2050
|
|
|
|
|
|
|
{ |
2051
|
|
|
|
|
|
|
count = 1; |
2052
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2053
|
|
|
|
|
|
|
} |
2054
|
|
|
|
|
|
|
cc += 1; |
2055
|
|
|
|
|
|
|
break; |
2056
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2A |
2058
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
2059
|
|
|
|
|
|
|
{ |
2060
|
|
|
|
|
|
|
count = 2; |
2061
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2062
|
|
|
|
|
|
|
srcw[1] = srcw[0] + sizeof(sljit_sw); |
2063
|
|
|
|
|
|
|
} |
2064
|
|
|
|
|
|
|
cc += 1; |
2065
|
|
|
|
|
|
|
break; |
2066
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
CASE_ITERATOR_TYPE_PRIVATE_DATA_2B |
2068
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
2069
|
|
|
|
|
|
|
{ |
2070
|
|
|
|
|
|
|
count = 2; |
2071
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2072
|
|
|
|
|
|
|
srcw[1] = srcw[0] + sizeof(sljit_sw); |
2073
|
|
|
|
|
|
|
} |
2074
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE; |
2075
|
|
|
|
|
|
|
break; |
2076
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
case OP_CLASS: |
2078
|
|
|
|
|
|
|
case OP_NCLASS: |
2079
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
2080
|
|
|
|
|
|
|
case OP_XCLASS: |
2081
|
|
|
|
|
|
|
size = (*cc == OP_XCLASS) ? GET(cc, 1) : 1 + 32 / (int)sizeof(pcre_uchar); |
2082
|
|
|
|
|
|
|
#else |
2083
|
|
|
|
|
|
|
size = 1 + 32 / (int)sizeof(pcre_uchar); |
2084
|
|
|
|
|
|
|
#endif |
2085
|
|
|
|
|
|
|
if (PRIVATE_DATA(cc)) |
2086
|
|
|
|
|
|
|
switch(get_class_iterator_size(cc + size)) |
2087
|
|
|
|
|
|
|
{ |
2088
|
|
|
|
|
|
|
case 1: |
2089
|
|
|
|
|
|
|
count = 1; |
2090
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2091
|
|
|
|
|
|
|
break; |
2092
|
|
|
|
|
|
|
|
2093
|
|
|
|
|
|
|
case 2: |
2094
|
|
|
|
|
|
|
count = 2; |
2095
|
|
|
|
|
|
|
srcw[0] = PRIVATE_DATA(cc); |
2096
|
|
|
|
|
|
|
srcw[1] = srcw[0] + sizeof(sljit_sw); |
2097
|
|
|
|
|
|
|
break; |
2098
|
|
|
|
|
|
|
|
2099
|
|
|
|
|
|
|
default: |
2100
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
2101
|
|
|
|
|
|
|
break; |
2102
|
|
|
|
|
|
|
} |
2103
|
|
|
|
|
|
|
cc += size; |
2104
|
|
|
|
|
|
|
break; |
2105
|
|
|
|
|
|
|
|
2106
|
|
|
|
|
|
|
default: |
2107
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
2108
|
|
|
|
|
|
|
SLJIT_ASSERT(cc != NULL); |
2109
|
|
|
|
|
|
|
break; |
2110
|
|
|
|
|
|
|
} |
2111
|
|
|
|
|
|
|
|
2112
|
|
|
|
|
|
|
while (count > 0) |
2113
|
|
|
|
|
|
|
{ |
2114
|
|
|
|
|
|
|
count--; |
2115
|
|
|
|
|
|
|
if (save) |
2116
|
|
|
|
|
|
|
{ |
2117
|
|
|
|
|
|
|
if (tmp1next) |
2118
|
|
|
|
|
|
|
{ |
2119
|
|
|
|
|
|
|
if (!tmp1empty) |
2120
|
|
|
|
|
|
|
{ |
2121
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
2122
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
2123
|
|
|
|
|
|
|
} |
2124
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); |
2125
|
|
|
|
|
|
|
tmp1empty = FALSE; |
2126
|
|
|
|
|
|
|
tmp1next = FALSE; |
2127
|
|
|
|
|
|
|
} |
2128
|
|
|
|
|
|
|
else |
2129
|
|
|
|
|
|
|
{ |
2130
|
|
|
|
|
|
|
if (!tmp2empty) |
2131
|
|
|
|
|
|
|
{ |
2132
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
2133
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
2134
|
|
|
|
|
|
|
} |
2135
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), srcw[count]); |
2136
|
|
|
|
|
|
|
tmp2empty = FALSE; |
2137
|
|
|
|
|
|
|
tmp1next = TRUE; |
2138
|
|
|
|
|
|
|
} |
2139
|
|
|
|
|
|
|
} |
2140
|
|
|
|
|
|
|
else |
2141
|
|
|
|
|
|
|
{ |
2142
|
|
|
|
|
|
|
if (tmp1next) |
2143
|
|
|
|
|
|
|
{ |
2144
|
|
|
|
|
|
|
SLJIT_ASSERT(!tmp1empty); |
2145
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP1, 0); |
2146
|
|
|
|
|
|
|
tmp1empty = stackptr >= stacktop; |
2147
|
|
|
|
|
|
|
if (!tmp1empty) |
2148
|
|
|
|
|
|
|
{ |
2149
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
2150
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
2151
|
|
|
|
|
|
|
} |
2152
|
|
|
|
|
|
|
tmp1next = FALSE; |
2153
|
|
|
|
|
|
|
} |
2154
|
|
|
|
|
|
|
else |
2155
|
|
|
|
|
|
|
{ |
2156
|
|
|
|
|
|
|
SLJIT_ASSERT(!tmp2empty); |
2157
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), srcw[count], TMP2, 0); |
2158
|
|
|
|
|
|
|
tmp2empty = stackptr >= stacktop; |
2159
|
|
|
|
|
|
|
if (!tmp2empty) |
2160
|
|
|
|
|
|
|
{ |
2161
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), stackptr); |
2162
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
2163
|
|
|
|
|
|
|
} |
2164
|
|
|
|
|
|
|
tmp1next = TRUE; |
2165
|
|
|
|
|
|
|
} |
2166
|
|
|
|
|
|
|
} |
2167
|
|
|
|
|
|
|
} |
2168
|
|
|
|
|
|
|
} |
2169
|
|
|
|
|
|
|
while (status != end); |
2170
|
|
|
|
|
|
|
|
2171
|
|
|
|
|
|
|
if (save) |
2172
|
|
|
|
|
|
|
{ |
2173
|
|
|
|
|
|
|
if (tmp1next) |
2174
|
|
|
|
|
|
|
{ |
2175
|
|
|
|
|
|
|
if (!tmp1empty) |
2176
|
|
|
|
|
|
|
{ |
2177
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
2178
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
2179
|
|
|
|
|
|
|
} |
2180
|
|
|
|
|
|
|
if (!tmp2empty) |
2181
|
|
|
|
|
|
|
{ |
2182
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
2183
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
2184
|
|
|
|
|
|
|
} |
2185
|
|
|
|
|
|
|
} |
2186
|
|
|
|
|
|
|
else |
2187
|
|
|
|
|
|
|
{ |
2188
|
|
|
|
|
|
|
if (!tmp2empty) |
2189
|
|
|
|
|
|
|
{ |
2190
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP2, 0); |
2191
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
2192
|
|
|
|
|
|
|
} |
2193
|
|
|
|
|
|
|
if (!tmp1empty) |
2194
|
|
|
|
|
|
|
{ |
2195
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), stackptr, TMP1, 0); |
2196
|
|
|
|
|
|
|
stackptr += sizeof(sljit_sw); |
2197
|
|
|
|
|
|
|
} |
2198
|
|
|
|
|
|
|
} |
2199
|
|
|
|
|
|
|
} |
2200
|
|
|
|
|
|
|
SLJIT_ASSERT(cc == ccend && stackptr == stacktop && (save || (tmp1empty && tmp2empty))); |
2201
|
|
|
|
|
|
|
} |
2202
|
|
|
|
|
|
|
|
2203
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *set_then_offsets(compiler_common *common, pcre_uchar *cc, sljit_u8 *current_offset) |
2204
|
|
|
|
|
|
|
{ |
2205
|
|
|
|
|
|
|
pcre_uchar *end = bracketend(cc); |
2206
|
|
|
|
|
|
|
BOOL has_alternatives = cc[GET(cc, 1)] == OP_ALT; |
2207
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
/* Assert captures then. */ |
2209
|
|
|
|
|
|
|
if (*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) |
2210
|
|
|
|
|
|
|
current_offset = NULL; |
2211
|
|
|
|
|
|
|
/* Conditional block does not. */ |
2212
|
|
|
|
|
|
|
if (*cc == OP_COND || *cc == OP_SCOND) |
2213
|
|
|
|
|
|
|
has_alternatives = FALSE; |
2214
|
|
|
|
|
|
|
|
2215
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
2216
|
|
|
|
|
|
|
if (has_alternatives) |
2217
|
|
|
|
|
|
|
current_offset = common->then_offsets + (cc - common->start); |
2218
|
|
|
|
|
|
|
|
2219
|
|
|
|
|
|
|
while (cc < end) |
2220
|
|
|
|
|
|
|
{ |
2221
|
|
|
|
|
|
|
if ((*cc >= OP_ASSERT && *cc <= OP_ASSERTBACK_NOT) || (*cc >= OP_ONCE && *cc <= OP_SCOND)) |
2222
|
|
|
|
|
|
|
cc = set_then_offsets(common, cc, current_offset); |
2223
|
|
|
|
|
|
|
else |
2224
|
|
|
|
|
|
|
{ |
2225
|
|
|
|
|
|
|
if (*cc == OP_ALT && has_alternatives) |
2226
|
|
|
|
|
|
|
current_offset = common->then_offsets + (cc + 1 + LINK_SIZE - common->start); |
2227
|
|
|
|
|
|
|
if (*cc >= OP_THEN && *cc <= OP_THEN_ARG && current_offset != NULL) |
2228
|
|
|
|
|
|
|
*current_offset = 1; |
2229
|
|
|
|
|
|
|
cc = next_opcode(common, cc); |
2230
|
|
|
|
|
|
|
} |
2231
|
|
|
|
|
|
|
} |
2232
|
|
|
|
|
|
|
|
2233
|
|
|
|
|
|
|
return end; |
2234
|
|
|
|
|
|
|
} |
2235
|
|
|
|
|
|
|
|
2236
|
|
|
|
|
|
|
#undef CASE_ITERATOR_PRIVATE_DATA_1 |
2237
|
|
|
|
|
|
|
#undef CASE_ITERATOR_PRIVATE_DATA_2A |
2238
|
|
|
|
|
|
|
#undef CASE_ITERATOR_PRIVATE_DATA_2B |
2239
|
|
|
|
|
|
|
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_1 |
2240
|
|
|
|
|
|
|
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2A |
2241
|
|
|
|
|
|
|
#undef CASE_ITERATOR_TYPE_PRIVATE_DATA_2B |
2242
|
|
|
|
|
|
|
|
2243
|
|
|
|
|
|
|
static SLJIT_INLINE BOOL is_powerof2(unsigned int value) |
2244
|
|
|
|
|
|
|
{ |
2245
|
|
|
|
|
|
|
return (value & (value - 1)) == 0; |
2246
|
|
|
|
|
|
|
} |
2247
|
|
|
|
|
|
|
|
2248
|
|
|
|
|
|
|
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label) |
2249
|
|
|
|
|
|
|
{ |
2250
|
|
|
|
|
|
|
while (list) |
2251
|
|
|
|
|
|
|
{ |
2252
|
|
|
|
|
|
|
/* sljit_set_label is clever enough to do nothing |
2253
|
|
|
|
|
|
|
if either the jump or the label is NULL. */ |
2254
|
|
|
|
|
|
|
SET_LABEL(list->jump, label); |
2255
|
|
|
|
|
|
|
list = list->next; |
2256
|
|
|
|
|
|
|
} |
2257
|
|
|
|
|
|
|
} |
2258
|
|
|
|
|
|
|
|
2259
|
|
|
|
|
|
|
static SLJIT_INLINE void add_jump(struct sljit_compiler *compiler, jump_list **list, struct sljit_jump *jump) |
2260
|
|
|
|
|
|
|
{ |
2261
|
|
|
|
|
|
|
jump_list *list_item = sljit_alloc_memory(compiler, sizeof(jump_list)); |
2262
|
|
|
|
|
|
|
if (list_item) |
2263
|
|
|
|
|
|
|
{ |
2264
|
|
|
|
|
|
|
list_item->next = *list; |
2265
|
|
|
|
|
|
|
list_item->jump = jump; |
2266
|
|
|
|
|
|
|
*list = list_item; |
2267
|
|
|
|
|
|
|
} |
2268
|
|
|
|
|
|
|
} |
2269
|
|
|
|
|
|
|
|
2270
|
|
|
|
|
|
|
static void add_stub(compiler_common *common, struct sljit_jump *start) |
2271
|
|
|
|
|
|
|
{ |
2272
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2273
|
|
|
|
|
|
|
stub_list *list_item = sljit_alloc_memory(compiler, sizeof(stub_list)); |
2274
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
if (list_item) |
2276
|
|
|
|
|
|
|
{ |
2277
|
|
|
|
|
|
|
list_item->start = start; |
2278
|
|
|
|
|
|
|
list_item->quit = LABEL(); |
2279
|
|
|
|
|
|
|
list_item->next = common->stubs; |
2280
|
|
|
|
|
|
|
common->stubs = list_item; |
2281
|
|
|
|
|
|
|
} |
2282
|
|
|
|
|
|
|
} |
2283
|
|
|
|
|
|
|
|
2284
|
|
|
|
|
|
|
static void flush_stubs(compiler_common *common) |
2285
|
|
|
|
|
|
|
{ |
2286
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2287
|
|
|
|
|
|
|
stub_list *list_item = common->stubs; |
2288
|
|
|
|
|
|
|
|
2289
|
|
|
|
|
|
|
while (list_item) |
2290
|
|
|
|
|
|
|
{ |
2291
|
|
|
|
|
|
|
JUMPHERE(list_item->start); |
2292
|
|
|
|
|
|
|
add_jump(compiler, &common->stackalloc, JUMP(SLJIT_FAST_CALL)); |
2293
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, list_item->quit); |
2294
|
|
|
|
|
|
|
list_item = list_item->next; |
2295
|
|
|
|
|
|
|
} |
2296
|
|
|
|
|
|
|
common->stubs = NULL; |
2297
|
|
|
|
|
|
|
} |
2298
|
|
|
|
|
|
|
|
2299
|
|
|
|
|
|
|
static void add_label_addr(compiler_common *common, sljit_uw *update_addr) |
2300
|
|
|
|
|
|
|
{ |
2301
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2302
|
|
|
|
|
|
|
label_addr_list *label_addr; |
2303
|
|
|
|
|
|
|
|
2304
|
|
|
|
|
|
|
label_addr = sljit_alloc_memory(compiler, sizeof(label_addr_list)); |
2305
|
|
|
|
|
|
|
if (label_addr == NULL) |
2306
|
|
|
|
|
|
|
return; |
2307
|
|
|
|
|
|
|
label_addr->label = LABEL(); |
2308
|
|
|
|
|
|
|
label_addr->update_addr = update_addr; |
2309
|
|
|
|
|
|
|
label_addr->next = common->label_addrs; |
2310
|
|
|
|
|
|
|
common->label_addrs = label_addr; |
2311
|
|
|
|
|
|
|
} |
2312
|
|
|
|
|
|
|
|
2313
|
|
|
|
|
|
|
static SLJIT_INLINE void count_match(compiler_common *common) |
2314
|
|
|
|
|
|
|
{ |
2315
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2316
|
|
|
|
|
|
|
|
2317
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, COUNT_MATCH, 0, COUNT_MATCH, 0, SLJIT_IMM, 1); |
2318
|
|
|
|
|
|
|
add_jump(compiler, &common->calllimit, JUMP(SLJIT_ZERO)); |
2319
|
|
|
|
|
|
|
} |
2320
|
|
|
|
|
|
|
|
2321
|
|
|
|
|
|
|
static SLJIT_INLINE void allocate_stack(compiler_common *common, int size) |
2322
|
|
|
|
|
|
|
{ |
2323
|
|
|
|
|
|
|
/* May destroy all locals and registers except TMP2. */ |
2324
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2325
|
|
|
|
|
|
|
|
2326
|
|
|
|
|
|
|
SLJIT_ASSERT(size > 0); |
2327
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); |
2328
|
|
|
|
|
|
|
#ifdef DESTROY_REGISTERS |
2329
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 12345); |
2330
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
2331
|
|
|
|
|
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); |
2332
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, TMP1, 0); |
2333
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); |
2334
|
|
|
|
|
|
|
#endif |
2335
|
|
|
|
|
|
|
add_stub(common, CMP(SLJIT_LESS, STACK_TOP, 0, STACK_LIMIT, 0)); |
2336
|
|
|
|
|
|
|
} |
2337
|
|
|
|
|
|
|
|
2338
|
|
|
|
|
|
|
static SLJIT_INLINE void free_stack(compiler_common *common, int size) |
2339
|
|
|
|
|
|
|
{ |
2340
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2341
|
|
|
|
|
|
|
|
2342
|
|
|
|
|
|
|
SLJIT_ASSERT(size > 0); |
2343
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, size * sizeof(sljit_sw)); |
2344
|
|
|
|
|
|
|
} |
2345
|
|
|
|
|
|
|
|
2346
|
|
|
|
|
|
|
static sljit_uw * allocate_read_only_data(compiler_common *common, sljit_uw size) |
2347
|
|
|
|
|
|
|
{ |
2348
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2349
|
|
|
|
|
|
|
sljit_uw *result; |
2350
|
|
|
|
|
|
|
|
2351
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
2352
|
|
|
|
|
|
|
return NULL; |
2353
|
|
|
|
|
|
|
|
2354
|
|
|
|
|
|
|
result = (sljit_uw *)SLJIT_MALLOC(size + sizeof(sljit_uw), compiler->allocator_data); |
2355
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(result == NULL)) |
2356
|
|
|
|
|
|
|
{ |
2357
|
|
|
|
|
|
|
sljit_set_compiler_memory_error(compiler); |
2358
|
|
|
|
|
|
|
return NULL; |
2359
|
|
|
|
|
|
|
} |
2360
|
|
|
|
|
|
|
|
2361
|
|
|
|
|
|
|
*(void**)result = common->read_only_data_head; |
2362
|
|
|
|
|
|
|
common->read_only_data_head = (void *)result; |
2363
|
|
|
|
|
|
|
return result + 1; |
2364
|
|
|
|
|
|
|
} |
2365
|
|
|
|
|
|
|
|
2366
|
|
|
|
|
|
|
static void free_read_only_data(void *current, void *allocator_data) |
2367
|
|
|
|
|
|
|
{ |
2368
|
|
|
|
|
|
|
void *next; |
2369
|
|
|
|
|
|
|
|
2370
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(allocator_data); |
2371
|
|
|
|
|
|
|
|
2372
|
|
|
|
|
|
|
while (current != NULL) |
2373
|
|
|
|
|
|
|
{ |
2374
|
|
|
|
|
|
|
next = *(void**)current; |
2375
|
|
|
|
|
|
|
SLJIT_FREE(current, allocator_data); |
2376
|
|
|
|
|
|
|
current = next; |
2377
|
|
|
|
|
|
|
} |
2378
|
|
|
|
|
|
|
} |
2379
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
static SLJIT_INLINE void reset_ovector(compiler_common *common, int length) |
2381
|
|
|
|
|
|
|
{ |
2382
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2383
|
|
|
|
|
|
|
struct sljit_label *loop; |
2384
|
|
|
|
|
|
|
int i; |
2385
|
|
|
|
|
|
|
|
2386
|
|
|
|
|
|
|
/* At this point we can freely use all temporary registers. */ |
2387
|
|
|
|
|
|
|
SLJIT_ASSERT(length > 1); |
2388
|
|
|
|
|
|
|
/* TMP1 returns with begin - 1. */ |
2389
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_S0), SLJIT_OFFSETOF(jit_arguments, begin), SLJIT_IMM, IN_UCHARS(1)); |
2390
|
|
|
|
|
|
|
if (length < 8) |
2391
|
|
|
|
|
|
|
{ |
2392
|
|
|
|
|
|
|
for (i = 1; i < length; i++) |
2393
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), SLJIT_R0, 0); |
2394
|
|
|
|
|
|
|
} |
2395
|
|
|
|
|
|
|
else |
2396
|
|
|
|
|
|
|
{ |
2397
|
|
|
|
|
|
|
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)) == SLJIT_SUCCESS) |
2398
|
|
|
|
|
|
|
{ |
2399
|
|
|
|
|
|
|
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START); |
2400
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); |
2401
|
|
|
|
|
|
|
loop = LABEL(); |
2402
|
|
|
|
|
|
|
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, SLJIT_R0, SLJIT_MEM1(SLJIT_R1), sizeof(sljit_sw)); |
2403
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); |
2404
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, loop); |
2405
|
|
|
|
|
|
|
} |
2406
|
|
|
|
|
|
|
else |
2407
|
|
|
|
|
|
|
{ |
2408
|
|
|
|
|
|
|
GET_LOCAL_BASE(SLJIT_R1, 0, OVECTOR_START + sizeof(sljit_sw)); |
2409
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, length - 1); |
2410
|
|
|
|
|
|
|
loop = LABEL(); |
2411
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R0, 0); |
2412
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_sw)); |
2413
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, 1); |
2414
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, loop); |
2415
|
|
|
|
|
|
|
} |
2416
|
|
|
|
|
|
|
} |
2417
|
|
|
|
|
|
|
} |
2418
|
|
|
|
|
|
|
|
2419
|
|
|
|
|
|
|
static SLJIT_INLINE void reset_fast_fail(compiler_common *common) |
2420
|
|
|
|
|
|
|
{ |
2421
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2422
|
|
|
|
|
|
|
sljit_s32 i; |
2423
|
|
|
|
|
|
|
|
2424
|
|
|
|
|
|
|
SLJIT_ASSERT(common->fast_fail_start_ptr < common->fast_fail_end_ptr); |
2425
|
|
|
|
|
|
|
|
2426
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2427
|
|
|
|
|
|
|
for (i = common->fast_fail_start_ptr; i < common->fast_fail_end_ptr; i += sizeof(sljit_sw)) |
2428
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), i, TMP1, 0); |
2429
|
|
|
|
|
|
|
} |
2430
|
|
|
|
|
|
|
|
2431
|
|
|
|
|
|
|
static SLJIT_INLINE void do_reset_match(compiler_common *common, int length) |
2432
|
|
|
|
|
|
|
{ |
2433
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2434
|
|
|
|
|
|
|
struct sljit_label *loop; |
2435
|
|
|
|
|
|
|
int i; |
2436
|
|
|
|
|
|
|
|
2437
|
|
|
|
|
|
|
SLJIT_ASSERT(length > 1); |
2438
|
|
|
|
|
|
|
/* OVECTOR(1) contains the "string begin - 1" constant. */ |
2439
|
|
|
|
|
|
|
if (length > 2) |
2440
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); |
2441
|
|
|
|
|
|
|
if (length < 8) |
2442
|
|
|
|
|
|
|
{ |
2443
|
|
|
|
|
|
|
for (i = 2; i < length; i++) |
2444
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(i), TMP1, 0); |
2445
|
|
|
|
|
|
|
} |
2446
|
|
|
|
|
|
|
else |
2447
|
|
|
|
|
|
|
{ |
2448
|
|
|
|
|
|
|
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)) == SLJIT_SUCCESS) |
2449
|
|
|
|
|
|
|
{ |
2450
|
|
|
|
|
|
|
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + sizeof(sljit_sw)); |
2451
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); |
2452
|
|
|
|
|
|
|
loop = LABEL(); |
2453
|
|
|
|
|
|
|
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_STORE | SLJIT_MEM_PRE, TMP1, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
2454
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); |
2455
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, loop); |
2456
|
|
|
|
|
|
|
} |
2457
|
|
|
|
|
|
|
else |
2458
|
|
|
|
|
|
|
{ |
2459
|
|
|
|
|
|
|
GET_LOCAL_BASE(TMP2, 0, OVECTOR_START + 2 * sizeof(sljit_sw)); |
2460
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, length - 2); |
2461
|
|
|
|
|
|
|
loop = LABEL(); |
2462
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, TMP1, 0); |
2463
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, sizeof(sljit_sw)); |
2464
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 1); |
2465
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, loop); |
2466
|
|
|
|
|
|
|
} |
2467
|
|
|
|
|
|
|
} |
2468
|
|
|
|
|
|
|
|
2469
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, ARGUMENTS, 0); |
2470
|
|
|
|
|
|
|
if (common->mark_ptr != 0) |
2471
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); |
2472
|
|
|
|
|
|
|
if (common->control_head_ptr != 0) |
2473
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
2474
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(jit_arguments, stack)); |
2475
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr); |
2476
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), SLJIT_OFFSETOF(struct sljit_stack, end)); |
2477
|
|
|
|
|
|
|
} |
2478
|
|
|
|
|
|
|
|
2479
|
|
|
|
|
|
|
static sljit_sw SLJIT_FUNC do_search_mark(sljit_sw *current, const pcre_uchar *skip_arg) |
2480
|
|
|
|
|
|
|
{ |
2481
|
|
|
|
|
|
|
while (current != NULL) |
2482
|
|
|
|
|
|
|
{ |
2483
|
|
|
|
|
|
|
switch (current[1]) |
2484
|
|
|
|
|
|
|
{ |
2485
|
|
|
|
|
|
|
case type_then_trap: |
2486
|
|
|
|
|
|
|
break; |
2487
|
|
|
|
|
|
|
|
2488
|
|
|
|
|
|
|
case type_mark: |
2489
|
|
|
|
|
|
|
if (STRCMP_UC_UC(skip_arg, (pcre_uchar *)current[2]) == 0) |
2490
|
|
|
|
|
|
|
return current[3]; |
2491
|
|
|
|
|
|
|
break; |
2492
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
default: |
2494
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
2495
|
|
|
|
|
|
|
break; |
2496
|
|
|
|
|
|
|
} |
2497
|
|
|
|
|
|
|
SLJIT_ASSERT(current[0] == 0 || current < (sljit_sw*)current[0]); |
2498
|
|
|
|
|
|
|
current = (sljit_sw*)current[0]; |
2499
|
|
|
|
|
|
|
} |
2500
|
|
|
|
|
|
|
return 0; |
2501
|
|
|
|
|
|
|
} |
2502
|
|
|
|
|
|
|
|
2503
|
|
|
|
|
|
|
static SLJIT_INLINE void copy_ovector(compiler_common *common, int topbracket) |
2504
|
|
|
|
|
|
|
{ |
2505
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2506
|
|
|
|
|
|
|
struct sljit_label *loop; |
2507
|
|
|
|
|
|
|
struct sljit_jump *early_quit; |
2508
|
|
|
|
|
|
|
BOOL has_pre; |
2509
|
|
|
|
|
|
|
|
2510
|
|
|
|
|
|
|
/* At this point we can freely use all registers. */ |
2511
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); |
2512
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(1), STR_PTR, 0); |
2513
|
|
|
|
|
|
|
|
2514
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); |
2515
|
|
|
|
|
|
|
if (common->mark_ptr != 0) |
2516
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
2517
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offset_count)); |
2518
|
|
|
|
|
|
|
if (common->mark_ptr != 0) |
2519
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, mark_ptr), SLJIT_R2, 0); |
2520
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, offsets), SLJIT_IMM, sizeof(int)); |
2521
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, begin)); |
2522
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
has_pre = sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)) == SLJIT_SUCCESS; |
2524
|
|
|
|
|
|
|
GET_LOCAL_BASE(SLJIT_S0, 0, OVECTOR_START - (has_pre ? sizeof(sljit_sw) : 0)); |
2525
|
|
|
|
|
|
|
|
2526
|
|
|
|
|
|
|
/* Unlikely, but possible */ |
2527
|
|
|
|
|
|
|
early_quit = CMP(SLJIT_EQUAL, SLJIT_R1, 0, SLJIT_IMM, 0); |
2528
|
|
|
|
|
|
|
loop = LABEL(); |
2529
|
|
|
|
|
|
|
|
2530
|
|
|
|
|
|
|
if (has_pre) |
2531
|
|
|
|
|
|
|
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_S1, SLJIT_MEM1(SLJIT_S0), sizeof(sljit_sw)); |
2532
|
|
|
|
|
|
|
else |
2533
|
|
|
|
|
|
|
{ |
2534
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 0); |
2535
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_S0, 0, SLJIT_S0, 0, SLJIT_IMM, sizeof(sljit_sw)); |
2536
|
|
|
|
|
|
|
} |
2537
|
|
|
|
|
|
|
|
2538
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, sizeof(int)); |
2539
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_R0, 0); |
2540
|
|
|
|
|
|
|
/* Copy the integer value to the output buffer */ |
2541
|
|
|
|
|
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2542
|
|
|
|
|
|
|
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); |
2543
|
|
|
|
|
|
|
#endif |
2544
|
|
|
|
|
|
|
|
2545
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_S1, 0); |
2546
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); |
2547
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, loop); |
2548
|
|
|
|
|
|
|
JUMPHERE(early_quit); |
2549
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
/* Calculate the return value, which is the maximum ovector value. */ |
2551
|
|
|
|
|
|
|
if (topbracket > 1) |
2552
|
|
|
|
|
|
|
{ |
2553
|
|
|
|
|
|
|
if (sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))) == SLJIT_SUCCESS) |
2554
|
|
|
|
|
|
|
{ |
2555
|
|
|
|
|
|
|
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + topbracket * 2 * sizeof(sljit_sw)); |
2556
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); |
2557
|
|
|
|
|
|
|
|
2558
|
|
|
|
|
|
|
/* OVECTOR(0) is never equal to SLJIT_S2. */ |
2559
|
|
|
|
|
|
|
loop = LABEL(); |
2560
|
|
|
|
|
|
|
sljit_emit_mem(compiler, SLJIT_MOV | SLJIT_MEM_PRE, SLJIT_R2, SLJIT_MEM1(SLJIT_R0), -(2 * (sljit_sw)sizeof(sljit_sw))); |
2561
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); |
2562
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); |
2563
|
|
|
|
|
|
|
} |
2564
|
|
|
|
|
|
|
else |
2565
|
|
|
|
|
|
|
{ |
2566
|
|
|
|
|
|
|
GET_LOCAL_BASE(SLJIT_R0, 0, OVECTOR_START + (topbracket - 1) * 2 * sizeof(sljit_sw)); |
2567
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, topbracket + 1); |
2568
|
|
|
|
|
|
|
|
2569
|
|
|
|
|
|
|
/* OVECTOR(0) is never equal to SLJIT_S2. */ |
2570
|
|
|
|
|
|
|
loop = LABEL(); |
2571
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R0), 0); |
2572
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R0, 0, SLJIT_R0, 0, SLJIT_IMM, 2 * (sljit_sw)sizeof(sljit_sw)); |
2573
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R1, 0, SLJIT_R1, 0, SLJIT_IMM, 1); |
2574
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, SLJIT_R2, 0, SLJIT_S2, 0, loop); |
2575
|
|
|
|
|
|
|
} |
2576
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_R1, 0); |
2577
|
|
|
|
|
|
|
} |
2578
|
|
|
|
|
|
|
else |
2579
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); |
2580
|
|
|
|
|
|
|
} |
2581
|
|
|
|
|
|
|
|
2582
|
|
|
|
|
|
|
static SLJIT_INLINE void return_with_partial_match(compiler_common *common, struct sljit_label *quit) |
2583
|
|
|
|
|
|
|
{ |
2584
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2585
|
|
|
|
|
|
|
struct sljit_jump *jump; |
2586
|
|
|
|
|
|
|
|
2587
|
|
|
|
|
|
|
SLJIT_COMPILE_ASSERT(STR_END == SLJIT_S1, str_end_must_be_saved_reg2); |
2588
|
|
|
|
|
|
|
SLJIT_ASSERT(common->start_used_ptr != 0 && common->start_ptr != 0 |
2589
|
|
|
|
|
|
|
&& (common->mode == JIT_PARTIAL_SOFT_COMPILE ? common->hit_start != 0 : common->hit_start == 0)); |
2590
|
|
|
|
|
|
|
|
2591
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, ARGUMENTS, 0); |
2592
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_PARTIAL); |
2593
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, real_offset_count)); |
2594
|
|
|
|
|
|
|
CMPTO(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 2, quit); |
2595
|
|
|
|
|
|
|
|
2596
|
|
|
|
|
|
|
/* Store match begin and end. */ |
2597
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, begin)); |
2598
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_R1), SLJIT_OFFSETOF(jit_arguments, offsets)); |
2599
|
|
|
|
|
|
|
|
2600
|
|
|
|
|
|
|
jump = CMP(SLJIT_SIG_LESS, SLJIT_R2, 0, SLJIT_IMM, 3); |
2601
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_ptr : (common->hit_start + (int)sizeof(sljit_sw)), SLJIT_S0, 0); |
2602
|
|
|
|
|
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2603
|
|
|
|
|
|
|
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); |
2604
|
|
|
|
|
|
|
#endif |
2605
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 2 * sizeof(int), SLJIT_R2, 0); |
2606
|
|
|
|
|
|
|
JUMPHERE(jump); |
2607
|
|
|
|
|
|
|
|
2608
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), common->mode == JIT_PARTIAL_HARD_COMPILE ? common->start_used_ptr : common->hit_start); |
2609
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_S1, 0, STR_END, 0, SLJIT_S0, 0); |
2610
|
|
|
|
|
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2611
|
|
|
|
|
|
|
OP2(SLJIT_ASHR, SLJIT_S1, 0, SLJIT_S1, 0, SLJIT_IMM, UCHAR_SHIFT); |
2612
|
|
|
|
|
|
|
#endif |
2613
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), sizeof(int), SLJIT_S1, 0); |
2614
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_S0, 0); |
2616
|
|
|
|
|
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2617
|
|
|
|
|
|
|
OP2(SLJIT_ASHR, SLJIT_R2, 0, SLJIT_R2, 0, SLJIT_IMM, UCHAR_SHIFT); |
2618
|
|
|
|
|
|
|
#endif |
2619
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(SLJIT_R1), 0, SLJIT_R2, 0); |
2620
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, quit); |
2622
|
|
|
|
|
|
|
} |
2623
|
|
|
|
|
|
|
|
2624
|
|
|
|
|
|
|
static SLJIT_INLINE void check_start_used_ptr(compiler_common *common) |
2625
|
|
|
|
|
|
|
{ |
2626
|
|
|
|
|
|
|
/* May destroy TMP1. */ |
2627
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2628
|
|
|
|
|
|
|
struct sljit_jump *jump; |
2629
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2631
|
|
|
|
|
|
|
{ |
2632
|
|
|
|
|
|
|
/* The value of -1 must be kept for start_used_ptr! */ |
2633
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, 1); |
2634
|
|
|
|
|
|
|
/* Jumps if start_used_ptr < STR_PTR, or start_used_ptr == -1. Although overwriting |
2635
|
|
|
|
|
|
|
is not necessary if start_used_ptr == STR_PTR, it does not hurt as well. */ |
2636
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS_EQUAL, TMP1, 0, STR_PTR, 0); |
2637
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
2638
|
|
|
|
|
|
|
JUMPHERE(jump); |
2639
|
|
|
|
|
|
|
} |
2640
|
|
|
|
|
|
|
else if (common->mode == JIT_PARTIAL_HARD_COMPILE) |
2641
|
|
|
|
|
|
|
{ |
2642
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
2643
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
2644
|
|
|
|
|
|
|
JUMPHERE(jump); |
2645
|
|
|
|
|
|
|
} |
2646
|
|
|
|
|
|
|
} |
2647
|
|
|
|
|
|
|
|
2648
|
|
|
|
|
|
|
static SLJIT_INLINE BOOL char_has_othercase(compiler_common *common, pcre_uchar *cc) |
2649
|
|
|
|
|
|
|
{ |
2650
|
|
|
|
|
|
|
/* Detects if the character has an othercase. */ |
2651
|
|
|
|
|
|
|
unsigned int c; |
2652
|
|
|
|
|
|
|
|
2653
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
2654
|
|
|
|
|
|
|
if (common->utf) |
2655
|
|
|
|
|
|
|
{ |
2656
|
|
|
|
|
|
|
GETCHAR(c, cc); |
2657
|
|
|
|
|
|
|
if (c > 127) |
2658
|
|
|
|
|
|
|
{ |
2659
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
2660
|
|
|
|
|
|
|
return c != UCD_OTHERCASE(c); |
2661
|
|
|
|
|
|
|
#else |
2662
|
|
|
|
|
|
|
return FALSE; |
2663
|
|
|
|
|
|
|
#endif |
2664
|
|
|
|
|
|
|
} |
2665
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
2666
|
|
|
|
|
|
|
return common->fcc[c] != c; |
2667
|
|
|
|
|
|
|
#endif |
2668
|
|
|
|
|
|
|
} |
2669
|
|
|
|
|
|
|
else |
2670
|
|
|
|
|
|
|
#endif |
2671
|
|
|
|
|
|
|
c = *cc; |
2672
|
|
|
|
|
|
|
return MAX_255(c) ? common->fcc[c] != c : FALSE; |
2673
|
|
|
|
|
|
|
} |
2674
|
|
|
|
|
|
|
|
2675
|
|
|
|
|
|
|
static SLJIT_INLINE unsigned int char_othercase(compiler_common *common, unsigned int c) |
2676
|
|
|
|
|
|
|
{ |
2677
|
|
|
|
|
|
|
/* Returns with the othercase. */ |
2678
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
2679
|
|
|
|
|
|
|
if (common->utf && c > 127) |
2680
|
|
|
|
|
|
|
{ |
2681
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
2682
|
|
|
|
|
|
|
return UCD_OTHERCASE(c); |
2683
|
|
|
|
|
|
|
#else |
2684
|
|
|
|
|
|
|
return c; |
2685
|
|
|
|
|
|
|
#endif |
2686
|
|
|
|
|
|
|
} |
2687
|
|
|
|
|
|
|
#endif |
2688
|
|
|
|
|
|
|
return TABLE_GET(c, common->fcc, c); |
2689
|
|
|
|
|
|
|
} |
2690
|
|
|
|
|
|
|
|
2691
|
|
|
|
|
|
|
static unsigned int char_get_othercase_bit(compiler_common *common, pcre_uchar *cc) |
2692
|
|
|
|
|
|
|
{ |
2693
|
|
|
|
|
|
|
/* Detects if the character and its othercase has only 1 bit difference. */ |
2694
|
|
|
|
|
|
|
unsigned int c, oc, bit; |
2695
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2696
|
|
|
|
|
|
|
int n; |
2697
|
|
|
|
|
|
|
#endif |
2698
|
|
|
|
|
|
|
|
2699
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
2700
|
|
|
|
|
|
|
if (common->utf) |
2701
|
|
|
|
|
|
|
{ |
2702
|
|
|
|
|
|
|
GETCHAR(c, cc); |
2703
|
|
|
|
|
|
|
if (c <= 127) |
2704
|
|
|
|
|
|
|
oc = common->fcc[c]; |
2705
|
|
|
|
|
|
|
else |
2706
|
|
|
|
|
|
|
{ |
2707
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
2708
|
|
|
|
|
|
|
oc = UCD_OTHERCASE(c); |
2709
|
|
|
|
|
|
|
#else |
2710
|
|
|
|
|
|
|
oc = c; |
2711
|
|
|
|
|
|
|
#endif |
2712
|
|
|
|
|
|
|
} |
2713
|
|
|
|
|
|
|
} |
2714
|
|
|
|
|
|
|
else |
2715
|
|
|
|
|
|
|
{ |
2716
|
|
|
|
|
|
|
c = *cc; |
2717
|
|
|
|
|
|
|
oc = TABLE_GET(c, common->fcc, c); |
2718
|
|
|
|
|
|
|
} |
2719
|
|
|
|
|
|
|
#else |
2720
|
|
|
|
|
|
|
c = *cc; |
2721
|
|
|
|
|
|
|
oc = TABLE_GET(c, common->fcc, c); |
2722
|
|
|
|
|
|
|
#endif |
2723
|
|
|
|
|
|
|
|
2724
|
|
|
|
|
|
|
SLJIT_ASSERT(c != oc); |
2725
|
|
|
|
|
|
|
|
2726
|
|
|
|
|
|
|
bit = c ^ oc; |
2727
|
|
|
|
|
|
|
/* Optimized for English alphabet. */ |
2728
|
|
|
|
|
|
|
if (c <= 127 && bit == 0x20) |
2729
|
|
|
|
|
|
|
return (0 << 8) | 0x20; |
2730
|
|
|
|
|
|
|
|
2731
|
|
|
|
|
|
|
/* Since c != oc, they must have at least 1 bit difference. */ |
2732
|
|
|
|
|
|
|
if (!is_powerof2(bit)) |
2733
|
|
|
|
|
|
|
return 0; |
2734
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
2736
|
|
|
|
|
|
|
|
2737
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
2738
|
|
|
|
|
|
|
if (common->utf && c > 127) |
2739
|
|
|
|
|
|
|
{ |
2740
|
|
|
|
|
|
|
n = GET_EXTRALEN(*cc); |
2741
|
|
|
|
|
|
|
while ((bit & 0x3f) == 0) |
2742
|
|
|
|
|
|
|
{ |
2743
|
|
|
|
|
|
|
n--; |
2744
|
|
|
|
|
|
|
bit >>= 6; |
2745
|
|
|
|
|
|
|
} |
2746
|
|
|
|
|
|
|
return (n << 8) | bit; |
2747
|
|
|
|
|
|
|
} |
2748
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
2749
|
|
|
|
|
|
|
return (0 << 8) | bit; |
2750
|
|
|
|
|
|
|
|
2751
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
2752
|
|
|
|
|
|
|
|
2753
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
2754
|
|
|
|
|
|
|
if (common->utf && c > 65535) |
2755
|
|
|
|
|
|
|
{ |
2756
|
|
|
|
|
|
|
if (bit >= (1 << 10)) |
2757
|
|
|
|
|
|
|
bit >>= 10; |
2758
|
|
|
|
|
|
|
else |
2759
|
|
|
|
|
|
|
return (bit < 256) ? ((2 << 8) | bit) : ((3 << 8) | (bit >> 8)); |
2760
|
|
|
|
|
|
|
} |
2761
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
2762
|
|
|
|
|
|
|
return (bit < 256) ? ((0 << 8) | bit) : ((1 << 8) | (bit >> 8)); |
2763
|
|
|
|
|
|
|
|
2764
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE[8|16|32] */ |
2765
|
|
|
|
|
|
|
} |
2766
|
|
|
|
|
|
|
|
2767
|
|
|
|
|
|
|
static void check_partial(compiler_common *common, BOOL force) |
2768
|
|
|
|
|
|
|
{ |
2769
|
|
|
|
|
|
|
/* Checks whether a partial matching is occurred. Does not modify registers. */ |
2770
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2771
|
|
|
|
|
|
|
struct sljit_jump *jump = NULL; |
2772
|
|
|
|
|
|
|
|
2773
|
|
|
|
|
|
|
SLJIT_ASSERT(!force || common->mode != JIT_COMPILE); |
2774
|
|
|
|
|
|
|
|
2775
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
2776
|
|
|
|
|
|
|
return; |
2777
|
|
|
|
|
|
|
|
2778
|
|
|
|
|
|
|
if (!force) |
2779
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
2780
|
|
|
|
|
|
|
else if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2781
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); |
2782
|
|
|
|
|
|
|
|
2783
|
|
|
|
|
|
|
if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2784
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); |
2785
|
|
|
|
|
|
|
else |
2786
|
|
|
|
|
|
|
{ |
2787
|
|
|
|
|
|
|
if (common->partialmatchlabel != NULL) |
2788
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
2789
|
|
|
|
|
|
|
else |
2790
|
|
|
|
|
|
|
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
2791
|
|
|
|
|
|
|
} |
2792
|
|
|
|
|
|
|
|
2793
|
|
|
|
|
|
|
if (jump != NULL) |
2794
|
|
|
|
|
|
|
JUMPHERE(jump); |
2795
|
|
|
|
|
|
|
} |
2796
|
|
|
|
|
|
|
|
2797
|
|
|
|
|
|
|
static void check_str_end(compiler_common *common, jump_list **end_reached) |
2798
|
|
|
|
|
|
|
{ |
2799
|
|
|
|
|
|
|
/* Does not affect registers. Usually used in a tight spot. */ |
2800
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2801
|
|
|
|
|
|
|
struct sljit_jump *jump; |
2802
|
|
|
|
|
|
|
|
2803
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
2804
|
|
|
|
|
|
|
{ |
2805
|
|
|
|
|
|
|
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
2806
|
|
|
|
|
|
|
return; |
2807
|
|
|
|
|
|
|
} |
2808
|
|
|
|
|
|
|
|
2809
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
2810
|
|
|
|
|
|
|
if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2811
|
|
|
|
|
|
|
{ |
2812
|
|
|
|
|
|
|
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); |
2813
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); |
2814
|
|
|
|
|
|
|
add_jump(compiler, end_reached, JUMP(SLJIT_JUMP)); |
2815
|
|
|
|
|
|
|
} |
2816
|
|
|
|
|
|
|
else |
2817
|
|
|
|
|
|
|
{ |
2818
|
|
|
|
|
|
|
add_jump(compiler, end_reached, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); |
2819
|
|
|
|
|
|
|
if (common->partialmatchlabel != NULL) |
2820
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
2821
|
|
|
|
|
|
|
else |
2822
|
|
|
|
|
|
|
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
2823
|
|
|
|
|
|
|
} |
2824
|
|
|
|
|
|
|
JUMPHERE(jump); |
2825
|
|
|
|
|
|
|
} |
2826
|
|
|
|
|
|
|
|
2827
|
|
|
|
|
|
|
static void detect_partial_match(compiler_common *common, jump_list **backtracks) |
2828
|
|
|
|
|
|
|
{ |
2829
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2830
|
|
|
|
|
|
|
struct sljit_jump *jump; |
2831
|
|
|
|
|
|
|
|
2832
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
2833
|
|
|
|
|
|
|
{ |
2834
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
2835
|
|
|
|
|
|
|
return; |
2836
|
|
|
|
|
|
|
} |
2837
|
|
|
|
|
|
|
|
2838
|
|
|
|
|
|
|
/* Partial matching mode. */ |
2839
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
2840
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); |
2841
|
|
|
|
|
|
|
if (common->mode == JIT_PARTIAL_SOFT_COMPILE) |
2842
|
|
|
|
|
|
|
{ |
2843
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); |
2844
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
2845
|
|
|
|
|
|
|
} |
2846
|
|
|
|
|
|
|
else |
2847
|
|
|
|
|
|
|
{ |
2848
|
|
|
|
|
|
|
if (common->partialmatchlabel != NULL) |
2849
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->partialmatchlabel); |
2850
|
|
|
|
|
|
|
else |
2851
|
|
|
|
|
|
|
add_jump(compiler, &common->partialmatch, JUMP(SLJIT_JUMP)); |
2852
|
|
|
|
|
|
|
} |
2853
|
|
|
|
|
|
|
JUMPHERE(jump); |
2854
|
|
|
|
|
|
|
} |
2855
|
|
|
|
|
|
|
|
2856
|
|
|
|
|
|
|
static void peek_char(compiler_common *common, sljit_u32 max) |
2857
|
|
|
|
|
|
|
{ |
2858
|
|
|
|
|
|
|
/* Reads the character into TMP1, keeps STR_PTR. |
2859
|
|
|
|
|
|
|
Does not check STR_END. TMP2 Destroyed. */ |
2860
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2861
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
2862
|
|
|
|
|
|
|
struct sljit_jump *jump; |
2863
|
|
|
|
|
|
|
#endif |
2864
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(max); |
2866
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
2868
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2869
|
|
|
|
|
|
|
if (common->utf) |
2870
|
|
|
|
|
|
|
{ |
2871
|
|
|
|
|
|
|
if (max < 128) return; |
2872
|
|
|
|
|
|
|
|
2873
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
2874
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2875
|
|
|
|
|
|
|
add_jump(compiler, &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
2876
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2877
|
|
|
|
|
|
|
JUMPHERE(jump); |
2878
|
|
|
|
|
|
|
} |
2879
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
2880
|
|
|
|
|
|
|
|
2881
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
2882
|
|
|
|
|
|
|
if (common->utf) |
2883
|
|
|
|
|
|
|
{ |
2884
|
|
|
|
|
|
|
if (max < 0xd800) return; |
2885
|
|
|
|
|
|
|
|
2886
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
2887
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
2888
|
|
|
|
|
|
|
/* TMP2 contains the high surrogate. */ |
2889
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2890
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); |
2891
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); |
2892
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); |
2893
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2894
|
|
|
|
|
|
|
JUMPHERE(jump); |
2895
|
|
|
|
|
|
|
} |
2896
|
|
|
|
|
|
|
#endif |
2897
|
|
|
|
|
|
|
} |
2898
|
|
|
|
|
|
|
|
2899
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2900
|
|
|
|
|
|
|
|
2901
|
|
|
|
|
|
|
static BOOL is_char7_bitset(const sljit_u8 *bitset, BOOL nclass) |
2902
|
|
|
|
|
|
|
{ |
2903
|
|
|
|
|
|
|
/* Tells whether the character codes below 128 are enough |
2904
|
|
|
|
|
|
|
to determine a match. */ |
2905
|
|
|
|
|
|
|
const sljit_u8 value = nclass ? 0xff : 0; |
2906
|
|
|
|
|
|
|
const sljit_u8 *end = bitset + 32; |
2907
|
|
|
|
|
|
|
|
2908
|
|
|
|
|
|
|
bitset += 16; |
2909
|
|
|
|
|
|
|
do |
2910
|
|
|
|
|
|
|
{ |
2911
|
|
|
|
|
|
|
if (*bitset++ != value) |
2912
|
|
|
|
|
|
|
return FALSE; |
2913
|
|
|
|
|
|
|
} |
2914
|
|
|
|
|
|
|
while (bitset < end); |
2915
|
|
|
|
|
|
|
return TRUE; |
2916
|
|
|
|
|
|
|
} |
2917
|
|
|
|
|
|
|
|
2918
|
|
|
|
|
|
|
static void read_char7_type(compiler_common *common, BOOL full_read) |
2919
|
|
|
|
|
|
|
{ |
2920
|
|
|
|
|
|
|
/* Reads the precise character type of a character into TMP1, if the character |
2921
|
|
|
|
|
|
|
is less than 128. Otherwise it returns with zero. Does not check STR_END. The |
2922
|
|
|
|
|
|
|
full_read argument tells whether characters above max are accepted or not. */ |
2923
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2924
|
|
|
|
|
|
|
struct sljit_jump *jump; |
2925
|
|
|
|
|
|
|
|
2926
|
|
|
|
|
|
|
SLJIT_ASSERT(common->utf); |
2927
|
|
|
|
|
|
|
|
2928
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
2929
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2930
|
|
|
|
|
|
|
|
2931
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
2932
|
|
|
|
|
|
|
|
2933
|
|
|
|
|
|
|
if (full_read) |
2934
|
|
|
|
|
|
|
{ |
2935
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); |
2936
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2937
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
2938
|
|
|
|
|
|
|
JUMPHERE(jump); |
2939
|
|
|
|
|
|
|
} |
2940
|
|
|
|
|
|
|
} |
2941
|
|
|
|
|
|
|
|
2942
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
2943
|
|
|
|
|
|
|
|
2944
|
|
|
|
|
|
|
static void read_char_range(compiler_common *common, sljit_u32 min, sljit_u32 max, BOOL update_str_ptr) |
2945
|
|
|
|
|
|
|
{ |
2946
|
|
|
|
|
|
|
/* Reads the precise value of a character into TMP1, if the character is |
2947
|
|
|
|
|
|
|
between min and max (c >= min && c <= max). Otherwise it returns with a value |
2948
|
|
|
|
|
|
|
outside the range. Does not check STR_END. */ |
2949
|
|
|
|
|
|
|
DEFINE_COMPILER; |
2950
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
2951
|
|
|
|
|
|
|
struct sljit_jump *jump; |
2952
|
|
|
|
|
|
|
#endif |
2953
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2954
|
|
|
|
|
|
|
struct sljit_jump *jump2; |
2955
|
|
|
|
|
|
|
#endif |
2956
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(update_str_ptr); |
2958
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(min); |
2959
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(max); |
2960
|
|
|
|
|
|
|
SLJIT_ASSERT(min <= max); |
2961
|
|
|
|
|
|
|
|
2962
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2963
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
2964
|
|
|
|
|
|
|
|
2965
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
2966
|
|
|
|
|
|
|
if (common->utf) |
2967
|
|
|
|
|
|
|
{ |
2968
|
|
|
|
|
|
|
if (max < 128 && !update_str_ptr) return; |
2969
|
|
|
|
|
|
|
|
2970
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
2971
|
|
|
|
|
|
|
if (min >= 0x10000) |
2972
|
|
|
|
|
|
|
{ |
2973
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xf0); |
2974
|
|
|
|
|
|
|
if (update_str_ptr) |
2975
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
2976
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
2977
|
|
|
|
|
|
|
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x7); |
2978
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
2979
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
2980
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2981
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
2982
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2983
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2984
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2985
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
2986
|
|
|
|
|
|
|
if (!update_str_ptr) |
2987
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); |
2988
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
2989
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
2990
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
2991
|
|
|
|
|
|
|
JUMPHERE(jump2); |
2992
|
|
|
|
|
|
|
if (update_str_ptr) |
2993
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); |
2994
|
|
|
|
|
|
|
} |
2995
|
|
|
|
|
|
|
else if (min >= 0x800 && max <= 0xffff) |
2996
|
|
|
|
|
|
|
{ |
2997
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xe0); |
2998
|
|
|
|
|
|
|
if (update_str_ptr) |
2999
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
3000
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3001
|
|
|
|
|
|
|
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xf); |
3002
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
3003
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3004
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3005
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
3006
|
|
|
|
|
|
|
if (!update_str_ptr) |
3007
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
3008
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3009
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3010
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3011
|
|
|
|
|
|
|
JUMPHERE(jump2); |
3012
|
|
|
|
|
|
|
if (update_str_ptr) |
3013
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); |
3014
|
|
|
|
|
|
|
} |
3015
|
|
|
|
|
|
|
else if (max >= 0x800) |
3016
|
|
|
|
|
|
|
add_jump(compiler, (max < 0x10000) ? &common->utfreadchar16 : &common->utfreadchar, JUMP(SLJIT_FAST_CALL)); |
3017
|
|
|
|
|
|
|
else if (max < 128) |
3018
|
|
|
|
|
|
|
{ |
3019
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
3020
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
3021
|
|
|
|
|
|
|
} |
3022
|
|
|
|
|
|
|
else |
3023
|
|
|
|
|
|
|
{ |
3024
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3025
|
|
|
|
|
|
|
if (!update_str_ptr) |
3026
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3027
|
|
|
|
|
|
|
else |
3028
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
3029
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3030
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3031
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3032
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3033
|
|
|
|
|
|
|
if (update_str_ptr) |
3034
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, RETURN_ADDR, 0); |
3035
|
|
|
|
|
|
|
} |
3036
|
|
|
|
|
|
|
JUMPHERE(jump); |
3037
|
|
|
|
|
|
|
} |
3038
|
|
|
|
|
|
|
#endif |
3039
|
|
|
|
|
|
|
|
3040
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
3041
|
|
|
|
|
|
|
if (common->utf) |
3042
|
|
|
|
|
|
|
{ |
3043
|
|
|
|
|
|
|
if (max >= 0x10000) |
3044
|
|
|
|
|
|
|
{ |
3045
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
3046
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
3047
|
|
|
|
|
|
|
/* TMP2 contains the high surrogate. */ |
3048
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3049
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x40); |
3050
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 10); |
3051
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3052
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3ff); |
3053
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3054
|
|
|
|
|
|
|
JUMPHERE(jump); |
3055
|
|
|
|
|
|
|
return; |
3056
|
|
|
|
|
|
|
} |
3057
|
|
|
|
|
|
|
|
3058
|
|
|
|
|
|
|
if (max < 0xd800 && !update_str_ptr) return; |
3059
|
|
|
|
|
|
|
|
3060
|
|
|
|
|
|
|
/* Skip low surrogate if necessary. */ |
3061
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
3062
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
3063
|
|
|
|
|
|
|
if (update_str_ptr) |
3064
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3065
|
|
|
|
|
|
|
if (max >= 0xd800) |
3066
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0x10000); |
3067
|
|
|
|
|
|
|
JUMPHERE(jump); |
3068
|
|
|
|
|
|
|
} |
3069
|
|
|
|
|
|
|
#endif |
3070
|
|
|
|
|
|
|
} |
3071
|
|
|
|
|
|
|
|
3072
|
|
|
|
|
|
|
static SLJIT_INLINE void read_char(compiler_common *common) |
3073
|
|
|
|
|
|
|
{ |
3074
|
|
|
|
|
|
|
read_char_range(common, 0, READ_CHAR_MAX, TRUE); |
3075
|
|
|
|
|
|
|
} |
3076
|
|
|
|
|
|
|
|
3077
|
|
|
|
|
|
|
static void read_char8_type(compiler_common *common, BOOL update_str_ptr) |
3078
|
|
|
|
|
|
|
{ |
3079
|
|
|
|
|
|
|
/* Reads the character type into TMP1, updates STR_PTR. Does not check STR_END. */ |
3080
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3081
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
3082
|
|
|
|
|
|
|
struct sljit_jump *jump; |
3083
|
|
|
|
|
|
|
#endif |
3084
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3085
|
|
|
|
|
|
|
struct sljit_jump *jump2; |
3086
|
|
|
|
|
|
|
#endif |
3087
|
|
|
|
|
|
|
|
3088
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(update_str_ptr); |
3089
|
|
|
|
|
|
|
|
3090
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), 0); |
3091
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3092
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3094
|
|
|
|
|
|
|
if (common->utf) |
3095
|
|
|
|
|
|
|
{ |
3096
|
|
|
|
|
|
|
/* This can be an extra read in some situations, but hopefully |
3097
|
|
|
|
|
|
|
it is needed in most cases. */ |
3098
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
3099
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, TMP2, 0, SLJIT_IMM, 0xc0); |
3100
|
|
|
|
|
|
|
if (!update_str_ptr) |
3101
|
|
|
|
|
|
|
{ |
3102
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3103
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3104
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3105
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
3106
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3107
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
3108
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
3109
|
|
|
|
|
|
|
jump2 = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); |
3110
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
3111
|
|
|
|
|
|
|
JUMPHERE(jump2); |
3112
|
|
|
|
|
|
|
} |
3113
|
|
|
|
|
|
|
else |
3114
|
|
|
|
|
|
|
add_jump(compiler, &common->utfreadtype8, JUMP(SLJIT_FAST_CALL)); |
3115
|
|
|
|
|
|
|
JUMPHERE(jump); |
3116
|
|
|
|
|
|
|
return; |
3117
|
|
|
|
|
|
|
} |
3118
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
3119
|
|
|
|
|
|
|
|
3120
|
|
|
|
|
|
|
#if !defined COMPILE_PCRE8 |
3121
|
|
|
|
|
|
|
/* The ctypes array contains only 256 values. */ |
3122
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
3123
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 255); |
3124
|
|
|
|
|
|
|
#endif |
3125
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
3126
|
|
|
|
|
|
|
#if !defined COMPILE_PCRE8 |
3127
|
|
|
|
|
|
|
JUMPHERE(jump); |
3128
|
|
|
|
|
|
|
#endif |
3129
|
|
|
|
|
|
|
|
3130
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE16 |
3131
|
|
|
|
|
|
|
if (common->utf && update_str_ptr) |
3132
|
|
|
|
|
|
|
{ |
3133
|
|
|
|
|
|
|
/* Skip low surrogate if necessary. */ |
3134
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, SLJIT_IMM, 0xd800); |
3135
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0xdc00 - 0xd800 - 1); |
3136
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3137
|
|
|
|
|
|
|
JUMPHERE(jump); |
3138
|
|
|
|
|
|
|
} |
3139
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && COMPILE_PCRE16 */ |
3140
|
|
|
|
|
|
|
} |
3141
|
|
|
|
|
|
|
|
3142
|
|
|
|
|
|
|
static void skip_char_back(compiler_common *common) |
3143
|
|
|
|
|
|
|
{ |
3144
|
|
|
|
|
|
|
/* Goes one character back. Affects STR_PTR and TMP1. Does not check begin. */ |
3145
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3146
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3147
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
3148
|
|
|
|
|
|
|
struct sljit_label *label; |
3149
|
|
|
|
|
|
|
|
3150
|
|
|
|
|
|
|
if (common->utf) |
3151
|
|
|
|
|
|
|
{ |
3152
|
|
|
|
|
|
|
label = LABEL(); |
3153
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); |
3154
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3155
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
3156
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, label); |
3157
|
|
|
|
|
|
|
return; |
3158
|
|
|
|
|
|
|
} |
3159
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
3160
|
|
|
|
|
|
|
if (common->utf) |
3161
|
|
|
|
|
|
|
{ |
3162
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -IN_UCHARS(1)); |
3163
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3164
|
|
|
|
|
|
|
/* Skip low surrogate if necessary. */ |
3165
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
3166
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xdc00); |
3167
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); |
3168
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
3169
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
3170
|
|
|
|
|
|
|
return; |
3171
|
|
|
|
|
|
|
} |
3172
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE[8|16] */ |
3173
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
3174
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3175
|
|
|
|
|
|
|
} |
3176
|
|
|
|
|
|
|
|
3177
|
|
|
|
|
|
|
static void check_newlinechar(compiler_common *common, int nltype, jump_list **backtracks, BOOL jumpifmatch) |
3178
|
|
|
|
|
|
|
{ |
3179
|
|
|
|
|
|
|
/* Character comes in TMP1. Checks if it is a newline. TMP2 may be destroyed. */ |
3180
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3181
|
|
|
|
|
|
|
struct sljit_jump *jump; |
3182
|
|
|
|
|
|
|
|
3183
|
|
|
|
|
|
|
if (nltype == NLTYPE_ANY) |
3184
|
|
|
|
|
|
|
{ |
3185
|
|
|
|
|
|
|
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); |
3186
|
|
|
|
|
|
|
sljit_set_current_flags(compiler, SLJIT_SET_Z); |
3187
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(jumpifmatch ? SLJIT_NOT_ZERO : SLJIT_ZERO)); |
3188
|
|
|
|
|
|
|
} |
3189
|
|
|
|
|
|
|
else if (nltype == NLTYPE_ANYCRLF) |
3190
|
|
|
|
|
|
|
{ |
3191
|
|
|
|
|
|
|
if (jumpifmatch) |
3192
|
|
|
|
|
|
|
{ |
3193
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR)); |
3194
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); |
3195
|
|
|
|
|
|
|
} |
3196
|
|
|
|
|
|
|
else |
3197
|
|
|
|
|
|
|
{ |
3198
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
3199
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); |
3200
|
|
|
|
|
|
|
JUMPHERE(jump); |
3201
|
|
|
|
|
|
|
} |
3202
|
|
|
|
|
|
|
} |
3203
|
|
|
|
|
|
|
else |
3204
|
|
|
|
|
|
|
{ |
3205
|
|
|
|
|
|
|
SLJIT_ASSERT(nltype == NLTYPE_FIXED && common->newline < 256); |
3206
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(jumpifmatch ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); |
3207
|
|
|
|
|
|
|
} |
3208
|
|
|
|
|
|
|
} |
3209
|
|
|
|
|
|
|
|
3210
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
3211
|
|
|
|
|
|
|
|
3212
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
3213
|
|
|
|
|
|
|
static void do_utfreadchar(compiler_common *common) |
3214
|
|
|
|
|
|
|
{ |
3215
|
|
|
|
|
|
|
/* Fast decoding a UTF-8 character. TMP1 contains the first byte |
3216
|
|
|
|
|
|
|
of the character (>= 0xc0). Return char value in TMP1, length in TMP2. */ |
3217
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3218
|
|
|
|
|
|
|
struct sljit_jump *jump; |
3219
|
|
|
|
|
|
|
|
3220
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
3221
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3222
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3223
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3224
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3225
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3226
|
|
|
|
|
|
|
|
3227
|
|
|
|
|
|
|
/* Searching for the first zero. */ |
3228
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); |
3229
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO); |
3230
|
|
|
|
|
|
|
/* Two byte sequence. */ |
3231
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3232
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(2)); |
3233
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3234
|
|
|
|
|
|
|
|
3235
|
|
|
|
|
|
|
JUMPHERE(jump); |
3236
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
3237
|
|
|
|
|
|
|
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); |
3238
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3239
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3240
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3241
|
|
|
|
|
|
|
|
3242
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x10000); |
3243
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO); |
3244
|
|
|
|
|
|
|
/* Three byte sequence. */ |
3245
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
3246
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(3)); |
3247
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3248
|
|
|
|
|
|
|
|
3249
|
|
|
|
|
|
|
/* Four byte sequence. */ |
3250
|
|
|
|
|
|
|
JUMPHERE(jump); |
3251
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(2)); |
3252
|
|
|
|
|
|
|
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x10000); |
3253
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3254
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(3)); |
3255
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3256
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3257
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, IN_UCHARS(4)); |
3258
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3259
|
|
|
|
|
|
|
} |
3260
|
|
|
|
|
|
|
|
3261
|
|
|
|
|
|
|
static void do_utfreadchar16(compiler_common *common) |
3262
|
|
|
|
|
|
|
{ |
3263
|
|
|
|
|
|
|
/* Fast decoding a UTF-8 character. TMP1 contains the first byte |
3264
|
|
|
|
|
|
|
of the character (>= 0xc0). Return value in TMP1. */ |
3265
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3266
|
|
|
|
|
|
|
struct sljit_jump *jump; |
3267
|
|
|
|
|
|
|
|
3268
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
3269
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3270
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3271
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3272
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3273
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3274
|
|
|
|
|
|
|
|
3275
|
|
|
|
|
|
|
/* Searching for the first zero. */ |
3276
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x800); |
3277
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO); |
3278
|
|
|
|
|
|
|
/* Two byte sequence. */ |
3279
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3280
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3281
|
|
|
|
|
|
|
|
3282
|
|
|
|
|
|
|
JUMPHERE(jump); |
3283
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x400); |
3284
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_NOT_ZERO); |
3285
|
|
|
|
|
|
|
/* This code runs only in 8 bit mode. No need to shift the value. */ |
3286
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
3287
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
3288
|
|
|
|
|
|
|
OP2(SLJIT_XOR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x800); |
3289
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 6); |
3290
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x3f); |
3291
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
3292
|
|
|
|
|
|
|
/* Three byte sequence. */ |
3293
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
3294
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3295
|
|
|
|
|
|
|
} |
3296
|
|
|
|
|
|
|
|
3297
|
|
|
|
|
|
|
static void do_utfreadtype8(compiler_common *common) |
3298
|
|
|
|
|
|
|
{ |
3299
|
|
|
|
|
|
|
/* Fast decoding a UTF-8 character type. TMP2 contains the first byte |
3300
|
|
|
|
|
|
|
of the character (>= 0xc0). Return value in TMP1. */ |
3301
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3302
|
|
|
|
|
|
|
struct sljit_jump *jump; |
3303
|
|
|
|
|
|
|
struct sljit_jump *compare; |
3304
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
3306
|
|
|
|
|
|
|
|
3307
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, 0x20); |
3308
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO); |
3309
|
|
|
|
|
|
|
/* Two byte sequence. */ |
3310
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3311
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3312
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 0x1f); |
3313
|
|
|
|
|
|
|
/* The upper 5 bits are known at this point. */ |
3314
|
|
|
|
|
|
|
compare = CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, 0x3); |
3315
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 6); |
3316
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x3f); |
3317
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, TMP1, 0); |
3318
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP2), common->ctypes); |
3319
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3320
|
|
|
|
|
|
|
|
3321
|
|
|
|
|
|
|
JUMPHERE(compare); |
3322
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
3323
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3324
|
|
|
|
|
|
|
|
3325
|
|
|
|
|
|
|
/* We only have types for characters less than 256. */ |
3326
|
|
|
|
|
|
|
JUMPHERE(jump); |
3327
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
3328
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0); |
3329
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
3330
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3331
|
|
|
|
|
|
|
} |
3332
|
|
|
|
|
|
|
|
3333
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE8 */ |
3334
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
3336
|
|
|
|
|
|
|
|
3337
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
3338
|
|
|
|
|
|
|
|
3339
|
|
|
|
|
|
|
/* UCD_BLOCK_SIZE must be 128 (see the assert below). */ |
3340
|
|
|
|
|
|
|
#define UCD_BLOCK_MASK 127 |
3341
|
|
|
|
|
|
|
#define UCD_BLOCK_SHIFT 7 |
3342
|
|
|
|
|
|
|
|
3343
|
|
|
|
|
|
|
static void do_getucd(compiler_common *common) |
3344
|
|
|
|
|
|
|
{ |
3345
|
|
|
|
|
|
|
/* Search the UCD record for the character comes in TMP1. |
3346
|
|
|
|
|
|
|
Returns chartype in TMP1 and UCD offset in TMP2. */ |
3347
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3348
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE32 |
3349
|
|
|
|
|
|
|
struct sljit_jump *jump; |
3350
|
|
|
|
|
|
|
#endif |
3351
|
|
|
|
|
|
|
|
3352
|
|
|
|
|
|
|
#if defined SLJIT_DEBUG && SLJIT_DEBUG |
3353
|
|
|
|
|
|
|
/* dummy_ucd_record */ |
3354
|
|
|
|
|
|
|
const ucd_record *record = GET_UCD(INVALID_UTF_CHAR); |
3355
|
|
|
|
|
|
|
SLJIT_ASSERT(record->script == ucp_Common && record->chartype == ucp_Cn && record->gbprop == ucp_gbOther); |
3356
|
|
|
|
|
|
|
SLJIT_ASSERT(record->caseset == 0 && record->other_case == 0); |
3357
|
|
|
|
|
|
|
#endif |
3358
|
|
|
|
|
|
|
|
3359
|
|
|
|
|
|
|
SLJIT_ASSERT(UCD_BLOCK_SIZE == 128 && sizeof(ucd_record) == 8); |
3360
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
3362
|
|
|
|
|
|
|
|
3363
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE32 |
3364
|
|
|
|
|
|
|
if (!common->utf) |
3365
|
|
|
|
|
|
|
{ |
3366
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1); |
3367
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); |
3368
|
|
|
|
|
|
|
JUMPHERE(jump); |
3369
|
|
|
|
|
|
|
} |
3370
|
|
|
|
|
|
|
#endif |
3371
|
|
|
|
|
|
|
|
3372
|
|
|
|
|
|
|
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); |
3373
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); |
3374
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); |
3375
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); |
3376
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); |
3377
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); |
3378
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); |
3379
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); |
3380
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
3381
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
3382
|
|
|
|
|
|
|
} |
3383
|
|
|
|
|
|
|
#endif |
3384
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
static SLJIT_INLINE struct sljit_label *mainloop_entry(compiler_common *common, BOOL hascrorlf) |
3386
|
|
|
|
|
|
|
{ |
3387
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3388
|
|
|
|
|
|
|
struct sljit_label *mainloop; |
3389
|
|
|
|
|
|
|
struct sljit_label *newlinelabel = NULL; |
3390
|
|
|
|
|
|
|
struct sljit_jump *start; |
3391
|
|
|
|
|
|
|
struct sljit_jump *end = NULL; |
3392
|
|
|
|
|
|
|
struct sljit_jump *end2 = NULL; |
3393
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3394
|
|
|
|
|
|
|
struct sljit_jump *singlechar; |
3395
|
|
|
|
|
|
|
#endif |
3396
|
|
|
|
|
|
|
jump_list *newline = NULL; |
3397
|
|
|
|
|
|
|
BOOL newlinecheck = FALSE; |
3398
|
|
|
|
|
|
|
BOOL readuchar = FALSE; |
3399
|
|
|
|
|
|
|
|
3400
|
|
|
|
|
|
|
if (!(hascrorlf || (common->match_end_ptr != 0)) && |
3401
|
|
|
|
|
|
|
(common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF || common->newline > 255)) |
3402
|
|
|
|
|
|
|
newlinecheck = TRUE; |
3403
|
|
|
|
|
|
|
|
3404
|
|
|
|
|
|
|
if (common->match_end_ptr != 0) |
3405
|
|
|
|
|
|
|
{ |
3406
|
|
|
|
|
|
|
/* Search for the end of the first line. */ |
3407
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
3408
|
|
|
|
|
|
|
|
3409
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
3410
|
|
|
|
|
|
|
{ |
3411
|
|
|
|
|
|
|
mainloop = LABEL(); |
3412
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3413
|
|
|
|
|
|
|
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3414
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
3415
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
3416
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, mainloop); |
3417
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, mainloop); |
3418
|
|
|
|
|
|
|
JUMPHERE(end); |
3419
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3420
|
|
|
|
|
|
|
} |
3421
|
|
|
|
|
|
|
else |
3422
|
|
|
|
|
|
|
{ |
3423
|
|
|
|
|
|
|
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3424
|
|
|
|
|
|
|
mainloop = LABEL(); |
3425
|
|
|
|
|
|
|
/* Continual stores does not cause data dependency. */ |
3426
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); |
3427
|
|
|
|
|
|
|
read_char_range(common, common->nlmin, common->nlmax, TRUE); |
3428
|
|
|
|
|
|
|
check_newlinechar(common, common->nltype, &newline, TRUE); |
3429
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, mainloop); |
3430
|
|
|
|
|
|
|
JUMPHERE(end); |
3431
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, STR_PTR, 0); |
3432
|
|
|
|
|
|
|
set_jumps(newline, LABEL()); |
3433
|
|
|
|
|
|
|
} |
3434
|
|
|
|
|
|
|
|
3435
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
3436
|
|
|
|
|
|
|
} |
3437
|
|
|
|
|
|
|
|
3438
|
|
|
|
|
|
|
start = JUMP(SLJIT_JUMP); |
3439
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
if (newlinecheck) |
3441
|
|
|
|
|
|
|
{ |
3442
|
|
|
|
|
|
|
newlinelabel = LABEL(); |
3443
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3444
|
|
|
|
|
|
|
end = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3445
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
3446
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, common->newline & 0xff); |
3447
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); |
3448
|
|
|
|
|
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
3449
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); |
3450
|
|
|
|
|
|
|
#endif |
3451
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
3452
|
|
|
|
|
|
|
end2 = JUMP(SLJIT_JUMP); |
3453
|
|
|
|
|
|
|
} |
3454
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
mainloop = LABEL(); |
3456
|
|
|
|
|
|
|
|
3457
|
|
|
|
|
|
|
/* Increasing the STR_PTR here requires one less jump in the most common case. */ |
3458
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
3459
|
|
|
|
|
|
|
if (common->utf) readuchar = TRUE; |
3460
|
|
|
|
|
|
|
#endif |
3461
|
|
|
|
|
|
|
if (newlinecheck) readuchar = TRUE; |
3462
|
|
|
|
|
|
|
|
3463
|
|
|
|
|
|
|
if (readuchar) |
3464
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
3465
|
|
|
|
|
|
|
|
3466
|
|
|
|
|
|
|
if (newlinecheck) |
3467
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, newlinelabel); |
3468
|
|
|
|
|
|
|
|
3469
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
3470
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3471
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
3472
|
|
|
|
|
|
|
if (common->utf) |
3473
|
|
|
|
|
|
|
{ |
3474
|
|
|
|
|
|
|
singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
3475
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
3476
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
3477
|
|
|
|
|
|
|
JUMPHERE(singlechar); |
3478
|
|
|
|
|
|
|
} |
3479
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
3480
|
|
|
|
|
|
|
if (common->utf) |
3481
|
|
|
|
|
|
|
{ |
3482
|
|
|
|
|
|
|
singlechar = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
3483
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
3484
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
3485
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); |
3486
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
3487
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
3488
|
|
|
|
|
|
|
JUMPHERE(singlechar); |
3489
|
|
|
|
|
|
|
} |
3490
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE[8|16] */ |
3491
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && !COMPILE_PCRE32 */ |
3492
|
|
|
|
|
|
|
JUMPHERE(start); |
3493
|
|
|
|
|
|
|
|
3494
|
|
|
|
|
|
|
if (newlinecheck) |
3495
|
|
|
|
|
|
|
{ |
3496
|
|
|
|
|
|
|
JUMPHERE(end); |
3497
|
|
|
|
|
|
|
JUMPHERE(end2); |
3498
|
|
|
|
|
|
|
} |
3499
|
|
|
|
|
|
|
|
3500
|
|
|
|
|
|
|
return mainloop; |
3501
|
|
|
|
|
|
|
} |
3502
|
|
|
|
|
|
|
|
3503
|
|
|
|
|
|
|
#define MAX_N_CHARS 16 |
3504
|
|
|
|
|
|
|
#define MAX_DIFF_CHARS 6 |
3505
|
|
|
|
|
|
|
|
3506
|
|
|
|
|
|
|
static SLJIT_INLINE void add_prefix_char(pcre_uchar chr, pcre_uchar *chars) |
3507
|
|
|
|
|
|
|
{ |
3508
|
|
|
|
|
|
|
pcre_uchar i, len; |
3509
|
|
|
|
|
|
|
|
3510
|
|
|
|
|
|
|
len = chars[0]; |
3511
|
|
|
|
|
|
|
if (len == 255) |
3512
|
|
|
|
|
|
|
return; |
3513
|
|
|
|
|
|
|
|
3514
|
|
|
|
|
|
|
if (len == 0) |
3515
|
|
|
|
|
|
|
{ |
3516
|
|
|
|
|
|
|
chars[0] = 1; |
3517
|
|
|
|
|
|
|
chars[1] = chr; |
3518
|
|
|
|
|
|
|
return; |
3519
|
|
|
|
|
|
|
} |
3520
|
|
|
|
|
|
|
|
3521
|
|
|
|
|
|
|
for (i = len; i > 0; i--) |
3522
|
|
|
|
|
|
|
if (chars[i] == chr) |
3523
|
|
|
|
|
|
|
return; |
3524
|
|
|
|
|
|
|
|
3525
|
|
|
|
|
|
|
if (len >= MAX_DIFF_CHARS - 1) |
3526
|
|
|
|
|
|
|
{ |
3527
|
|
|
|
|
|
|
chars[0] = 255; |
3528
|
|
|
|
|
|
|
return; |
3529
|
|
|
|
|
|
|
} |
3530
|
|
|
|
|
|
|
|
3531
|
|
|
|
|
|
|
len++; |
3532
|
|
|
|
|
|
|
chars[len] = chr; |
3533
|
|
|
|
|
|
|
chars[0] = len; |
3534
|
|
|
|
|
|
|
} |
3535
|
|
|
|
|
|
|
|
3536
|
|
|
|
|
|
|
static int scan_prefix(compiler_common *common, pcre_uchar *cc, pcre_uchar *chars, int max_chars, sljit_u32 *rec_count) |
3537
|
|
|
|
|
|
|
{ |
3538
|
|
|
|
|
|
|
/* Recursive function, which scans prefix literals. */ |
3539
|
|
|
|
|
|
|
BOOL last, any, class, caseless; |
3540
|
|
|
|
|
|
|
int len, repeat, len_save, consumed = 0; |
3541
|
|
|
|
|
|
|
sljit_u32 chr; /* Any unicode character. */ |
3542
|
|
|
|
|
|
|
sljit_u8 *bytes, *bytes_end, byte; |
3543
|
|
|
|
|
|
|
pcre_uchar *alternative, *cc_save, *oc; |
3544
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3545
|
|
|
|
|
|
|
pcre_uchar othercase[8]; |
3546
|
|
|
|
|
|
|
#elif defined SUPPORT_UTF && defined COMPILE_PCRE16 |
3547
|
|
|
|
|
|
|
pcre_uchar othercase[2]; |
3548
|
|
|
|
|
|
|
#else |
3549
|
|
|
|
|
|
|
pcre_uchar othercase[1]; |
3550
|
|
|
|
|
|
|
#endif |
3551
|
|
|
|
|
|
|
|
3552
|
|
|
|
|
|
|
repeat = 1; |
3553
|
|
|
|
|
|
|
while (TRUE) |
3554
|
|
|
|
|
|
|
{ |
3555
|
|
|
|
|
|
|
if (*rec_count == 0) |
3556
|
|
|
|
|
|
|
return 0; |
3557
|
|
|
|
|
|
|
(*rec_count)--; |
3558
|
|
|
|
|
|
|
|
3559
|
|
|
|
|
|
|
last = TRUE; |
3560
|
|
|
|
|
|
|
any = FALSE; |
3561
|
|
|
|
|
|
|
class = FALSE; |
3562
|
|
|
|
|
|
|
caseless = FALSE; |
3563
|
|
|
|
|
|
|
|
3564
|
|
|
|
|
|
|
switch (*cc) |
3565
|
|
|
|
|
|
|
{ |
3566
|
|
|
|
|
|
|
case OP_CHARI: |
3567
|
|
|
|
|
|
|
caseless = TRUE; |
3568
|
|
|
|
|
|
|
case OP_CHAR: |
3569
|
|
|
|
|
|
|
last = FALSE; |
3570
|
|
|
|
|
|
|
cc++; |
3571
|
|
|
|
|
|
|
break; |
3572
|
|
|
|
|
|
|
|
3573
|
|
|
|
|
|
|
case OP_SOD: |
3574
|
|
|
|
|
|
|
case OP_SOM: |
3575
|
|
|
|
|
|
|
case OP_SET_SOM: |
3576
|
|
|
|
|
|
|
case OP_NOT_WORD_BOUNDARY: |
3577
|
|
|
|
|
|
|
case OP_WORD_BOUNDARY: |
3578
|
|
|
|
|
|
|
case OP_EODN: |
3579
|
|
|
|
|
|
|
case OP_EOD: |
3580
|
|
|
|
|
|
|
case OP_CIRC: |
3581
|
|
|
|
|
|
|
case OP_CIRCM: |
3582
|
|
|
|
|
|
|
case OP_DOLL: |
3583
|
|
|
|
|
|
|
case OP_DOLLM: |
3584
|
|
|
|
|
|
|
/* Zero width assertions. */ |
3585
|
|
|
|
|
|
|
cc++; |
3586
|
|
|
|
|
|
|
continue; |
3587
|
|
|
|
|
|
|
|
3588
|
|
|
|
|
|
|
case OP_ASSERT: |
3589
|
|
|
|
|
|
|
case OP_ASSERT_NOT: |
3590
|
|
|
|
|
|
|
case OP_ASSERTBACK: |
3591
|
|
|
|
|
|
|
case OP_ASSERTBACK_NOT: |
3592
|
|
|
|
|
|
|
cc = bracketend(cc); |
3593
|
|
|
|
|
|
|
continue; |
3594
|
|
|
|
|
|
|
|
3595
|
|
|
|
|
|
|
case OP_PLUSI: |
3596
|
|
|
|
|
|
|
case OP_MINPLUSI: |
3597
|
|
|
|
|
|
|
case OP_POSPLUSI: |
3598
|
|
|
|
|
|
|
caseless = TRUE; |
3599
|
|
|
|
|
|
|
case OP_PLUS: |
3600
|
|
|
|
|
|
|
case OP_MINPLUS: |
3601
|
|
|
|
|
|
|
case OP_POSPLUS: |
3602
|
|
|
|
|
|
|
cc++; |
3603
|
|
|
|
|
|
|
break; |
3604
|
|
|
|
|
|
|
|
3605
|
|
|
|
|
|
|
case OP_EXACTI: |
3606
|
|
|
|
|
|
|
caseless = TRUE; |
3607
|
|
|
|
|
|
|
case OP_EXACT: |
3608
|
|
|
|
|
|
|
repeat = GET2(cc, 1); |
3609
|
|
|
|
|
|
|
last = FALSE; |
3610
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE; |
3611
|
|
|
|
|
|
|
break; |
3612
|
|
|
|
|
|
|
|
3613
|
|
|
|
|
|
|
case OP_QUERYI: |
3614
|
|
|
|
|
|
|
case OP_MINQUERYI: |
3615
|
|
|
|
|
|
|
case OP_POSQUERYI: |
3616
|
|
|
|
|
|
|
caseless = TRUE; |
3617
|
|
|
|
|
|
|
case OP_QUERY: |
3618
|
|
|
|
|
|
|
case OP_MINQUERY: |
3619
|
|
|
|
|
|
|
case OP_POSQUERY: |
3620
|
|
|
|
|
|
|
len = 1; |
3621
|
|
|
|
|
|
|
cc++; |
3622
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
3623
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); |
3624
|
|
|
|
|
|
|
#endif |
3625
|
|
|
|
|
|
|
max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count); |
3626
|
|
|
|
|
|
|
if (max_chars == 0) |
3627
|
|
|
|
|
|
|
return consumed; |
3628
|
|
|
|
|
|
|
last = FALSE; |
3629
|
|
|
|
|
|
|
break; |
3630
|
|
|
|
|
|
|
|
3631
|
|
|
|
|
|
|
case OP_KET: |
3632
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
3633
|
|
|
|
|
|
|
continue; |
3634
|
|
|
|
|
|
|
|
3635
|
|
|
|
|
|
|
case OP_ALT: |
3636
|
|
|
|
|
|
|
cc += GET(cc, 1); |
3637
|
|
|
|
|
|
|
continue; |
3638
|
|
|
|
|
|
|
|
3639
|
|
|
|
|
|
|
case OP_ONCE: |
3640
|
|
|
|
|
|
|
case OP_ONCE_NC: |
3641
|
|
|
|
|
|
|
case OP_BRA: |
3642
|
|
|
|
|
|
|
case OP_BRAPOS: |
3643
|
|
|
|
|
|
|
case OP_CBRA: |
3644
|
|
|
|
|
|
|
case OP_CBRAPOS: |
3645
|
|
|
|
|
|
|
alternative = cc + GET(cc, 1); |
3646
|
|
|
|
|
|
|
while (*alternative == OP_ALT) |
3647
|
|
|
|
|
|
|
{ |
3648
|
|
|
|
|
|
|
max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count); |
3649
|
|
|
|
|
|
|
if (max_chars == 0) |
3650
|
|
|
|
|
|
|
return consumed; |
3651
|
|
|
|
|
|
|
alternative += GET(alternative, 1); |
3652
|
|
|
|
|
|
|
} |
3653
|
|
|
|
|
|
|
|
3654
|
|
|
|
|
|
|
if (*cc == OP_CBRA || *cc == OP_CBRAPOS) |
3655
|
|
|
|
|
|
|
cc += IMM2_SIZE; |
3656
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
3657
|
|
|
|
|
|
|
continue; |
3658
|
|
|
|
|
|
|
|
3659
|
|
|
|
|
|
|
case OP_CLASS: |
3660
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3661
|
|
|
|
|
|
|
if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE)) |
3662
|
|
|
|
|
|
|
return consumed; |
3663
|
|
|
|
|
|
|
#endif |
3664
|
|
|
|
|
|
|
class = TRUE; |
3665
|
|
|
|
|
|
|
break; |
3666
|
|
|
|
|
|
|
|
3667
|
|
|
|
|
|
|
case OP_NCLASS: |
3668
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3669
|
|
|
|
|
|
|
if (common->utf) return consumed; |
3670
|
|
|
|
|
|
|
#endif |
3671
|
|
|
|
|
|
|
class = TRUE; |
3672
|
|
|
|
|
|
|
break; |
3673
|
|
|
|
|
|
|
|
3674
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
3675
|
|
|
|
|
|
|
case OP_XCLASS: |
3676
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3677
|
|
|
|
|
|
|
if (common->utf) return consumed; |
3678
|
|
|
|
|
|
|
#endif |
3679
|
|
|
|
|
|
|
any = TRUE; |
3680
|
|
|
|
|
|
|
cc += GET(cc, 1); |
3681
|
|
|
|
|
|
|
break; |
3682
|
|
|
|
|
|
|
#endif |
3683
|
|
|
|
|
|
|
|
3684
|
|
|
|
|
|
|
case OP_DIGIT: |
3685
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3686
|
|
|
|
|
|
|
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) |
3687
|
|
|
|
|
|
|
return consumed; |
3688
|
|
|
|
|
|
|
#endif |
3689
|
|
|
|
|
|
|
any = TRUE; |
3690
|
|
|
|
|
|
|
cc++; |
3691
|
|
|
|
|
|
|
break; |
3692
|
|
|
|
|
|
|
|
3693
|
|
|
|
|
|
|
case OP_WHITESPACE: |
3694
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3695
|
|
|
|
|
|
|
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) |
3696
|
|
|
|
|
|
|
return consumed; |
3697
|
|
|
|
|
|
|
#endif |
3698
|
|
|
|
|
|
|
any = TRUE; |
3699
|
|
|
|
|
|
|
cc++; |
3700
|
|
|
|
|
|
|
break; |
3701
|
|
|
|
|
|
|
|
3702
|
|
|
|
|
|
|
case OP_WORDCHAR: |
3703
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
3704
|
|
|
|
|
|
|
if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) |
3705
|
|
|
|
|
|
|
return consumed; |
3706
|
|
|
|
|
|
|
#endif |
3707
|
|
|
|
|
|
|
any = TRUE; |
3708
|
|
|
|
|
|
|
cc++; |
3709
|
|
|
|
|
|
|
break; |
3710
|
|
|
|
|
|
|
|
3711
|
|
|
|
|
|
|
case OP_NOT: |
3712
|
|
|
|
|
|
|
case OP_NOTI: |
3713
|
|
|
|
|
|
|
cc++; |
3714
|
|
|
|
|
|
|
/* Fall through. */ |
3715
|
|
|
|
|
|
|
case OP_NOT_DIGIT: |
3716
|
|
|
|
|
|
|
case OP_NOT_WHITESPACE: |
3717
|
|
|
|
|
|
|
case OP_NOT_WORDCHAR: |
3718
|
|
|
|
|
|
|
case OP_ANY: |
3719
|
|
|
|
|
|
|
case OP_ALLANY: |
3720
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3721
|
|
|
|
|
|
|
if (common->utf) return consumed; |
3722
|
|
|
|
|
|
|
#endif |
3723
|
|
|
|
|
|
|
any = TRUE; |
3724
|
|
|
|
|
|
|
cc++; |
3725
|
|
|
|
|
|
|
break; |
3726
|
|
|
|
|
|
|
|
3727
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
3728
|
|
|
|
|
|
|
case OP_NOTPROP: |
3729
|
|
|
|
|
|
|
case OP_PROP: |
3730
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE32 |
3731
|
|
|
|
|
|
|
if (common->utf) return consumed; |
3732
|
|
|
|
|
|
|
#endif |
3733
|
|
|
|
|
|
|
any = TRUE; |
3734
|
|
|
|
|
|
|
cc += 1 + 2; |
3735
|
|
|
|
|
|
|
break; |
3736
|
|
|
|
|
|
|
#endif |
3737
|
|
|
|
|
|
|
|
3738
|
|
|
|
|
|
|
case OP_TYPEEXACT: |
3739
|
|
|
|
|
|
|
repeat = GET2(cc, 1); |
3740
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE; |
3741
|
|
|
|
|
|
|
continue; |
3742
|
|
|
|
|
|
|
|
3743
|
|
|
|
|
|
|
case OP_NOTEXACT: |
3744
|
|
|
|
|
|
|
case OP_NOTEXACTI: |
3745
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
3746
|
|
|
|
|
|
|
if (common->utf) return consumed; |
3747
|
|
|
|
|
|
|
#endif |
3748
|
|
|
|
|
|
|
any = TRUE; |
3749
|
|
|
|
|
|
|
repeat = GET2(cc, 1); |
3750
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE + 1; |
3751
|
|
|
|
|
|
|
break; |
3752
|
|
|
|
|
|
|
|
3753
|
|
|
|
|
|
|
default: |
3754
|
|
|
|
|
|
|
return consumed; |
3755
|
|
|
|
|
|
|
} |
3756
|
|
|
|
|
|
|
|
3757
|
|
|
|
|
|
|
if (any) |
3758
|
|
|
|
|
|
|
{ |
3759
|
|
|
|
|
|
|
do |
3760
|
|
|
|
|
|
|
{ |
3761
|
|
|
|
|
|
|
chars[0] = 255; |
3762
|
|
|
|
|
|
|
|
3763
|
|
|
|
|
|
|
consumed++; |
3764
|
|
|
|
|
|
|
if (--max_chars == 0) |
3765
|
|
|
|
|
|
|
return consumed; |
3766
|
|
|
|
|
|
|
chars += MAX_DIFF_CHARS; |
3767
|
|
|
|
|
|
|
} |
3768
|
|
|
|
|
|
|
while (--repeat > 0); |
3769
|
|
|
|
|
|
|
|
3770
|
|
|
|
|
|
|
repeat = 1; |
3771
|
|
|
|
|
|
|
continue; |
3772
|
|
|
|
|
|
|
} |
3773
|
|
|
|
|
|
|
|
3774
|
|
|
|
|
|
|
if (class) |
3775
|
|
|
|
|
|
|
{ |
3776
|
|
|
|
|
|
|
bytes = (sljit_u8*) (cc + 1); |
3777
|
|
|
|
|
|
|
cc += 1 + 32 / sizeof(pcre_uchar); |
3778
|
|
|
|
|
|
|
|
3779
|
|
|
|
|
|
|
switch (*cc) |
3780
|
|
|
|
|
|
|
{ |
3781
|
|
|
|
|
|
|
case OP_CRSTAR: |
3782
|
|
|
|
|
|
|
case OP_CRMINSTAR: |
3783
|
|
|
|
|
|
|
case OP_CRPOSSTAR: |
3784
|
|
|
|
|
|
|
case OP_CRQUERY: |
3785
|
|
|
|
|
|
|
case OP_CRMINQUERY: |
3786
|
|
|
|
|
|
|
case OP_CRPOSQUERY: |
3787
|
|
|
|
|
|
|
max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count); |
3788
|
|
|
|
|
|
|
if (max_chars == 0) |
3789
|
|
|
|
|
|
|
return consumed; |
3790
|
|
|
|
|
|
|
break; |
3791
|
|
|
|
|
|
|
|
3792
|
|
|
|
|
|
|
default: |
3793
|
|
|
|
|
|
|
case OP_CRPLUS: |
3794
|
|
|
|
|
|
|
case OP_CRMINPLUS: |
3795
|
|
|
|
|
|
|
case OP_CRPOSPLUS: |
3796
|
|
|
|
|
|
|
break; |
3797
|
|
|
|
|
|
|
|
3798
|
|
|
|
|
|
|
case OP_CRRANGE: |
3799
|
|
|
|
|
|
|
case OP_CRMINRANGE: |
3800
|
|
|
|
|
|
|
case OP_CRPOSRANGE: |
3801
|
|
|
|
|
|
|
repeat = GET2(cc, 1); |
3802
|
|
|
|
|
|
|
if (repeat <= 0) |
3803
|
|
|
|
|
|
|
return consumed; |
3804
|
|
|
|
|
|
|
break; |
3805
|
|
|
|
|
|
|
} |
3806
|
|
|
|
|
|
|
|
3807
|
|
|
|
|
|
|
do |
3808
|
|
|
|
|
|
|
{ |
3809
|
|
|
|
|
|
|
if (bytes[31] & 0x80) |
3810
|
|
|
|
|
|
|
chars[0] = 255; |
3811
|
|
|
|
|
|
|
else if (chars[0] != 255) |
3812
|
|
|
|
|
|
|
{ |
3813
|
|
|
|
|
|
|
bytes_end = bytes + 32; |
3814
|
|
|
|
|
|
|
chr = 0; |
3815
|
|
|
|
|
|
|
do |
3816
|
|
|
|
|
|
|
{ |
3817
|
|
|
|
|
|
|
byte = *bytes++; |
3818
|
|
|
|
|
|
|
SLJIT_ASSERT((chr & 0x7) == 0); |
3819
|
|
|
|
|
|
|
if (byte == 0) |
3820
|
|
|
|
|
|
|
chr += 8; |
3821
|
|
|
|
|
|
|
else |
3822
|
|
|
|
|
|
|
{ |
3823
|
|
|
|
|
|
|
do |
3824
|
|
|
|
|
|
|
{ |
3825
|
|
|
|
|
|
|
if ((byte & 0x1) != 0) |
3826
|
|
|
|
|
|
|
add_prefix_char(chr, chars); |
3827
|
|
|
|
|
|
|
byte >>= 1; |
3828
|
|
|
|
|
|
|
chr++; |
3829
|
|
|
|
|
|
|
} |
3830
|
|
|
|
|
|
|
while (byte != 0); |
3831
|
|
|
|
|
|
|
chr = (chr + 7) & ~7; |
3832
|
|
|
|
|
|
|
} |
3833
|
|
|
|
|
|
|
} |
3834
|
|
|
|
|
|
|
while (chars[0] != 255 && bytes < bytes_end); |
3835
|
|
|
|
|
|
|
bytes = bytes_end - 32; |
3836
|
|
|
|
|
|
|
} |
3837
|
|
|
|
|
|
|
|
3838
|
|
|
|
|
|
|
consumed++; |
3839
|
|
|
|
|
|
|
if (--max_chars == 0) |
3840
|
|
|
|
|
|
|
return consumed; |
3841
|
|
|
|
|
|
|
chars += MAX_DIFF_CHARS; |
3842
|
|
|
|
|
|
|
} |
3843
|
|
|
|
|
|
|
while (--repeat > 0); |
3844
|
|
|
|
|
|
|
|
3845
|
|
|
|
|
|
|
switch (*cc) |
3846
|
|
|
|
|
|
|
{ |
3847
|
|
|
|
|
|
|
case OP_CRSTAR: |
3848
|
|
|
|
|
|
|
case OP_CRMINSTAR: |
3849
|
|
|
|
|
|
|
case OP_CRPOSSTAR: |
3850
|
|
|
|
|
|
|
return consumed; |
3851
|
|
|
|
|
|
|
|
3852
|
|
|
|
|
|
|
case OP_CRQUERY: |
3853
|
|
|
|
|
|
|
case OP_CRMINQUERY: |
3854
|
|
|
|
|
|
|
case OP_CRPOSQUERY: |
3855
|
|
|
|
|
|
|
cc++; |
3856
|
|
|
|
|
|
|
break; |
3857
|
|
|
|
|
|
|
|
3858
|
|
|
|
|
|
|
case OP_CRRANGE: |
3859
|
|
|
|
|
|
|
case OP_CRMINRANGE: |
3860
|
|
|
|
|
|
|
case OP_CRPOSRANGE: |
3861
|
|
|
|
|
|
|
if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE)) |
3862
|
|
|
|
|
|
|
return consumed; |
3863
|
|
|
|
|
|
|
cc += 1 + 2 * IMM2_SIZE; |
3864
|
|
|
|
|
|
|
break; |
3865
|
|
|
|
|
|
|
} |
3866
|
|
|
|
|
|
|
|
3867
|
|
|
|
|
|
|
repeat = 1; |
3868
|
|
|
|
|
|
|
continue; |
3869
|
|
|
|
|
|
|
} |
3870
|
|
|
|
|
|
|
|
3871
|
|
|
|
|
|
|
len = 1; |
3872
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
3873
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc); |
3874
|
|
|
|
|
|
|
#endif |
3875
|
|
|
|
|
|
|
|
3876
|
|
|
|
|
|
|
if (caseless && char_has_othercase(common, cc)) |
3877
|
|
|
|
|
|
|
{ |
3878
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
3879
|
|
|
|
|
|
|
if (common->utf) |
3880
|
|
|
|
|
|
|
{ |
3881
|
|
|
|
|
|
|
GETCHAR(chr, cc); |
3882
|
|
|
|
|
|
|
if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len) |
3883
|
|
|
|
|
|
|
return consumed; |
3884
|
|
|
|
|
|
|
} |
3885
|
|
|
|
|
|
|
else |
3886
|
|
|
|
|
|
|
#endif |
3887
|
|
|
|
|
|
|
{ |
3888
|
|
|
|
|
|
|
chr = *cc; |
3889
|
|
|
|
|
|
|
othercase[0] = TABLE_GET(chr, common->fcc, chr); |
3890
|
|
|
|
|
|
|
} |
3891
|
|
|
|
|
|
|
} |
3892
|
|
|
|
|
|
|
else |
3893
|
|
|
|
|
|
|
{ |
3894
|
|
|
|
|
|
|
caseless = FALSE; |
3895
|
|
|
|
|
|
|
othercase[0] = 0; /* Stops compiler warning - PH */ |
3896
|
|
|
|
|
|
|
} |
3897
|
|
|
|
|
|
|
|
3898
|
|
|
|
|
|
|
len_save = len; |
3899
|
|
|
|
|
|
|
cc_save = cc; |
3900
|
|
|
|
|
|
|
while (TRUE) |
3901
|
|
|
|
|
|
|
{ |
3902
|
|
|
|
|
|
|
oc = othercase; |
3903
|
|
|
|
|
|
|
do |
3904
|
|
|
|
|
|
|
{ |
3905
|
|
|
|
|
|
|
chr = *cc; |
3906
|
|
|
|
|
|
|
add_prefix_char(*cc, chars); |
3907
|
|
|
|
|
|
|
|
3908
|
|
|
|
|
|
|
if (caseless) |
3909
|
|
|
|
|
|
|
add_prefix_char(*oc, chars); |
3910
|
|
|
|
|
|
|
|
3911
|
|
|
|
|
|
|
len--; |
3912
|
|
|
|
|
|
|
consumed++; |
3913
|
|
|
|
|
|
|
if (--max_chars == 0) |
3914
|
|
|
|
|
|
|
return consumed; |
3915
|
|
|
|
|
|
|
chars += MAX_DIFF_CHARS; |
3916
|
|
|
|
|
|
|
cc++; |
3917
|
|
|
|
|
|
|
oc++; |
3918
|
|
|
|
|
|
|
} |
3919
|
|
|
|
|
|
|
while (len > 0); |
3920
|
|
|
|
|
|
|
|
3921
|
|
|
|
|
|
|
if (--repeat == 0) |
3922
|
|
|
|
|
|
|
break; |
3923
|
|
|
|
|
|
|
|
3924
|
|
|
|
|
|
|
len = len_save; |
3925
|
|
|
|
|
|
|
cc = cc_save; |
3926
|
|
|
|
|
|
|
} |
3927
|
|
|
|
|
|
|
|
3928
|
|
|
|
|
|
|
repeat = 1; |
3929
|
|
|
|
|
|
|
if (last) |
3930
|
|
|
|
|
|
|
return consumed; |
3931
|
|
|
|
|
|
|
} |
3932
|
|
|
|
|
|
|
} |
3933
|
|
|
|
|
|
|
|
3934
|
|
|
|
|
|
|
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) |
3935
|
|
|
|
|
|
|
|
3936
|
|
|
|
|
|
|
static sljit_s32 character_to_int32(pcre_uchar chr) |
3937
|
|
|
|
|
|
|
{ |
3938
|
|
|
|
|
|
|
sljit_s32 value = (sljit_s32)chr; |
3939
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
3940
|
|
|
|
|
|
|
#define SSE2_COMPARE_TYPE_INDEX 0 |
3941
|
|
|
|
|
|
|
return (value << 24) | (value << 16) | (value << 8) | value; |
3942
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
3943
|
|
|
|
|
|
|
#define SSE2_COMPARE_TYPE_INDEX 1 |
3944
|
|
|
|
|
|
|
return (value << 16) | value; |
3945
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
3946
|
|
|
|
|
|
|
#define SSE2_COMPARE_TYPE_INDEX 2 |
3947
|
|
|
|
|
|
|
return value; |
3948
|
|
|
|
|
|
|
#else |
3949
|
|
|
|
|
|
|
#error "Unsupported unit width" |
3950
|
|
|
|
|
|
|
#endif |
3951
|
|
|
|
|
|
|
} |
3952
|
|
|
|
|
|
|
|
3953
|
|
|
|
|
|
|
static SLJIT_INLINE void fast_forward_first_char2_sse2(compiler_common *common, pcre_uchar char1, pcre_uchar char2) |
3954
|
|
|
|
|
|
|
{ |
3955
|
|
|
|
|
|
|
DEFINE_COMPILER; |
3956
|
|
|
|
|
|
|
struct sljit_label *start; |
3957
|
|
|
|
|
|
|
struct sljit_jump *quit[3]; |
3958
|
|
|
|
|
|
|
struct sljit_jump *nomatch; |
3959
|
|
|
|
|
|
|
sljit_u8 instruction[8]; |
3960
|
|
|
|
|
|
|
sljit_s32 tmp1_ind = sljit_get_register_index(TMP1); |
3961
|
|
|
|
|
|
|
sljit_s32 tmp2_ind = sljit_get_register_index(TMP2); |
3962
|
|
|
|
|
|
|
sljit_s32 str_ptr_ind = sljit_get_register_index(STR_PTR); |
3963
|
|
|
|
|
|
|
BOOL load_twice = FALSE; |
3964
|
|
|
|
|
|
|
pcre_uchar bit; |
3965
|
|
|
|
|
|
|
|
3966
|
|
|
|
|
|
|
bit = char1 ^ char2; |
3967
|
|
|
|
|
|
|
if (!is_powerof2(bit)) |
3968
|
|
|
|
|
|
|
bit = 0; |
3969
|
|
|
|
|
|
|
|
3970
|
|
|
|
|
|
|
if ((char1 != char2) && bit == 0) |
3971
|
|
|
|
|
|
|
load_twice = TRUE; |
3972
|
|
|
|
|
|
|
|
3973
|
|
|
|
|
|
|
quit[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
3974
|
|
|
|
|
|
|
|
3975
|
|
|
|
|
|
|
/* First part (unaligned start) */ |
3976
|
|
|
|
|
|
|
|
3977
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(char1 | bit)); |
3978
|
|
|
|
|
|
|
|
3979
|
|
|
|
|
|
|
SLJIT_ASSERT(tmp1_ind < 8 && tmp2_ind == 1); |
3980
|
|
|
|
|
|
|
|
3981
|
|
|
|
|
|
|
/* MOVD xmm, r/m32 */ |
3982
|
|
|
|
|
|
|
instruction[0] = 0x66; |
3983
|
|
|
|
|
|
|
instruction[1] = 0x0f; |
3984
|
|
|
|
|
|
|
instruction[2] = 0x6e; |
3985
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (2 << 3) | tmp1_ind; |
3986
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
3987
|
|
|
|
|
|
|
|
3988
|
|
|
|
|
|
|
if (char1 != char2) |
3989
|
|
|
|
|
|
|
{ |
3990
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, character_to_int32(bit != 0 ? bit : char2)); |
3991
|
|
|
|
|
|
|
|
3992
|
|
|
|
|
|
|
/* MOVD xmm, r/m32 */ |
3993
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (3 << 3) | tmp1_ind; |
3994
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
3995
|
|
|
|
|
|
|
} |
3996
|
|
|
|
|
|
|
|
3997
|
|
|
|
|
|
|
/* PSHUFD xmm1, xmm2/m128, imm8 */ |
3998
|
|
|
|
|
|
|
instruction[2] = 0x70; |
3999
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (2 << 3) | 2; |
4000
|
|
|
|
|
|
|
instruction[4] = 0; |
4001
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 5); |
4002
|
|
|
|
|
|
|
|
4003
|
|
|
|
|
|
|
if (char1 != char2) |
4004
|
|
|
|
|
|
|
{ |
4005
|
|
|
|
|
|
|
/* PSHUFD xmm1, xmm2/m128, imm8 */ |
4006
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (3 << 3) | 3; |
4007
|
|
|
|
|
|
|
instruction[4] = 0; |
4008
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 5); |
4009
|
|
|
|
|
|
|
} |
4010
|
|
|
|
|
|
|
|
4011
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, STR_PTR, 0, SLJIT_IMM, 0xf); |
4012
|
|
|
|
|
|
|
OP2(SLJIT_AND, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, ~0xf); |
4013
|
|
|
|
|
|
|
|
4014
|
|
|
|
|
|
|
/* MOVDQA xmm1, xmm2/m128 */ |
4015
|
|
|
|
|
|
|
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4016
|
|
|
|
|
|
|
|
4017
|
|
|
|
|
|
|
if (str_ptr_ind < 8) |
4018
|
|
|
|
|
|
|
{ |
4019
|
|
|
|
|
|
|
instruction[2] = 0x6f; |
4020
|
|
|
|
|
|
|
instruction[3] = (0 << 3) | str_ptr_ind; |
4021
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4022
|
|
|
|
|
|
|
|
4023
|
|
|
|
|
|
|
if (load_twice) |
4024
|
|
|
|
|
|
|
{ |
4025
|
|
|
|
|
|
|
instruction[3] = (1 << 3) | str_ptr_ind; |
4026
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4027
|
|
|
|
|
|
|
} |
4028
|
|
|
|
|
|
|
} |
4029
|
|
|
|
|
|
|
else |
4030
|
|
|
|
|
|
|
{ |
4031
|
|
|
|
|
|
|
instruction[1] = 0x41; |
4032
|
|
|
|
|
|
|
instruction[2] = 0x0f; |
4033
|
|
|
|
|
|
|
instruction[3] = 0x6f; |
4034
|
|
|
|
|
|
|
instruction[4] = (0 << 3) | (str_ptr_ind & 0x7); |
4035
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 5); |
4036
|
|
|
|
|
|
|
|
4037
|
|
|
|
|
|
|
if (load_twice) |
4038
|
|
|
|
|
|
|
{ |
4039
|
|
|
|
|
|
|
instruction[4] = (1 << 3) | str_ptr_ind; |
4040
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 5); |
4041
|
|
|
|
|
|
|
} |
4042
|
|
|
|
|
|
|
instruction[1] = 0x0f; |
4043
|
|
|
|
|
|
|
} |
4044
|
|
|
|
|
|
|
|
4045
|
|
|
|
|
|
|
#else |
4046
|
|
|
|
|
|
|
|
4047
|
|
|
|
|
|
|
instruction[2] = 0x6f; |
4048
|
|
|
|
|
|
|
instruction[3] = (0 << 3) | str_ptr_ind; |
4049
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4050
|
|
|
|
|
|
|
|
4051
|
|
|
|
|
|
|
if (load_twice) |
4052
|
|
|
|
|
|
|
{ |
4053
|
|
|
|
|
|
|
instruction[3] = (1 << 3) | str_ptr_ind; |
4054
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4055
|
|
|
|
|
|
|
} |
4056
|
|
|
|
|
|
|
|
4057
|
|
|
|
|
|
|
#endif |
4058
|
|
|
|
|
|
|
|
4059
|
|
|
|
|
|
|
if (bit != 0) |
4060
|
|
|
|
|
|
|
{ |
4061
|
|
|
|
|
|
|
/* POR xmm1, xmm2/m128 */ |
4062
|
|
|
|
|
|
|
instruction[2] = 0xeb; |
4063
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (0 << 3) | 3; |
4064
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4065
|
|
|
|
|
|
|
} |
4066
|
|
|
|
|
|
|
|
4067
|
|
|
|
|
|
|
/* PCMPEQB/W/D xmm1, xmm2/m128 */ |
4068
|
|
|
|
|
|
|
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
4069
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (0 << 3) | 2; |
4070
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4071
|
|
|
|
|
|
|
|
4072
|
|
|
|
|
|
|
if (load_twice) |
4073
|
|
|
|
|
|
|
{ |
4074
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (1 << 3) | 3; |
4075
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4076
|
|
|
|
|
|
|
} |
4077
|
|
|
|
|
|
|
|
4078
|
|
|
|
|
|
|
/* PMOVMSKB reg, xmm */ |
4079
|
|
|
|
|
|
|
instruction[2] = 0xd7; |
4080
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; |
4081
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4082
|
|
|
|
|
|
|
|
4083
|
|
|
|
|
|
|
if (load_twice) |
4084
|
|
|
|
|
|
|
{ |
4085
|
|
|
|
|
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP2, 0); |
4086
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (tmp2_ind << 3) | 1; |
4087
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4088
|
|
|
|
|
|
|
|
4089
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
4090
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, RETURN_ADDR, 0); |
4091
|
|
|
|
|
|
|
} |
4092
|
|
|
|
|
|
|
|
4093
|
|
|
|
|
|
|
OP2(SLJIT_ASHR, TMP1, 0, TMP1, 0, TMP2, 0); |
4094
|
|
|
|
|
|
|
|
4095
|
|
|
|
|
|
|
/* BSF r32, r/m32 */ |
4096
|
|
|
|
|
|
|
instruction[0] = 0x0f; |
4097
|
|
|
|
|
|
|
instruction[1] = 0xbc; |
4098
|
|
|
|
|
|
|
instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; |
4099
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 3); |
4100
|
|
|
|
|
|
|
sljit_set_current_flags(compiler, SLJIT_SET_Z); |
4101
|
|
|
|
|
|
|
|
4102
|
|
|
|
|
|
|
nomatch = JUMP(SLJIT_ZERO); |
4103
|
|
|
|
|
|
|
|
4104
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
4105
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4106
|
|
|
|
|
|
|
quit[1] = JUMP(SLJIT_JUMP); |
4107
|
|
|
|
|
|
|
|
4108
|
|
|
|
|
|
|
JUMPHERE(nomatch); |
4109
|
|
|
|
|
|
|
|
4110
|
|
|
|
|
|
|
start = LABEL(); |
4111
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, 16); |
4112
|
|
|
|
|
|
|
quit[2] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4113
|
|
|
|
|
|
|
|
4114
|
|
|
|
|
|
|
/* Second part (aligned) */ |
4115
|
|
|
|
|
|
|
|
4116
|
|
|
|
|
|
|
instruction[0] = 0x66; |
4117
|
|
|
|
|
|
|
instruction[1] = 0x0f; |
4118
|
|
|
|
|
|
|
|
4119
|
|
|
|
|
|
|
/* MOVDQA xmm1, xmm2/m128 */ |
4120
|
|
|
|
|
|
|
#if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) |
4121
|
|
|
|
|
|
|
|
4122
|
|
|
|
|
|
|
if (str_ptr_ind < 8) |
4123
|
|
|
|
|
|
|
{ |
4124
|
|
|
|
|
|
|
instruction[2] = 0x6f; |
4125
|
|
|
|
|
|
|
instruction[3] = (0 << 3) | str_ptr_ind; |
4126
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4127
|
|
|
|
|
|
|
|
4128
|
|
|
|
|
|
|
if (load_twice) |
4129
|
|
|
|
|
|
|
{ |
4130
|
|
|
|
|
|
|
instruction[3] = (1 << 3) | str_ptr_ind; |
4131
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4132
|
|
|
|
|
|
|
} |
4133
|
|
|
|
|
|
|
} |
4134
|
|
|
|
|
|
|
else |
4135
|
|
|
|
|
|
|
{ |
4136
|
|
|
|
|
|
|
instruction[1] = 0x41; |
4137
|
|
|
|
|
|
|
instruction[2] = 0x0f; |
4138
|
|
|
|
|
|
|
instruction[3] = 0x6f; |
4139
|
|
|
|
|
|
|
instruction[4] = (0 << 3) | (str_ptr_ind & 0x7); |
4140
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 5); |
4141
|
|
|
|
|
|
|
|
4142
|
|
|
|
|
|
|
if (load_twice) |
4143
|
|
|
|
|
|
|
{ |
4144
|
|
|
|
|
|
|
instruction[4] = (1 << 3) | str_ptr_ind; |
4145
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 5); |
4146
|
|
|
|
|
|
|
} |
4147
|
|
|
|
|
|
|
instruction[1] = 0x0f; |
4148
|
|
|
|
|
|
|
} |
4149
|
|
|
|
|
|
|
|
4150
|
|
|
|
|
|
|
#else |
4151
|
|
|
|
|
|
|
|
4152
|
|
|
|
|
|
|
instruction[2] = 0x6f; |
4153
|
|
|
|
|
|
|
instruction[3] = (0 << 3) | str_ptr_ind; |
4154
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4155
|
|
|
|
|
|
|
|
4156
|
|
|
|
|
|
|
if (load_twice) |
4157
|
|
|
|
|
|
|
{ |
4158
|
|
|
|
|
|
|
instruction[3] = (1 << 3) | str_ptr_ind; |
4159
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4160
|
|
|
|
|
|
|
} |
4161
|
|
|
|
|
|
|
|
4162
|
|
|
|
|
|
|
#endif |
4163
|
|
|
|
|
|
|
|
4164
|
|
|
|
|
|
|
if (bit != 0) |
4165
|
|
|
|
|
|
|
{ |
4166
|
|
|
|
|
|
|
/* POR xmm1, xmm2/m128 */ |
4167
|
|
|
|
|
|
|
instruction[2] = 0xeb; |
4168
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (0 << 3) | 3; |
4169
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4170
|
|
|
|
|
|
|
} |
4171
|
|
|
|
|
|
|
|
4172
|
|
|
|
|
|
|
/* PCMPEQB/W/D xmm1, xmm2/m128 */ |
4173
|
|
|
|
|
|
|
instruction[2] = 0x74 + SSE2_COMPARE_TYPE_INDEX; |
4174
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (0 << 3) | 2; |
4175
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4176
|
|
|
|
|
|
|
|
4177
|
|
|
|
|
|
|
if (load_twice) |
4178
|
|
|
|
|
|
|
{ |
4179
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (1 << 3) | 3; |
4180
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4181
|
|
|
|
|
|
|
} |
4182
|
|
|
|
|
|
|
|
4183
|
|
|
|
|
|
|
/* PMOVMSKB reg, xmm */ |
4184
|
|
|
|
|
|
|
instruction[2] = 0xd7; |
4185
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (tmp1_ind << 3) | 0; |
4186
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4187
|
|
|
|
|
|
|
|
4188
|
|
|
|
|
|
|
if (load_twice) |
4189
|
|
|
|
|
|
|
{ |
4190
|
|
|
|
|
|
|
instruction[3] = 0xc0 | (tmp2_ind << 3) | 1; |
4191
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 4); |
4192
|
|
|
|
|
|
|
|
4193
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, TMP2, 0); |
4194
|
|
|
|
|
|
|
} |
4195
|
|
|
|
|
|
|
|
4196
|
|
|
|
|
|
|
/* BSF r32, r/m32 */ |
4197
|
|
|
|
|
|
|
instruction[0] = 0x0f; |
4198
|
|
|
|
|
|
|
instruction[1] = 0xbc; |
4199
|
|
|
|
|
|
|
instruction[2] = 0xc0 | (tmp1_ind << 3) | tmp1_ind; |
4200
|
|
|
|
|
|
|
sljit_emit_op_custom(compiler, instruction, 3); |
4201
|
|
|
|
|
|
|
sljit_set_current_flags(compiler, SLJIT_SET_Z); |
4202
|
|
|
|
|
|
|
|
4203
|
|
|
|
|
|
|
JUMPTO(SLJIT_ZERO, start); |
4204
|
|
|
|
|
|
|
|
4205
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4206
|
|
|
|
|
|
|
|
4207
|
|
|
|
|
|
|
start = LABEL(); |
4208
|
|
|
|
|
|
|
SET_LABEL(quit[0], start); |
4209
|
|
|
|
|
|
|
SET_LABEL(quit[1], start); |
4210
|
|
|
|
|
|
|
SET_LABEL(quit[2], start); |
4211
|
|
|
|
|
|
|
} |
4212
|
|
|
|
|
|
|
|
4213
|
|
|
|
|
|
|
#undef SSE2_COMPARE_TYPE_INDEX |
4214
|
|
|
|
|
|
|
|
4215
|
|
|
|
|
|
|
#endif |
4216
|
|
|
|
|
|
|
|
4217
|
|
|
|
|
|
|
static void fast_forward_first_char2(compiler_common *common, pcre_uchar char1, pcre_uchar char2, sljit_s32 offset) |
4218
|
|
|
|
|
|
|
{ |
4219
|
|
|
|
|
|
|
DEFINE_COMPILER; |
4220
|
|
|
|
|
|
|
struct sljit_label *start; |
4221
|
|
|
|
|
|
|
struct sljit_jump *quit; |
4222
|
|
|
|
|
|
|
struct sljit_jump *found; |
4223
|
|
|
|
|
|
|
pcre_uchar mask; |
4224
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4225
|
|
|
|
|
|
|
struct sljit_label *utf_start = NULL; |
4226
|
|
|
|
|
|
|
struct sljit_jump *utf_quit = NULL; |
4227
|
|
|
|
|
|
|
#endif |
4228
|
|
|
|
|
|
|
BOOL has_match_end = (common->match_end_ptr != 0); |
4229
|
|
|
|
|
|
|
|
4230
|
|
|
|
|
|
|
if (offset > 0) |
4231
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); |
4232
|
|
|
|
|
|
|
|
4233
|
|
|
|
|
|
|
if (has_match_end) |
4234
|
|
|
|
|
|
|
{ |
4235
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
4236
|
|
|
|
|
|
|
|
4237
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr, SLJIT_IMM, IN_UCHARS(offset + 1)); |
4238
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, STR_END, 0, TMP3, 0); |
4239
|
|
|
|
|
|
|
sljit_emit_cmov(compiler, SLJIT_GREATER, STR_END, TMP3, 0); |
4240
|
|
|
|
|
|
|
} |
4241
|
|
|
|
|
|
|
|
4242
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4243
|
|
|
|
|
|
|
if (common->utf && offset > 0) |
4244
|
|
|
|
|
|
|
utf_start = LABEL(); |
4245
|
|
|
|
|
|
|
#endif |
4246
|
|
|
|
|
|
|
|
4247
|
|
|
|
|
|
|
#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86) && !(defined SUPPORT_VALGRIND) |
4248
|
|
|
|
|
|
|
|
4249
|
|
|
|
|
|
|
/* SSE2 accelerated first character search. */ |
4250
|
|
|
|
|
|
|
|
4251
|
|
|
|
|
|
|
if (sljit_has_cpu_feature(SLJIT_HAS_SSE2)) |
4252
|
|
|
|
|
|
|
{ |
4253
|
|
|
|
|
|
|
fast_forward_first_char2_sse2(common, char1, char2); |
4254
|
|
|
|
|
|
|
|
4255
|
|
|
|
|
|
|
SLJIT_ASSERT(common->mode == JIT_COMPILE || offset == 0); |
4256
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
4257
|
|
|
|
|
|
|
{ |
4258
|
|
|
|
|
|
|
/* In complete mode, we don't need to run a match when STR_PTR == STR_END. */ |
4259
|
|
|
|
|
|
|
SLJIT_ASSERT(common->forced_quit_label == NULL); |
4260
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); |
4261
|
|
|
|
|
|
|
add_jump(compiler, &common->forced_quit, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
4262
|
|
|
|
|
|
|
|
4263
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4264
|
|
|
|
|
|
|
if (common->utf && offset > 0) |
4265
|
|
|
|
|
|
|
{ |
4266
|
|
|
|
|
|
|
SLJIT_ASSERT(common->mode == JIT_COMPILE); |
4267
|
|
|
|
|
|
|
|
4268
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); |
4269
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4270
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
4271
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
4272
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); |
4273
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
4274
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
4275
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); |
4276
|
|
|
|
|
|
|
#else |
4277
|
|
|
|
|
|
|
#error "Unknown code width" |
4278
|
|
|
|
|
|
|
#endif |
4279
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4280
|
|
|
|
|
|
|
} |
4281
|
|
|
|
|
|
|
#endif |
4282
|
|
|
|
|
|
|
|
4283
|
|
|
|
|
|
|
if (offset > 0) |
4284
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); |
4285
|
|
|
|
|
|
|
} |
4286
|
|
|
|
|
|
|
else |
4287
|
|
|
|
|
|
|
{ |
4288
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, STR_END, 0); |
4289
|
|
|
|
|
|
|
if (has_match_end) |
4290
|
|
|
|
|
|
|
{ |
4291
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4292
|
|
|
|
|
|
|
sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, TMP1, 0); |
4293
|
|
|
|
|
|
|
} |
4294
|
|
|
|
|
|
|
else |
4295
|
|
|
|
|
|
|
sljit_emit_cmov(compiler, SLJIT_GREATER_EQUAL, STR_PTR, STR_END, 0); |
4296
|
|
|
|
|
|
|
} |
4297
|
|
|
|
|
|
|
|
4298
|
|
|
|
|
|
|
if (has_match_end) |
4299
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4300
|
|
|
|
|
|
|
return; |
4301
|
|
|
|
|
|
|
} |
4302
|
|
|
|
|
|
|
|
4303
|
|
|
|
|
|
|
#endif |
4304
|
|
|
|
|
|
|
|
4305
|
|
|
|
|
|
|
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4306
|
|
|
|
|
|
|
|
4307
|
|
|
|
|
|
|
start = LABEL(); |
4308
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
4309
|
|
|
|
|
|
|
|
4310
|
|
|
|
|
|
|
if (char1 == char2) |
4311
|
|
|
|
|
|
|
found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1); |
4312
|
|
|
|
|
|
|
else |
4313
|
|
|
|
|
|
|
{ |
4314
|
|
|
|
|
|
|
mask = char1 ^ char2; |
4315
|
|
|
|
|
|
|
if (is_powerof2(mask)) |
4316
|
|
|
|
|
|
|
{ |
4317
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); |
4318
|
|
|
|
|
|
|
found = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, char1 | mask); |
4319
|
|
|
|
|
|
|
} |
4320
|
|
|
|
|
|
|
else |
4321
|
|
|
|
|
|
|
{ |
4322
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char1); |
4323
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); |
4324
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, char2); |
4325
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); |
4326
|
|
|
|
|
|
|
found = JUMP(SLJIT_NOT_ZERO); |
4327
|
|
|
|
|
|
|
} |
4328
|
|
|
|
|
|
|
} |
4329
|
|
|
|
|
|
|
|
4330
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4331
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, start); |
4332
|
|
|
|
|
|
|
|
4333
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4334
|
|
|
|
|
|
|
if (common->utf && offset > 0) |
4335
|
|
|
|
|
|
|
utf_quit = JUMP(SLJIT_JUMP); |
4336
|
|
|
|
|
|
|
#endif |
4337
|
|
|
|
|
|
|
|
4338
|
|
|
|
|
|
|
JUMPHERE(found); |
4339
|
|
|
|
|
|
|
|
4340
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4341
|
|
|
|
|
|
|
if (common->utf && offset > 0) |
4342
|
|
|
|
|
|
|
{ |
4343
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-offset)); |
4344
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4345
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
4346
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
4347
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, utf_start); |
4348
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
4349
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
4350
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, utf_start); |
4351
|
|
|
|
|
|
|
#else |
4352
|
|
|
|
|
|
|
#error "Unknown code width" |
4353
|
|
|
|
|
|
|
#endif |
4354
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4355
|
|
|
|
|
|
|
JUMPHERE(utf_quit); |
4356
|
|
|
|
|
|
|
} |
4357
|
|
|
|
|
|
|
#endif |
4358
|
|
|
|
|
|
|
|
4359
|
|
|
|
|
|
|
JUMPHERE(quit); |
4360
|
|
|
|
|
|
|
|
4361
|
|
|
|
|
|
|
if (has_match_end) |
4362
|
|
|
|
|
|
|
{ |
4363
|
|
|
|
|
|
|
quit = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
4364
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4365
|
|
|
|
|
|
|
if (offset > 0) |
4366
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); |
4367
|
|
|
|
|
|
|
JUMPHERE(quit); |
4368
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4369
|
|
|
|
|
|
|
} |
4370
|
|
|
|
|
|
|
|
4371
|
|
|
|
|
|
|
if (offset > 0) |
4372
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(offset)); |
4373
|
|
|
|
|
|
|
} |
4374
|
|
|
|
|
|
|
|
4375
|
|
|
|
|
|
|
static SLJIT_INLINE BOOL fast_forward_first_n_chars(compiler_common *common) |
4376
|
|
|
|
|
|
|
{ |
4377
|
|
|
|
|
|
|
DEFINE_COMPILER; |
4378
|
|
|
|
|
|
|
struct sljit_label *start; |
4379
|
|
|
|
|
|
|
struct sljit_jump *quit; |
4380
|
|
|
|
|
|
|
struct sljit_jump *match; |
4381
|
|
|
|
|
|
|
/* bytes[0] represent the number of characters between 0 |
4382
|
|
|
|
|
|
|
and MAX_N_BYTES - 1, 255 represents any character. */ |
4383
|
|
|
|
|
|
|
pcre_uchar chars[MAX_N_CHARS * MAX_DIFF_CHARS]; |
4384
|
|
|
|
|
|
|
sljit_s32 offset; |
4385
|
|
|
|
|
|
|
pcre_uchar mask; |
4386
|
|
|
|
|
|
|
pcre_uchar *char_set, *char_set_end; |
4387
|
|
|
|
|
|
|
int i, max, from; |
4388
|
|
|
|
|
|
|
int range_right = -1, range_len; |
4389
|
|
|
|
|
|
|
sljit_u8 *update_table = NULL; |
4390
|
|
|
|
|
|
|
BOOL in_range; |
4391
|
|
|
|
|
|
|
sljit_u32 rec_count; |
4392
|
|
|
|
|
|
|
|
4393
|
|
|
|
|
|
|
for (i = 0; i < MAX_N_CHARS; i++) |
4394
|
|
|
|
|
|
|
chars[i * MAX_DIFF_CHARS] = 0; |
4395
|
|
|
|
|
|
|
|
4396
|
|
|
|
|
|
|
rec_count = 10000; |
4397
|
|
|
|
|
|
|
max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count); |
4398
|
|
|
|
|
|
|
|
4399
|
|
|
|
|
|
|
if (max < 1) |
4400
|
|
|
|
|
|
|
return FALSE; |
4401
|
|
|
|
|
|
|
|
4402
|
|
|
|
|
|
|
in_range = FALSE; |
4403
|
|
|
|
|
|
|
/* Prevent compiler "uninitialized" warning */ |
4404
|
|
|
|
|
|
|
from = 0; |
4405
|
|
|
|
|
|
|
range_len = 4 /* minimum length */ - 1; |
4406
|
|
|
|
|
|
|
for (i = 0; i <= max; i++) |
4407
|
|
|
|
|
|
|
{ |
4408
|
|
|
|
|
|
|
if (in_range && (i - from) > range_len && (chars[(i - 1) * MAX_DIFF_CHARS] < 255)) |
4409
|
|
|
|
|
|
|
{ |
4410
|
|
|
|
|
|
|
range_len = i - from; |
4411
|
|
|
|
|
|
|
range_right = i - 1; |
4412
|
|
|
|
|
|
|
} |
4413
|
|
|
|
|
|
|
|
4414
|
|
|
|
|
|
|
if (i < max && chars[i * MAX_DIFF_CHARS] < 255) |
4415
|
|
|
|
|
|
|
{ |
4416
|
|
|
|
|
|
|
SLJIT_ASSERT(chars[i * MAX_DIFF_CHARS] > 0); |
4417
|
|
|
|
|
|
|
if (!in_range) |
4418
|
|
|
|
|
|
|
{ |
4419
|
|
|
|
|
|
|
in_range = TRUE; |
4420
|
|
|
|
|
|
|
from = i; |
4421
|
|
|
|
|
|
|
} |
4422
|
|
|
|
|
|
|
} |
4423
|
|
|
|
|
|
|
else |
4424
|
|
|
|
|
|
|
in_range = FALSE; |
4425
|
|
|
|
|
|
|
} |
4426
|
|
|
|
|
|
|
|
4427
|
|
|
|
|
|
|
if (range_right >= 0) |
4428
|
|
|
|
|
|
|
{ |
4429
|
|
|
|
|
|
|
update_table = (sljit_u8 *)allocate_read_only_data(common, 256); |
4430
|
|
|
|
|
|
|
if (update_table == NULL) |
4431
|
|
|
|
|
|
|
return TRUE; |
4432
|
|
|
|
|
|
|
memset(update_table, IN_UCHARS(range_len), 256); |
4433
|
|
|
|
|
|
|
|
4434
|
|
|
|
|
|
|
for (i = 0; i < range_len; i++) |
4435
|
|
|
|
|
|
|
{ |
4436
|
|
|
|
|
|
|
char_set = chars + ((range_right - i) * MAX_DIFF_CHARS); |
4437
|
|
|
|
|
|
|
SLJIT_ASSERT(char_set[0] > 0 && char_set[0] < 255); |
4438
|
|
|
|
|
|
|
char_set_end = char_set + char_set[0]; |
4439
|
|
|
|
|
|
|
char_set++; |
4440
|
|
|
|
|
|
|
while (char_set <= char_set_end) |
4441
|
|
|
|
|
|
|
{ |
4442
|
|
|
|
|
|
|
if (update_table[(*char_set) & 0xff] > IN_UCHARS(i)) |
4443
|
|
|
|
|
|
|
update_table[(*char_set) & 0xff] = IN_UCHARS(i); |
4444
|
|
|
|
|
|
|
char_set++; |
4445
|
|
|
|
|
|
|
} |
4446
|
|
|
|
|
|
|
} |
4447
|
|
|
|
|
|
|
} |
4448
|
|
|
|
|
|
|
|
4449
|
|
|
|
|
|
|
offset = -1; |
4450
|
|
|
|
|
|
|
/* Scan forward. */ |
4451
|
|
|
|
|
|
|
for (i = 0; i < max; i++) |
4452
|
|
|
|
|
|
|
{ |
4453
|
|
|
|
|
|
|
if (offset == -1) |
4454
|
|
|
|
|
|
|
{ |
4455
|
|
|
|
|
|
|
if (chars[i * MAX_DIFF_CHARS] <= 2) |
4456
|
|
|
|
|
|
|
offset = i; |
4457
|
|
|
|
|
|
|
} |
4458
|
|
|
|
|
|
|
else if (chars[offset * MAX_DIFF_CHARS] == 2 && chars[i * MAX_DIFF_CHARS] <= 2) |
4459
|
|
|
|
|
|
|
{ |
4460
|
|
|
|
|
|
|
if (chars[i * MAX_DIFF_CHARS] == 1) |
4461
|
|
|
|
|
|
|
offset = i; |
4462
|
|
|
|
|
|
|
else |
4463
|
|
|
|
|
|
|
{ |
4464
|
|
|
|
|
|
|
mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2]; |
4465
|
|
|
|
|
|
|
if (!is_powerof2(mask)) |
4466
|
|
|
|
|
|
|
{ |
4467
|
|
|
|
|
|
|
mask = chars[i * MAX_DIFF_CHARS + 1] ^ chars[i * MAX_DIFF_CHARS + 2]; |
4468
|
|
|
|
|
|
|
if (is_powerof2(mask)) |
4469
|
|
|
|
|
|
|
offset = i; |
4470
|
|
|
|
|
|
|
} |
4471
|
|
|
|
|
|
|
} |
4472
|
|
|
|
|
|
|
} |
4473
|
|
|
|
|
|
|
} |
4474
|
|
|
|
|
|
|
|
4475
|
|
|
|
|
|
|
if (range_right < 0) |
4476
|
|
|
|
|
|
|
{ |
4477
|
|
|
|
|
|
|
if (offset < 0) |
4478
|
|
|
|
|
|
|
return FALSE; |
4479
|
|
|
|
|
|
|
SLJIT_ASSERT(chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2); |
4480
|
|
|
|
|
|
|
/* Works regardless the value is 1 or 2. */ |
4481
|
|
|
|
|
|
|
mask = chars[offset * MAX_DIFF_CHARS + chars[offset * MAX_DIFF_CHARS]]; |
4482
|
|
|
|
|
|
|
fast_forward_first_char2(common, chars[offset * MAX_DIFF_CHARS + 1], mask, offset); |
4483
|
|
|
|
|
|
|
return TRUE; |
4484
|
|
|
|
|
|
|
} |
4485
|
|
|
|
|
|
|
|
4486
|
|
|
|
|
|
|
if (range_right == offset) |
4487
|
|
|
|
|
|
|
offset = -1; |
4488
|
|
|
|
|
|
|
|
4489
|
|
|
|
|
|
|
SLJIT_ASSERT(offset == -1 || (chars[offset * MAX_DIFF_CHARS] >= 1 && chars[offset * MAX_DIFF_CHARS] <= 2)); |
4490
|
|
|
|
|
|
|
|
4491
|
|
|
|
|
|
|
max -= 1; |
4492
|
|
|
|
|
|
|
SLJIT_ASSERT(max > 0); |
4493
|
|
|
|
|
|
|
if (common->match_end_ptr != 0) |
4494
|
|
|
|
|
|
|
{ |
4495
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4496
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
4497
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); |
4498
|
|
|
|
|
|
|
quit = CMP(SLJIT_LESS_EQUAL, STR_END, 0, TMP1, 0); |
4499
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, TMP1, 0); |
4500
|
|
|
|
|
|
|
JUMPHERE(quit); |
4501
|
|
|
|
|
|
|
} |
4502
|
|
|
|
|
|
|
else |
4503
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); |
4504
|
|
|
|
|
|
|
|
4505
|
|
|
|
|
|
|
SLJIT_ASSERT(range_right >= 0); |
4506
|
|
|
|
|
|
|
|
4507
|
|
|
|
|
|
|
#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4508
|
|
|
|
|
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, SLJIT_IMM, (sljit_sw)update_table); |
4509
|
|
|
|
|
|
|
#endif |
4510
|
|
|
|
|
|
|
|
4511
|
|
|
|
|
|
|
start = LABEL(); |
4512
|
|
|
|
|
|
|
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4513
|
|
|
|
|
|
|
|
4514
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 || (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN) |
4515
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right)); |
4516
|
|
|
|
|
|
|
#else |
4517
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(range_right + 1) - 1); |
4518
|
|
|
|
|
|
|
#endif |
4519
|
|
|
|
|
|
|
|
4520
|
|
|
|
|
|
|
#if !(defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) |
4521
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(RETURN_ADDR, TMP1), 0); |
4522
|
|
|
|
|
|
|
#else |
4523
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)update_table); |
4524
|
|
|
|
|
|
|
#endif |
4525
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4526
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, start); |
4527
|
|
|
|
|
|
|
|
4528
|
|
|
|
|
|
|
if (offset >= 0) |
4529
|
|
|
|
|
|
|
{ |
4530
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(offset)); |
4531
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4532
|
|
|
|
|
|
|
|
4533
|
|
|
|
|
|
|
if (chars[offset * MAX_DIFF_CHARS] == 1) |
4534
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1], start); |
4535
|
|
|
|
|
|
|
else |
4536
|
|
|
|
|
|
|
{ |
4537
|
|
|
|
|
|
|
mask = chars[offset * MAX_DIFF_CHARS + 1] ^ chars[offset * MAX_DIFF_CHARS + 2]; |
4538
|
|
|
|
|
|
|
if (is_powerof2(mask)) |
4539
|
|
|
|
|
|
|
{ |
4540
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, mask); |
4541
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1] | mask, start); |
4542
|
|
|
|
|
|
|
} |
4543
|
|
|
|
|
|
|
else |
4544
|
|
|
|
|
|
|
{ |
4545
|
|
|
|
|
|
|
match = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 1]); |
4546
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, chars[offset * MAX_DIFF_CHARS + 2], start); |
4547
|
|
|
|
|
|
|
JUMPHERE(match); |
4548
|
|
|
|
|
|
|
} |
4549
|
|
|
|
|
|
|
} |
4550
|
|
|
|
|
|
|
} |
4551
|
|
|
|
|
|
|
|
4552
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
4553
|
|
|
|
|
|
|
if (common->utf && offset != 0) |
4554
|
|
|
|
|
|
|
{ |
4555
|
|
|
|
|
|
|
if (offset < 0) |
4556
|
|
|
|
|
|
|
{ |
4557
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
4558
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4559
|
|
|
|
|
|
|
} |
4560
|
|
|
|
|
|
|
else |
4561
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
4562
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
4563
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xc0); |
4564
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0x80, start); |
4565
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
4566
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
4567
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0xdc00, start); |
4568
|
|
|
|
|
|
|
#else |
4569
|
|
|
|
|
|
|
#error "Unknown code width" |
4570
|
|
|
|
|
|
|
#endif |
4571
|
|
|
|
|
|
|
if (offset < 0) |
4572
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4573
|
|
|
|
|
|
|
} |
4574
|
|
|
|
|
|
|
#endif |
4575
|
|
|
|
|
|
|
|
4576
|
|
|
|
|
|
|
if (offset >= 0) |
4577
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4578
|
|
|
|
|
|
|
|
4579
|
|
|
|
|
|
|
JUMPHERE(quit); |
4580
|
|
|
|
|
|
|
|
4581
|
|
|
|
|
|
|
if (common->match_end_ptr != 0) |
4582
|
|
|
|
|
|
|
{ |
4583
|
|
|
|
|
|
|
if (range_right >= 0) |
4584
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4585
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4586
|
|
|
|
|
|
|
if (range_right >= 0) |
4587
|
|
|
|
|
|
|
{ |
4588
|
|
|
|
|
|
|
quit = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0); |
4589
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); |
4590
|
|
|
|
|
|
|
JUMPHERE(quit); |
4591
|
|
|
|
|
|
|
} |
4592
|
|
|
|
|
|
|
} |
4593
|
|
|
|
|
|
|
else |
4594
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_END, 0, STR_END, 0, SLJIT_IMM, IN_UCHARS(max)); |
4595
|
|
|
|
|
|
|
return TRUE; |
4596
|
|
|
|
|
|
|
} |
4597
|
|
|
|
|
|
|
|
4598
|
|
|
|
|
|
|
#undef MAX_N_CHARS |
4599
|
|
|
|
|
|
|
#undef MAX_DIFF_CHARS |
4600
|
|
|
|
|
|
|
|
4601
|
|
|
|
|
|
|
static SLJIT_INLINE void fast_forward_first_char(compiler_common *common, pcre_uchar first_char, BOOL caseless) |
4602
|
|
|
|
|
|
|
{ |
4603
|
|
|
|
|
|
|
pcre_uchar oc; |
4604
|
|
|
|
|
|
|
|
4605
|
|
|
|
|
|
|
oc = first_char; |
4606
|
|
|
|
|
|
|
if (caseless) |
4607
|
|
|
|
|
|
|
{ |
4608
|
|
|
|
|
|
|
oc = TABLE_GET(first_char, common->fcc, first_char); |
4609
|
|
|
|
|
|
|
#if defined SUPPORT_UCP && !defined COMPILE_PCRE8 |
4610
|
|
|
|
|
|
|
if (first_char > 127 && common->utf) |
4611
|
|
|
|
|
|
|
oc = UCD_OTHERCASE(first_char); |
4612
|
|
|
|
|
|
|
#endif |
4613
|
|
|
|
|
|
|
} |
4614
|
|
|
|
|
|
|
|
4615
|
|
|
|
|
|
|
fast_forward_first_char2(common, first_char, oc, 0); |
4616
|
|
|
|
|
|
|
} |
4617
|
|
|
|
|
|
|
|
4618
|
|
|
|
|
|
|
static SLJIT_INLINE void fast_forward_newline(compiler_common *common) |
4619
|
|
|
|
|
|
|
{ |
4620
|
|
|
|
|
|
|
DEFINE_COMPILER; |
4621
|
|
|
|
|
|
|
struct sljit_label *loop; |
4622
|
|
|
|
|
|
|
struct sljit_jump *lastchar; |
4623
|
|
|
|
|
|
|
struct sljit_jump *firstchar; |
4624
|
|
|
|
|
|
|
struct sljit_jump *quit; |
4625
|
|
|
|
|
|
|
struct sljit_jump *foundcr = NULL; |
4626
|
|
|
|
|
|
|
struct sljit_jump *notfoundnl; |
4627
|
|
|
|
|
|
|
jump_list *newline = NULL; |
4628
|
|
|
|
|
|
|
|
4629
|
|
|
|
|
|
|
if (common->match_end_ptr != 0) |
4630
|
|
|
|
|
|
|
{ |
4631
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, STR_END, 0); |
4632
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4633
|
|
|
|
|
|
|
} |
4634
|
|
|
|
|
|
|
|
4635
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
4636
|
|
|
|
|
|
|
{ |
4637
|
|
|
|
|
|
|
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4638
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
4639
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
4640
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
4641
|
|
|
|
|
|
|
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); |
4642
|
|
|
|
|
|
|
|
4643
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(2)); |
4644
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, STR_PTR, 0, TMP1, 0); |
4645
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER_EQUAL); |
4646
|
|
|
|
|
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4647
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCHAR_SHIFT); |
4648
|
|
|
|
|
|
|
#endif |
4649
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
4650
|
|
|
|
|
|
|
|
4651
|
|
|
|
|
|
|
loop = LABEL(); |
4652
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4653
|
|
|
|
|
|
|
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4654
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); |
4655
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
4656
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff, loop); |
4657
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff, loop); |
4658
|
|
|
|
|
|
|
|
4659
|
|
|
|
|
|
|
JUMPHERE(quit); |
4660
|
|
|
|
|
|
|
JUMPHERE(firstchar); |
4661
|
|
|
|
|
|
|
JUMPHERE(lastchar); |
4662
|
|
|
|
|
|
|
|
4663
|
|
|
|
|
|
|
if (common->match_end_ptr != 0) |
4664
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4665
|
|
|
|
|
|
|
return; |
4666
|
|
|
|
|
|
|
} |
4667
|
|
|
|
|
|
|
|
4668
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
4669
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
4670
|
|
|
|
|
|
|
firstchar = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); |
4671
|
|
|
|
|
|
|
skip_char_back(common); |
4672
|
|
|
|
|
|
|
|
4673
|
|
|
|
|
|
|
loop = LABEL(); |
4674
|
|
|
|
|
|
|
common->ff_newline_shortcut = loop; |
4675
|
|
|
|
|
|
|
|
4676
|
|
|
|
|
|
|
read_char_range(common, common->nlmin, common->nlmax, TRUE); |
4677
|
|
|
|
|
|
|
lastchar = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4678
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) |
4679
|
|
|
|
|
|
|
foundcr = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
4680
|
|
|
|
|
|
|
check_newlinechar(common, common->nltype, &newline, FALSE); |
4681
|
|
|
|
|
|
|
set_jumps(newline, loop); |
4682
|
|
|
|
|
|
|
|
4683
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_ANY || common->nltype == NLTYPE_ANYCRLF) |
4684
|
|
|
|
|
|
|
{ |
4685
|
|
|
|
|
|
|
quit = JUMP(SLJIT_JUMP); |
4686
|
|
|
|
|
|
|
JUMPHERE(foundcr); |
4687
|
|
|
|
|
|
|
notfoundnl = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4688
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
4689
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, CHAR_NL); |
4690
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); |
4691
|
|
|
|
|
|
|
#if defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
4692
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, UCHAR_SHIFT); |
4693
|
|
|
|
|
|
|
#endif |
4694
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4695
|
|
|
|
|
|
|
JUMPHERE(notfoundnl); |
4696
|
|
|
|
|
|
|
JUMPHERE(quit); |
4697
|
|
|
|
|
|
|
} |
4698
|
|
|
|
|
|
|
JUMPHERE(lastchar); |
4699
|
|
|
|
|
|
|
JUMPHERE(firstchar); |
4700
|
|
|
|
|
|
|
|
4701
|
|
|
|
|
|
|
if (common->match_end_ptr != 0) |
4702
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, TMP3, 0); |
4703
|
|
|
|
|
|
|
} |
4704
|
|
|
|
|
|
|
|
4705
|
|
|
|
|
|
|
static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks); |
4706
|
|
|
|
|
|
|
|
4707
|
|
|
|
|
|
|
static SLJIT_INLINE void fast_forward_start_bits(compiler_common *common, const sljit_u8 *start_bits) |
4708
|
|
|
|
|
|
|
{ |
4709
|
|
|
|
|
|
|
DEFINE_COMPILER; |
4710
|
|
|
|
|
|
|
struct sljit_label *start; |
4711
|
|
|
|
|
|
|
struct sljit_jump *quit; |
4712
|
|
|
|
|
|
|
struct sljit_jump *found = NULL; |
4713
|
|
|
|
|
|
|
jump_list *matches = NULL; |
4714
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
4715
|
|
|
|
|
|
|
struct sljit_jump *jump; |
4716
|
|
|
|
|
|
|
#endif |
4717
|
|
|
|
|
|
|
|
4718
|
|
|
|
|
|
|
if (common->match_end_ptr != 0) |
4719
|
|
|
|
|
|
|
{ |
4720
|
|
|
|
|
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, STR_END, 0); |
4721
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
4722
|
|
|
|
|
|
|
} |
4723
|
|
|
|
|
|
|
|
4724
|
|
|
|
|
|
|
start = LABEL(); |
4725
|
|
|
|
|
|
|
quit = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
4726
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
4727
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
4728
|
|
|
|
|
|
|
if (common->utf) |
4729
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, TMP1, 0); |
4730
|
|
|
|
|
|
|
#endif |
4731
|
|
|
|
|
|
|
|
4732
|
|
|
|
|
|
|
if (!check_class_ranges(common, start_bits, (start_bits[31] & 0x80) != 0, TRUE, &matches)) |
4733
|
|
|
|
|
|
|
{ |
4734
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
4735
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 255); |
4736
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 255); |
4737
|
|
|
|
|
|
|
JUMPHERE(jump); |
4738
|
|
|
|
|
|
|
#endif |
4739
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
4740
|
|
|
|
|
|
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
4741
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)start_bits); |
4742
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
4743
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
4744
|
|
|
|
|
|
|
found = JUMP(SLJIT_NOT_ZERO); |
4745
|
|
|
|
|
|
|
} |
4746
|
|
|
|
|
|
|
|
4747
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
4748
|
|
|
|
|
|
|
if (common->utf) |
4749
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
4750
|
|
|
|
|
|
|
#endif |
4751
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4752
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
4753
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
4754
|
|
|
|
|
|
|
if (common->utf) |
4755
|
|
|
|
|
|
|
{ |
4756
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0, start); |
4757
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
4758
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4759
|
|
|
|
|
|
|
} |
4760
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
4761
|
|
|
|
|
|
|
if (common->utf) |
4762
|
|
|
|
|
|
|
{ |
4763
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800, start); |
4764
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
4765
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
4766
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); |
4767
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
4768
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
4769
|
|
|
|
|
|
|
} |
4770
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE[8|16] */ |
4771
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
4772
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, start); |
4773
|
|
|
|
|
|
|
if (found != NULL) |
4774
|
|
|
|
|
|
|
JUMPHERE(found); |
4775
|
|
|
|
|
|
|
if (matches != NULL) |
4776
|
|
|
|
|
|
|
set_jumps(matches, LABEL()); |
4777
|
|
|
|
|
|
|
JUMPHERE(quit); |
4778
|
|
|
|
|
|
|
|
4779
|
|
|
|
|
|
|
if (common->match_end_ptr != 0) |
4780
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, RETURN_ADDR, 0); |
4781
|
|
|
|
|
|
|
} |
4782
|
|
|
|
|
|
|
|
4783
|
|
|
|
|
|
|
static SLJIT_INLINE struct sljit_jump *search_requested_char(compiler_common *common, pcre_uchar req_char, BOOL caseless, BOOL has_firstchar) |
4784
|
|
|
|
|
|
|
{ |
4785
|
|
|
|
|
|
|
DEFINE_COMPILER; |
4786
|
|
|
|
|
|
|
struct sljit_label *loop; |
4787
|
|
|
|
|
|
|
struct sljit_jump *toolong; |
4788
|
|
|
|
|
|
|
struct sljit_jump *alreadyfound; |
4789
|
|
|
|
|
|
|
struct sljit_jump *found; |
4790
|
|
|
|
|
|
|
struct sljit_jump *foundoc = NULL; |
4791
|
|
|
|
|
|
|
struct sljit_jump *notfound; |
4792
|
|
|
|
|
|
|
sljit_u32 oc, bit; |
4793
|
|
|
|
|
|
|
|
4794
|
|
|
|
|
|
|
SLJIT_ASSERT(common->req_char_ptr != 0); |
4795
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr); |
4796
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, REQ_BYTE_MAX); |
4797
|
|
|
|
|
|
|
toolong = CMP(SLJIT_LESS, TMP1, 0, STR_END, 0); |
4798
|
|
|
|
|
|
|
alreadyfound = CMP(SLJIT_LESS, STR_PTR, 0, TMP2, 0); |
4799
|
|
|
|
|
|
|
|
4800
|
|
|
|
|
|
|
if (has_firstchar) |
4801
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
4802
|
|
|
|
|
|
|
else |
4803
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, STR_PTR, 0); |
4804
|
|
|
|
|
|
|
|
4805
|
|
|
|
|
|
|
loop = LABEL(); |
4806
|
|
|
|
|
|
|
notfound = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, STR_END, 0); |
4807
|
|
|
|
|
|
|
|
4808
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(TMP1), 0); |
4809
|
|
|
|
|
|
|
oc = req_char; |
4810
|
|
|
|
|
|
|
if (caseless) |
4811
|
|
|
|
|
|
|
{ |
4812
|
|
|
|
|
|
|
oc = TABLE_GET(req_char, common->fcc, req_char); |
4813
|
|
|
|
|
|
|
#if defined SUPPORT_UCP && !(defined COMPILE_PCRE8) |
4814
|
|
|
|
|
|
|
if (req_char > 127 && common->utf) |
4815
|
|
|
|
|
|
|
oc = UCD_OTHERCASE(req_char); |
4816
|
|
|
|
|
|
|
#endif |
4817
|
|
|
|
|
|
|
} |
4818
|
|
|
|
|
|
|
if (req_char == oc) |
4819
|
|
|
|
|
|
|
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); |
4820
|
|
|
|
|
|
|
else |
4821
|
|
|
|
|
|
|
{ |
4822
|
|
|
|
|
|
|
bit = req_char ^ oc; |
4823
|
|
|
|
|
|
|
if (is_powerof2(bit)) |
4824
|
|
|
|
|
|
|
{ |
4825
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, bit); |
4826
|
|
|
|
|
|
|
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char | bit); |
4827
|
|
|
|
|
|
|
} |
4828
|
|
|
|
|
|
|
else |
4829
|
|
|
|
|
|
|
{ |
4830
|
|
|
|
|
|
|
found = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, req_char); |
4831
|
|
|
|
|
|
|
foundoc = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, oc); |
4832
|
|
|
|
|
|
|
} |
4833
|
|
|
|
|
|
|
} |
4834
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
4835
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, loop); |
4836
|
|
|
|
|
|
|
|
4837
|
|
|
|
|
|
|
JUMPHERE(found); |
4838
|
|
|
|
|
|
|
if (foundoc) |
4839
|
|
|
|
|
|
|
JUMPHERE(foundoc); |
4840
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, TMP1, 0); |
4841
|
|
|
|
|
|
|
JUMPHERE(alreadyfound); |
4842
|
|
|
|
|
|
|
JUMPHERE(toolong); |
4843
|
|
|
|
|
|
|
return notfound; |
4844
|
|
|
|
|
|
|
} |
4845
|
|
|
|
|
|
|
|
4846
|
|
|
|
|
|
|
static void do_revertframes(compiler_common *common) |
4847
|
|
|
|
|
|
|
{ |
4848
|
|
|
|
|
|
|
DEFINE_COMPILER; |
4849
|
|
|
|
|
|
|
struct sljit_jump *jump; |
4850
|
|
|
|
|
|
|
struct sljit_label *mainloop; |
4851
|
|
|
|
|
|
|
|
4852
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
4853
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, STACK_TOP, 0); |
4854
|
|
|
|
|
|
|
GET_LOCAL_BASE(TMP1, 0, 0); |
4855
|
|
|
|
|
|
|
|
4856
|
|
|
|
|
|
|
/* Drop frames until we reach STACK_TOP. */ |
4857
|
|
|
|
|
|
|
mainloop = LABEL(); |
4858
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), -sizeof(sljit_sw)); |
4859
|
|
|
|
|
|
|
jump = CMP(SLJIT_SIG_LESS_EQUAL, TMP2, 0, SLJIT_IMM, 0); |
4860
|
|
|
|
|
|
|
|
4861
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); |
4862
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw)); |
4863
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), sizeof(sljit_sw), SLJIT_MEM1(STACK_TOP), -3 * sizeof(sljit_sw)); |
4864
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 3 * sizeof(sljit_sw)); |
4865
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, mainloop); |
4866
|
|
|
|
|
|
|
|
4867
|
|
|
|
|
|
|
JUMPHERE(jump); |
4868
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_ZERO /* SIG_LESS */, TMP2, 0, SLJIT_IMM, 0); |
4869
|
|
|
|
|
|
|
/* End of reverting values. */ |
4870
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, TMP3, 0); |
4871
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
4872
|
|
|
|
|
|
|
|
4873
|
|
|
|
|
|
|
JUMPHERE(jump); |
4874
|
|
|
|
|
|
|
OP1(SLJIT_NEG, TMP2, 0, TMP2, 0); |
4875
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, TMP1, 0); |
4876
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), 0, SLJIT_MEM1(STACK_TOP), -2 * sizeof(sljit_sw)); |
4877
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2 * sizeof(sljit_sw)); |
4878
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, mainloop); |
4879
|
|
|
|
|
|
|
} |
4880
|
|
|
|
|
|
|
|
4881
|
|
|
|
|
|
|
static void check_wordboundary(compiler_common *common) |
4882
|
|
|
|
|
|
|
{ |
4883
|
|
|
|
|
|
|
DEFINE_COMPILER; |
4884
|
|
|
|
|
|
|
struct sljit_jump *skipread; |
4885
|
|
|
|
|
|
|
jump_list *skipread_list = NULL; |
4886
|
|
|
|
|
|
|
#if !(defined COMPILE_PCRE8) || defined SUPPORT_UTF |
4887
|
|
|
|
|
|
|
struct sljit_jump *jump; |
4888
|
|
|
|
|
|
|
#endif |
4889
|
|
|
|
|
|
|
|
4890
|
|
|
|
|
|
|
SLJIT_COMPILE_ASSERT(ctype_word == 0x10, ctype_word_must_be_16); |
4891
|
|
|
|
|
|
|
|
4892
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
4893
|
|
|
|
|
|
|
/* Get type of the previous char, and put it to LOCALS1. */ |
4894
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
4895
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
4896
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, SLJIT_IMM, 0); |
4897
|
|
|
|
|
|
|
skipread = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP1, 0); |
4898
|
|
|
|
|
|
|
skip_char_back(common); |
4899
|
|
|
|
|
|
|
check_start_used_ptr(common); |
4900
|
|
|
|
|
|
|
read_char(common); |
4901
|
|
|
|
|
|
|
|
4902
|
|
|
|
|
|
|
/* Testing char type. */ |
4903
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
4904
|
|
|
|
|
|
|
if (common->use_ucp) |
4905
|
|
|
|
|
|
|
{ |
4906
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); |
4907
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); |
4908
|
|
|
|
|
|
|
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); |
4909
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); |
4910
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); |
4911
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); |
4912
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); |
4913
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); |
4914
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); |
4915
|
|
|
|
|
|
|
JUMPHERE(jump); |
4916
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP2, 0); |
4917
|
|
|
|
|
|
|
} |
4918
|
|
|
|
|
|
|
else |
4919
|
|
|
|
|
|
|
#endif |
4920
|
|
|
|
|
|
|
{ |
4921
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
4922
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4923
|
|
|
|
|
|
|
#elif defined SUPPORT_UTF |
4924
|
|
|
|
|
|
|
/* Here LOCALS1 has already been zeroed. */ |
4925
|
|
|
|
|
|
|
jump = NULL; |
4926
|
|
|
|
|
|
|
if (common->utf) |
4927
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4928
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE8 */ |
4929
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), common->ctypes); |
4930
|
|
|
|
|
|
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 4 /* ctype_word */); |
4931
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
4932
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, TMP1, 0); |
4933
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
4934
|
|
|
|
|
|
|
JUMPHERE(jump); |
4935
|
|
|
|
|
|
|
#elif defined SUPPORT_UTF |
4936
|
|
|
|
|
|
|
if (jump != NULL) |
4937
|
|
|
|
|
|
|
JUMPHERE(jump); |
4938
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE8 */ |
4939
|
|
|
|
|
|
|
} |
4940
|
|
|
|
|
|
|
JUMPHERE(skipread); |
4941
|
|
|
|
|
|
|
|
4942
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
4943
|
|
|
|
|
|
|
check_str_end(common, &skipread_list); |
4944
|
|
|
|
|
|
|
peek_char(common, READ_CHAR_MAX); |
4945
|
|
|
|
|
|
|
|
4946
|
|
|
|
|
|
|
/* Testing char type. This is a code duplication. */ |
4947
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
4948
|
|
|
|
|
|
|
if (common->use_ucp) |
4949
|
|
|
|
|
|
|
{ |
4950
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 1); |
4951
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_UNDERSCORE); |
4952
|
|
|
|
|
|
|
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); |
4953
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Ll); |
4954
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); |
4955
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); |
4956
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ucp_Nd - ucp_Ll); |
4957
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ucp_No - ucp_Nd); |
4958
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); |
4959
|
|
|
|
|
|
|
JUMPHERE(jump); |
4960
|
|
|
|
|
|
|
} |
4961
|
|
|
|
|
|
|
else |
4962
|
|
|
|
|
|
|
#endif |
4963
|
|
|
|
|
|
|
{ |
4964
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
4965
|
|
|
|
|
|
|
/* TMP2 may be destroyed by peek_char. */ |
4966
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
4967
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4968
|
|
|
|
|
|
|
#elif defined SUPPORT_UTF |
4969
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0); |
4970
|
|
|
|
|
|
|
jump = NULL; |
4971
|
|
|
|
|
|
|
if (common->utf) |
4972
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
4973
|
|
|
|
|
|
|
#endif |
4974
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), common->ctypes); |
4975
|
|
|
|
|
|
|
OP2(SLJIT_LSHR, TMP2, 0, TMP2, 0, SLJIT_IMM, 4 /* ctype_word */); |
4976
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
4977
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
4978
|
|
|
|
|
|
|
JUMPHERE(jump); |
4979
|
|
|
|
|
|
|
#elif defined SUPPORT_UTF |
4980
|
|
|
|
|
|
|
if (jump != NULL) |
4981
|
|
|
|
|
|
|
JUMPHERE(jump); |
4982
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE8 */ |
4983
|
|
|
|
|
|
|
} |
4984
|
|
|
|
|
|
|
set_jumps(skipread_list, LABEL()); |
4985
|
|
|
|
|
|
|
|
4986
|
|
|
|
|
|
|
OP2(SLJIT_XOR | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
4987
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
4988
|
|
|
|
|
|
|
} |
4989
|
|
|
|
|
|
|
|
4990
|
|
|
|
|
|
|
static BOOL check_class_ranges(compiler_common *common, const sljit_u8 *bits, BOOL nclass, BOOL invert, jump_list **backtracks) |
4991
|
|
|
|
|
|
|
{ |
4992
|
|
|
|
|
|
|
/* May destroy TMP1. */ |
4993
|
|
|
|
|
|
|
DEFINE_COMPILER; |
4994
|
|
|
|
|
|
|
int ranges[MAX_RANGE_SIZE]; |
4995
|
|
|
|
|
|
|
sljit_u8 bit, cbit, all; |
4996
|
|
|
|
|
|
|
int i, byte, length = 0; |
4997
|
|
|
|
|
|
|
|
4998
|
|
|
|
|
|
|
bit = bits[0] & 0x1; |
4999
|
|
|
|
|
|
|
/* All bits will be zero or one (since bit is zero or one). */ |
5000
|
|
|
|
|
|
|
all = -bit; |
5001
|
|
|
|
|
|
|
|
5002
|
|
|
|
|
|
|
for (i = 0; i < 256; ) |
5003
|
|
|
|
|
|
|
{ |
5004
|
|
|
|
|
|
|
byte = i >> 3; |
5005
|
|
|
|
|
|
|
if ((i & 0x7) == 0 && bits[byte] == all) |
5006
|
|
|
|
|
|
|
i += 8; |
5007
|
|
|
|
|
|
|
else |
5008
|
|
|
|
|
|
|
{ |
5009
|
|
|
|
|
|
|
cbit = (bits[byte] >> (i & 0x7)) & 0x1; |
5010
|
|
|
|
|
|
|
if (cbit != bit) |
5011
|
|
|
|
|
|
|
{ |
5012
|
|
|
|
|
|
|
if (length >= MAX_RANGE_SIZE) |
5013
|
|
|
|
|
|
|
return FALSE; |
5014
|
|
|
|
|
|
|
ranges[length] = i; |
5015
|
|
|
|
|
|
|
length++; |
5016
|
|
|
|
|
|
|
bit = cbit; |
5017
|
|
|
|
|
|
|
all = -cbit; |
5018
|
|
|
|
|
|
|
} |
5019
|
|
|
|
|
|
|
i++; |
5020
|
|
|
|
|
|
|
} |
5021
|
|
|
|
|
|
|
} |
5022
|
|
|
|
|
|
|
|
5023
|
|
|
|
|
|
|
if (((bit == 0) && nclass) || ((bit == 1) && !nclass)) |
5024
|
|
|
|
|
|
|
{ |
5025
|
|
|
|
|
|
|
if (length >= MAX_RANGE_SIZE) |
5026
|
|
|
|
|
|
|
return FALSE; |
5027
|
|
|
|
|
|
|
ranges[length] = 256; |
5028
|
|
|
|
|
|
|
length++; |
5029
|
|
|
|
|
|
|
} |
5030
|
|
|
|
|
|
|
|
5031
|
|
|
|
|
|
|
if (length < 0 || length > 4) |
5032
|
|
|
|
|
|
|
return FALSE; |
5033
|
|
|
|
|
|
|
|
5034
|
|
|
|
|
|
|
bit = bits[0] & 0x1; |
5035
|
|
|
|
|
|
|
if (invert) bit ^= 0x1; |
5036
|
|
|
|
|
|
|
|
5037
|
|
|
|
|
|
|
/* No character is accepted. */ |
5038
|
|
|
|
|
|
|
if (length == 0 && bit == 0) |
5039
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
5040
|
|
|
|
|
|
|
|
5041
|
|
|
|
|
|
|
switch(length) |
5042
|
|
|
|
|
|
|
{ |
5043
|
|
|
|
|
|
|
case 0: |
5044
|
|
|
|
|
|
|
/* When bit != 0, all characters are accepted. */ |
5045
|
|
|
|
|
|
|
return TRUE; |
5046
|
|
|
|
|
|
|
|
5047
|
|
|
|
|
|
|
case 1: |
5048
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(bit == 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
5049
|
|
|
|
|
|
|
return TRUE; |
5050
|
|
|
|
|
|
|
|
5051
|
|
|
|
|
|
|
case 2: |
5052
|
|
|
|
|
|
|
if (ranges[0] + 1 != ranges[1]) |
5053
|
|
|
|
|
|
|
{ |
5054
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
5055
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
5056
|
|
|
|
|
|
|
} |
5057
|
|
|
|
|
|
|
else |
5058
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
5059
|
|
|
|
|
|
|
return TRUE; |
5060
|
|
|
|
|
|
|
|
5061
|
|
|
|
|
|
|
case 3: |
5062
|
|
|
|
|
|
|
if (bit != 0) |
5063
|
|
|
|
|
|
|
{ |
5064
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
5065
|
|
|
|
|
|
|
if (ranges[0] + 1 != ranges[1]) |
5066
|
|
|
|
|
|
|
{ |
5067
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
5068
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
5069
|
|
|
|
|
|
|
} |
5070
|
|
|
|
|
|
|
else |
5071
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
5072
|
|
|
|
|
|
|
return TRUE; |
5073
|
|
|
|
|
|
|
} |
5074
|
|
|
|
|
|
|
|
5075
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[0])); |
5076
|
|
|
|
|
|
|
if (ranges[1] + 1 != ranges[2]) |
5077
|
|
|
|
|
|
|
{ |
5078
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1]); |
5079
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); |
5080
|
|
|
|
|
|
|
} |
5081
|
|
|
|
|
|
|
else |
5082
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1])); |
5083
|
|
|
|
|
|
|
return TRUE; |
5084
|
|
|
|
|
|
|
|
5085
|
|
|
|
|
|
|
case 4: |
5086
|
|
|
|
|
|
|
if ((ranges[1] - ranges[0]) == (ranges[3] - ranges[2]) |
5087
|
|
|
|
|
|
|
&& (ranges[0] | (ranges[2] - ranges[0])) == ranges[2] |
5088
|
|
|
|
|
|
|
&& (ranges[1] & (ranges[2] - ranges[0])) == 0 |
5089
|
|
|
|
|
|
|
&& is_powerof2(ranges[2] - ranges[0])) |
5090
|
|
|
|
|
|
|
{ |
5091
|
|
|
|
|
|
|
SLJIT_ASSERT((ranges[0] & (ranges[2] - ranges[0])) == 0 && (ranges[2] & ranges[3] & (ranges[2] - ranges[0])) != 0); |
5092
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[0]); |
5093
|
|
|
|
|
|
|
if (ranges[2] + 1 != ranges[3]) |
5094
|
|
|
|
|
|
|
{ |
5095
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2]); |
5096
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_LESS : SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
5097
|
|
|
|
|
|
|
} |
5098
|
|
|
|
|
|
|
else |
5099
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(bit != 0 ? SLJIT_EQUAL : SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2])); |
5100
|
|
|
|
|
|
|
return TRUE; |
5101
|
|
|
|
|
|
|
} |
5102
|
|
|
|
|
|
|
|
5103
|
|
|
|
|
|
|
if (bit != 0) |
5104
|
|
|
|
|
|
|
{ |
5105
|
|
|
|
|
|
|
i = 0; |
5106
|
|
|
|
|
|
|
if (ranges[0] + 1 != ranges[1]) |
5107
|
|
|
|
|
|
|
{ |
5108
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
5109
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
5110
|
|
|
|
|
|
|
i = ranges[0]; |
5111
|
|
|
|
|
|
|
} |
5112
|
|
|
|
|
|
|
else |
5113
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[0])); |
5114
|
|
|
|
|
|
|
|
5115
|
|
|
|
|
|
|
if (ranges[2] + 1 != ranges[3]) |
5116
|
|
|
|
|
|
|
{ |
5117
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[2] - i); |
5118
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[2])); |
5119
|
|
|
|
|
|
|
} |
5120
|
|
|
|
|
|
|
else |
5121
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[2] - i)); |
5122
|
|
|
|
|
|
|
return TRUE; |
5123
|
|
|
|
|
|
|
} |
5124
|
|
|
|
|
|
|
|
5125
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[0]); |
5126
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, ranges[3] - ranges[0])); |
5127
|
|
|
|
|
|
|
if (ranges[1] + 1 != ranges[2]) |
5128
|
|
|
|
|
|
|
{ |
5129
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0]); |
5130
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, ranges[2] - ranges[1])); |
5131
|
|
|
|
|
|
|
} |
5132
|
|
|
|
|
|
|
else |
5133
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, ranges[1] - ranges[0])); |
5134
|
|
|
|
|
|
|
return TRUE; |
5135
|
|
|
|
|
|
|
|
5136
|
|
|
|
|
|
|
default: |
5137
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
5138
|
|
|
|
|
|
|
return FALSE; |
5139
|
|
|
|
|
|
|
} |
5140
|
|
|
|
|
|
|
} |
5141
|
|
|
|
|
|
|
|
5142
|
|
|
|
|
|
|
static void check_anynewline(compiler_common *common) |
5143
|
|
|
|
|
|
|
{ |
5144
|
|
|
|
|
|
|
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
5145
|
|
|
|
|
|
|
DEFINE_COMPILER; |
5146
|
|
|
|
|
|
|
|
5147
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
5148
|
|
|
|
|
|
|
|
5149
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); |
5150
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); |
5151
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); |
5152
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); |
5153
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5154
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5155
|
|
|
|
|
|
|
if (common->utf) |
5156
|
|
|
|
|
|
|
{ |
5157
|
|
|
|
|
|
|
#endif |
5158
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5159
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); |
5160
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); |
5161
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5162
|
|
|
|
|
|
|
} |
5163
|
|
|
|
|
|
|
#endif |
5164
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ |
5165
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); |
5166
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
5167
|
|
|
|
|
|
|
} |
5168
|
|
|
|
|
|
|
|
5169
|
|
|
|
|
|
|
static void check_hspace(compiler_common *common) |
5170
|
|
|
|
|
|
|
{ |
5171
|
|
|
|
|
|
|
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
5172
|
|
|
|
|
|
|
DEFINE_COMPILER; |
5173
|
|
|
|
|
|
|
|
5174
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
5175
|
|
|
|
|
|
|
|
5176
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x09); |
5177
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); |
5178
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x20); |
5179
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5180
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xa0); |
5181
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5182
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5183
|
|
|
|
|
|
|
if (common->utf) |
5184
|
|
|
|
|
|
|
{ |
5185
|
|
|
|
|
|
|
#endif |
5186
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5187
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x1680); |
5188
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5189
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e); |
5190
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5191
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x2000); |
5192
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x200A - 0x2000); |
5193
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); |
5194
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x202f - 0x2000); |
5195
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5196
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x205f - 0x2000); |
5197
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5198
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x3000 - 0x2000); |
5199
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5200
|
|
|
|
|
|
|
} |
5201
|
|
|
|
|
|
|
#endif |
5202
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ |
5203
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); |
5204
|
|
|
|
|
|
|
|
5205
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
5206
|
|
|
|
|
|
|
} |
5207
|
|
|
|
|
|
|
|
5208
|
|
|
|
|
|
|
static void check_vspace(compiler_common *common) |
5209
|
|
|
|
|
|
|
{ |
5210
|
|
|
|
|
|
|
/* Check whether TMP1 contains a newline character. TMP2 destroyed. */ |
5211
|
|
|
|
|
|
|
DEFINE_COMPILER; |
5212
|
|
|
|
|
|
|
|
5213
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, RETURN_ADDR, 0); |
5214
|
|
|
|
|
|
|
|
5215
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x0a); |
5216
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x0d - 0x0a); |
5217
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); |
5218
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x0a); |
5219
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5220
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5221
|
|
|
|
|
|
|
if (common->utf) |
5222
|
|
|
|
|
|
|
{ |
5223
|
|
|
|
|
|
|
#endif |
5224
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5225
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, 0x1); |
5226
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2029 - 0x0a); |
5227
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5228
|
|
|
|
|
|
|
} |
5229
|
|
|
|
|
|
|
#endif |
5230
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF || COMPILE_PCRE16 || COMPILE_PCRE32 */ |
5231
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); |
5232
|
|
|
|
|
|
|
|
5233
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, RETURN_ADDR, 0); |
5234
|
|
|
|
|
|
|
} |
5235
|
|
|
|
|
|
|
|
5236
|
|
|
|
|
|
|
static void do_casefulcmp(compiler_common *common) |
5237
|
|
|
|
|
|
|
{ |
5238
|
|
|
|
|
|
|
DEFINE_COMPILER; |
5239
|
|
|
|
|
|
|
struct sljit_jump *jump; |
5240
|
|
|
|
|
|
|
struct sljit_label *label; |
5241
|
|
|
|
|
|
|
int char1_reg; |
5242
|
|
|
|
|
|
|
int char2_reg; |
5243
|
|
|
|
|
|
|
|
5244
|
|
|
|
|
|
|
if (sljit_get_register_index(TMP3) < 0) |
5245
|
|
|
|
|
|
|
{ |
5246
|
|
|
|
|
|
|
char1_reg = STR_END; |
5247
|
|
|
|
|
|
|
char2_reg = STACK_TOP; |
5248
|
|
|
|
|
|
|
} |
5249
|
|
|
|
|
|
|
else |
5250
|
|
|
|
|
|
|
{ |
5251
|
|
|
|
|
|
|
char1_reg = TMP3; |
5252
|
|
|
|
|
|
|
char2_reg = RETURN_ADDR; |
5253
|
|
|
|
|
|
|
} |
5254
|
|
|
|
|
|
|
|
5255
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
5256
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
5257
|
|
|
|
|
|
|
|
5258
|
|
|
|
|
|
|
if (char1_reg == STR_END) |
5259
|
|
|
|
|
|
|
{ |
5260
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, char1_reg, 0); |
5261
|
|
|
|
|
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, char2_reg, 0); |
5262
|
|
|
|
|
|
|
} |
5263
|
|
|
|
|
|
|
|
5264
|
|
|
|
|
|
|
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) |
5265
|
|
|
|
|
|
|
{ |
5266
|
|
|
|
|
|
|
label = LABEL(); |
5267
|
|
|
|
|
|
|
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); |
5268
|
|
|
|
|
|
|
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
5269
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); |
5270
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
5271
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
5272
|
|
|
|
|
|
|
|
5273
|
|
|
|
|
|
|
JUMPHERE(jump); |
5274
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
5275
|
|
|
|
|
|
|
} |
5276
|
|
|
|
|
|
|
else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) |
5277
|
|
|
|
|
|
|
{ |
5278
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
5279
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5280
|
|
|
|
|
|
|
|
5281
|
|
|
|
|
|
|
label = LABEL(); |
5282
|
|
|
|
|
|
|
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); |
5283
|
|
|
|
|
|
|
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
5284
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); |
5285
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
5286
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
5287
|
|
|
|
|
|
|
|
5288
|
|
|
|
|
|
|
JUMPHERE(jump); |
5289
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
5290
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5291
|
|
|
|
|
|
|
} |
5292
|
|
|
|
|
|
|
else |
5293
|
|
|
|
|
|
|
{ |
5294
|
|
|
|
|
|
|
label = LABEL(); |
5295
|
|
|
|
|
|
|
OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0); |
5296
|
|
|
|
|
|
|
OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0); |
5297
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
5298
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5299
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); |
5300
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
5301
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
5302
|
|
|
|
|
|
|
|
5303
|
|
|
|
|
|
|
JUMPHERE(jump); |
5304
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
5305
|
|
|
|
|
|
|
} |
5306
|
|
|
|
|
|
|
|
5307
|
|
|
|
|
|
|
if (char1_reg == STR_END) |
5308
|
|
|
|
|
|
|
{ |
5309
|
|
|
|
|
|
|
OP1(SLJIT_MOV, char1_reg, 0, TMP3, 0); |
5310
|
|
|
|
|
|
|
OP1(SLJIT_MOV, char2_reg, 0, RETURN_ADDR, 0); |
5311
|
|
|
|
|
|
|
} |
5312
|
|
|
|
|
|
|
|
5313
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, TMP1, 0); |
5314
|
|
|
|
|
|
|
} |
5315
|
|
|
|
|
|
|
|
5316
|
|
|
|
|
|
|
static void do_caselesscmp(compiler_common *common) |
5317
|
|
|
|
|
|
|
{ |
5318
|
|
|
|
|
|
|
DEFINE_COMPILER; |
5319
|
|
|
|
|
|
|
struct sljit_jump *jump; |
5320
|
|
|
|
|
|
|
struct sljit_label *label; |
5321
|
|
|
|
|
|
|
int char1_reg = STR_END; |
5322
|
|
|
|
|
|
|
int char2_reg; |
5323
|
|
|
|
|
|
|
int lcc_table; |
5324
|
|
|
|
|
|
|
int opt_type = 0; |
5325
|
|
|
|
|
|
|
|
5326
|
|
|
|
|
|
|
if (sljit_get_register_index(TMP3) < 0) |
5327
|
|
|
|
|
|
|
{ |
5328
|
|
|
|
|
|
|
char2_reg = STACK_TOP; |
5329
|
|
|
|
|
|
|
lcc_table = STACK_LIMIT; |
5330
|
|
|
|
|
|
|
} |
5331
|
|
|
|
|
|
|
else |
5332
|
|
|
|
|
|
|
{ |
5333
|
|
|
|
|
|
|
char2_reg = RETURN_ADDR; |
5334
|
|
|
|
|
|
|
lcc_table = TMP3; |
5335
|
|
|
|
|
|
|
} |
5336
|
|
|
|
|
|
|
|
5337
|
|
|
|
|
|
|
if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) |
5338
|
|
|
|
|
|
|
opt_type = 1; |
5339
|
|
|
|
|
|
|
else if (sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_SUPP | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)) == SLJIT_SUCCESS) |
5340
|
|
|
|
|
|
|
opt_type = 2; |
5341
|
|
|
|
|
|
|
|
5342
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
5343
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
5344
|
|
|
|
|
|
|
|
5345
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, char1_reg, 0); |
5346
|
|
|
|
|
|
|
|
5347
|
|
|
|
|
|
|
if (char2_reg == STACK_TOP) |
5348
|
|
|
|
|
|
|
{ |
5349
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, char2_reg, 0); |
5350
|
|
|
|
|
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, lcc_table, 0); |
5351
|
|
|
|
|
|
|
} |
5352
|
|
|
|
|
|
|
|
5353
|
|
|
|
|
|
|
OP1(SLJIT_MOV, lcc_table, 0, SLJIT_IMM, common->lcc); |
5354
|
|
|
|
|
|
|
|
5355
|
|
|
|
|
|
|
if (opt_type == 1) |
5356
|
|
|
|
|
|
|
{ |
5357
|
|
|
|
|
|
|
label = LABEL(); |
5358
|
|
|
|
|
|
|
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); |
5359
|
|
|
|
|
|
|
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_POST, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
5360
|
|
|
|
|
|
|
} |
5361
|
|
|
|
|
|
|
else if (opt_type == 2) |
5362
|
|
|
|
|
|
|
{ |
5363
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
5364
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5365
|
|
|
|
|
|
|
|
5366
|
|
|
|
|
|
|
label = LABEL(); |
5367
|
|
|
|
|
|
|
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char1_reg, SLJIT_MEM1(TMP1), IN_UCHARS(1)); |
5368
|
|
|
|
|
|
|
sljit_emit_mem(compiler, MOV_UCHAR | SLJIT_MEM_PRE, char2_reg, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
5369
|
|
|
|
|
|
|
} |
5370
|
|
|
|
|
|
|
else |
5371
|
|
|
|
|
|
|
{ |
5372
|
|
|
|
|
|
|
label = LABEL(); |
5373
|
|
|
|
|
|
|
OP1(MOV_UCHAR, char1_reg, 0, SLJIT_MEM1(TMP1), 0); |
5374
|
|
|
|
|
|
|
OP1(MOV_UCHAR, char2_reg, 0, SLJIT_MEM1(STR_PTR), 0); |
5375
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, IN_UCHARS(1)); |
5376
|
|
|
|
|
|
|
} |
5377
|
|
|
|
|
|
|
|
5378
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
5379
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, char1_reg, 0, SLJIT_IMM, 255); |
5380
|
|
|
|
|
|
|
#endif |
5381
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, char1_reg, 0, SLJIT_MEM2(lcc_table, char1_reg), 0); |
5382
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
5383
|
|
|
|
|
|
|
JUMPHERE(jump); |
5384
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, char2_reg, 0, SLJIT_IMM, 255); |
5385
|
|
|
|
|
|
|
#endif |
5386
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, char2_reg, 0, SLJIT_MEM2(lcc_table, char2_reg), 0); |
5387
|
|
|
|
|
|
|
#ifndef COMPILE_PCRE8 |
5388
|
|
|
|
|
|
|
JUMPHERE(jump); |
5389
|
|
|
|
|
|
|
#endif |
5390
|
|
|
|
|
|
|
|
5391
|
|
|
|
|
|
|
if (opt_type == 0) |
5392
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5393
|
|
|
|
|
|
|
|
5394
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, char1_reg, 0, char2_reg, 0); |
5395
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, IN_UCHARS(1)); |
5396
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
5397
|
|
|
|
|
|
|
|
5398
|
|
|
|
|
|
|
JUMPHERE(jump); |
5399
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
5400
|
|
|
|
|
|
|
|
5401
|
|
|
|
|
|
|
if (opt_type == 2) |
5402
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
5403
|
|
|
|
|
|
|
|
5404
|
|
|
|
|
|
|
if (char2_reg == STACK_TOP) |
5405
|
|
|
|
|
|
|
{ |
5406
|
|
|
|
|
|
|
OP1(SLJIT_MOV, char2_reg, 0, TMP3, 0); |
5407
|
|
|
|
|
|
|
OP1(SLJIT_MOV, lcc_table, 0, RETURN_ADDR, 0); |
5408
|
|
|
|
|
|
|
} |
5409
|
|
|
|
|
|
|
|
5410
|
|
|
|
|
|
|
OP1(SLJIT_MOV, char1_reg, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
5411
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, TMP1, 0); |
5412
|
|
|
|
|
|
|
} |
5413
|
|
|
|
|
|
|
|
5414
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined SUPPORT_UCP |
5415
|
|
|
|
|
|
|
|
5416
|
|
|
|
|
|
|
static const pcre_uchar * SLJIT_FUNC do_utf_caselesscmp(pcre_uchar *src1, pcre_uchar *src2, pcre_uchar *end1, pcre_uchar *end2) |
5417
|
|
|
|
|
|
|
{ |
5418
|
|
|
|
|
|
|
/* This function would be ineffective to do in JIT level. */ |
5419
|
|
|
|
|
|
|
sljit_u32 c1, c2; |
5420
|
|
|
|
|
|
|
const ucd_record *ur; |
5421
|
|
|
|
|
|
|
const sljit_u32 *pp; |
5422
|
|
|
|
|
|
|
|
5423
|
|
|
|
|
|
|
while (src1 < end1) |
5424
|
|
|
|
|
|
|
{ |
5425
|
|
|
|
|
|
|
if (src2 >= end2) |
5426
|
|
|
|
|
|
|
return (pcre_uchar*)1; |
5427
|
|
|
|
|
|
|
GETCHARINC(c1, src1); |
5428
|
|
|
|
|
|
|
GETCHARINC(c2, src2); |
5429
|
|
|
|
|
|
|
ur = GET_UCD(c2); |
5430
|
|
|
|
|
|
|
if (c1 != c2 && c1 != c2 + ur->other_case) |
5431
|
|
|
|
|
|
|
{ |
5432
|
|
|
|
|
|
|
pp = PRIV(ucd_caseless_sets) + ur->caseset; |
5433
|
|
|
|
|
|
|
for (;;) |
5434
|
|
|
|
|
|
|
{ |
5435
|
|
|
|
|
|
|
if (c1 < *pp) return NULL; |
5436
|
|
|
|
|
|
|
if (c1 == *pp++) break; |
5437
|
|
|
|
|
|
|
} |
5438
|
|
|
|
|
|
|
} |
5439
|
|
|
|
|
|
|
} |
5440
|
|
|
|
|
|
|
return src2; |
5441
|
|
|
|
|
|
|
} |
5442
|
|
|
|
|
|
|
|
5443
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && SUPPORT_UCP */ |
5444
|
|
|
|
|
|
|
|
5445
|
|
|
|
|
|
|
static pcre_uchar *byte_sequence_compare(compiler_common *common, BOOL caseless, pcre_uchar *cc, |
5446
|
|
|
|
|
|
|
compare_context *context, jump_list **backtracks) |
5447
|
|
|
|
|
|
|
{ |
5448
|
|
|
|
|
|
|
DEFINE_COMPILER; |
5449
|
|
|
|
|
|
|
unsigned int othercasebit = 0; |
5450
|
|
|
|
|
|
|
pcre_uchar *othercasechar = NULL; |
5451
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
5452
|
|
|
|
|
|
|
int utflength; |
5453
|
|
|
|
|
|
|
#endif |
5454
|
|
|
|
|
|
|
|
5455
|
|
|
|
|
|
|
if (caseless && char_has_othercase(common, cc)) |
5456
|
|
|
|
|
|
|
{ |
5457
|
|
|
|
|
|
|
othercasebit = char_get_othercase_bit(common, cc); |
5458
|
|
|
|
|
|
|
SLJIT_ASSERT(othercasebit); |
5459
|
|
|
|
|
|
|
/* Extracting bit difference info. */ |
5460
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
5461
|
|
|
|
|
|
|
othercasechar = cc + (othercasebit >> 8); |
5462
|
|
|
|
|
|
|
othercasebit &= 0xff; |
5463
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
5464
|
|
|
|
|
|
|
/* Note that this code only handles characters in the BMP. If there |
5465
|
|
|
|
|
|
|
ever are characters outside the BMP whose othercase differs in only one |
5466
|
|
|
|
|
|
|
bit from itself (there currently are none), this code will need to be |
5467
|
|
|
|
|
|
|
revised for COMPILE_PCRE32. */ |
5468
|
|
|
|
|
|
|
othercasechar = cc + (othercasebit >> 9); |
5469
|
|
|
|
|
|
|
if ((othercasebit & 0x100) != 0) |
5470
|
|
|
|
|
|
|
othercasebit = (othercasebit & 0xff) << 8; |
5471
|
|
|
|
|
|
|
else |
5472
|
|
|
|
|
|
|
othercasebit &= 0xff; |
5473
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE[8|16|32] */ |
5474
|
|
|
|
|
|
|
} |
5475
|
|
|
|
|
|
|
|
5476
|
|
|
|
|
|
|
if (context->sourcereg == -1) |
5477
|
|
|
|
|
|
|
{ |
5478
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
5479
|
|
|
|
|
|
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
5480
|
|
|
|
|
|
|
if (context->length >= 4) |
5481
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5482
|
|
|
|
|
|
|
else if (context->length >= 2) |
5483
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U16, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5484
|
|
|
|
|
|
|
else |
5485
|
|
|
|
|
|
|
#endif |
5486
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5487
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
5488
|
|
|
|
|
|
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
5489
|
|
|
|
|
|
|
if (context->length >= 4) |
5490
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5491
|
|
|
|
|
|
|
else |
5492
|
|
|
|
|
|
|
#endif |
5493
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5494
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
5495
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5496
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE[8|16|32] */ |
5497
|
|
|
|
|
|
|
context->sourcereg = TMP2; |
5498
|
|
|
|
|
|
|
} |
5499
|
|
|
|
|
|
|
|
5500
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
5501
|
|
|
|
|
|
|
utflength = 1; |
5502
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(*cc)) |
5503
|
|
|
|
|
|
|
utflength += GET_EXTRALEN(*cc); |
5504
|
|
|
|
|
|
|
|
5505
|
|
|
|
|
|
|
do |
5506
|
|
|
|
|
|
|
{ |
5507
|
|
|
|
|
|
|
#endif |
5508
|
|
|
|
|
|
|
|
5509
|
|
|
|
|
|
|
context->length -= IN_UCHARS(1); |
5510
|
|
|
|
|
|
|
#if (defined SLJIT_UNALIGNED && SLJIT_UNALIGNED) && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) |
5511
|
|
|
|
|
|
|
|
5512
|
|
|
|
|
|
|
/* Unaligned read is supported. */ |
5513
|
|
|
|
|
|
|
if (othercasebit != 0 && othercasechar == cc) |
5514
|
|
|
|
|
|
|
{ |
5515
|
|
|
|
|
|
|
context->c.asuchars[context->ucharptr] = *cc | othercasebit; |
5516
|
|
|
|
|
|
|
context->oc.asuchars[context->ucharptr] = othercasebit; |
5517
|
|
|
|
|
|
|
} |
5518
|
|
|
|
|
|
|
else |
5519
|
|
|
|
|
|
|
{ |
5520
|
|
|
|
|
|
|
context->c.asuchars[context->ucharptr] = *cc; |
5521
|
|
|
|
|
|
|
context->oc.asuchars[context->ucharptr] = 0; |
5522
|
|
|
|
|
|
|
} |
5523
|
|
|
|
|
|
|
context->ucharptr++; |
5524
|
|
|
|
|
|
|
|
5525
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
5526
|
|
|
|
|
|
|
if (context->ucharptr >= 4 || context->length == 0 || (context->ucharptr == 2 && context->length == 1)) |
5527
|
|
|
|
|
|
|
#else |
5528
|
|
|
|
|
|
|
if (context->ucharptr >= 2 || context->length == 0) |
5529
|
|
|
|
|
|
|
#endif |
5530
|
|
|
|
|
|
|
{ |
5531
|
|
|
|
|
|
|
if (context->length >= 4) |
5532
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5533
|
|
|
|
|
|
|
else if (context->length >= 2) |
5534
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U16, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5535
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
5536
|
|
|
|
|
|
|
else if (context->length >= 1) |
5537
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5538
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE8 */ |
5539
|
|
|
|
|
|
|
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; |
5540
|
|
|
|
|
|
|
|
5541
|
|
|
|
|
|
|
switch(context->ucharptr) |
5542
|
|
|
|
|
|
|
{ |
5543
|
|
|
|
|
|
|
case 4 / sizeof(pcre_uchar): |
5544
|
|
|
|
|
|
|
if (context->oc.asint != 0) |
5545
|
|
|
|
|
|
|
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asint); |
5546
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asint | context->oc.asint)); |
5547
|
|
|
|
|
|
|
break; |
5548
|
|
|
|
|
|
|
|
5549
|
|
|
|
|
|
|
case 2 / sizeof(pcre_uchar): |
5550
|
|
|
|
|
|
|
if (context->oc.asushort != 0) |
5551
|
|
|
|
|
|
|
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asushort); |
5552
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asushort | context->oc.asushort)); |
5553
|
|
|
|
|
|
|
break; |
5554
|
|
|
|
|
|
|
|
5555
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5556
|
|
|
|
|
|
|
case 1: |
5557
|
|
|
|
|
|
|
if (context->oc.asbyte != 0) |
5558
|
|
|
|
|
|
|
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, context->oc.asbyte); |
5559
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, context->c.asbyte | context->oc.asbyte)); |
5560
|
|
|
|
|
|
|
break; |
5561
|
|
|
|
|
|
|
#endif |
5562
|
|
|
|
|
|
|
|
5563
|
|
|
|
|
|
|
default: |
5564
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
5565
|
|
|
|
|
|
|
break; |
5566
|
|
|
|
|
|
|
} |
5567
|
|
|
|
|
|
|
context->ucharptr = 0; |
5568
|
|
|
|
|
|
|
} |
5569
|
|
|
|
|
|
|
|
5570
|
|
|
|
|
|
|
#else |
5571
|
|
|
|
|
|
|
|
5572
|
|
|
|
|
|
|
/* Unaligned read is unsupported or in 32 bit mode. */ |
5573
|
|
|
|
|
|
|
if (context->length >= 1) |
5574
|
|
|
|
|
|
|
OP1(MOV_UCHAR, context->sourcereg, 0, SLJIT_MEM1(STR_PTR), -context->length); |
5575
|
|
|
|
|
|
|
|
5576
|
|
|
|
|
|
|
context->sourcereg = context->sourcereg == TMP1 ? TMP2 : TMP1; |
5577
|
|
|
|
|
|
|
|
5578
|
|
|
|
|
|
|
if (othercasebit != 0 && othercasechar == cc) |
5579
|
|
|
|
|
|
|
{ |
5580
|
|
|
|
|
|
|
OP2(SLJIT_OR, context->sourcereg, 0, context->sourcereg, 0, SLJIT_IMM, othercasebit); |
5581
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc | othercasebit)); |
5582
|
|
|
|
|
|
|
} |
5583
|
|
|
|
|
|
|
else |
5584
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, context->sourcereg, 0, SLJIT_IMM, *cc)); |
5585
|
|
|
|
|
|
|
|
5586
|
|
|
|
|
|
|
#endif |
5587
|
|
|
|
|
|
|
|
5588
|
|
|
|
|
|
|
cc++; |
5589
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
5590
|
|
|
|
|
|
|
utflength--; |
5591
|
|
|
|
|
|
|
} |
5592
|
|
|
|
|
|
|
while (utflength > 0); |
5593
|
|
|
|
|
|
|
#endif |
5594
|
|
|
|
|
|
|
|
5595
|
|
|
|
|
|
|
return cc; |
5596
|
|
|
|
|
|
|
} |
5597
|
|
|
|
|
|
|
|
5598
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
5599
|
|
|
|
|
|
|
|
5600
|
|
|
|
|
|
|
#define SET_TYPE_OFFSET(value) \ |
5601
|
|
|
|
|
|
|
if ((value) != typeoffset) \ |
5602
|
|
|
|
|
|
|
{ \ |
5603
|
|
|
|
|
|
|
if ((value) < typeoffset) \ |
5604
|
|
|
|
|
|
|
OP2(SLJIT_ADD, typereg, 0, typereg, 0, SLJIT_IMM, typeoffset - (value)); \ |
5605
|
|
|
|
|
|
|
else \ |
5606
|
|
|
|
|
|
|
OP2(SLJIT_SUB, typereg, 0, typereg, 0, SLJIT_IMM, (value) - typeoffset); \ |
5607
|
|
|
|
|
|
|
} \ |
5608
|
|
|
|
|
|
|
typeoffset = (value); |
5609
|
|
|
|
|
|
|
|
5610
|
|
|
|
|
|
|
#define SET_CHAR_OFFSET(value) \ |
5611
|
|
|
|
|
|
|
if ((value) != charoffset) \ |
5612
|
|
|
|
|
|
|
{ \ |
5613
|
|
|
|
|
|
|
if ((value) < charoffset) \ |
5614
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(charoffset - (value))); \ |
5615
|
|
|
|
|
|
|
else \ |
5616
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP1, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)((value) - charoffset)); \ |
5617
|
|
|
|
|
|
|
} \ |
5618
|
|
|
|
|
|
|
charoffset = (value); |
5619
|
|
|
|
|
|
|
|
5620
|
|
|
|
|
|
|
static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr); |
5621
|
|
|
|
|
|
|
|
5622
|
|
|
|
|
|
|
static void compile_xclass_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) |
5623
|
|
|
|
|
|
|
{ |
5624
|
|
|
|
|
|
|
DEFINE_COMPILER; |
5625
|
|
|
|
|
|
|
jump_list *found = NULL; |
5626
|
|
|
|
|
|
|
jump_list **list = (cc[0] & XCL_NOT) == 0 ? &found : backtracks; |
5627
|
|
|
|
|
|
|
sljit_uw c, charoffset, max = 256, min = READ_CHAR_MAX; |
5628
|
|
|
|
|
|
|
struct sljit_jump *jump = NULL; |
5629
|
|
|
|
|
|
|
pcre_uchar *ccbegin; |
5630
|
|
|
|
|
|
|
int compares, invertcmp, numberofcmps; |
5631
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && (defined COMPILE_PCRE8 || defined COMPILE_PCRE16) |
5632
|
|
|
|
|
|
|
BOOL utf = common->utf; |
5633
|
|
|
|
|
|
|
#endif |
5634
|
|
|
|
|
|
|
|
5635
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
5636
|
|
|
|
|
|
|
BOOL needstype = FALSE, needsscript = FALSE, needschar = FALSE; |
5637
|
|
|
|
|
|
|
BOOL charsaved = FALSE; |
5638
|
|
|
|
|
|
|
int typereg = TMP1; |
5639
|
|
|
|
|
|
|
const sljit_u32 *other_cases; |
5640
|
|
|
|
|
|
|
sljit_uw typeoffset; |
5641
|
|
|
|
|
|
|
#endif |
5642
|
|
|
|
|
|
|
|
5643
|
|
|
|
|
|
|
/* Scanning the necessary info. */ |
5644
|
|
|
|
|
|
|
cc++; |
5645
|
|
|
|
|
|
|
ccbegin = cc; |
5646
|
|
|
|
|
|
|
compares = 0; |
5647
|
|
|
|
|
|
|
if (cc[-1] & XCL_MAP) |
5648
|
|
|
|
|
|
|
{ |
5649
|
|
|
|
|
|
|
min = 0; |
5650
|
|
|
|
|
|
|
cc += 32 / sizeof(pcre_uchar); |
5651
|
|
|
|
|
|
|
} |
5652
|
|
|
|
|
|
|
|
5653
|
|
|
|
|
|
|
while (*cc != XCL_END) |
5654
|
|
|
|
|
|
|
{ |
5655
|
|
|
|
|
|
|
compares++; |
5656
|
|
|
|
|
|
|
if (*cc == XCL_SINGLE) |
5657
|
|
|
|
|
|
|
{ |
5658
|
|
|
|
|
|
|
cc ++; |
5659
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5660
|
|
|
|
|
|
|
if (c > max) max = c; |
5661
|
|
|
|
|
|
|
if (c < min) min = c; |
5662
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
5663
|
|
|
|
|
|
|
needschar = TRUE; |
5664
|
|
|
|
|
|
|
#endif |
5665
|
|
|
|
|
|
|
} |
5666
|
|
|
|
|
|
|
else if (*cc == XCL_RANGE) |
5667
|
|
|
|
|
|
|
{ |
5668
|
|
|
|
|
|
|
cc ++; |
5669
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5670
|
|
|
|
|
|
|
if (c < min) min = c; |
5671
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5672
|
|
|
|
|
|
|
if (c > max) max = c; |
5673
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
5674
|
|
|
|
|
|
|
needschar = TRUE; |
5675
|
|
|
|
|
|
|
#endif |
5676
|
|
|
|
|
|
|
} |
5677
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
5678
|
|
|
|
|
|
|
else |
5679
|
|
|
|
|
|
|
{ |
5680
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); |
5681
|
|
|
|
|
|
|
cc++; |
5682
|
|
|
|
|
|
|
if (*cc == PT_CLIST) |
5683
|
|
|
|
|
|
|
{ |
5684
|
|
|
|
|
|
|
other_cases = PRIV(ucd_caseless_sets) + cc[1]; |
5685
|
|
|
|
|
|
|
while (*other_cases != NOTACHAR) |
5686
|
|
|
|
|
|
|
{ |
5687
|
|
|
|
|
|
|
if (*other_cases > max) max = *other_cases; |
5688
|
|
|
|
|
|
|
if (*other_cases < min) min = *other_cases; |
5689
|
|
|
|
|
|
|
other_cases++; |
5690
|
|
|
|
|
|
|
} |
5691
|
|
|
|
|
|
|
} |
5692
|
|
|
|
|
|
|
else |
5693
|
|
|
|
|
|
|
{ |
5694
|
|
|
|
|
|
|
max = READ_CHAR_MAX; |
5695
|
|
|
|
|
|
|
min = 0; |
5696
|
|
|
|
|
|
|
} |
5697
|
|
|
|
|
|
|
|
5698
|
|
|
|
|
|
|
switch(*cc) |
5699
|
|
|
|
|
|
|
{ |
5700
|
|
|
|
|
|
|
case PT_ANY: |
5701
|
|
|
|
|
|
|
/* Any either accepts everything or ignored. */ |
5702
|
|
|
|
|
|
|
if (cc[-1] == XCL_PROP) |
5703
|
|
|
|
|
|
|
{ |
5704
|
|
|
|
|
|
|
compile_char1_matchingpath(common, OP_ALLANY, cc, backtracks, FALSE); |
5705
|
|
|
|
|
|
|
if (list == backtracks) |
5706
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
5707
|
|
|
|
|
|
|
return; |
5708
|
|
|
|
|
|
|
} |
5709
|
|
|
|
|
|
|
break; |
5710
|
|
|
|
|
|
|
|
5711
|
|
|
|
|
|
|
case PT_LAMP: |
5712
|
|
|
|
|
|
|
case PT_GC: |
5713
|
|
|
|
|
|
|
case PT_PC: |
5714
|
|
|
|
|
|
|
case PT_ALNUM: |
5715
|
|
|
|
|
|
|
needstype = TRUE; |
5716
|
|
|
|
|
|
|
break; |
5717
|
|
|
|
|
|
|
|
5718
|
|
|
|
|
|
|
case PT_SC: |
5719
|
|
|
|
|
|
|
needsscript = TRUE; |
5720
|
|
|
|
|
|
|
break; |
5721
|
|
|
|
|
|
|
|
5722
|
|
|
|
|
|
|
case PT_SPACE: |
5723
|
|
|
|
|
|
|
case PT_PXSPACE: |
5724
|
|
|
|
|
|
|
case PT_WORD: |
5725
|
|
|
|
|
|
|
case PT_PXGRAPH: |
5726
|
|
|
|
|
|
|
case PT_PXPRINT: |
5727
|
|
|
|
|
|
|
case PT_PXPUNCT: |
5728
|
|
|
|
|
|
|
needstype = TRUE; |
5729
|
|
|
|
|
|
|
needschar = TRUE; |
5730
|
|
|
|
|
|
|
break; |
5731
|
|
|
|
|
|
|
|
5732
|
|
|
|
|
|
|
case PT_CLIST: |
5733
|
|
|
|
|
|
|
case PT_UCNC: |
5734
|
|
|
|
|
|
|
needschar = TRUE; |
5735
|
|
|
|
|
|
|
break; |
5736
|
|
|
|
|
|
|
|
5737
|
|
|
|
|
|
|
default: |
5738
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
5739
|
|
|
|
|
|
|
break; |
5740
|
|
|
|
|
|
|
} |
5741
|
|
|
|
|
|
|
cc += 2; |
5742
|
|
|
|
|
|
|
} |
5743
|
|
|
|
|
|
|
#endif |
5744
|
|
|
|
|
|
|
} |
5745
|
|
|
|
|
|
|
SLJIT_ASSERT(compares > 0); |
5746
|
|
|
|
|
|
|
|
5747
|
|
|
|
|
|
|
/* We are not necessary in utf mode even in 8 bit mode. */ |
5748
|
|
|
|
|
|
|
cc = ccbegin; |
5749
|
|
|
|
|
|
|
read_char_range(common, min, max, (cc[-1] & XCL_NOT) != 0); |
5750
|
|
|
|
|
|
|
|
5751
|
|
|
|
|
|
|
if ((cc[-1] & XCL_HASPROP) == 0) |
5752
|
|
|
|
|
|
|
{ |
5753
|
|
|
|
|
|
|
if ((cc[-1] & XCL_MAP) != 0) |
5754
|
|
|
|
|
|
|
{ |
5755
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
5756
|
|
|
|
|
|
|
if (!check_class_ranges(common, (const sljit_u8 *)cc, (((const sljit_u8 *)cc)[31] & 0x80) != 0, TRUE, &found)) |
5757
|
|
|
|
|
|
|
{ |
5758
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
5759
|
|
|
|
|
|
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
5760
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
5761
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
5762
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
5763
|
|
|
|
|
|
|
add_jump(compiler, &found, JUMP(SLJIT_NOT_ZERO)); |
5764
|
|
|
|
|
|
|
} |
5765
|
|
|
|
|
|
|
|
5766
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
5767
|
|
|
|
|
|
|
JUMPHERE(jump); |
5768
|
|
|
|
|
|
|
|
5769
|
|
|
|
|
|
|
cc += 32 / sizeof(pcre_uchar); |
5770
|
|
|
|
|
|
|
} |
5771
|
|
|
|
|
|
|
else |
5772
|
|
|
|
|
|
|
{ |
5773
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, TMP1, 0, SLJIT_IMM, min); |
5774
|
|
|
|
|
|
|
add_jump(compiler, (cc[-1] & XCL_NOT) == 0 ? backtracks : &found, CMP(SLJIT_GREATER, TMP2, 0, SLJIT_IMM, max - min)); |
5775
|
|
|
|
|
|
|
} |
5776
|
|
|
|
|
|
|
} |
5777
|
|
|
|
|
|
|
else if ((cc[-1] & XCL_MAP) != 0) |
5778
|
|
|
|
|
|
|
{ |
5779
|
|
|
|
|
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); |
5780
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
5781
|
|
|
|
|
|
|
charsaved = TRUE; |
5782
|
|
|
|
|
|
|
#endif |
5783
|
|
|
|
|
|
|
if (!check_class_ranges(common, (const sljit_u8 *)cc, FALSE, TRUE, list)) |
5784
|
|
|
|
|
|
|
{ |
5785
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5786
|
|
|
|
|
|
|
jump = NULL; |
5787
|
|
|
|
|
|
|
if (common->utf) |
5788
|
|
|
|
|
|
|
#endif |
5789
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
5790
|
|
|
|
|
|
|
|
5791
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
5792
|
|
|
|
|
|
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
5793
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
5794
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
5795
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
5796
|
|
|
|
|
|
|
add_jump(compiler, list, JUMP(SLJIT_NOT_ZERO)); |
5797
|
|
|
|
|
|
|
|
5798
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
5799
|
|
|
|
|
|
|
if (common->utf) |
5800
|
|
|
|
|
|
|
#endif |
5801
|
|
|
|
|
|
|
JUMPHERE(jump); |
5802
|
|
|
|
|
|
|
} |
5803
|
|
|
|
|
|
|
|
5804
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); |
5805
|
|
|
|
|
|
|
cc += 32 / sizeof(pcre_uchar); |
5806
|
|
|
|
|
|
|
} |
5807
|
|
|
|
|
|
|
|
5808
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
5809
|
|
|
|
|
|
|
if (needstype || needsscript) |
5810
|
|
|
|
|
|
|
{ |
5811
|
|
|
|
|
|
|
if (needschar && !charsaved) |
5812
|
|
|
|
|
|
|
OP1(SLJIT_MOV, RETURN_ADDR, 0, TMP1, 0); |
5813
|
|
|
|
|
|
|
|
5814
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE32 |
5815
|
|
|
|
|
|
|
if (!common->utf) |
5816
|
|
|
|
|
|
|
{ |
5817
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0x10ffff + 1); |
5818
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR); |
5819
|
|
|
|
|
|
|
JUMPHERE(jump); |
5820
|
|
|
|
|
|
|
} |
5821
|
|
|
|
|
|
|
#endif |
5822
|
|
|
|
|
|
|
|
5823
|
|
|
|
|
|
|
OP2(SLJIT_LSHR, TMP2, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); |
5824
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_stage1)); |
5825
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, UCD_BLOCK_MASK); |
5826
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, UCD_BLOCK_SHIFT); |
5827
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, TMP2, 0); |
5828
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_stage2)); |
5829
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U16, TMP2, 0, SLJIT_MEM2(TMP2, TMP1), 1); |
5830
|
|
|
|
|
|
|
|
5831
|
|
|
|
|
|
|
/* Before anything else, we deal with scripts. */ |
5832
|
|
|
|
|
|
|
if (needsscript) |
5833
|
|
|
|
|
|
|
{ |
5834
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, script)); |
5835
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
5836
|
|
|
|
|
|
|
|
5837
|
|
|
|
|
|
|
ccbegin = cc; |
5838
|
|
|
|
|
|
|
|
5839
|
|
|
|
|
|
|
while (*cc != XCL_END) |
5840
|
|
|
|
|
|
|
{ |
5841
|
|
|
|
|
|
|
if (*cc == XCL_SINGLE) |
5842
|
|
|
|
|
|
|
{ |
5843
|
|
|
|
|
|
|
cc ++; |
5844
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5845
|
|
|
|
|
|
|
} |
5846
|
|
|
|
|
|
|
else if (*cc == XCL_RANGE) |
5847
|
|
|
|
|
|
|
{ |
5848
|
|
|
|
|
|
|
cc ++; |
5849
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5850
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5851
|
|
|
|
|
|
|
} |
5852
|
|
|
|
|
|
|
else |
5853
|
|
|
|
|
|
|
{ |
5854
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); |
5855
|
|
|
|
|
|
|
cc++; |
5856
|
|
|
|
|
|
|
if (*cc == PT_SC) |
5857
|
|
|
|
|
|
|
{ |
5858
|
|
|
|
|
|
|
compares--; |
5859
|
|
|
|
|
|
|
invertcmp = (compares == 0 && list != backtracks); |
5860
|
|
|
|
|
|
|
if (cc[-1] == XCL_NOTPROP) |
5861
|
|
|
|
|
|
|
invertcmp ^= 0x1; |
5862
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (int)cc[1]); |
5863
|
|
|
|
|
|
|
add_jump(compiler, compares > 0 ? list : backtracks, jump); |
5864
|
|
|
|
|
|
|
} |
5865
|
|
|
|
|
|
|
cc += 2; |
5866
|
|
|
|
|
|
|
} |
5867
|
|
|
|
|
|
|
} |
5868
|
|
|
|
|
|
|
|
5869
|
|
|
|
|
|
|
cc = ccbegin; |
5870
|
|
|
|
|
|
|
} |
5871
|
|
|
|
|
|
|
|
5872
|
|
|
|
|
|
|
if (needschar) |
5873
|
|
|
|
|
|
|
{ |
5874
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, RETURN_ADDR, 0); |
5875
|
|
|
|
|
|
|
} |
5876
|
|
|
|
|
|
|
|
5877
|
|
|
|
|
|
|
if (needstype) |
5878
|
|
|
|
|
|
|
{ |
5879
|
|
|
|
|
|
|
if (!needschar) |
5880
|
|
|
|
|
|
|
{ |
5881
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); |
5882
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
5883
|
|
|
|
|
|
|
} |
5884
|
|
|
|
|
|
|
else |
5885
|
|
|
|
|
|
|
{ |
5886
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, TMP2, 0, SLJIT_IMM, 3); |
5887
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, RETURN_ADDR, 0, SLJIT_MEM1(TMP2), (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, chartype)); |
5888
|
|
|
|
|
|
|
typereg = RETURN_ADDR; |
5889
|
|
|
|
|
|
|
} |
5890
|
|
|
|
|
|
|
} |
5891
|
|
|
|
|
|
|
} |
5892
|
|
|
|
|
|
|
#endif |
5893
|
|
|
|
|
|
|
|
5894
|
|
|
|
|
|
|
/* Generating code. */ |
5895
|
|
|
|
|
|
|
charoffset = 0; |
5896
|
|
|
|
|
|
|
numberofcmps = 0; |
5897
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
5898
|
|
|
|
|
|
|
typeoffset = 0; |
5899
|
|
|
|
|
|
|
#endif |
5900
|
|
|
|
|
|
|
|
5901
|
|
|
|
|
|
|
while (*cc != XCL_END) |
5902
|
|
|
|
|
|
|
{ |
5903
|
|
|
|
|
|
|
compares--; |
5904
|
|
|
|
|
|
|
invertcmp = (compares == 0 && list != backtracks); |
5905
|
|
|
|
|
|
|
jump = NULL; |
5906
|
|
|
|
|
|
|
|
5907
|
|
|
|
|
|
|
if (*cc == XCL_SINGLE) |
5908
|
|
|
|
|
|
|
{ |
5909
|
|
|
|
|
|
|
cc ++; |
5910
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5911
|
|
|
|
|
|
|
|
5912
|
|
|
|
|
|
|
if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) |
5913
|
|
|
|
|
|
|
{ |
5914
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5915
|
|
|
|
|
|
|
OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5916
|
|
|
|
|
|
|
numberofcmps++; |
5917
|
|
|
|
|
|
|
} |
5918
|
|
|
|
|
|
|
else if (numberofcmps > 0) |
5919
|
|
|
|
|
|
|
{ |
5920
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5921
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); |
5922
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5923
|
|
|
|
|
|
|
numberofcmps = 0; |
5924
|
|
|
|
|
|
|
} |
5925
|
|
|
|
|
|
|
else |
5926
|
|
|
|
|
|
|
{ |
5927
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5928
|
|
|
|
|
|
|
numberofcmps = 0; |
5929
|
|
|
|
|
|
|
} |
5930
|
|
|
|
|
|
|
} |
5931
|
|
|
|
|
|
|
else if (*cc == XCL_RANGE) |
5932
|
|
|
|
|
|
|
{ |
5933
|
|
|
|
|
|
|
cc ++; |
5934
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5935
|
|
|
|
|
|
|
SET_CHAR_OFFSET(c); |
5936
|
|
|
|
|
|
|
GETCHARINCTEST(c, cc); |
5937
|
|
|
|
|
|
|
|
5938
|
|
|
|
|
|
|
if (numberofcmps < 3 && (*cc == XCL_SINGLE || *cc == XCL_RANGE)) |
5939
|
|
|
|
|
|
|
{ |
5940
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5941
|
|
|
|
|
|
|
OP_FLAGS(numberofcmps == 0 ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); |
5942
|
|
|
|
|
|
|
numberofcmps++; |
5943
|
|
|
|
|
|
|
} |
5944
|
|
|
|
|
|
|
else if (numberofcmps > 0) |
5945
|
|
|
|
|
|
|
{ |
5946
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5947
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); |
5948
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5949
|
|
|
|
|
|
|
numberofcmps = 0; |
5950
|
|
|
|
|
|
|
} |
5951
|
|
|
|
|
|
|
else |
5952
|
|
|
|
|
|
|
{ |
5953
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, TMP1, 0, SLJIT_IMM, (sljit_sw)(c - charoffset)); |
5954
|
|
|
|
|
|
|
numberofcmps = 0; |
5955
|
|
|
|
|
|
|
} |
5956
|
|
|
|
|
|
|
} |
5957
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
5958
|
|
|
|
|
|
|
else |
5959
|
|
|
|
|
|
|
{ |
5960
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc == XCL_PROP || *cc == XCL_NOTPROP); |
5961
|
|
|
|
|
|
|
if (*cc == XCL_NOTPROP) |
5962
|
|
|
|
|
|
|
invertcmp ^= 0x1; |
5963
|
|
|
|
|
|
|
cc++; |
5964
|
|
|
|
|
|
|
switch(*cc) |
5965
|
|
|
|
|
|
|
{ |
5966
|
|
|
|
|
|
|
case PT_ANY: |
5967
|
|
|
|
|
|
|
if (!invertcmp) |
5968
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
5969
|
|
|
|
|
|
|
break; |
5970
|
|
|
|
|
|
|
|
5971
|
|
|
|
|
|
|
case PT_LAMP: |
5972
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - typeoffset); |
5973
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); |
5974
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ll - typeoffset); |
5975
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
5976
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lt - typeoffset); |
5977
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_EQUAL); |
5978
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
5979
|
|
|
|
|
|
|
break; |
5980
|
|
|
|
|
|
|
|
5981
|
|
|
|
|
|
|
case PT_GC: |
5982
|
|
|
|
|
|
|
c = PRIV(ucp_typerange)[(int)cc[1] * 2]; |
5983
|
|
|
|
|
|
|
SET_TYPE_OFFSET(c); |
5984
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, PRIV(ucp_typerange)[(int)cc[1] * 2 + 1] - c); |
5985
|
|
|
|
|
|
|
break; |
5986
|
|
|
|
|
|
|
|
5987
|
|
|
|
|
|
|
case PT_PC: |
5988
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL ^ invertcmp, typereg, 0, SLJIT_IMM, (int)cc[1] - typeoffset); |
5989
|
|
|
|
|
|
|
break; |
5990
|
|
|
|
|
|
|
|
5991
|
|
|
|
|
|
|
case PT_SC: |
5992
|
|
|
|
|
|
|
compares++; |
5993
|
|
|
|
|
|
|
/* Do nothing. */ |
5994
|
|
|
|
|
|
|
break; |
5995
|
|
|
|
|
|
|
|
5996
|
|
|
|
|
|
|
case PT_SPACE: |
5997
|
|
|
|
|
|
|
case PT_PXSPACE: |
5998
|
|
|
|
|
|
|
SET_CHAR_OFFSET(9); |
5999
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd - 0x9); |
6000
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); |
6001
|
|
|
|
|
|
|
|
6002
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x85 - 0x9); |
6003
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
6004
|
|
|
|
|
|
|
|
6005
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x9); |
6006
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
6007
|
|
|
|
|
|
|
|
6008
|
|
|
|
|
|
|
SET_TYPE_OFFSET(ucp_Zl); |
6009
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl); |
6010
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); |
6011
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
6012
|
|
|
|
|
|
|
break; |
6013
|
|
|
|
|
|
|
|
6014
|
|
|
|
|
|
|
case PT_WORD: |
6015
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_UNDERSCORE - charoffset)); |
6016
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); |
6017
|
|
|
|
|
|
|
/* Fall through. */ |
6018
|
|
|
|
|
|
|
|
6019
|
|
|
|
|
|
|
case PT_ALNUM: |
6020
|
|
|
|
|
|
|
SET_TYPE_OFFSET(ucp_Ll); |
6021
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Lu - ucp_Ll); |
6022
|
|
|
|
|
|
|
OP_FLAGS((*cc == PT_ALNUM) ? SLJIT_MOV : SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); |
6023
|
|
|
|
|
|
|
SET_TYPE_OFFSET(ucp_Nd); |
6024
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_No - ucp_Nd); |
6025
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); |
6026
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
6027
|
|
|
|
|
|
|
break; |
6028
|
|
|
|
|
|
|
|
6029
|
|
|
|
|
|
|
case PT_CLIST: |
6030
|
|
|
|
|
|
|
other_cases = PRIV(ucd_caseless_sets) + cc[1]; |
6031
|
|
|
|
|
|
|
|
6032
|
|
|
|
|
|
|
/* At least three characters are required. |
6033
|
|
|
|
|
|
|
Otherwise this case would be handled by the normal code path. */ |
6034
|
|
|
|
|
|
|
SLJIT_ASSERT(other_cases[0] != NOTACHAR && other_cases[1] != NOTACHAR && other_cases[2] != NOTACHAR); |
6035
|
|
|
|
|
|
|
SLJIT_ASSERT(other_cases[0] < other_cases[1] && other_cases[1] < other_cases[2]); |
6036
|
|
|
|
|
|
|
|
6037
|
|
|
|
|
|
|
/* Optimizing character pairs, if their difference is power of 2. */ |
6038
|
|
|
|
|
|
|
if (is_powerof2(other_cases[1] ^ other_cases[0])) |
6039
|
|
|
|
|
|
|
{ |
6040
|
|
|
|
|
|
|
if (charoffset == 0) |
6041
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); |
6042
|
|
|
|
|
|
|
else |
6043
|
|
|
|
|
|
|
{ |
6044
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); |
6045
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); |
6046
|
|
|
|
|
|
|
} |
6047
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[1]); |
6048
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); |
6049
|
|
|
|
|
|
|
other_cases += 2; |
6050
|
|
|
|
|
|
|
} |
6051
|
|
|
|
|
|
|
else if (is_powerof2(other_cases[2] ^ other_cases[1])) |
6052
|
|
|
|
|
|
|
{ |
6053
|
|
|
|
|
|
|
if (charoffset == 0) |
6054
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, other_cases[2] ^ other_cases[1]); |
6055
|
|
|
|
|
|
|
else |
6056
|
|
|
|
|
|
|
{ |
6057
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)charoffset); |
6058
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP2, 0, TMP2, 0, SLJIT_IMM, other_cases[1] ^ other_cases[0]); |
6059
|
|
|
|
|
|
|
} |
6060
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP2, 0, SLJIT_IMM, other_cases[2]); |
6061
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); |
6062
|
|
|
|
|
|
|
|
6063
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(other_cases[0] - charoffset)); |
6064
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | ((other_cases[3] == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); |
6065
|
|
|
|
|
|
|
|
6066
|
|
|
|
|
|
|
other_cases += 3; |
6067
|
|
|
|
|
|
|
} |
6068
|
|
|
|
|
|
|
else |
6069
|
|
|
|
|
|
|
{ |
6070
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); |
6071
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); |
6072
|
|
|
|
|
|
|
} |
6073
|
|
|
|
|
|
|
|
6074
|
|
|
|
|
|
|
while (*other_cases != NOTACHAR) |
6075
|
|
|
|
|
|
|
{ |
6076
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(*other_cases++ - charoffset)); |
6077
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | ((*other_cases == NOTACHAR) ? SLJIT_SET_Z : 0), TMP2, 0, SLJIT_EQUAL); |
6078
|
|
|
|
|
|
|
} |
6079
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
6080
|
|
|
|
|
|
|
break; |
6081
|
|
|
|
|
|
|
|
6082
|
|
|
|
|
|
|
case PT_UCNC: |
6083
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_DOLLAR_SIGN - charoffset)); |
6084
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_EQUAL); |
6085
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_COMMERCIAL_AT - charoffset)); |
6086
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
6087
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(CHAR_GRAVE_ACCENT - charoffset)); |
6088
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
6089
|
|
|
|
|
|
|
|
6090
|
|
|
|
|
|
|
SET_CHAR_OFFSET(0xa0); |
6091
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (sljit_sw)(0xd7ff - charoffset)); |
6092
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_LESS_EQUAL); |
6093
|
|
|
|
|
|
|
SET_CHAR_OFFSET(0); |
6094
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_GREATER_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xe000 - 0); |
6095
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_GREATER_EQUAL); |
6096
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
6097
|
|
|
|
|
|
|
break; |
6098
|
|
|
|
|
|
|
|
6099
|
|
|
|
|
|
|
case PT_PXGRAPH: |
6100
|
|
|
|
|
|
|
/* C and Z groups are the farthest two groups. */ |
6101
|
|
|
|
|
|
|
SET_TYPE_OFFSET(ucp_Ll); |
6102
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); |
6103
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); |
6104
|
|
|
|
|
|
|
|
6105
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); |
6106
|
|
|
|
|
|
|
|
6107
|
|
|
|
|
|
|
/* In case of ucp_Cf, we overwrite the result. */ |
6108
|
|
|
|
|
|
|
SET_CHAR_OFFSET(0x2066); |
6109
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); |
6110
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); |
6111
|
|
|
|
|
|
|
|
6112
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); |
6113
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
6114
|
|
|
|
|
|
|
|
6115
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x180e - 0x2066); |
6116
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
6117
|
|
|
|
|
|
|
|
6118
|
|
|
|
|
|
|
JUMPHERE(jump); |
6119
|
|
|
|
|
|
|
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); |
6120
|
|
|
|
|
|
|
break; |
6121
|
|
|
|
|
|
|
|
6122
|
|
|
|
|
|
|
case PT_PXPRINT: |
6123
|
|
|
|
|
|
|
/* C and Z groups are the farthest two groups. */ |
6124
|
|
|
|
|
|
|
SET_TYPE_OFFSET(ucp_Ll); |
6125
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Ll); |
6126
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_GREATER); |
6127
|
|
|
|
|
|
|
|
6128
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Ll); |
6129
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_NOT_EQUAL); |
6130
|
|
|
|
|
|
|
|
6131
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, typereg, 0, SLJIT_IMM, ucp_Cf - ucp_Ll); |
6132
|
|
|
|
|
|
|
|
6133
|
|
|
|
|
|
|
/* In case of ucp_Cf, we overwrite the result. */ |
6134
|
|
|
|
|
|
|
SET_CHAR_OFFSET(0x2066); |
6135
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x2069 - 0x2066); |
6136
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); |
6137
|
|
|
|
|
|
|
|
6138
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x061c - 0x2066); |
6139
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR, TMP2, 0, SLJIT_EQUAL); |
6140
|
|
|
|
|
|
|
|
6141
|
|
|
|
|
|
|
JUMPHERE(jump); |
6142
|
|
|
|
|
|
|
jump = CMP(SLJIT_ZERO ^ invertcmp, TMP2, 0, SLJIT_IMM, 0); |
6143
|
|
|
|
|
|
|
break; |
6144
|
|
|
|
|
|
|
|
6145
|
|
|
|
|
|
|
case PT_PXPUNCT: |
6146
|
|
|
|
|
|
|
SET_TYPE_OFFSET(ucp_Sc); |
6147
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_So - ucp_Sc); |
6148
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS_EQUAL); |
6149
|
|
|
|
|
|
|
|
6150
|
|
|
|
|
|
|
SET_CHAR_OFFSET(0); |
6151
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0x7f); |
6152
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_AND, TMP2, 0, SLJIT_LESS_EQUAL); |
6153
|
|
|
|
|
|
|
|
6154
|
|
|
|
|
|
|
SET_TYPE_OFFSET(ucp_Pc); |
6155
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS_EQUAL, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Ps - ucp_Pc); |
6156
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_LESS_EQUAL); |
6157
|
|
|
|
|
|
|
jump = JUMP(SLJIT_NOT_ZERO ^ invertcmp); |
6158
|
|
|
|
|
|
|
break; |
6159
|
|
|
|
|
|
|
|
6160
|
|
|
|
|
|
|
default: |
6161
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
6162
|
|
|
|
|
|
|
break; |
6163
|
|
|
|
|
|
|
} |
6164
|
|
|
|
|
|
|
cc += 2; |
6165
|
|
|
|
|
|
|
} |
6166
|
|
|
|
|
|
|
#endif |
6167
|
|
|
|
|
|
|
|
6168
|
|
|
|
|
|
|
if (jump != NULL) |
6169
|
|
|
|
|
|
|
add_jump(compiler, compares > 0 ? list : backtracks, jump); |
6170
|
|
|
|
|
|
|
} |
6171
|
|
|
|
|
|
|
|
6172
|
|
|
|
|
|
|
if (found != NULL) |
6173
|
|
|
|
|
|
|
set_jumps(found, LABEL()); |
6174
|
|
|
|
|
|
|
} |
6175
|
|
|
|
|
|
|
|
6176
|
|
|
|
|
|
|
#undef SET_TYPE_OFFSET |
6177
|
|
|
|
|
|
|
#undef SET_CHAR_OFFSET |
6178
|
|
|
|
|
|
|
|
6179
|
|
|
|
|
|
|
#endif |
6180
|
|
|
|
|
|
|
|
6181
|
|
|
|
|
|
|
static pcre_uchar *compile_simple_assertion_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks) |
6182
|
|
|
|
|
|
|
{ |
6183
|
|
|
|
|
|
|
DEFINE_COMPILER; |
6184
|
|
|
|
|
|
|
int length; |
6185
|
|
|
|
|
|
|
struct sljit_jump *jump[4]; |
6186
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6187
|
|
|
|
|
|
|
struct sljit_label *label; |
6188
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
6189
|
|
|
|
|
|
|
|
6190
|
|
|
|
|
|
|
switch(type) |
6191
|
|
|
|
|
|
|
{ |
6192
|
|
|
|
|
|
|
case OP_SOD: |
6193
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
6194
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
6195
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); |
6196
|
|
|
|
|
|
|
return cc; |
6197
|
|
|
|
|
|
|
|
6198
|
|
|
|
|
|
|
case OP_SOM: |
6199
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
6200
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
6201
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, TMP1, 0)); |
6202
|
|
|
|
|
|
|
return cc; |
6203
|
|
|
|
|
|
|
|
6204
|
|
|
|
|
|
|
case OP_NOT_WORD_BOUNDARY: |
6205
|
|
|
|
|
|
|
case OP_WORD_BOUNDARY: |
6206
|
|
|
|
|
|
|
add_jump(compiler, &common->wordboundary, JUMP(SLJIT_FAST_CALL)); |
6207
|
|
|
|
|
|
|
sljit_set_current_flags(compiler, SLJIT_SET_Z); |
6208
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(type == OP_NOT_WORD_BOUNDARY ? SLJIT_NOT_ZERO : SLJIT_ZERO)); |
6209
|
|
|
|
|
|
|
return cc; |
6210
|
|
|
|
|
|
|
|
6211
|
|
|
|
|
|
|
case OP_EODN: |
6212
|
|
|
|
|
|
|
/* Requires rather complex checks. */ |
6213
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
6214
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
6215
|
|
|
|
|
|
|
{ |
6216
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
6217
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
6218
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
6219
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); |
6220
|
|
|
|
|
|
|
else |
6221
|
|
|
|
|
|
|
{ |
6222
|
|
|
|
|
|
|
jump[1] = CMP(SLJIT_EQUAL, TMP2, 0, STR_END, 0); |
6223
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_LESS, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); |
6224
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_LESS); |
6225
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); |
6226
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, SLJIT_NOT_EQUAL); |
6227
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL)); |
6228
|
|
|
|
|
|
|
check_partial(common, TRUE); |
6229
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6230
|
|
|
|
|
|
|
JUMPHERE(jump[1]); |
6231
|
|
|
|
|
|
|
} |
6232
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
6233
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); |
6234
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); |
6235
|
|
|
|
|
|
|
} |
6236
|
|
|
|
|
|
|
else if (common->nltype == NLTYPE_FIXED) |
6237
|
|
|
|
|
|
|
{ |
6238
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6239
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
6240
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_END, 0)); |
6241
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline)); |
6242
|
|
|
|
|
|
|
} |
6243
|
|
|
|
|
|
|
else |
6244
|
|
|
|
|
|
|
{ |
6245
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
6246
|
|
|
|
|
|
|
jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
6247
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
6248
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_GREATER, SLJIT_UNUSED, 0, TMP2, 0, STR_END, 0); |
6249
|
|
|
|
|
|
|
jump[2] = JUMP(SLJIT_GREATER); |
6250
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_NOT_EQUAL) /* LESS */); |
6251
|
|
|
|
|
|
|
/* Equal. */ |
6252
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
6253
|
|
|
|
|
|
|
jump[3] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); |
6254
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6255
|
|
|
|
|
|
|
|
6256
|
|
|
|
|
|
|
JUMPHERE(jump[1]); |
6257
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_ANYCRLF) |
6258
|
|
|
|
|
|
|
{ |
6259
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6260
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, STR_END, 0)); |
6261
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL)); |
6262
|
|
|
|
|
|
|
} |
6263
|
|
|
|
|
|
|
else |
6264
|
|
|
|
|
|
|
{ |
6265
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STR_PTR, 0); |
6266
|
|
|
|
|
|
|
read_char_range(common, common->nlmin, common->nlmax, TRUE); |
6267
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0)); |
6268
|
|
|
|
|
|
|
add_jump(compiler, &common->anynewline, JUMP(SLJIT_FAST_CALL)); |
6269
|
|
|
|
|
|
|
sljit_set_current_flags(compiler, SLJIT_SET_Z); |
6270
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); |
6271
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
6272
|
|
|
|
|
|
|
} |
6273
|
|
|
|
|
|
|
JUMPHERE(jump[2]); |
6274
|
|
|
|
|
|
|
JUMPHERE(jump[3]); |
6275
|
|
|
|
|
|
|
} |
6276
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6277
|
|
|
|
|
|
|
check_partial(common, FALSE); |
6278
|
|
|
|
|
|
|
return cc; |
6279
|
|
|
|
|
|
|
|
6280
|
|
|
|
|
|
|
case OP_EOD: |
6281
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); |
6282
|
|
|
|
|
|
|
check_partial(common, FALSE); |
6283
|
|
|
|
|
|
|
return cc; |
6284
|
|
|
|
|
|
|
|
6285
|
|
|
|
|
|
|
case OP_DOLL: |
6286
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); |
6287
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); |
6288
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6289
|
|
|
|
|
|
|
|
6290
|
|
|
|
|
|
|
if (!common->endonly) |
6291
|
|
|
|
|
|
|
compile_simple_assertion_matchingpath(common, OP_EODN, cc, backtracks); |
6292
|
|
|
|
|
|
|
else |
6293
|
|
|
|
|
|
|
{ |
6294
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); |
6295
|
|
|
|
|
|
|
check_partial(common, FALSE); |
6296
|
|
|
|
|
|
|
} |
6297
|
|
|
|
|
|
|
return cc; |
6298
|
|
|
|
|
|
|
|
6299
|
|
|
|
|
|
|
case OP_DOLLM: |
6300
|
|
|
|
|
|
|
jump[1] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
6301
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); |
6302
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, noteol)); |
6303
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6304
|
|
|
|
|
|
|
check_partial(common, FALSE); |
6305
|
|
|
|
|
|
|
jump[0] = JUMP(SLJIT_JUMP); |
6306
|
|
|
|
|
|
|
JUMPHERE(jump[1]); |
6307
|
|
|
|
|
|
|
|
6308
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
6309
|
|
|
|
|
|
|
{ |
6310
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
6311
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
6312
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
6313
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0)); |
6314
|
|
|
|
|
|
|
else |
6315
|
|
|
|
|
|
|
{ |
6316
|
|
|
|
|
|
|
jump[1] = CMP(SLJIT_LESS_EQUAL, TMP2, 0, STR_END, 0); |
6317
|
|
|
|
|
|
|
/* STR_PTR = STR_END - IN_UCHARS(1) */ |
6318
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); |
6319
|
|
|
|
|
|
|
check_partial(common, TRUE); |
6320
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6321
|
|
|
|
|
|
|
JUMPHERE(jump[1]); |
6322
|
|
|
|
|
|
|
} |
6323
|
|
|
|
|
|
|
|
6324
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(1)); |
6325
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); |
6326
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); |
6327
|
|
|
|
|
|
|
} |
6328
|
|
|
|
|
|
|
else |
6329
|
|
|
|
|
|
|
{ |
6330
|
|
|
|
|
|
|
peek_char(common, common->nlmax); |
6331
|
|
|
|
|
|
|
check_newlinechar(common, common->nltype, backtracks, FALSE); |
6332
|
|
|
|
|
|
|
} |
6333
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6334
|
|
|
|
|
|
|
return cc; |
6335
|
|
|
|
|
|
|
|
6336
|
|
|
|
|
|
|
case OP_CIRC: |
6337
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); |
6338
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); |
6339
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0)); |
6340
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); |
6341
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6342
|
|
|
|
|
|
|
return cc; |
6343
|
|
|
|
|
|
|
|
6344
|
|
|
|
|
|
|
case OP_CIRCM: |
6345
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, ARGUMENTS, 0); |
6346
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, begin)); |
6347
|
|
|
|
|
|
|
jump[1] = CMP(SLJIT_GREATER, STR_PTR, 0, TMP1, 0); |
6348
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(jit_arguments, notbol)); |
6349
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6350
|
|
|
|
|
|
|
jump[0] = JUMP(SLJIT_JUMP); |
6351
|
|
|
|
|
|
|
JUMPHERE(jump[1]); |
6352
|
|
|
|
|
|
|
|
6353
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
6354
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
6355
|
|
|
|
|
|
|
{ |
6356
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(2)); |
6357
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, TMP2, 0, TMP1, 0)); |
6358
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-2)); |
6359
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP2, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
6360
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff)); |
6361
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, common->newline & 0xff)); |
6362
|
|
|
|
|
|
|
} |
6363
|
|
|
|
|
|
|
else |
6364
|
|
|
|
|
|
|
{ |
6365
|
|
|
|
|
|
|
skip_char_back(common); |
6366
|
|
|
|
|
|
|
read_char_range(common, common->nlmin, common->nlmax, TRUE); |
6367
|
|
|
|
|
|
|
check_newlinechar(common, common->nltype, backtracks, FALSE); |
6368
|
|
|
|
|
|
|
} |
6369
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6370
|
|
|
|
|
|
|
return cc; |
6371
|
|
|
|
|
|
|
|
6372
|
|
|
|
|
|
|
case OP_REVERSE: |
6373
|
|
|
|
|
|
|
length = GET(cc, 0); |
6374
|
|
|
|
|
|
|
if (length == 0) |
6375
|
|
|
|
|
|
|
return cc + LINK_SIZE; |
6376
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
6377
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6378
|
|
|
|
|
|
|
if (common->utf) |
6379
|
|
|
|
|
|
|
{ |
6380
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
6381
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, length); |
6382
|
|
|
|
|
|
|
label = LABEL(); |
6383
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP3, 0)); |
6384
|
|
|
|
|
|
|
skip_char_back(common); |
6385
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, TMP2, 0, SLJIT_IMM, 1); |
6386
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
6387
|
|
|
|
|
|
|
} |
6388
|
|
|
|
|
|
|
else |
6389
|
|
|
|
|
|
|
#endif |
6390
|
|
|
|
|
|
|
{ |
6391
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, begin)); |
6392
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); |
6393
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, TMP1, 0)); |
6394
|
|
|
|
|
|
|
} |
6395
|
|
|
|
|
|
|
check_start_used_ptr(common); |
6396
|
|
|
|
|
|
|
return cc + LINK_SIZE; |
6397
|
|
|
|
|
|
|
} |
6398
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
6399
|
|
|
|
|
|
|
return cc; |
6400
|
|
|
|
|
|
|
} |
6401
|
|
|
|
|
|
|
|
6402
|
|
|
|
|
|
|
static pcre_uchar *compile_char1_matchingpath(compiler_common *common, pcre_uchar type, pcre_uchar *cc, jump_list **backtracks, BOOL check_str_ptr) |
6403
|
|
|
|
|
|
|
{ |
6404
|
|
|
|
|
|
|
DEFINE_COMPILER; |
6405
|
|
|
|
|
|
|
int length; |
6406
|
|
|
|
|
|
|
unsigned int c, oc, bit; |
6407
|
|
|
|
|
|
|
compare_context context; |
6408
|
|
|
|
|
|
|
struct sljit_jump *jump[3]; |
6409
|
|
|
|
|
|
|
jump_list *end_list; |
6410
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6411
|
|
|
|
|
|
|
struct sljit_label *label; |
6412
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
6413
|
|
|
|
|
|
|
pcre_uchar propdata[5]; |
6414
|
|
|
|
|
|
|
#endif |
6415
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
6416
|
|
|
|
|
|
|
|
6417
|
|
|
|
|
|
|
switch(type) |
6418
|
|
|
|
|
|
|
{ |
6419
|
|
|
|
|
|
|
case OP_NOT_DIGIT: |
6420
|
|
|
|
|
|
|
case OP_DIGIT: |
6421
|
|
|
|
|
|
|
/* Digits are usually 0-9, so it is worth to optimize them. */ |
6422
|
|
|
|
|
|
|
if (check_str_ptr) |
6423
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6424
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6425
|
|
|
|
|
|
|
if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE)) |
6426
|
|
|
|
|
|
|
read_char7_type(common, type == OP_NOT_DIGIT); |
6427
|
|
|
|
|
|
|
else |
6428
|
|
|
|
|
|
|
#endif |
6429
|
|
|
|
|
|
|
read_char8_type(common, type == OP_NOT_DIGIT); |
6430
|
|
|
|
|
|
|
/* Flip the starting bit in the negative case. */ |
6431
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_digit); |
6432
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(type == OP_DIGIT ? SLJIT_ZERO : SLJIT_NOT_ZERO)); |
6433
|
|
|
|
|
|
|
return cc; |
6434
|
|
|
|
|
|
|
|
6435
|
|
|
|
|
|
|
case OP_NOT_WHITESPACE: |
6436
|
|
|
|
|
|
|
case OP_WHITESPACE: |
6437
|
|
|
|
|
|
|
if (check_str_ptr) |
6438
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6439
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6440
|
|
|
|
|
|
|
if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE)) |
6441
|
|
|
|
|
|
|
read_char7_type(common, type == OP_NOT_WHITESPACE); |
6442
|
|
|
|
|
|
|
else |
6443
|
|
|
|
|
|
|
#endif |
6444
|
|
|
|
|
|
|
read_char8_type(common, type == OP_NOT_WHITESPACE); |
6445
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_space); |
6446
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(type == OP_WHITESPACE ? SLJIT_ZERO : SLJIT_NOT_ZERO)); |
6447
|
|
|
|
|
|
|
return cc; |
6448
|
|
|
|
|
|
|
|
6449
|
|
|
|
|
|
|
case OP_NOT_WORDCHAR: |
6450
|
|
|
|
|
|
|
case OP_WORDCHAR: |
6451
|
|
|
|
|
|
|
if (check_str_ptr) |
6452
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6453
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6454
|
|
|
|
|
|
|
if (common->utf && is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE)) |
6455
|
|
|
|
|
|
|
read_char7_type(common, type == OP_NOT_WORDCHAR); |
6456
|
|
|
|
|
|
|
else |
6457
|
|
|
|
|
|
|
#endif |
6458
|
|
|
|
|
|
|
read_char8_type(common, type == OP_NOT_WORDCHAR); |
6459
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, ctype_word); |
6460
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(type == OP_WORDCHAR ? SLJIT_ZERO : SLJIT_NOT_ZERO)); |
6461
|
|
|
|
|
|
|
return cc; |
6462
|
|
|
|
|
|
|
|
6463
|
|
|
|
|
|
|
case OP_ANY: |
6464
|
|
|
|
|
|
|
if (check_str_ptr) |
6465
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6466
|
|
|
|
|
|
|
read_char_range(common, common->nlmin, common->nlmax, TRUE); |
6467
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_FIXED && common->newline > 255) |
6468
|
|
|
|
|
|
|
{ |
6469
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, (common->newline >> 8) & 0xff); |
6470
|
|
|
|
|
|
|
end_list = NULL; |
6471
|
|
|
|
|
|
|
if (common->mode != JIT_PARTIAL_HARD_COMPILE) |
6472
|
|
|
|
|
|
|
add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
6473
|
|
|
|
|
|
|
else |
6474
|
|
|
|
|
|
|
check_str_end(common, &end_list); |
6475
|
|
|
|
|
|
|
|
6476
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
6477
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, common->newline & 0xff)); |
6478
|
|
|
|
|
|
|
set_jumps(end_list, LABEL()); |
6479
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6480
|
|
|
|
|
|
|
} |
6481
|
|
|
|
|
|
|
else |
6482
|
|
|
|
|
|
|
check_newlinechar(common, common->nltype, backtracks, TRUE); |
6483
|
|
|
|
|
|
|
return cc; |
6484
|
|
|
|
|
|
|
|
6485
|
|
|
|
|
|
|
case OP_ALLANY: |
6486
|
|
|
|
|
|
|
if (check_str_ptr) |
6487
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6488
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6489
|
|
|
|
|
|
|
if (common->utf) |
6490
|
|
|
|
|
|
|
{ |
6491
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
6492
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6493
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 || defined COMPILE_PCRE16 |
6494
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
6495
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
6496
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
6497
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
6498
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
6499
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xd800); |
6500
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP1, 0, TMP1, 0, SLJIT_IMM, 0xfc00); |
6501
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 0xd800); |
6502
|
|
|
|
|
|
|
OP_FLAGS(SLJIT_MOV, TMP1, 0, SLJIT_EQUAL); |
6503
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
6504
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
6505
|
|
|
|
|
|
|
#endif |
6506
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6507
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE[8|16] */ |
6508
|
|
|
|
|
|
|
return cc; |
6509
|
|
|
|
|
|
|
} |
6510
|
|
|
|
|
|
|
#endif |
6511
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6512
|
|
|
|
|
|
|
return cc; |
6513
|
|
|
|
|
|
|
|
6514
|
|
|
|
|
|
|
case OP_ANYBYTE: |
6515
|
|
|
|
|
|
|
if (check_str_ptr) |
6516
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6517
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6518
|
|
|
|
|
|
|
return cc; |
6519
|
|
|
|
|
|
|
|
6520
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6521
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
6522
|
|
|
|
|
|
|
case OP_NOTPROP: |
6523
|
|
|
|
|
|
|
case OP_PROP: |
6524
|
|
|
|
|
|
|
propdata[0] = XCL_HASPROP; |
6525
|
|
|
|
|
|
|
propdata[1] = type == OP_NOTPROP ? XCL_NOTPROP : XCL_PROP; |
6526
|
|
|
|
|
|
|
propdata[2] = cc[0]; |
6527
|
|
|
|
|
|
|
propdata[3] = cc[1]; |
6528
|
|
|
|
|
|
|
propdata[4] = XCL_END; |
6529
|
|
|
|
|
|
|
if (check_str_ptr) |
6530
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6531
|
|
|
|
|
|
|
compile_xclass_matchingpath(common, propdata, backtracks); |
6532
|
|
|
|
|
|
|
return cc + 2; |
6533
|
|
|
|
|
|
|
#endif |
6534
|
|
|
|
|
|
|
#endif |
6535
|
|
|
|
|
|
|
|
6536
|
|
|
|
|
|
|
case OP_ANYNL: |
6537
|
|
|
|
|
|
|
if (check_str_ptr) |
6538
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6539
|
|
|
|
|
|
|
read_char_range(common, common->bsr_nlmin, common->bsr_nlmax, FALSE); |
6540
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_CR); |
6541
|
|
|
|
|
|
|
/* We don't need to handle soft partial matching case. */ |
6542
|
|
|
|
|
|
|
end_list = NULL; |
6543
|
|
|
|
|
|
|
if (common->mode != JIT_PARTIAL_HARD_COMPILE) |
6544
|
|
|
|
|
|
|
add_jump(compiler, &end_list, CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0)); |
6545
|
|
|
|
|
|
|
else |
6546
|
|
|
|
|
|
|
check_str_end(common, &end_list); |
6547
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
6548
|
|
|
|
|
|
|
jump[1] = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, CHAR_NL); |
6549
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6550
|
|
|
|
|
|
|
jump[2] = JUMP(SLJIT_JUMP); |
6551
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6552
|
|
|
|
|
|
|
check_newlinechar(common, common->bsr_nltype, backtracks, FALSE); |
6553
|
|
|
|
|
|
|
set_jumps(end_list, LABEL()); |
6554
|
|
|
|
|
|
|
JUMPHERE(jump[1]); |
6555
|
|
|
|
|
|
|
JUMPHERE(jump[2]); |
6556
|
|
|
|
|
|
|
return cc; |
6557
|
|
|
|
|
|
|
|
6558
|
|
|
|
|
|
|
case OP_NOT_HSPACE: |
6559
|
|
|
|
|
|
|
case OP_HSPACE: |
6560
|
|
|
|
|
|
|
if (check_str_ptr) |
6561
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6562
|
|
|
|
|
|
|
read_char_range(common, 0x9, 0x3000, type == OP_NOT_HSPACE); |
6563
|
|
|
|
|
|
|
add_jump(compiler, &common->hspace, JUMP(SLJIT_FAST_CALL)); |
6564
|
|
|
|
|
|
|
sljit_set_current_flags(compiler, SLJIT_SET_Z); |
6565
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(type == OP_NOT_HSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); |
6566
|
|
|
|
|
|
|
return cc; |
6567
|
|
|
|
|
|
|
|
6568
|
|
|
|
|
|
|
case OP_NOT_VSPACE: |
6569
|
|
|
|
|
|
|
case OP_VSPACE: |
6570
|
|
|
|
|
|
|
if (check_str_ptr) |
6571
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6572
|
|
|
|
|
|
|
read_char_range(common, 0xa, 0x2029, type == OP_NOT_VSPACE); |
6573
|
|
|
|
|
|
|
add_jump(compiler, &common->vspace, JUMP(SLJIT_FAST_CALL)); |
6574
|
|
|
|
|
|
|
sljit_set_current_flags(compiler, SLJIT_SET_Z); |
6575
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(type == OP_NOT_VSPACE ? SLJIT_NOT_ZERO : SLJIT_ZERO)); |
6576
|
|
|
|
|
|
|
return cc; |
6577
|
|
|
|
|
|
|
|
6578
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
6579
|
|
|
|
|
|
|
case OP_EXTUNI: |
6580
|
|
|
|
|
|
|
if (check_str_ptr) |
6581
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6582
|
|
|
|
|
|
|
read_char(common); |
6583
|
|
|
|
|
|
|
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); |
6584
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); |
6585
|
|
|
|
|
|
|
/* Optimize register allocation: use a real register. */ |
6586
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); |
6587
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, STACK_TOP, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
6588
|
|
|
|
|
|
|
|
6589
|
|
|
|
|
|
|
label = LABEL(); |
6590
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_GREATER_EQUAL, STR_PTR, 0, STR_END, 0); |
6591
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
6592
|
|
|
|
|
|
|
read_char(common); |
6593
|
|
|
|
|
|
|
add_jump(compiler, &common->getucd, JUMP(SLJIT_FAST_CALL)); |
6594
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, (sljit_sw)PRIV(ucd_records) + SLJIT_OFFSETOF(ucd_record, gbprop)); |
6595
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM2(TMP1, TMP2), 3); |
6596
|
|
|
|
|
|
|
|
6597
|
|
|
|
|
|
|
OP2(SLJIT_SHL, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, 2); |
6598
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(STACK_TOP), (sljit_sw)PRIV(ucp_gbtable)); |
6599
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, TMP2, 0); |
6600
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
6601
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
6602
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
6603
|
|
|
|
|
|
|
|
6604
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
6605
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6606
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
6607
|
|
|
|
|
|
|
|
6608
|
|
|
|
|
|
|
if (common->mode == JIT_PARTIAL_HARD_COMPILE) |
6609
|
|
|
|
|
|
|
{ |
6610
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); |
6611
|
|
|
|
|
|
|
/* Since we successfully read a char above, partial matching must occure. */ |
6612
|
|
|
|
|
|
|
check_partial(common, TRUE); |
6613
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6614
|
|
|
|
|
|
|
} |
6615
|
|
|
|
|
|
|
return cc; |
6616
|
|
|
|
|
|
|
#endif |
6617
|
|
|
|
|
|
|
|
6618
|
|
|
|
|
|
|
case OP_CHAR: |
6619
|
|
|
|
|
|
|
case OP_CHARI: |
6620
|
|
|
|
|
|
|
length = 1; |
6621
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6622
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(*cc)) length += GET_EXTRALEN(*cc); |
6623
|
|
|
|
|
|
|
#endif |
6624
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE && check_str_ptr |
6625
|
|
|
|
|
|
|
&& (type == OP_CHAR || !char_has_othercase(common, cc) || char_get_othercase_bit(common, cc) != 0)) |
6626
|
|
|
|
|
|
|
{ |
6627
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(length)); |
6628
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); |
6629
|
|
|
|
|
|
|
|
6630
|
|
|
|
|
|
|
context.length = IN_UCHARS(length); |
6631
|
|
|
|
|
|
|
context.sourcereg = -1; |
6632
|
|
|
|
|
|
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
6633
|
|
|
|
|
|
|
context.ucharptr = 0; |
6634
|
|
|
|
|
|
|
#endif |
6635
|
|
|
|
|
|
|
return byte_sequence_compare(common, type == OP_CHARI, cc, &context, backtracks); |
6636
|
|
|
|
|
|
|
} |
6637
|
|
|
|
|
|
|
|
6638
|
|
|
|
|
|
|
if (check_str_ptr) |
6639
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6640
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6641
|
|
|
|
|
|
|
if (common->utf) |
6642
|
|
|
|
|
|
|
{ |
6643
|
|
|
|
|
|
|
GETCHAR(c, cc); |
6644
|
|
|
|
|
|
|
} |
6645
|
|
|
|
|
|
|
else |
6646
|
|
|
|
|
|
|
#endif |
6647
|
|
|
|
|
|
|
c = *cc; |
6648
|
|
|
|
|
|
|
|
6649
|
|
|
|
|
|
|
if (type == OP_CHAR || !char_has_othercase(common, cc)) |
6650
|
|
|
|
|
|
|
{ |
6651
|
|
|
|
|
|
|
read_char_range(common, c, c, FALSE); |
6652
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c)); |
6653
|
|
|
|
|
|
|
return cc + length; |
6654
|
|
|
|
|
|
|
} |
6655
|
|
|
|
|
|
|
oc = char_othercase(common, c); |
6656
|
|
|
|
|
|
|
read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, FALSE); |
6657
|
|
|
|
|
|
|
bit = c ^ oc; |
6658
|
|
|
|
|
|
|
if (is_powerof2(bit)) |
6659
|
|
|
|
|
|
|
{ |
6660
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); |
6661
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); |
6662
|
|
|
|
|
|
|
return cc + length; |
6663
|
|
|
|
|
|
|
} |
6664
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c); |
6665
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); |
6666
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6667
|
|
|
|
|
|
|
return cc + length; |
6668
|
|
|
|
|
|
|
|
6669
|
|
|
|
|
|
|
case OP_NOT: |
6670
|
|
|
|
|
|
|
case OP_NOTI: |
6671
|
|
|
|
|
|
|
if (check_str_ptr) |
6672
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6673
|
|
|
|
|
|
|
length = 1; |
6674
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6675
|
|
|
|
|
|
|
if (common->utf) |
6676
|
|
|
|
|
|
|
{ |
6677
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
6678
|
|
|
|
|
|
|
c = *cc; |
6679
|
|
|
|
|
|
|
if (c < 128) |
6680
|
|
|
|
|
|
|
{ |
6681
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(STR_PTR), 0); |
6682
|
|
|
|
|
|
|
if (type == OP_NOT || !char_has_othercase(common, cc)) |
6683
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); |
6684
|
|
|
|
|
|
|
else |
6685
|
|
|
|
|
|
|
{ |
6686
|
|
|
|
|
|
|
/* Since UTF8 code page is fixed, we know that c is in [a-z] or [A-Z] range. */ |
6687
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x20); |
6688
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, c | 0x20)); |
6689
|
|
|
|
|
|
|
} |
6690
|
|
|
|
|
|
|
/* Skip the variable-length character. */ |
6691
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
6692
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_LESS, TMP1, 0, SLJIT_IMM, 0xc0); |
6693
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)PRIV(utf8_table4) - 0xc0); |
6694
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP1, 0); |
6695
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6696
|
|
|
|
|
|
|
return cc + 1; |
6697
|
|
|
|
|
|
|
} |
6698
|
|
|
|
|
|
|
else |
6699
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE8 */ |
6700
|
|
|
|
|
|
|
{ |
6701
|
|
|
|
|
|
|
GETCHARLEN(c, cc, length); |
6702
|
|
|
|
|
|
|
} |
6703
|
|
|
|
|
|
|
} |
6704
|
|
|
|
|
|
|
else |
6705
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
6706
|
|
|
|
|
|
|
c = *cc; |
6707
|
|
|
|
|
|
|
|
6708
|
|
|
|
|
|
|
if (type == OP_NOT || !char_has_othercase(common, cc)) |
6709
|
|
|
|
|
|
|
{ |
6710
|
|
|
|
|
|
|
read_char_range(common, c, c, TRUE); |
6711
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); |
6712
|
|
|
|
|
|
|
} |
6713
|
|
|
|
|
|
|
else |
6714
|
|
|
|
|
|
|
{ |
6715
|
|
|
|
|
|
|
oc = char_othercase(common, c); |
6716
|
|
|
|
|
|
|
read_char_range(common, c < oc ? c : oc, c > oc ? c : oc, TRUE); |
6717
|
|
|
|
|
|
|
bit = c ^ oc; |
6718
|
|
|
|
|
|
|
if (is_powerof2(bit)) |
6719
|
|
|
|
|
|
|
{ |
6720
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, bit); |
6721
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c | bit)); |
6722
|
|
|
|
|
|
|
} |
6723
|
|
|
|
|
|
|
else |
6724
|
|
|
|
|
|
|
{ |
6725
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, c)); |
6726
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, oc)); |
6727
|
|
|
|
|
|
|
} |
6728
|
|
|
|
|
|
|
} |
6729
|
|
|
|
|
|
|
return cc + length; |
6730
|
|
|
|
|
|
|
|
6731
|
|
|
|
|
|
|
case OP_CLASS: |
6732
|
|
|
|
|
|
|
case OP_NCLASS: |
6733
|
|
|
|
|
|
|
if (check_str_ptr) |
6734
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6735
|
|
|
|
|
|
|
|
6736
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6737
|
|
|
|
|
|
|
bit = (common->utf && is_char7_bitset((const sljit_u8 *)cc, type == OP_NCLASS)) ? 127 : 255; |
6738
|
|
|
|
|
|
|
read_char_range(common, 0, bit, type == OP_NCLASS); |
6739
|
|
|
|
|
|
|
#else |
6740
|
|
|
|
|
|
|
read_char_range(common, 0, 255, type == OP_NCLASS); |
6741
|
|
|
|
|
|
|
#endif |
6742
|
|
|
|
|
|
|
|
6743
|
|
|
|
|
|
|
if (check_class_ranges(common, (const sljit_u8 *)cc, type == OP_NCLASS, FALSE, backtracks)) |
6744
|
|
|
|
|
|
|
return cc + 32 / sizeof(pcre_uchar); |
6745
|
|
|
|
|
|
|
|
6746
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
6747
|
|
|
|
|
|
|
jump[0] = NULL; |
6748
|
|
|
|
|
|
|
if (common->utf) |
6749
|
|
|
|
|
|
|
{ |
6750
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, bit); |
6751
|
|
|
|
|
|
|
if (type == OP_CLASS) |
6752
|
|
|
|
|
|
|
{ |
6753
|
|
|
|
|
|
|
add_jump(compiler, backtracks, jump[0]); |
6754
|
|
|
|
|
|
|
jump[0] = NULL; |
6755
|
|
|
|
|
|
|
} |
6756
|
|
|
|
|
|
|
} |
6757
|
|
|
|
|
|
|
#elif !defined COMPILE_PCRE8 |
6758
|
|
|
|
|
|
|
jump[0] = CMP(SLJIT_GREATER, TMP1, 0, SLJIT_IMM, 255); |
6759
|
|
|
|
|
|
|
if (type == OP_CLASS) |
6760
|
|
|
|
|
|
|
{ |
6761
|
|
|
|
|
|
|
add_jump(compiler, backtracks, jump[0]); |
6762
|
|
|
|
|
|
|
jump[0] = NULL; |
6763
|
|
|
|
|
|
|
} |
6764
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && COMPILE_PCRE8 */ |
6765
|
|
|
|
|
|
|
|
6766
|
|
|
|
|
|
|
OP2(SLJIT_AND, TMP2, 0, TMP1, 0, SLJIT_IMM, 0x7); |
6767
|
|
|
|
|
|
|
OP2(SLJIT_LSHR, TMP1, 0, TMP1, 0, SLJIT_IMM, 3); |
6768
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP1, 0, SLJIT_MEM1(TMP1), (sljit_sw)cc); |
6769
|
|
|
|
|
|
|
OP2(SLJIT_SHL, TMP2, 0, SLJIT_IMM, 1, TMP2, 0); |
6770
|
|
|
|
|
|
|
OP2(SLJIT_AND | SLJIT_SET_Z, SLJIT_UNUSED, 0, TMP1, 0, TMP2, 0); |
6771
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_ZERO)); |
6772
|
|
|
|
|
|
|
|
6773
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
6774
|
|
|
|
|
|
|
if (jump[0] != NULL) |
6775
|
|
|
|
|
|
|
JUMPHERE(jump[0]); |
6776
|
|
|
|
|
|
|
#endif |
6777
|
|
|
|
|
|
|
return cc + 32 / sizeof(pcre_uchar); |
6778
|
|
|
|
|
|
|
|
6779
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
6780
|
|
|
|
|
|
|
case OP_XCLASS: |
6781
|
|
|
|
|
|
|
if (check_str_ptr) |
6782
|
|
|
|
|
|
|
detect_partial_match(common, backtracks); |
6783
|
|
|
|
|
|
|
compile_xclass_matchingpath(common, cc + LINK_SIZE, backtracks); |
6784
|
|
|
|
|
|
|
return cc + GET(cc, 0) - 1; |
6785
|
|
|
|
|
|
|
#endif |
6786
|
|
|
|
|
|
|
} |
6787
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
6788
|
|
|
|
|
|
|
return cc; |
6789
|
|
|
|
|
|
|
} |
6790
|
|
|
|
|
|
|
|
6791
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *compile_charn_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, jump_list **backtracks) |
6792
|
|
|
|
|
|
|
{ |
6793
|
|
|
|
|
|
|
/* This function consumes at least one input character. */ |
6794
|
|
|
|
|
|
|
/* To decrease the number of length checks, we try to concatenate the fixed length character sequences. */ |
6795
|
|
|
|
|
|
|
DEFINE_COMPILER; |
6796
|
|
|
|
|
|
|
pcre_uchar *ccbegin = cc; |
6797
|
|
|
|
|
|
|
compare_context context; |
6798
|
|
|
|
|
|
|
int size; |
6799
|
|
|
|
|
|
|
|
6800
|
|
|
|
|
|
|
context.length = 0; |
6801
|
|
|
|
|
|
|
do |
6802
|
|
|
|
|
|
|
{ |
6803
|
|
|
|
|
|
|
if (cc >= ccend) |
6804
|
|
|
|
|
|
|
break; |
6805
|
|
|
|
|
|
|
|
6806
|
|
|
|
|
|
|
if (*cc == OP_CHAR) |
6807
|
|
|
|
|
|
|
{ |
6808
|
|
|
|
|
|
|
size = 1; |
6809
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6810
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(cc[1])) |
6811
|
|
|
|
|
|
|
size += GET_EXTRALEN(cc[1]); |
6812
|
|
|
|
|
|
|
#endif |
6813
|
|
|
|
|
|
|
} |
6814
|
|
|
|
|
|
|
else if (*cc == OP_CHARI) |
6815
|
|
|
|
|
|
|
{ |
6816
|
|
|
|
|
|
|
size = 1; |
6817
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
6818
|
|
|
|
|
|
|
if (common->utf) |
6819
|
|
|
|
|
|
|
{ |
6820
|
|
|
|
|
|
|
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) |
6821
|
|
|
|
|
|
|
size = 0; |
6822
|
|
|
|
|
|
|
else if (HAS_EXTRALEN(cc[1])) |
6823
|
|
|
|
|
|
|
size += GET_EXTRALEN(cc[1]); |
6824
|
|
|
|
|
|
|
} |
6825
|
|
|
|
|
|
|
else |
6826
|
|
|
|
|
|
|
#endif |
6827
|
|
|
|
|
|
|
if (char_has_othercase(common, cc + 1) && char_get_othercase_bit(common, cc + 1) == 0) |
6828
|
|
|
|
|
|
|
size = 0; |
6829
|
|
|
|
|
|
|
} |
6830
|
|
|
|
|
|
|
else |
6831
|
|
|
|
|
|
|
size = 0; |
6832
|
|
|
|
|
|
|
|
6833
|
|
|
|
|
|
|
cc += 1 + size; |
6834
|
|
|
|
|
|
|
context.length += IN_UCHARS(size); |
6835
|
|
|
|
|
|
|
} |
6836
|
|
|
|
|
|
|
while (size > 0 && context.length <= 128); |
6837
|
|
|
|
|
|
|
|
6838
|
|
|
|
|
|
|
cc = ccbegin; |
6839
|
|
|
|
|
|
|
if (context.length > 0) |
6840
|
|
|
|
|
|
|
{ |
6841
|
|
|
|
|
|
|
/* We have a fixed-length byte sequence. */ |
6842
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, context.length); |
6843
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0)); |
6844
|
|
|
|
|
|
|
|
6845
|
|
|
|
|
|
|
context.sourcereg = -1; |
6846
|
|
|
|
|
|
|
#if defined SLJIT_UNALIGNED && SLJIT_UNALIGNED |
6847
|
|
|
|
|
|
|
context.ucharptr = 0; |
6848
|
|
|
|
|
|
|
#endif |
6849
|
|
|
|
|
|
|
do cc = byte_sequence_compare(common, *cc == OP_CHARI, cc + 1, &context, backtracks); while (context.length > 0); |
6850
|
|
|
|
|
|
|
return cc; |
6851
|
|
|
|
|
|
|
} |
6852
|
|
|
|
|
|
|
|
6853
|
|
|
|
|
|
|
/* A non-fixed length character will be checked if length == 0. */ |
6854
|
|
|
|
|
|
|
return compile_char1_matchingpath(common, *cc, cc + 1, backtracks, TRUE); |
6855
|
|
|
|
|
|
|
} |
6856
|
|
|
|
|
|
|
|
6857
|
|
|
|
|
|
|
/* Forward definitions. */ |
6858
|
|
|
|
|
|
|
static void compile_matchingpath(compiler_common *, pcre_uchar *, pcre_uchar *, backtrack_common *); |
6859
|
|
|
|
|
|
|
static void compile_backtrackingpath(compiler_common *, struct backtrack_common *); |
6860
|
|
|
|
|
|
|
|
6861
|
|
|
|
|
|
|
#define PUSH_BACKTRACK(size, ccstart, error) \ |
6862
|
|
|
|
|
|
|
do \ |
6863
|
|
|
|
|
|
|
{ \ |
6864
|
|
|
|
|
|
|
backtrack = sljit_alloc_memory(compiler, (size)); \ |
6865
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ |
6866
|
|
|
|
|
|
|
return error; \ |
6867
|
|
|
|
|
|
|
memset(backtrack, 0, size); \ |
6868
|
|
|
|
|
|
|
backtrack->prev = parent->top; \ |
6869
|
|
|
|
|
|
|
backtrack->cc = (ccstart); \ |
6870
|
|
|
|
|
|
|
parent->top = backtrack; \ |
6871
|
|
|
|
|
|
|
} \ |
6872
|
|
|
|
|
|
|
while (0) |
6873
|
|
|
|
|
|
|
|
6874
|
|
|
|
|
|
|
#define PUSH_BACKTRACK_NOVALUE(size, ccstart) \ |
6875
|
|
|
|
|
|
|
do \ |
6876
|
|
|
|
|
|
|
{ \ |
6877
|
|
|
|
|
|
|
backtrack = sljit_alloc_memory(compiler, (size)); \ |
6878
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ |
6879
|
|
|
|
|
|
|
return; \ |
6880
|
|
|
|
|
|
|
memset(backtrack, 0, size); \ |
6881
|
|
|
|
|
|
|
backtrack->prev = parent->top; \ |
6882
|
|
|
|
|
|
|
backtrack->cc = (ccstart); \ |
6883
|
|
|
|
|
|
|
parent->top = backtrack; \ |
6884
|
|
|
|
|
|
|
} \ |
6885
|
|
|
|
|
|
|
while (0) |
6886
|
|
|
|
|
|
|
|
6887
|
|
|
|
|
|
|
#define BACKTRACK_AS(type) ((type *)backtrack) |
6888
|
|
|
|
|
|
|
|
6889
|
|
|
|
|
|
|
static void compile_dnref_search(compiler_common *common, pcre_uchar *cc, jump_list **backtracks) |
6890
|
|
|
|
|
|
|
{ |
6891
|
|
|
|
|
|
|
/* The OVECTOR offset goes to TMP2. */ |
6892
|
|
|
|
|
|
|
DEFINE_COMPILER; |
6893
|
|
|
|
|
|
|
int count = GET2(cc, 1 + IMM2_SIZE); |
6894
|
|
|
|
|
|
|
pcre_uchar *slot = common->name_table + GET2(cc, 1) * common->name_entry_size; |
6895
|
|
|
|
|
|
|
unsigned int offset; |
6896
|
|
|
|
|
|
|
jump_list *found = NULL; |
6897
|
|
|
|
|
|
|
|
6898
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc == OP_DNREF || *cc == OP_DNREFI); |
6899
|
|
|
|
|
|
|
|
6900
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); |
6901
|
|
|
|
|
|
|
|
6902
|
|
|
|
|
|
|
count--; |
6903
|
|
|
|
|
|
|
while (count-- > 0) |
6904
|
|
|
|
|
|
|
{ |
6905
|
|
|
|
|
|
|
offset = GET2(slot, 0) << 1; |
6906
|
|
|
|
|
|
|
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); |
6907
|
|
|
|
|
|
|
add_jump(compiler, &found, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); |
6908
|
|
|
|
|
|
|
slot += common->name_entry_size; |
6909
|
|
|
|
|
|
|
} |
6910
|
|
|
|
|
|
|
|
6911
|
|
|
|
|
|
|
offset = GET2(slot, 0) << 1; |
6912
|
|
|
|
|
|
|
GET_LOCAL_BASE(TMP2, 0, OVECTOR(offset)); |
6913
|
|
|
|
|
|
|
if (backtracks != NULL && !common->jscript_compat) |
6914
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0)); |
6915
|
|
|
|
|
|
|
|
6916
|
|
|
|
|
|
|
set_jumps(found, LABEL()); |
6917
|
|
|
|
|
|
|
} |
6918
|
|
|
|
|
|
|
|
6919
|
|
|
|
|
|
|
static void compile_ref_matchingpath(compiler_common *common, pcre_uchar *cc, jump_list **backtracks, BOOL withchecks, BOOL emptyfail) |
6920
|
|
|
|
|
|
|
{ |
6921
|
|
|
|
|
|
|
DEFINE_COMPILER; |
6922
|
|
|
|
|
|
|
BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
6923
|
|
|
|
|
|
|
int offset = 0; |
6924
|
|
|
|
|
|
|
struct sljit_jump *jump = NULL; |
6925
|
|
|
|
|
|
|
struct sljit_jump *partial; |
6926
|
|
|
|
|
|
|
struct sljit_jump *nopartial; |
6927
|
|
|
|
|
|
|
|
6928
|
|
|
|
|
|
|
if (ref) |
6929
|
|
|
|
|
|
|
{ |
6930
|
|
|
|
|
|
|
offset = GET2(cc, 1) << 1; |
6931
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
6932
|
|
|
|
|
|
|
/* OVECTOR(1) contains the "string begin - 1" constant. */ |
6933
|
|
|
|
|
|
|
if (withchecks && !common->jscript_compat) |
6934
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); |
6935
|
|
|
|
|
|
|
} |
6936
|
|
|
|
|
|
|
else |
6937
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
6938
|
|
|
|
|
|
|
|
6939
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && defined SUPPORT_UCP |
6940
|
|
|
|
|
|
|
if (common->utf && *cc == OP_REFI) |
6941
|
|
|
|
|
|
|
{ |
6942
|
|
|
|
|
|
|
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1); |
6943
|
|
|
|
|
|
|
if (ref) |
6944
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
6945
|
|
|
|
|
|
|
else |
6946
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
6947
|
|
|
|
|
|
|
|
6948
|
|
|
|
|
|
|
if (withchecks) |
6949
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_R2, 0); |
6950
|
|
|
|
|
|
|
|
6951
|
|
|
|
|
|
|
/* No free saved registers so save data on stack. */ |
6952
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); |
6953
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, STR_PTR, 0); |
6954
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R3, 0, STR_END, 0); |
6955
|
|
|
|
|
|
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW) | SLJIT_ARG4(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_utf_caselesscmp)); |
6956
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
6957
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_RETURN_REG, 0); |
6958
|
|
|
|
|
|
|
|
6959
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
6960
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_LESS_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1)); |
6961
|
|
|
|
|
|
|
else |
6962
|
|
|
|
|
|
|
{ |
6963
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z | SLJIT_SET_LESS, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 1); |
6964
|
|
|
|
|
|
|
|
6965
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_LESS)); |
6966
|
|
|
|
|
|
|
|
6967
|
|
|
|
|
|
|
nopartial = JUMP(SLJIT_NOT_EQUAL); |
6968
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); |
6969
|
|
|
|
|
|
|
check_partial(common, FALSE); |
6970
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
6971
|
|
|
|
|
|
|
JUMPHERE(nopartial); |
6972
|
|
|
|
|
|
|
} |
6973
|
|
|
|
|
|
|
} |
6974
|
|
|
|
|
|
|
else |
6975
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF && SUPPORT_UCP */ |
6976
|
|
|
|
|
|
|
{ |
6977
|
|
|
|
|
|
|
if (ref) |
6978
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); |
6979
|
|
|
|
|
|
|
else |
6980
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw), TMP1, 0); |
6981
|
|
|
|
|
|
|
|
6982
|
|
|
|
|
|
|
if (withchecks) |
6983
|
|
|
|
|
|
|
jump = JUMP(SLJIT_ZERO); |
6984
|
|
|
|
|
|
|
|
6985
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, TMP2, 0); |
6986
|
|
|
|
|
|
|
partial = CMP(SLJIT_GREATER, STR_PTR, 0, STR_END, 0); |
6987
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
6988
|
|
|
|
|
|
|
add_jump(compiler, backtracks, partial); |
6989
|
|
|
|
|
|
|
|
6990
|
|
|
|
|
|
|
add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); |
6991
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
6992
|
|
|
|
|
|
|
|
6993
|
|
|
|
|
|
|
if (common->mode != JIT_COMPILE) |
6994
|
|
|
|
|
|
|
{ |
6995
|
|
|
|
|
|
|
nopartial = JUMP(SLJIT_JUMP); |
6996
|
|
|
|
|
|
|
JUMPHERE(partial); |
6997
|
|
|
|
|
|
|
/* TMP2 -= STR_END - STR_PTR */ |
6998
|
|
|
|
|
|
|
OP2(SLJIT_SUB, TMP2, 0, TMP2, 0, STR_PTR, 0); |
6999
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, TMP2, 0, STR_END, 0); |
7000
|
|
|
|
|
|
|
partial = CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0); |
7001
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, STR_END, 0); |
7002
|
|
|
|
|
|
|
add_jump(compiler, *cc == OP_REF ? &common->casefulcmp : &common->caselesscmp, JUMP(SLJIT_FAST_CALL)); |
7003
|
|
|
|
|
|
|
add_jump(compiler, backtracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
7004
|
|
|
|
|
|
|
JUMPHERE(partial); |
7005
|
|
|
|
|
|
|
check_partial(common, FALSE); |
7006
|
|
|
|
|
|
|
add_jump(compiler, backtracks, JUMP(SLJIT_JUMP)); |
7007
|
|
|
|
|
|
|
JUMPHERE(nopartial); |
7008
|
|
|
|
|
|
|
} |
7009
|
|
|
|
|
|
|
} |
7010
|
|
|
|
|
|
|
|
7011
|
|
|
|
|
|
|
if (jump != NULL) |
7012
|
|
|
|
|
|
|
{ |
7013
|
|
|
|
|
|
|
if (emptyfail) |
7014
|
|
|
|
|
|
|
add_jump(compiler, backtracks, jump); |
7015
|
|
|
|
|
|
|
else |
7016
|
|
|
|
|
|
|
JUMPHERE(jump); |
7017
|
|
|
|
|
|
|
} |
7018
|
|
|
|
|
|
|
} |
7019
|
|
|
|
|
|
|
|
7020
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *compile_ref_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
7021
|
|
|
|
|
|
|
{ |
7022
|
|
|
|
|
|
|
DEFINE_COMPILER; |
7023
|
|
|
|
|
|
|
BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
7024
|
|
|
|
|
|
|
backtrack_common *backtrack; |
7025
|
|
|
|
|
|
|
pcre_uchar type; |
7026
|
|
|
|
|
|
|
int offset = 0; |
7027
|
|
|
|
|
|
|
struct sljit_label *label; |
7028
|
|
|
|
|
|
|
struct sljit_jump *zerolength; |
7029
|
|
|
|
|
|
|
struct sljit_jump *jump = NULL; |
7030
|
|
|
|
|
|
|
pcre_uchar *ccbegin = cc; |
7031
|
|
|
|
|
|
|
int min = 0, max = 0; |
7032
|
|
|
|
|
|
|
BOOL minimize; |
7033
|
|
|
|
|
|
|
|
7034
|
|
|
|
|
|
|
PUSH_BACKTRACK(sizeof(ref_iterator_backtrack), cc, NULL); |
7035
|
|
|
|
|
|
|
|
7036
|
|
|
|
|
|
|
if (ref) |
7037
|
|
|
|
|
|
|
offset = GET2(cc, 1) << 1; |
7038
|
|
|
|
|
|
|
else |
7039
|
|
|
|
|
|
|
cc += IMM2_SIZE; |
7040
|
|
|
|
|
|
|
type = cc[1 + IMM2_SIZE]; |
7041
|
|
|
|
|
|
|
|
7042
|
|
|
|
|
|
|
SLJIT_COMPILE_ASSERT((OP_CRSTAR & 0x1) == 0, crstar_opcode_must_be_even); |
7043
|
|
|
|
|
|
|
minimize = (type & 0x1) != 0; |
7044
|
|
|
|
|
|
|
switch(type) |
7045
|
|
|
|
|
|
|
{ |
7046
|
|
|
|
|
|
|
case OP_CRSTAR: |
7047
|
|
|
|
|
|
|
case OP_CRMINSTAR: |
7048
|
|
|
|
|
|
|
min = 0; |
7049
|
|
|
|
|
|
|
max = 0; |
7050
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE + 1; |
7051
|
|
|
|
|
|
|
break; |
7052
|
|
|
|
|
|
|
case OP_CRPLUS: |
7053
|
|
|
|
|
|
|
case OP_CRMINPLUS: |
7054
|
|
|
|
|
|
|
min = 1; |
7055
|
|
|
|
|
|
|
max = 0; |
7056
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE + 1; |
7057
|
|
|
|
|
|
|
break; |
7058
|
|
|
|
|
|
|
case OP_CRQUERY: |
7059
|
|
|
|
|
|
|
case OP_CRMINQUERY: |
7060
|
|
|
|
|
|
|
min = 0; |
7061
|
|
|
|
|
|
|
max = 1; |
7062
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE + 1; |
7063
|
|
|
|
|
|
|
break; |
7064
|
|
|
|
|
|
|
case OP_CRRANGE: |
7065
|
|
|
|
|
|
|
case OP_CRMINRANGE: |
7066
|
|
|
|
|
|
|
min = GET2(cc, 1 + IMM2_SIZE + 1); |
7067
|
|
|
|
|
|
|
max = GET2(cc, 1 + IMM2_SIZE + 1 + IMM2_SIZE); |
7068
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE + 1 + 2 * IMM2_SIZE; |
7069
|
|
|
|
|
|
|
break; |
7070
|
|
|
|
|
|
|
default: |
7071
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
7072
|
|
|
|
|
|
|
break; |
7073
|
|
|
|
|
|
|
} |
7074
|
|
|
|
|
|
|
|
7075
|
|
|
|
|
|
|
if (!minimize) |
7076
|
|
|
|
|
|
|
{ |
7077
|
|
|
|
|
|
|
if (min == 0) |
7078
|
|
|
|
|
|
|
{ |
7079
|
|
|
|
|
|
|
allocate_stack(common, 2); |
7080
|
|
|
|
|
|
|
if (ref) |
7081
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
7082
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7083
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
7084
|
|
|
|
|
|
|
/* Temporary release of STR_PTR. */ |
7085
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
7086
|
|
|
|
|
|
|
/* Handles both invalid and empty cases. Since the minimum repeat, |
7087
|
|
|
|
|
|
|
is zero the invalid case is basically the same as an empty case. */ |
7088
|
|
|
|
|
|
|
if (ref) |
7089
|
|
|
|
|
|
|
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
7090
|
|
|
|
|
|
|
else |
7091
|
|
|
|
|
|
|
{ |
7092
|
|
|
|
|
|
|
compile_dnref_search(common, ccbegin, NULL); |
7093
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
7094
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); |
7095
|
|
|
|
|
|
|
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
7096
|
|
|
|
|
|
|
} |
7097
|
|
|
|
|
|
|
/* Restore if not zero length. */ |
7098
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
7099
|
|
|
|
|
|
|
} |
7100
|
|
|
|
|
|
|
else |
7101
|
|
|
|
|
|
|
{ |
7102
|
|
|
|
|
|
|
allocate_stack(common, 1); |
7103
|
|
|
|
|
|
|
if (ref) |
7104
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
7105
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7106
|
|
|
|
|
|
|
if (ref) |
7107
|
|
|
|
|
|
|
{ |
7108
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); |
7109
|
|
|
|
|
|
|
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
7110
|
|
|
|
|
|
|
} |
7111
|
|
|
|
|
|
|
else |
7112
|
|
|
|
|
|
|
{ |
7113
|
|
|
|
|
|
|
compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); |
7114
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
7115
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, TMP2, 0); |
7116
|
|
|
|
|
|
|
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
7117
|
|
|
|
|
|
|
} |
7118
|
|
|
|
|
|
|
} |
7119
|
|
|
|
|
|
|
|
7120
|
|
|
|
|
|
|
if (min > 1 || max > 1) |
7121
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, 0); |
7122
|
|
|
|
|
|
|
|
7123
|
|
|
|
|
|
|
label = LABEL(); |
7124
|
|
|
|
|
|
|
if (!ref) |
7125
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); |
7126
|
|
|
|
|
|
|
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, FALSE, FALSE); |
7127
|
|
|
|
|
|
|
|
7128
|
|
|
|
|
|
|
if (min > 1 || max > 1) |
7129
|
|
|
|
|
|
|
{ |
7130
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); |
7131
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
7132
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); |
7133
|
|
|
|
|
|
|
if (min > 1) |
7134
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, label); |
7135
|
|
|
|
|
|
|
if (max > 1) |
7136
|
|
|
|
|
|
|
{ |
7137
|
|
|
|
|
|
|
jump = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max); |
7138
|
|
|
|
|
|
|
allocate_stack(common, 1); |
7139
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7140
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, label); |
7141
|
|
|
|
|
|
|
JUMPHERE(jump); |
7142
|
|
|
|
|
|
|
} |
7143
|
|
|
|
|
|
|
} |
7144
|
|
|
|
|
|
|
|
7145
|
|
|
|
|
|
|
if (max == 0) |
7146
|
|
|
|
|
|
|
{ |
7147
|
|
|
|
|
|
|
/* Includes min > 1 case as well. */ |
7148
|
|
|
|
|
|
|
allocate_stack(common, 1); |
7149
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7150
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, label); |
7151
|
|
|
|
|
|
|
} |
7152
|
|
|
|
|
|
|
|
7153
|
|
|
|
|
|
|
JUMPHERE(zerolength); |
7154
|
|
|
|
|
|
|
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); |
7155
|
|
|
|
|
|
|
|
7156
|
|
|
|
|
|
|
count_match(common); |
7157
|
|
|
|
|
|
|
return cc; |
7158
|
|
|
|
|
|
|
} |
7159
|
|
|
|
|
|
|
|
7160
|
|
|
|
|
|
|
allocate_stack(common, ref ? 2 : 3); |
7161
|
|
|
|
|
|
|
if (ref) |
7162
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
7163
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7164
|
|
|
|
|
|
|
if (type != OP_CRMINSTAR) |
7165
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
7166
|
|
|
|
|
|
|
|
7167
|
|
|
|
|
|
|
if (min == 0) |
7168
|
|
|
|
|
|
|
{ |
7169
|
|
|
|
|
|
|
/* Handles both invalid and empty cases. Since the minimum repeat, |
7170
|
|
|
|
|
|
|
is zero the invalid case is basically the same as an empty case. */ |
7171
|
|
|
|
|
|
|
if (ref) |
7172
|
|
|
|
|
|
|
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
7173
|
|
|
|
|
|
|
else |
7174
|
|
|
|
|
|
|
{ |
7175
|
|
|
|
|
|
|
compile_dnref_search(common, ccbegin, NULL); |
7176
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
7177
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); |
7178
|
|
|
|
|
|
|
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
7179
|
|
|
|
|
|
|
} |
7180
|
|
|
|
|
|
|
/* Length is non-zero, we can match real repeats. */ |
7181
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7182
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
7183
|
|
|
|
|
|
|
} |
7184
|
|
|
|
|
|
|
else |
7185
|
|
|
|
|
|
|
{ |
7186
|
|
|
|
|
|
|
if (ref) |
7187
|
|
|
|
|
|
|
{ |
7188
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); |
7189
|
|
|
|
|
|
|
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
7190
|
|
|
|
|
|
|
} |
7191
|
|
|
|
|
|
|
else |
7192
|
|
|
|
|
|
|
{ |
7193
|
|
|
|
|
|
|
compile_dnref_search(common, ccbegin, &backtrack->topbacktracks); |
7194
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), 0); |
7195
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0); |
7196
|
|
|
|
|
|
|
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(TMP2), sizeof(sljit_sw)); |
7197
|
|
|
|
|
|
|
} |
7198
|
|
|
|
|
|
|
} |
7199
|
|
|
|
|
|
|
|
7200
|
|
|
|
|
|
|
BACKTRACK_AS(ref_iterator_backtrack)->matchingpath = LABEL(); |
7201
|
|
|
|
|
|
|
if (max > 0) |
7202
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, max)); |
7203
|
|
|
|
|
|
|
|
7204
|
|
|
|
|
|
|
if (!ref) |
7205
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); |
7206
|
|
|
|
|
|
|
compile_ref_matchingpath(common, ccbegin, &backtrack->topbacktracks, TRUE, TRUE); |
7207
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7208
|
|
|
|
|
|
|
|
7209
|
|
|
|
|
|
|
if (min > 1) |
7210
|
|
|
|
|
|
|
{ |
7211
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
7212
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
7213
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); |
7214
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS, TMP1, 0, SLJIT_IMM, min, BACKTRACK_AS(ref_iterator_backtrack)->matchingpath); |
7215
|
|
|
|
|
|
|
} |
7216
|
|
|
|
|
|
|
else if (max > 0) |
7217
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 1); |
7218
|
|
|
|
|
|
|
|
7219
|
|
|
|
|
|
|
if (jump != NULL) |
7220
|
|
|
|
|
|
|
JUMPHERE(jump); |
7221
|
|
|
|
|
|
|
JUMPHERE(zerolength); |
7222
|
|
|
|
|
|
|
|
7223
|
|
|
|
|
|
|
count_match(common); |
7224
|
|
|
|
|
|
|
return cc; |
7225
|
|
|
|
|
|
|
} |
7226
|
|
|
|
|
|
|
|
7227
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *compile_recurse_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
7228
|
|
|
|
|
|
|
{ |
7229
|
|
|
|
|
|
|
DEFINE_COMPILER; |
7230
|
|
|
|
|
|
|
backtrack_common *backtrack; |
7231
|
|
|
|
|
|
|
recurse_entry *entry = common->entries; |
7232
|
|
|
|
|
|
|
recurse_entry *prev = NULL; |
7233
|
|
|
|
|
|
|
sljit_sw start = GET(cc, 1); |
7234
|
|
|
|
|
|
|
pcre_uchar *start_cc; |
7235
|
|
|
|
|
|
|
BOOL needs_control_head; |
7236
|
|
|
|
|
|
|
|
7237
|
|
|
|
|
|
|
PUSH_BACKTRACK(sizeof(recurse_backtrack), cc, NULL); |
7238
|
|
|
|
|
|
|
|
7239
|
|
|
|
|
|
|
/* Inlining simple patterns. */ |
7240
|
|
|
|
|
|
|
if (get_framesize(common, common->start + start, NULL, TRUE, &needs_control_head) == no_stack) |
7241
|
|
|
|
|
|
|
{ |
7242
|
|
|
|
|
|
|
start_cc = common->start + start; |
7243
|
|
|
|
|
|
|
compile_matchingpath(common, next_opcode(common, start_cc), bracketend(start_cc) - (1 + LINK_SIZE), backtrack); |
7244
|
|
|
|
|
|
|
BACKTRACK_AS(recurse_backtrack)->inlined_pattern = TRUE; |
7245
|
|
|
|
|
|
|
return cc + 1 + LINK_SIZE; |
7246
|
|
|
|
|
|
|
} |
7247
|
|
|
|
|
|
|
|
7248
|
|
|
|
|
|
|
while (entry != NULL) |
7249
|
|
|
|
|
|
|
{ |
7250
|
|
|
|
|
|
|
if (entry->start == start) |
7251
|
|
|
|
|
|
|
break; |
7252
|
|
|
|
|
|
|
prev = entry; |
7253
|
|
|
|
|
|
|
entry = entry->next; |
7254
|
|
|
|
|
|
|
} |
7255
|
|
|
|
|
|
|
|
7256
|
|
|
|
|
|
|
if (entry == NULL) |
7257
|
|
|
|
|
|
|
{ |
7258
|
|
|
|
|
|
|
entry = sljit_alloc_memory(compiler, sizeof(recurse_entry)); |
7259
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
7260
|
|
|
|
|
|
|
return NULL; |
7261
|
|
|
|
|
|
|
entry->next = NULL; |
7262
|
|
|
|
|
|
|
entry->entry = NULL; |
7263
|
|
|
|
|
|
|
entry->calls = NULL; |
7264
|
|
|
|
|
|
|
entry->start = start; |
7265
|
|
|
|
|
|
|
|
7266
|
|
|
|
|
|
|
if (prev != NULL) |
7267
|
|
|
|
|
|
|
prev->next = entry; |
7268
|
|
|
|
|
|
|
else |
7269
|
|
|
|
|
|
|
common->entries = entry; |
7270
|
|
|
|
|
|
|
} |
7271
|
|
|
|
|
|
|
|
7272
|
|
|
|
|
|
|
if (common->has_set_som && common->mark_ptr != 0) |
7273
|
|
|
|
|
|
|
{ |
7274
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
7275
|
|
|
|
|
|
|
allocate_stack(common, 2); |
7276
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
7277
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
7278
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); |
7279
|
|
|
|
|
|
|
} |
7280
|
|
|
|
|
|
|
else if (common->has_set_som || common->mark_ptr != 0) |
7281
|
|
|
|
|
|
|
{ |
7282
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr); |
7283
|
|
|
|
|
|
|
allocate_stack(common, 1); |
7284
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
7285
|
|
|
|
|
|
|
} |
7286
|
|
|
|
|
|
|
|
7287
|
|
|
|
|
|
|
if (entry->entry == NULL) |
7288
|
|
|
|
|
|
|
add_jump(compiler, &entry->calls, JUMP(SLJIT_FAST_CALL)); |
7289
|
|
|
|
|
|
|
else |
7290
|
|
|
|
|
|
|
JUMPTO(SLJIT_FAST_CALL, entry->entry); |
7291
|
|
|
|
|
|
|
/* Leave if the match is failed. */ |
7292
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0)); |
7293
|
|
|
|
|
|
|
return cc + 1 + LINK_SIZE; |
7294
|
|
|
|
|
|
|
} |
7295
|
|
|
|
|
|
|
|
7296
|
|
|
|
|
|
|
static sljit_s32 SLJIT_FUNC do_callout(struct jit_arguments *arguments, PUBL(callout_block) *callout_block, pcre_uchar **jit_ovector) |
7297
|
|
|
|
|
|
|
{ |
7298
|
|
|
|
|
|
|
const pcre_uchar *begin = arguments->begin; |
7299
|
|
|
|
|
|
|
int *offset_vector = arguments->offsets; |
7300
|
|
|
|
|
|
|
int offset_count = arguments->offset_count; |
7301
|
|
|
|
|
|
|
int i; |
7302
|
|
|
|
|
|
|
|
7303
|
|
|
|
|
|
|
if (PUBL(callout) == NULL) |
7304
|
|
|
|
|
|
|
return 0; |
7305
|
|
|
|
|
|
|
|
7306
|
|
|
|
|
|
|
callout_block->version = 2; |
7307
|
|
|
|
|
|
|
callout_block->callout_data = arguments->callout_data; |
7308
|
|
|
|
|
|
|
|
7309
|
|
|
|
|
|
|
/* Offsets in subject. */ |
7310
|
|
|
|
|
|
|
callout_block->subject_length = arguments->end - arguments->begin; |
7311
|
|
|
|
|
|
|
callout_block->start_match = (pcre_uchar*)callout_block->subject - arguments->begin; |
7312
|
|
|
|
|
|
|
callout_block->current_position = (pcre_uchar*)callout_block->offset_vector - arguments->begin; |
7313
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
7314
|
|
|
|
|
|
|
callout_block->subject = (PCRE_SPTR)begin; |
7315
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
7316
|
|
|
|
|
|
|
callout_block->subject = (PCRE_SPTR16)begin; |
7317
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
7318
|
|
|
|
|
|
|
callout_block->subject = (PCRE_SPTR32)begin; |
7319
|
|
|
|
|
|
|
#endif |
7320
|
|
|
|
|
|
|
|
7321
|
|
|
|
|
|
|
/* Convert and copy the JIT offset vector to the offset_vector array. */ |
7322
|
|
|
|
|
|
|
callout_block->capture_top = 0; |
7323
|
|
|
|
|
|
|
callout_block->offset_vector = offset_vector; |
7324
|
|
|
|
|
|
|
for (i = 2; i < offset_count; i += 2) |
7325
|
|
|
|
|
|
|
{ |
7326
|
|
|
|
|
|
|
offset_vector[i] = jit_ovector[i] - begin; |
7327
|
|
|
|
|
|
|
offset_vector[i + 1] = jit_ovector[i + 1] - begin; |
7328
|
|
|
|
|
|
|
if (jit_ovector[i] >= begin) |
7329
|
|
|
|
|
|
|
callout_block->capture_top = i; |
7330
|
|
|
|
|
|
|
} |
7331
|
|
|
|
|
|
|
|
7332
|
|
|
|
|
|
|
callout_block->capture_top = (callout_block->capture_top >> 1) + 1; |
7333
|
|
|
|
|
|
|
if (offset_count > 0) |
7334
|
|
|
|
|
|
|
offset_vector[0] = -1; |
7335
|
|
|
|
|
|
|
if (offset_count > 1) |
7336
|
|
|
|
|
|
|
offset_vector[1] = -1; |
7337
|
|
|
|
|
|
|
return (*PUBL(callout))(callout_block); |
7338
|
|
|
|
|
|
|
} |
7339
|
|
|
|
|
|
|
|
7340
|
|
|
|
|
|
|
/* Aligning to 8 byte. */ |
7341
|
|
|
|
|
|
|
#define CALLOUT_ARG_SIZE \ |
7342
|
|
|
|
|
|
|
(((int)sizeof(PUBL(callout_block)) + 7) & ~7) |
7343
|
|
|
|
|
|
|
|
7344
|
|
|
|
|
|
|
#define CALLOUT_ARG_OFFSET(arg) \ |
7345
|
|
|
|
|
|
|
SLJIT_OFFSETOF(PUBL(callout_block), arg) |
7346
|
|
|
|
|
|
|
|
7347
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *compile_callout_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
7348
|
|
|
|
|
|
|
{ |
7349
|
|
|
|
|
|
|
DEFINE_COMPILER; |
7350
|
|
|
|
|
|
|
backtrack_common *backtrack; |
7351
|
|
|
|
|
|
|
|
7352
|
|
|
|
|
|
|
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); |
7353
|
|
|
|
|
|
|
|
7354
|
|
|
|
|
|
|
allocate_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); |
7355
|
|
|
|
|
|
|
|
7356
|
|
|
|
|
|
|
SLJIT_ASSERT(common->capture_last_ptr != 0); |
7357
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
7358
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
7359
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(callout_number), SLJIT_IMM, cc[1]); |
7360
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(capture_last), TMP2, 0); |
7361
|
|
|
|
|
|
|
|
7362
|
|
|
|
|
|
|
/* These pointer sized fields temporarly stores internal variables. */ |
7363
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
7364
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(offset_vector), STR_PTR, 0); |
7365
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(subject), TMP2, 0); |
7366
|
|
|
|
|
|
|
|
7367
|
|
|
|
|
|
|
if (common->mark_ptr != 0) |
7368
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr)); |
7369
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(pattern_position), SLJIT_IMM, GET(cc, 2)); |
7370
|
|
|
|
|
|
|
OP1(SLJIT_MOV_S32, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(next_item_length), SLJIT_IMM, GET(cc, 2 + LINK_SIZE)); |
7371
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), CALLOUT_ARG_OFFSET(mark), (common->mark_ptr != 0) ? TMP2 : SLJIT_IMM, 0); |
7372
|
|
|
|
|
|
|
|
7373
|
|
|
|
|
|
|
/* Needed to save important temporary registers. */ |
7374
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); |
7375
|
|
|
|
|
|
|
/* SLJIT_R0 = arguments */ |
7376
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R1, 0, STACK_TOP, 0); |
7377
|
|
|
|
|
|
|
GET_LOCAL_BASE(SLJIT_R2, 0, OVECTOR_START); |
7378
|
|
|
|
|
|
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(S32) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW) | SLJIT_ARG3(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_callout)); |
7379
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
7380
|
|
|
|
|
|
|
free_stack(common, CALLOUT_ARG_SIZE / sizeof(sljit_sw)); |
7381
|
|
|
|
|
|
|
|
7382
|
|
|
|
|
|
|
/* Check return value. */ |
7383
|
|
|
|
|
|
|
OP2(SLJIT_SUB32 | SLJIT_SET_Z | SLJIT_SET_SIG_GREATER, SLJIT_UNUSED, 0, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); |
7384
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_SIG_GREATER32)); |
7385
|
|
|
|
|
|
|
if (common->forced_quit_label == NULL) |
7386
|
|
|
|
|
|
|
add_jump(compiler, &common->forced_quit, JUMP(SLJIT_NOT_EQUAL32) /* SIG_LESS */); |
7387
|
|
|
|
|
|
|
else |
7388
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_EQUAL32 /* SIG_LESS */, common->forced_quit_label); |
7389
|
|
|
|
|
|
|
return cc + 2 + 2 * LINK_SIZE; |
7390
|
|
|
|
|
|
|
} |
7391
|
|
|
|
|
|
|
|
7392
|
|
|
|
|
|
|
#undef CALLOUT_ARG_SIZE |
7393
|
|
|
|
|
|
|
#undef CALLOUT_ARG_OFFSET |
7394
|
|
|
|
|
|
|
|
7395
|
|
|
|
|
|
|
static SLJIT_INLINE BOOL assert_needs_str_ptr_saving(pcre_uchar *cc) |
7396
|
|
|
|
|
|
|
{ |
7397
|
|
|
|
|
|
|
while (TRUE) |
7398
|
|
|
|
|
|
|
{ |
7399
|
|
|
|
|
|
|
switch (*cc) |
7400
|
|
|
|
|
|
|
{ |
7401
|
|
|
|
|
|
|
case OP_NOT_WORD_BOUNDARY: |
7402
|
|
|
|
|
|
|
case OP_WORD_BOUNDARY: |
7403
|
|
|
|
|
|
|
case OP_CIRC: |
7404
|
|
|
|
|
|
|
case OP_CIRCM: |
7405
|
|
|
|
|
|
|
case OP_DOLL: |
7406
|
|
|
|
|
|
|
case OP_DOLLM: |
7407
|
|
|
|
|
|
|
case OP_CALLOUT: |
7408
|
|
|
|
|
|
|
case OP_ALT: |
7409
|
|
|
|
|
|
|
cc += PRIV(OP_lengths)[*cc]; |
7410
|
|
|
|
|
|
|
break; |
7411
|
|
|
|
|
|
|
|
7412
|
|
|
|
|
|
|
case OP_KET: |
7413
|
|
|
|
|
|
|
return FALSE; |
7414
|
|
|
|
|
|
|
|
7415
|
|
|
|
|
|
|
default: |
7416
|
|
|
|
|
|
|
return TRUE; |
7417
|
|
|
|
|
|
|
} |
7418
|
|
|
|
|
|
|
} |
7419
|
|
|
|
|
|
|
} |
7420
|
|
|
|
|
|
|
|
7421
|
|
|
|
|
|
|
static pcre_uchar *compile_assert_matchingpath(compiler_common *common, pcre_uchar *cc, assert_backtrack *backtrack, BOOL conditional) |
7422
|
|
|
|
|
|
|
{ |
7423
|
|
|
|
|
|
|
DEFINE_COMPILER; |
7424
|
|
|
|
|
|
|
int framesize; |
7425
|
|
|
|
|
|
|
int extrasize; |
7426
|
|
|
|
|
|
|
BOOL needs_control_head; |
7427
|
|
|
|
|
|
|
int private_data_ptr; |
7428
|
|
|
|
|
|
|
backtrack_common altbacktrack; |
7429
|
|
|
|
|
|
|
pcre_uchar *ccbegin; |
7430
|
|
|
|
|
|
|
pcre_uchar opcode; |
7431
|
|
|
|
|
|
|
pcre_uchar bra = OP_BRA; |
7432
|
|
|
|
|
|
|
jump_list *tmp = NULL; |
7433
|
|
|
|
|
|
|
jump_list **target = (conditional) ? &backtrack->condfailed : &backtrack->common.topbacktracks; |
7434
|
|
|
|
|
|
|
jump_list **found; |
7435
|
|
|
|
|
|
|
/* Saving previous accept variables. */ |
7436
|
|
|
|
|
|
|
BOOL save_local_exit = common->local_exit; |
7437
|
|
|
|
|
|
|
BOOL save_positive_assert = common->positive_assert; |
7438
|
|
|
|
|
|
|
then_trap_backtrack *save_then_trap = common->then_trap; |
7439
|
|
|
|
|
|
|
struct sljit_label *save_quit_label = common->quit_label; |
7440
|
|
|
|
|
|
|
struct sljit_label *save_accept_label = common->accept_label; |
7441
|
|
|
|
|
|
|
jump_list *save_quit = common->quit; |
7442
|
|
|
|
|
|
|
jump_list *save_positive_assert_quit = common->positive_assert_quit; |
7443
|
|
|
|
|
|
|
jump_list *save_accept = common->accept; |
7444
|
|
|
|
|
|
|
struct sljit_jump *jump; |
7445
|
|
|
|
|
|
|
struct sljit_jump *brajump = NULL; |
7446
|
|
|
|
|
|
|
|
7447
|
|
|
|
|
|
|
/* Assert captures then. */ |
7448
|
|
|
|
|
|
|
common->then_trap = NULL; |
7449
|
|
|
|
|
|
|
|
7450
|
|
|
|
|
|
|
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) |
7451
|
|
|
|
|
|
|
{ |
7452
|
|
|
|
|
|
|
SLJIT_ASSERT(!conditional); |
7453
|
|
|
|
|
|
|
bra = *cc; |
7454
|
|
|
|
|
|
|
cc++; |
7455
|
|
|
|
|
|
|
} |
7456
|
|
|
|
|
|
|
private_data_ptr = PRIVATE_DATA(cc); |
7457
|
|
|
|
|
|
|
SLJIT_ASSERT(private_data_ptr != 0); |
7458
|
|
|
|
|
|
|
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); |
7459
|
|
|
|
|
|
|
backtrack->framesize = framesize; |
7460
|
|
|
|
|
|
|
backtrack->private_data_ptr = private_data_ptr; |
7461
|
|
|
|
|
|
|
opcode = *cc; |
7462
|
|
|
|
|
|
|
SLJIT_ASSERT(opcode >= OP_ASSERT && opcode <= OP_ASSERTBACK_NOT); |
7463
|
|
|
|
|
|
|
found = (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) ? &tmp : target; |
7464
|
|
|
|
|
|
|
ccbegin = cc; |
7465
|
|
|
|
|
|
|
cc += GET(cc, 1); |
7466
|
|
|
|
|
|
|
|
7467
|
|
|
|
|
|
|
if (bra == OP_BRAMINZERO) |
7468
|
|
|
|
|
|
|
{ |
7469
|
|
|
|
|
|
|
/* This is a braminzero backtrack path. */ |
7470
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7471
|
|
|
|
|
|
|
free_stack(common, 1); |
7472
|
|
|
|
|
|
|
brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
7473
|
|
|
|
|
|
|
} |
7474
|
|
|
|
|
|
|
|
7475
|
|
|
|
|
|
|
if (framesize < 0) |
7476
|
|
|
|
|
|
|
{ |
7477
|
|
|
|
|
|
|
extrasize = 1; |
7478
|
|
|
|
|
|
|
if (bra == OP_BRA && !assert_needs_str_ptr_saving(ccbegin + 1 + LINK_SIZE)) |
7479
|
|
|
|
|
|
|
extrasize = 0; |
7480
|
|
|
|
|
|
|
|
7481
|
|
|
|
|
|
|
if (needs_control_head) |
7482
|
|
|
|
|
|
|
extrasize++; |
7483
|
|
|
|
|
|
|
|
7484
|
|
|
|
|
|
|
if (framesize == no_frame) |
7485
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); |
7486
|
|
|
|
|
|
|
|
7487
|
|
|
|
|
|
|
if (extrasize > 0) |
7488
|
|
|
|
|
|
|
allocate_stack(common, extrasize); |
7489
|
|
|
|
|
|
|
|
7490
|
|
|
|
|
|
|
if (needs_control_head) |
7491
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
7492
|
|
|
|
|
|
|
|
7493
|
|
|
|
|
|
|
if (extrasize > 0) |
7494
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7495
|
|
|
|
|
|
|
|
7496
|
|
|
|
|
|
|
if (needs_control_head) |
7497
|
|
|
|
|
|
|
{ |
7498
|
|
|
|
|
|
|
SLJIT_ASSERT(extrasize == 2); |
7499
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
7500
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); |
7501
|
|
|
|
|
|
|
} |
7502
|
|
|
|
|
|
|
} |
7503
|
|
|
|
|
|
|
else |
7504
|
|
|
|
|
|
|
{ |
7505
|
|
|
|
|
|
|
extrasize = needs_control_head ? 3 : 2; |
7506
|
|
|
|
|
|
|
allocate_stack(common, framesize + extrasize); |
7507
|
|
|
|
|
|
|
|
7508
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7509
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); |
7510
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); |
7511
|
|
|
|
|
|
|
if (needs_control_head) |
7512
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
7513
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7514
|
|
|
|
|
|
|
|
7515
|
|
|
|
|
|
|
if (needs_control_head) |
7516
|
|
|
|
|
|
|
{ |
7517
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); |
7518
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); |
7519
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
7520
|
|
|
|
|
|
|
} |
7521
|
|
|
|
|
|
|
else |
7522
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0); |
7523
|
|
|
|
|
|
|
|
7524
|
|
|
|
|
|
|
init_frame(common, ccbegin, NULL, framesize + extrasize - 1, extrasize, FALSE); |
7525
|
|
|
|
|
|
|
} |
7526
|
|
|
|
|
|
|
|
7527
|
|
|
|
|
|
|
memset(&altbacktrack, 0, sizeof(backtrack_common)); |
7528
|
|
|
|
|
|
|
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7529
|
|
|
|
|
|
|
{ |
7530
|
|
|
|
|
|
|
/* Negative assert is stronger than positive assert. */ |
7531
|
|
|
|
|
|
|
common->local_exit = TRUE; |
7532
|
|
|
|
|
|
|
common->quit_label = NULL; |
7533
|
|
|
|
|
|
|
common->quit = NULL; |
7534
|
|
|
|
|
|
|
common->positive_assert = FALSE; |
7535
|
|
|
|
|
|
|
} |
7536
|
|
|
|
|
|
|
else |
7537
|
|
|
|
|
|
|
common->positive_assert = TRUE; |
7538
|
|
|
|
|
|
|
common->positive_assert_quit = NULL; |
7539
|
|
|
|
|
|
|
|
7540
|
|
|
|
|
|
|
while (1) |
7541
|
|
|
|
|
|
|
{ |
7542
|
|
|
|
|
|
|
common->accept_label = NULL; |
7543
|
|
|
|
|
|
|
common->accept = NULL; |
7544
|
|
|
|
|
|
|
altbacktrack.top = NULL; |
7545
|
|
|
|
|
|
|
altbacktrack.topbacktracks = NULL; |
7546
|
|
|
|
|
|
|
|
7547
|
|
|
|
|
|
|
if (*ccbegin == OP_ALT && extrasize > 0) |
7548
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7549
|
|
|
|
|
|
|
|
7550
|
|
|
|
|
|
|
altbacktrack.cc = ccbegin; |
7551
|
|
|
|
|
|
|
compile_matchingpath(common, ccbegin + 1 + LINK_SIZE, cc, &altbacktrack); |
7552
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
7553
|
|
|
|
|
|
|
{ |
7554
|
|
|
|
|
|
|
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7555
|
|
|
|
|
|
|
{ |
7556
|
|
|
|
|
|
|
common->local_exit = save_local_exit; |
7557
|
|
|
|
|
|
|
common->quit_label = save_quit_label; |
7558
|
|
|
|
|
|
|
common->quit = save_quit; |
7559
|
|
|
|
|
|
|
} |
7560
|
|
|
|
|
|
|
common->positive_assert = save_positive_assert; |
7561
|
|
|
|
|
|
|
common->then_trap = save_then_trap; |
7562
|
|
|
|
|
|
|
common->accept_label = save_accept_label; |
7563
|
|
|
|
|
|
|
common->positive_assert_quit = save_positive_assert_quit; |
7564
|
|
|
|
|
|
|
common->accept = save_accept; |
7565
|
|
|
|
|
|
|
return NULL; |
7566
|
|
|
|
|
|
|
} |
7567
|
|
|
|
|
|
|
common->accept_label = LABEL(); |
7568
|
|
|
|
|
|
|
if (common->accept != NULL) |
7569
|
|
|
|
|
|
|
set_jumps(common->accept, common->accept_label); |
7570
|
|
|
|
|
|
|
|
7571
|
|
|
|
|
|
|
/* Reset stack. */ |
7572
|
|
|
|
|
|
|
if (framesize < 0) |
7573
|
|
|
|
|
|
|
{ |
7574
|
|
|
|
|
|
|
if (framesize == no_frame) |
7575
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7576
|
|
|
|
|
|
|
else if (extrasize > 0) |
7577
|
|
|
|
|
|
|
free_stack(common, extrasize); |
7578
|
|
|
|
|
|
|
|
7579
|
|
|
|
|
|
|
if (needs_control_head) |
7580
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); |
7581
|
|
|
|
|
|
|
} |
7582
|
|
|
|
|
|
|
else |
7583
|
|
|
|
|
|
|
{ |
7584
|
|
|
|
|
|
|
if ((opcode != OP_ASSERT_NOT && opcode != OP_ASSERTBACK_NOT) || conditional) |
7585
|
|
|
|
|
|
|
{ |
7586
|
|
|
|
|
|
|
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ |
7587
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); |
7588
|
|
|
|
|
|
|
if (needs_control_head) |
7589
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); |
7590
|
|
|
|
|
|
|
} |
7591
|
|
|
|
|
|
|
else |
7592
|
|
|
|
|
|
|
{ |
7593
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7594
|
|
|
|
|
|
|
if (needs_control_head) |
7595
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 2)); |
7596
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
7597
|
|
|
|
|
|
|
} |
7598
|
|
|
|
|
|
|
} |
7599
|
|
|
|
|
|
|
|
7600
|
|
|
|
|
|
|
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7601
|
|
|
|
|
|
|
{ |
7602
|
|
|
|
|
|
|
/* We know that STR_PTR was stored on the top of the stack. */ |
7603
|
|
|
|
|
|
|
if (conditional) |
7604
|
|
|
|
|
|
|
{ |
7605
|
|
|
|
|
|
|
if (extrasize > 0) |
7606
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), needs_control_head ? STACK(-2) : STACK(-1)); |
7607
|
|
|
|
|
|
|
} |
7608
|
|
|
|
|
|
|
else if (bra == OP_BRAZERO) |
7609
|
|
|
|
|
|
|
{ |
7610
|
|
|
|
|
|
|
if (framesize < 0) |
7611
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize)); |
7612
|
|
|
|
|
|
|
else |
7613
|
|
|
|
|
|
|
{ |
7614
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1)); |
7615
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-framesize - extrasize)); |
7616
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
7617
|
|
|
|
|
|
|
} |
7618
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
7619
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7620
|
|
|
|
|
|
|
} |
7621
|
|
|
|
|
|
|
else if (framesize >= 0) |
7622
|
|
|
|
|
|
|
{ |
7623
|
|
|
|
|
|
|
/* For OP_BRA and OP_BRAMINZERO. */ |
7624
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1)); |
7625
|
|
|
|
|
|
|
} |
7626
|
|
|
|
|
|
|
} |
7627
|
|
|
|
|
|
|
add_jump(compiler, found, JUMP(SLJIT_JUMP)); |
7628
|
|
|
|
|
|
|
|
7629
|
|
|
|
|
|
|
compile_backtrackingpath(common, altbacktrack.top); |
7630
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
7631
|
|
|
|
|
|
|
{ |
7632
|
|
|
|
|
|
|
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7633
|
|
|
|
|
|
|
{ |
7634
|
|
|
|
|
|
|
common->local_exit = save_local_exit; |
7635
|
|
|
|
|
|
|
common->quit_label = save_quit_label; |
7636
|
|
|
|
|
|
|
common->quit = save_quit; |
7637
|
|
|
|
|
|
|
} |
7638
|
|
|
|
|
|
|
common->positive_assert = save_positive_assert; |
7639
|
|
|
|
|
|
|
common->then_trap = save_then_trap; |
7640
|
|
|
|
|
|
|
common->accept_label = save_accept_label; |
7641
|
|
|
|
|
|
|
common->positive_assert_quit = save_positive_assert_quit; |
7642
|
|
|
|
|
|
|
common->accept = save_accept; |
7643
|
|
|
|
|
|
|
return NULL; |
7644
|
|
|
|
|
|
|
} |
7645
|
|
|
|
|
|
|
set_jumps(altbacktrack.topbacktracks, LABEL()); |
7646
|
|
|
|
|
|
|
|
7647
|
|
|
|
|
|
|
if (*cc != OP_ALT) |
7648
|
|
|
|
|
|
|
break; |
7649
|
|
|
|
|
|
|
|
7650
|
|
|
|
|
|
|
ccbegin = cc; |
7651
|
|
|
|
|
|
|
cc += GET(cc, 1); |
7652
|
|
|
|
|
|
|
} |
7653
|
|
|
|
|
|
|
|
7654
|
|
|
|
|
|
|
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7655
|
|
|
|
|
|
|
{ |
7656
|
|
|
|
|
|
|
SLJIT_ASSERT(common->positive_assert_quit == NULL); |
7657
|
|
|
|
|
|
|
/* Makes the check less complicated below. */ |
7658
|
|
|
|
|
|
|
common->positive_assert_quit = common->quit; |
7659
|
|
|
|
|
|
|
} |
7660
|
|
|
|
|
|
|
|
7661
|
|
|
|
|
|
|
/* None of them matched. */ |
7662
|
|
|
|
|
|
|
if (common->positive_assert_quit != NULL) |
7663
|
|
|
|
|
|
|
{ |
7664
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
7665
|
|
|
|
|
|
|
set_jumps(common->positive_assert_quit, LABEL()); |
7666
|
|
|
|
|
|
|
SLJIT_ASSERT(framesize != no_stack); |
7667
|
|
|
|
|
|
|
if (framesize < 0) |
7668
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, extrasize * sizeof(sljit_sw)); |
7669
|
|
|
|
|
|
|
else |
7670
|
|
|
|
|
|
|
{ |
7671
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7672
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
7673
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + extrasize) * sizeof(sljit_sw)); |
7674
|
|
|
|
|
|
|
} |
7675
|
|
|
|
|
|
|
JUMPHERE(jump); |
7676
|
|
|
|
|
|
|
} |
7677
|
|
|
|
|
|
|
|
7678
|
|
|
|
|
|
|
if (needs_control_head) |
7679
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(1)); |
7680
|
|
|
|
|
|
|
|
7681
|
|
|
|
|
|
|
if (opcode == OP_ASSERT || opcode == OP_ASSERTBACK) |
7682
|
|
|
|
|
|
|
{ |
7683
|
|
|
|
|
|
|
/* Assert is failed. */ |
7684
|
|
|
|
|
|
|
if ((conditional && extrasize > 0) || bra == OP_BRAZERO) |
7685
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7686
|
|
|
|
|
|
|
|
7687
|
|
|
|
|
|
|
if (framesize < 0) |
7688
|
|
|
|
|
|
|
{ |
7689
|
|
|
|
|
|
|
/* The topmost item should be 0. */ |
7690
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
7691
|
|
|
|
|
|
|
{ |
7692
|
|
|
|
|
|
|
if (extrasize == 2) |
7693
|
|
|
|
|
|
|
free_stack(common, 1); |
7694
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7695
|
|
|
|
|
|
|
} |
7696
|
|
|
|
|
|
|
else if (extrasize > 0) |
7697
|
|
|
|
|
|
|
free_stack(common, extrasize); |
7698
|
|
|
|
|
|
|
} |
7699
|
|
|
|
|
|
|
else |
7700
|
|
|
|
|
|
|
{ |
7701
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); |
7702
|
|
|
|
|
|
|
/* The topmost item should be 0. */ |
7703
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
7704
|
|
|
|
|
|
|
{ |
7705
|
|
|
|
|
|
|
free_stack(common, framesize + extrasize - 1); |
7706
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7707
|
|
|
|
|
|
|
} |
7708
|
|
|
|
|
|
|
else |
7709
|
|
|
|
|
|
|
free_stack(common, framesize + extrasize); |
7710
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
7711
|
|
|
|
|
|
|
} |
7712
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
7713
|
|
|
|
|
|
|
if (bra != OP_BRAZERO) |
7714
|
|
|
|
|
|
|
add_jump(compiler, target, jump); |
7715
|
|
|
|
|
|
|
|
7716
|
|
|
|
|
|
|
/* Assert is successful. */ |
7717
|
|
|
|
|
|
|
set_jumps(tmp, LABEL()); |
7718
|
|
|
|
|
|
|
if (framesize < 0) |
7719
|
|
|
|
|
|
|
{ |
7720
|
|
|
|
|
|
|
/* We know that STR_PTR was stored on the top of the stack. */ |
7721
|
|
|
|
|
|
|
if (extrasize > 0) |
7722
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize)); |
7723
|
|
|
|
|
|
|
|
7724
|
|
|
|
|
|
|
/* Keep the STR_PTR on the top of the stack. */ |
7725
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
7726
|
|
|
|
|
|
|
{ |
7727
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
7728
|
|
|
|
|
|
|
if (extrasize == 2) |
7729
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
7730
|
|
|
|
|
|
|
} |
7731
|
|
|
|
|
|
|
else if (bra == OP_BRAMINZERO) |
7732
|
|
|
|
|
|
|
{ |
7733
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
7734
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7735
|
|
|
|
|
|
|
} |
7736
|
|
|
|
|
|
|
} |
7737
|
|
|
|
|
|
|
else |
7738
|
|
|
|
|
|
|
{ |
7739
|
|
|
|
|
|
|
if (bra == OP_BRA) |
7740
|
|
|
|
|
|
|
{ |
7741
|
|
|
|
|
|
|
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ |
7742
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 1) * sizeof(sljit_sw)); |
7743
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(-extrasize + 1)); |
7744
|
|
|
|
|
|
|
} |
7745
|
|
|
|
|
|
|
else |
7746
|
|
|
|
|
|
|
{ |
7747
|
|
|
|
|
|
|
/* We don't need to keep the STR_PTR, only the previous private_data_ptr. */ |
7748
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + 2) * sizeof(sljit_sw)); |
7749
|
|
|
|
|
|
|
if (extrasize == 2) |
7750
|
|
|
|
|
|
|
{ |
7751
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7752
|
|
|
|
|
|
|
if (bra == OP_BRAMINZERO) |
7753
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7754
|
|
|
|
|
|
|
} |
7755
|
|
|
|
|
|
|
else |
7756
|
|
|
|
|
|
|
{ |
7757
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), 0); |
7758
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), bra == OP_BRAZERO ? STR_PTR : SLJIT_IMM, 0); |
7759
|
|
|
|
|
|
|
} |
7760
|
|
|
|
|
|
|
} |
7761
|
|
|
|
|
|
|
} |
7762
|
|
|
|
|
|
|
|
7763
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
7764
|
|
|
|
|
|
|
{ |
7765
|
|
|
|
|
|
|
backtrack->matchingpath = LABEL(); |
7766
|
|
|
|
|
|
|
SET_LABEL(jump, backtrack->matchingpath); |
7767
|
|
|
|
|
|
|
} |
7768
|
|
|
|
|
|
|
else if (bra == OP_BRAMINZERO) |
7769
|
|
|
|
|
|
|
{ |
7770
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, backtrack->matchingpath); |
7771
|
|
|
|
|
|
|
JUMPHERE(brajump); |
7772
|
|
|
|
|
|
|
if (framesize >= 0) |
7773
|
|
|
|
|
|
|
{ |
7774
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7775
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
7776
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-framesize - 1)); |
7777
|
|
|
|
|
|
|
} |
7778
|
|
|
|
|
|
|
set_jumps(backtrack->common.topbacktracks, LABEL()); |
7779
|
|
|
|
|
|
|
} |
7780
|
|
|
|
|
|
|
} |
7781
|
|
|
|
|
|
|
else |
7782
|
|
|
|
|
|
|
{ |
7783
|
|
|
|
|
|
|
/* AssertNot is successful. */ |
7784
|
|
|
|
|
|
|
if (framesize < 0) |
7785
|
|
|
|
|
|
|
{ |
7786
|
|
|
|
|
|
|
if (extrasize > 0) |
7787
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7788
|
|
|
|
|
|
|
|
7789
|
|
|
|
|
|
|
if (bra != OP_BRA) |
7790
|
|
|
|
|
|
|
{ |
7791
|
|
|
|
|
|
|
if (extrasize == 2) |
7792
|
|
|
|
|
|
|
free_stack(common, 1); |
7793
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7794
|
|
|
|
|
|
|
} |
7795
|
|
|
|
|
|
|
else if (extrasize > 0) |
7796
|
|
|
|
|
|
|
free_stack(common, extrasize); |
7797
|
|
|
|
|
|
|
} |
7798
|
|
|
|
|
|
|
else |
7799
|
|
|
|
|
|
|
{ |
7800
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7801
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(extrasize - 1)); |
7802
|
|
|
|
|
|
|
/* The topmost item should be 0. */ |
7803
|
|
|
|
|
|
|
if (bra != OP_BRA) |
7804
|
|
|
|
|
|
|
{ |
7805
|
|
|
|
|
|
|
free_stack(common, framesize + extrasize - 1); |
7806
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
7807
|
|
|
|
|
|
|
} |
7808
|
|
|
|
|
|
|
else |
7809
|
|
|
|
|
|
|
free_stack(common, framesize + extrasize); |
7810
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
7811
|
|
|
|
|
|
|
} |
7812
|
|
|
|
|
|
|
|
7813
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
7814
|
|
|
|
|
|
|
backtrack->matchingpath = LABEL(); |
7815
|
|
|
|
|
|
|
else if (bra == OP_BRAMINZERO) |
7816
|
|
|
|
|
|
|
{ |
7817
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, backtrack->matchingpath); |
7818
|
|
|
|
|
|
|
JUMPHERE(brajump); |
7819
|
|
|
|
|
|
|
} |
7820
|
|
|
|
|
|
|
|
7821
|
|
|
|
|
|
|
if (bra != OP_BRA) |
7822
|
|
|
|
|
|
|
{ |
7823
|
|
|
|
|
|
|
SLJIT_ASSERT(found == &backtrack->common.topbacktracks); |
7824
|
|
|
|
|
|
|
set_jumps(backtrack->common.topbacktracks, LABEL()); |
7825
|
|
|
|
|
|
|
backtrack->common.topbacktracks = NULL; |
7826
|
|
|
|
|
|
|
} |
7827
|
|
|
|
|
|
|
} |
7828
|
|
|
|
|
|
|
|
7829
|
|
|
|
|
|
|
if (opcode == OP_ASSERT_NOT || opcode == OP_ASSERTBACK_NOT) |
7830
|
|
|
|
|
|
|
{ |
7831
|
|
|
|
|
|
|
common->local_exit = save_local_exit; |
7832
|
|
|
|
|
|
|
common->quit_label = save_quit_label; |
7833
|
|
|
|
|
|
|
common->quit = save_quit; |
7834
|
|
|
|
|
|
|
} |
7835
|
|
|
|
|
|
|
common->positive_assert = save_positive_assert; |
7836
|
|
|
|
|
|
|
common->then_trap = save_then_trap; |
7837
|
|
|
|
|
|
|
common->accept_label = save_accept_label; |
7838
|
|
|
|
|
|
|
common->positive_assert_quit = save_positive_assert_quit; |
7839
|
|
|
|
|
|
|
common->accept = save_accept; |
7840
|
|
|
|
|
|
|
return cc + 1 + LINK_SIZE; |
7841
|
|
|
|
|
|
|
} |
7842
|
|
|
|
|
|
|
|
7843
|
|
|
|
|
|
|
static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head) |
7844
|
|
|
|
|
|
|
{ |
7845
|
|
|
|
|
|
|
DEFINE_COMPILER; |
7846
|
|
|
|
|
|
|
int stacksize; |
7847
|
|
|
|
|
|
|
|
7848
|
|
|
|
|
|
|
if (framesize < 0) |
7849
|
|
|
|
|
|
|
{ |
7850
|
|
|
|
|
|
|
if (framesize == no_frame) |
7851
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7852
|
|
|
|
|
|
|
else |
7853
|
|
|
|
|
|
|
{ |
7854
|
|
|
|
|
|
|
stacksize = needs_control_head ? 1 : 0; |
7855
|
|
|
|
|
|
|
if (ket != OP_KET || has_alternatives) |
7856
|
|
|
|
|
|
|
stacksize++; |
7857
|
|
|
|
|
|
|
|
7858
|
|
|
|
|
|
|
if (stacksize > 0) |
7859
|
|
|
|
|
|
|
free_stack(common, stacksize); |
7860
|
|
|
|
|
|
|
} |
7861
|
|
|
|
|
|
|
|
7862
|
|
|
|
|
|
|
if (needs_control_head) |
7863
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), (ket != OP_KET || has_alternatives) ? STACK(-2) : STACK(-1)); |
7864
|
|
|
|
|
|
|
|
7865
|
|
|
|
|
|
|
/* TMP2 which is set here used by OP_KETRMAX below. */ |
7866
|
|
|
|
|
|
|
if (ket == OP_KETRMAX) |
7867
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); |
7868
|
|
|
|
|
|
|
else if (ket == OP_KETRMIN) |
7869
|
|
|
|
|
|
|
{ |
7870
|
|
|
|
|
|
|
/* Move the STR_PTR to the private_data_ptr. */ |
7871
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-1)); |
7872
|
|
|
|
|
|
|
} |
7873
|
|
|
|
|
|
|
} |
7874
|
|
|
|
|
|
|
else |
7875
|
|
|
|
|
|
|
{ |
7876
|
|
|
|
|
|
|
stacksize = (ket != OP_KET || has_alternatives) ? 2 : 1; |
7877
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, (framesize + stacksize) * sizeof(sljit_sw)); |
7878
|
|
|
|
|
|
|
if (needs_control_head) |
7879
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-1)); |
7880
|
|
|
|
|
|
|
|
7881
|
|
|
|
|
|
|
if (ket == OP_KETRMAX) |
7882
|
|
|
|
|
|
|
{ |
7883
|
|
|
|
|
|
|
/* TMP2 which is set here used by OP_KETRMAX below. */ |
7884
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
7885
|
|
|
|
|
|
|
} |
7886
|
|
|
|
|
|
|
} |
7887
|
|
|
|
|
|
|
if (needs_control_head) |
7888
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); |
7889
|
|
|
|
|
|
|
} |
7890
|
|
|
|
|
|
|
|
7891
|
|
|
|
|
|
|
static SLJIT_INLINE int match_capture_common(compiler_common *common, int stacksize, int offset, int private_data_ptr) |
7892
|
|
|
|
|
|
|
{ |
7893
|
|
|
|
|
|
|
DEFINE_COMPILER; |
7894
|
|
|
|
|
|
|
|
7895
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
7896
|
|
|
|
|
|
|
{ |
7897
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
7898
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); |
7899
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); |
7900
|
|
|
|
|
|
|
stacksize++; |
7901
|
|
|
|
|
|
|
} |
7902
|
|
|
|
|
|
|
if (common->optimized_cbracket[offset >> 1] == 0) |
7903
|
|
|
|
|
|
|
{ |
7904
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
7905
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
7906
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); |
7907
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
7908
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); |
7909
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
7910
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
7911
|
|
|
|
|
|
|
stacksize += 2; |
7912
|
|
|
|
|
|
|
} |
7913
|
|
|
|
|
|
|
return stacksize; |
7914
|
|
|
|
|
|
|
} |
7915
|
|
|
|
|
|
|
|
7916
|
|
|
|
|
|
|
/* |
7917
|
|
|
|
|
|
|
Handling bracketed expressions is probably the most complex part. |
7918
|
|
|
|
|
|
|
|
7919
|
|
|
|
|
|
|
Stack layout naming characters: |
7920
|
|
|
|
|
|
|
S - Push the current STR_PTR |
7921
|
|
|
|
|
|
|
0 - Push a 0 (NULL) |
7922
|
|
|
|
|
|
|
A - Push the current STR_PTR. Needed for restoring the STR_PTR |
7923
|
|
|
|
|
|
|
before the next alternative. Not pushed if there are no alternatives. |
7924
|
|
|
|
|
|
|
M - Any values pushed by the current alternative. Can be empty, or anything. |
7925
|
|
|
|
|
|
|
C - Push the previous OVECTOR(i), OVECTOR(i+1) and OVECTOR_PRIV(i) to the stack. |
7926
|
|
|
|
|
|
|
L - Push the previous local (pointed by localptr) to the stack |
7927
|
|
|
|
|
|
|
() - opional values stored on the stack |
7928
|
|
|
|
|
|
|
()* - optonal, can be stored multiple times |
7929
|
|
|
|
|
|
|
|
7930
|
|
|
|
|
|
|
The following list shows the regular expression templates, their PCRE byte codes |
7931
|
|
|
|
|
|
|
and stack layout supported by pcre-sljit. |
7932
|
|
|
|
|
|
|
|
7933
|
|
|
|
|
|
|
(?:) OP_BRA | OP_KET A M |
7934
|
|
|
|
|
|
|
() OP_CBRA | OP_KET C M |
7935
|
|
|
|
|
|
|
(?:)+ OP_BRA | OP_KETRMAX 0 A M S ( A M S )* |
7936
|
|
|
|
|
|
|
OP_SBRA | OP_KETRMAX 0 L M S ( L M S )* |
7937
|
|
|
|
|
|
|
(?:)+? OP_BRA | OP_KETRMIN 0 A M S ( A M S )* |
7938
|
|
|
|
|
|
|
OP_SBRA | OP_KETRMIN 0 L M S ( L M S )* |
7939
|
|
|
|
|
|
|
()+ OP_CBRA | OP_KETRMAX 0 C M S ( C M S )* |
7940
|
|
|
|
|
|
|
OP_SCBRA | OP_KETRMAX 0 C M S ( C M S )* |
7941
|
|
|
|
|
|
|
()+? OP_CBRA | OP_KETRMIN 0 C M S ( C M S )* |
7942
|
|
|
|
|
|
|
OP_SCBRA | OP_KETRMIN 0 C M S ( C M S )* |
7943
|
|
|
|
|
|
|
(?:)? OP_BRAZERO | OP_BRA | OP_KET S ( A M 0 ) |
7944
|
|
|
|
|
|
|
(?:)?? OP_BRAMINZERO | OP_BRA | OP_KET S ( A M 0 ) |
7945
|
|
|
|
|
|
|
()? OP_BRAZERO | OP_CBRA | OP_KET S ( C M 0 ) |
7946
|
|
|
|
|
|
|
()?? OP_BRAMINZERO | OP_CBRA | OP_KET S ( C M 0 ) |
7947
|
|
|
|
|
|
|
(?:)* OP_BRAZERO | OP_BRA | OP_KETRMAX S 0 ( A M S )* |
7948
|
|
|
|
|
|
|
OP_BRAZERO | OP_SBRA | OP_KETRMAX S 0 ( L M S )* |
7949
|
|
|
|
|
|
|
(?:)*? OP_BRAMINZERO | OP_BRA | OP_KETRMIN S 0 ( A M S )* |
7950
|
|
|
|
|
|
|
OP_BRAMINZERO | OP_SBRA | OP_KETRMIN S 0 ( L M S )* |
7951
|
|
|
|
|
|
|
()* OP_BRAZERO | OP_CBRA | OP_KETRMAX S 0 ( C M S )* |
7952
|
|
|
|
|
|
|
OP_BRAZERO | OP_SCBRA | OP_KETRMAX S 0 ( C M S )* |
7953
|
|
|
|
|
|
|
()*? OP_BRAMINZERO | OP_CBRA | OP_KETRMIN S 0 ( C M S )* |
7954
|
|
|
|
|
|
|
OP_BRAMINZERO | OP_SCBRA | OP_KETRMIN S 0 ( C M S )* |
7955
|
|
|
|
|
|
|
|
7956
|
|
|
|
|
|
|
|
7957
|
|
|
|
|
|
|
Stack layout naming characters: |
7958
|
|
|
|
|
|
|
A - Push the alternative index (starting from 0) on the stack. |
7959
|
|
|
|
|
|
|
Not pushed if there is no alternatives. |
7960
|
|
|
|
|
|
|
M - Any values pushed by the current alternative. Can be empty, or anything. |
7961
|
|
|
|
|
|
|
|
7962
|
|
|
|
|
|
|
The next list shows the possible content of a bracket: |
7963
|
|
|
|
|
|
|
(|) OP_*BRA | OP_ALT ... M A |
7964
|
|
|
|
|
|
|
(?()|) OP_*COND | OP_ALT M A |
7965
|
|
|
|
|
|
|
(?>|) OP_ONCE | OP_ALT ... [stack trace] M A |
7966
|
|
|
|
|
|
|
(?>|) OP_ONCE_NC | OP_ALT ... [stack trace] M A |
7967
|
|
|
|
|
|
|
Or nothing, if trace is unnecessary |
7968
|
|
|
|
|
|
|
*/ |
7969
|
|
|
|
|
|
|
|
7970
|
|
|
|
|
|
|
static pcre_uchar *compile_bracket_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
7971
|
|
|
|
|
|
|
{ |
7972
|
|
|
|
|
|
|
DEFINE_COMPILER; |
7973
|
|
|
|
|
|
|
backtrack_common *backtrack; |
7974
|
|
|
|
|
|
|
pcre_uchar opcode; |
7975
|
|
|
|
|
|
|
int private_data_ptr = 0; |
7976
|
|
|
|
|
|
|
int offset = 0; |
7977
|
|
|
|
|
|
|
int i, stacksize; |
7978
|
|
|
|
|
|
|
int repeat_ptr = 0, repeat_length = 0; |
7979
|
|
|
|
|
|
|
int repeat_type = 0, repeat_count = 0; |
7980
|
|
|
|
|
|
|
pcre_uchar *ccbegin; |
7981
|
|
|
|
|
|
|
pcre_uchar *matchingpath; |
7982
|
|
|
|
|
|
|
pcre_uchar *slot; |
7983
|
|
|
|
|
|
|
pcre_uchar bra = OP_BRA; |
7984
|
|
|
|
|
|
|
pcre_uchar ket; |
7985
|
|
|
|
|
|
|
assert_backtrack *assert; |
7986
|
|
|
|
|
|
|
BOOL has_alternatives; |
7987
|
|
|
|
|
|
|
BOOL needs_control_head = FALSE; |
7988
|
|
|
|
|
|
|
struct sljit_jump *jump; |
7989
|
|
|
|
|
|
|
struct sljit_jump *skip; |
7990
|
|
|
|
|
|
|
struct sljit_label *rmax_label = NULL; |
7991
|
|
|
|
|
|
|
struct sljit_jump *braminzero = NULL; |
7992
|
|
|
|
|
|
|
|
7993
|
|
|
|
|
|
|
PUSH_BACKTRACK(sizeof(bracket_backtrack), cc, NULL); |
7994
|
|
|
|
|
|
|
|
7995
|
|
|
|
|
|
|
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) |
7996
|
|
|
|
|
|
|
{ |
7997
|
|
|
|
|
|
|
bra = *cc; |
7998
|
|
|
|
|
|
|
cc++; |
7999
|
|
|
|
|
|
|
opcode = *cc; |
8000
|
|
|
|
|
|
|
} |
8001
|
|
|
|
|
|
|
|
8002
|
|
|
|
|
|
|
opcode = *cc; |
8003
|
|
|
|
|
|
|
ccbegin = cc; |
8004
|
|
|
|
|
|
|
matchingpath = bracketend(cc) - 1 - LINK_SIZE; |
8005
|
|
|
|
|
|
|
ket = *matchingpath; |
8006
|
|
|
|
|
|
|
if (ket == OP_KET && PRIVATE_DATA(matchingpath) != 0) |
8007
|
|
|
|
|
|
|
{ |
8008
|
|
|
|
|
|
|
repeat_ptr = PRIVATE_DATA(matchingpath); |
8009
|
|
|
|
|
|
|
repeat_length = PRIVATE_DATA(matchingpath + 1); |
8010
|
|
|
|
|
|
|
repeat_type = PRIVATE_DATA(matchingpath + 2); |
8011
|
|
|
|
|
|
|
repeat_count = PRIVATE_DATA(matchingpath + 3); |
8012
|
|
|
|
|
|
|
SLJIT_ASSERT(repeat_length != 0 && repeat_type != 0 && repeat_count != 0); |
8013
|
|
|
|
|
|
|
if (repeat_type == OP_UPTO) |
8014
|
|
|
|
|
|
|
ket = OP_KETRMAX; |
8015
|
|
|
|
|
|
|
if (repeat_type == OP_MINUPTO) |
8016
|
|
|
|
|
|
|
ket = OP_KETRMIN; |
8017
|
|
|
|
|
|
|
} |
8018
|
|
|
|
|
|
|
|
8019
|
|
|
|
|
|
|
if ((opcode == OP_COND || opcode == OP_SCOND) && cc[1 + LINK_SIZE] == OP_DEF) |
8020
|
|
|
|
|
|
|
{ |
8021
|
|
|
|
|
|
|
/* Drop this bracket_backtrack. */ |
8022
|
|
|
|
|
|
|
parent->top = backtrack->prev; |
8023
|
|
|
|
|
|
|
return matchingpath + 1 + LINK_SIZE + repeat_length; |
8024
|
|
|
|
|
|
|
} |
8025
|
|
|
|
|
|
|
|
8026
|
|
|
|
|
|
|
matchingpath = ccbegin + 1 + LINK_SIZE; |
8027
|
|
|
|
|
|
|
SLJIT_ASSERT(ket == OP_KET || ket == OP_KETRMAX || ket == OP_KETRMIN); |
8028
|
|
|
|
|
|
|
SLJIT_ASSERT(!((bra == OP_BRAZERO && ket == OP_KETRMIN) || (bra == OP_BRAMINZERO && ket == OP_KETRMAX))); |
8029
|
|
|
|
|
|
|
cc += GET(cc, 1); |
8030
|
|
|
|
|
|
|
|
8031
|
|
|
|
|
|
|
has_alternatives = *cc == OP_ALT; |
8032
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND)) |
8033
|
|
|
|
|
|
|
has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) ? FALSE : TRUE; |
8034
|
|
|
|
|
|
|
|
8035
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) |
8036
|
|
|
|
|
|
|
opcode = OP_SCOND; |
8037
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) |
8038
|
|
|
|
|
|
|
opcode = OP_ONCE; |
8039
|
|
|
|
|
|
|
|
8040
|
|
|
|
|
|
|
if (opcode == OP_CBRA || opcode == OP_SCBRA) |
8041
|
|
|
|
|
|
|
{ |
8042
|
|
|
|
|
|
|
/* Capturing brackets has a pre-allocated space. */ |
8043
|
|
|
|
|
|
|
offset = GET2(ccbegin, 1 + LINK_SIZE); |
8044
|
|
|
|
|
|
|
if (common->optimized_cbracket[offset] == 0) |
8045
|
|
|
|
|
|
|
{ |
8046
|
|
|
|
|
|
|
private_data_ptr = OVECTOR_PRIV(offset); |
8047
|
|
|
|
|
|
|
offset <<= 1; |
8048
|
|
|
|
|
|
|
} |
8049
|
|
|
|
|
|
|
else |
8050
|
|
|
|
|
|
|
{ |
8051
|
|
|
|
|
|
|
offset <<= 1; |
8052
|
|
|
|
|
|
|
private_data_ptr = OVECTOR(offset); |
8053
|
|
|
|
|
|
|
} |
8054
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; |
8055
|
|
|
|
|
|
|
matchingpath += IMM2_SIZE; |
8056
|
|
|
|
|
|
|
} |
8057
|
|
|
|
|
|
|
else if (opcode == OP_ONCE || opcode == OP_SBRA || opcode == OP_SCOND) |
8058
|
|
|
|
|
|
|
{ |
8059
|
|
|
|
|
|
|
/* Other brackets simply allocate the next entry. */ |
8060
|
|
|
|
|
|
|
private_data_ptr = PRIVATE_DATA(ccbegin); |
8061
|
|
|
|
|
|
|
SLJIT_ASSERT(private_data_ptr != 0); |
8062
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->private_data_ptr = private_data_ptr; |
8063
|
|
|
|
|
|
|
if (opcode == OP_ONCE) |
8064
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->u.framesize = get_framesize(common, ccbegin, NULL, FALSE, &needs_control_head); |
8065
|
|
|
|
|
|
|
} |
8066
|
|
|
|
|
|
|
|
8067
|
|
|
|
|
|
|
/* Instructions before the first alternative. */ |
8068
|
|
|
|
|
|
|
stacksize = 0; |
8069
|
|
|
|
|
|
|
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) |
8070
|
|
|
|
|
|
|
stacksize++; |
8071
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
8072
|
|
|
|
|
|
|
stacksize++; |
8073
|
|
|
|
|
|
|
|
8074
|
|
|
|
|
|
|
if (stacksize > 0) |
8075
|
|
|
|
|
|
|
allocate_stack(common, stacksize); |
8076
|
|
|
|
|
|
|
|
8077
|
|
|
|
|
|
|
stacksize = 0; |
8078
|
|
|
|
|
|
|
if (ket == OP_KETRMAX || (ket == OP_KETRMIN && bra != OP_BRAMINZERO)) |
8079
|
|
|
|
|
|
|
{ |
8080
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); |
8081
|
|
|
|
|
|
|
stacksize++; |
8082
|
|
|
|
|
|
|
} |
8083
|
|
|
|
|
|
|
|
8084
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
8085
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
8086
|
|
|
|
|
|
|
|
8087
|
|
|
|
|
|
|
if (bra == OP_BRAMINZERO) |
8088
|
|
|
|
|
|
|
{ |
8089
|
|
|
|
|
|
|
/* This is a backtrack path! (Since the try-path of OP_BRAMINZERO matches to the empty string) */ |
8090
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
8091
|
|
|
|
|
|
|
if (ket != OP_KETRMIN) |
8092
|
|
|
|
|
|
|
{ |
8093
|
|
|
|
|
|
|
free_stack(common, 1); |
8094
|
|
|
|
|
|
|
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
8095
|
|
|
|
|
|
|
} |
8096
|
|
|
|
|
|
|
else |
8097
|
|
|
|
|
|
|
{ |
8098
|
|
|
|
|
|
|
if (opcode == OP_ONCE || opcode >= OP_SBRA) |
8099
|
|
|
|
|
|
|
{ |
8100
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
8101
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
8102
|
|
|
|
|
|
|
/* Nothing stored during the first run. */ |
8103
|
|
|
|
|
|
|
skip = JUMP(SLJIT_JUMP); |
8104
|
|
|
|
|
|
|
JUMPHERE(jump); |
8105
|
|
|
|
|
|
|
/* Checking zero-length iteration. */ |
8106
|
|
|
|
|
|
|
if (opcode != OP_ONCE || BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) |
8107
|
|
|
|
|
|
|
{ |
8108
|
|
|
|
|
|
|
/* When we come from outside, private_data_ptr contains the previous STR_PTR. */ |
8109
|
|
|
|
|
|
|
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8110
|
|
|
|
|
|
|
} |
8111
|
|
|
|
|
|
|
else |
8112
|
|
|
|
|
|
|
{ |
8113
|
|
|
|
|
|
|
/* Except when the whole stack frame must be saved. */ |
8114
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8115
|
|
|
|
|
|
|
braminzero = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-BACKTRACK_AS(bracket_backtrack)->u.framesize - 2)); |
8116
|
|
|
|
|
|
|
} |
8117
|
|
|
|
|
|
|
JUMPHERE(skip); |
8118
|
|
|
|
|
|
|
} |
8119
|
|
|
|
|
|
|
else |
8120
|
|
|
|
|
|
|
{ |
8121
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
8122
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
8123
|
|
|
|
|
|
|
JUMPHERE(jump); |
8124
|
|
|
|
|
|
|
} |
8125
|
|
|
|
|
|
|
} |
8126
|
|
|
|
|
|
|
} |
8127
|
|
|
|
|
|
|
|
8128
|
|
|
|
|
|
|
if (repeat_type != 0) |
8129
|
|
|
|
|
|
|
{ |
8130
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, repeat_count); |
8131
|
|
|
|
|
|
|
if (repeat_type == OP_EXACT) |
8132
|
|
|
|
|
|
|
rmax_label = LABEL(); |
8133
|
|
|
|
|
|
|
} |
8134
|
|
|
|
|
|
|
|
8135
|
|
|
|
|
|
|
if (ket == OP_KETRMIN) |
8136
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); |
8137
|
|
|
|
|
|
|
|
8138
|
|
|
|
|
|
|
if (ket == OP_KETRMAX) |
8139
|
|
|
|
|
|
|
{ |
8140
|
|
|
|
|
|
|
rmax_label = LABEL(); |
8141
|
|
|
|
|
|
|
if (has_alternatives && opcode != OP_ONCE && opcode < OP_SBRA && repeat_type == 0) |
8142
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = rmax_label; |
8143
|
|
|
|
|
|
|
} |
8144
|
|
|
|
|
|
|
|
8145
|
|
|
|
|
|
|
/* Handling capturing brackets and alternatives. */ |
8146
|
|
|
|
|
|
|
if (opcode == OP_ONCE) |
8147
|
|
|
|
|
|
|
{ |
8148
|
|
|
|
|
|
|
stacksize = 0; |
8149
|
|
|
|
|
|
|
if (needs_control_head) |
8150
|
|
|
|
|
|
|
{ |
8151
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
8152
|
|
|
|
|
|
|
stacksize++; |
8153
|
|
|
|
|
|
|
} |
8154
|
|
|
|
|
|
|
|
8155
|
|
|
|
|
|
|
if (BACKTRACK_AS(bracket_backtrack)->u.framesize < 0) |
8156
|
|
|
|
|
|
|
{ |
8157
|
|
|
|
|
|
|
/* Neither capturing brackets nor recursions are found in the block. */ |
8158
|
|
|
|
|
|
|
if (ket == OP_KETRMIN) |
8159
|
|
|
|
|
|
|
{ |
8160
|
|
|
|
|
|
|
stacksize += 2; |
8161
|
|
|
|
|
|
|
if (!needs_control_head) |
8162
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8163
|
|
|
|
|
|
|
} |
8164
|
|
|
|
|
|
|
else |
8165
|
|
|
|
|
|
|
{ |
8166
|
|
|
|
|
|
|
if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) |
8167
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); |
8168
|
|
|
|
|
|
|
if (ket == OP_KETRMAX || has_alternatives) |
8169
|
|
|
|
|
|
|
stacksize++; |
8170
|
|
|
|
|
|
|
} |
8171
|
|
|
|
|
|
|
|
8172
|
|
|
|
|
|
|
if (stacksize > 0) |
8173
|
|
|
|
|
|
|
allocate_stack(common, stacksize); |
8174
|
|
|
|
|
|
|
|
8175
|
|
|
|
|
|
|
stacksize = 0; |
8176
|
|
|
|
|
|
|
if (needs_control_head) |
8177
|
|
|
|
|
|
|
{ |
8178
|
|
|
|
|
|
|
stacksize++; |
8179
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8180
|
|
|
|
|
|
|
} |
8181
|
|
|
|
|
|
|
|
8182
|
|
|
|
|
|
|
if (ket == OP_KETRMIN) |
8183
|
|
|
|
|
|
|
{ |
8184
|
|
|
|
|
|
|
if (needs_control_head) |
8185
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8186
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
8187
|
|
|
|
|
|
|
if (BACKTRACK_AS(bracket_backtrack)->u.framesize == no_frame) |
8188
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, needs_control_head ? (2 * sizeof(sljit_sw)) : sizeof(sljit_sw)); |
8189
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize + 1), TMP2, 0); |
8190
|
|
|
|
|
|
|
} |
8191
|
|
|
|
|
|
|
else if (ket == OP_KETRMAX || has_alternatives) |
8192
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
8193
|
|
|
|
|
|
|
} |
8194
|
|
|
|
|
|
|
else |
8195
|
|
|
|
|
|
|
{ |
8196
|
|
|
|
|
|
|
if (ket != OP_KET || has_alternatives) |
8197
|
|
|
|
|
|
|
stacksize++; |
8198
|
|
|
|
|
|
|
|
8199
|
|
|
|
|
|
|
stacksize += BACKTRACK_AS(bracket_backtrack)->u.framesize + 1; |
8200
|
|
|
|
|
|
|
allocate_stack(common, stacksize); |
8201
|
|
|
|
|
|
|
|
8202
|
|
|
|
|
|
|
if (needs_control_head) |
8203
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8204
|
|
|
|
|
|
|
|
8205
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8206
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); |
8207
|
|
|
|
|
|
|
|
8208
|
|
|
|
|
|
|
stacksize = needs_control_head ? 1 : 0; |
8209
|
|
|
|
|
|
|
if (ket != OP_KET || has_alternatives) |
8210
|
|
|
|
|
|
|
{ |
8211
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
8212
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); |
8213
|
|
|
|
|
|
|
stacksize++; |
8214
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); |
8215
|
|
|
|
|
|
|
} |
8216
|
|
|
|
|
|
|
else |
8217
|
|
|
|
|
|
|
{ |
8218
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP2, 0); |
8219
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP1, 0); |
8220
|
|
|
|
|
|
|
} |
8221
|
|
|
|
|
|
|
init_frame(common, ccbegin, NULL, BACKTRACK_AS(bracket_backtrack)->u.framesize + stacksize, stacksize + 1, FALSE); |
8222
|
|
|
|
|
|
|
} |
8223
|
|
|
|
|
|
|
} |
8224
|
|
|
|
|
|
|
else if (opcode == OP_CBRA || opcode == OP_SCBRA) |
8225
|
|
|
|
|
|
|
{ |
8226
|
|
|
|
|
|
|
/* Saving the previous values. */ |
8227
|
|
|
|
|
|
|
if (common->optimized_cbracket[offset >> 1] != 0) |
8228
|
|
|
|
|
|
|
{ |
8229
|
|
|
|
|
|
|
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset)); |
8230
|
|
|
|
|
|
|
allocate_stack(common, 2); |
8231
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8232
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr + sizeof(sljit_sw)); |
8233
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); |
8234
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); |
8235
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); |
8236
|
|
|
|
|
|
|
} |
8237
|
|
|
|
|
|
|
else |
8238
|
|
|
|
|
|
|
{ |
8239
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8240
|
|
|
|
|
|
|
allocate_stack(common, 1); |
8241
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); |
8242
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8243
|
|
|
|
|
|
|
} |
8244
|
|
|
|
|
|
|
} |
8245
|
|
|
|
|
|
|
else if (opcode == OP_SBRA || opcode == OP_SCOND) |
8246
|
|
|
|
|
|
|
{ |
8247
|
|
|
|
|
|
|
/* Saving the previous value. */ |
8248
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8249
|
|
|
|
|
|
|
allocate_stack(common, 1); |
8250
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0); |
8251
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8252
|
|
|
|
|
|
|
} |
8253
|
|
|
|
|
|
|
else if (has_alternatives) |
8254
|
|
|
|
|
|
|
{ |
8255
|
|
|
|
|
|
|
/* Pushing the starting string pointer. */ |
8256
|
|
|
|
|
|
|
allocate_stack(common, 1); |
8257
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
8258
|
|
|
|
|
|
|
} |
8259
|
|
|
|
|
|
|
|
8260
|
|
|
|
|
|
|
/* Generating code for the first alternative. */ |
8261
|
|
|
|
|
|
|
if (opcode == OP_COND || opcode == OP_SCOND) |
8262
|
|
|
|
|
|
|
{ |
8263
|
|
|
|
|
|
|
if (*matchingpath == OP_CREF) |
8264
|
|
|
|
|
|
|
{ |
8265
|
|
|
|
|
|
|
SLJIT_ASSERT(has_alternatives); |
8266
|
|
|
|
|
|
|
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), |
8267
|
|
|
|
|
|
|
CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_SP), OVECTOR(1))); |
8268
|
|
|
|
|
|
|
matchingpath += 1 + IMM2_SIZE; |
8269
|
|
|
|
|
|
|
} |
8270
|
|
|
|
|
|
|
else if (*matchingpath == OP_DNCREF) |
8271
|
|
|
|
|
|
|
{ |
8272
|
|
|
|
|
|
|
SLJIT_ASSERT(has_alternatives); |
8273
|
|
|
|
|
|
|
|
8274
|
|
|
|
|
|
|
i = GET2(matchingpath, 1 + IMM2_SIZE); |
8275
|
|
|
|
|
|
|
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; |
8276
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0); |
8277
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)); |
8278
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); |
8279
|
|
|
|
|
|
|
slot += common->name_entry_size; |
8280
|
|
|
|
|
|
|
i--; |
8281
|
|
|
|
|
|
|
while (i-- > 0) |
8282
|
|
|
|
|
|
|
{ |
8283
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(GET2(slot, 0) << 1), TMP1, 0); |
8284
|
|
|
|
|
|
|
OP2(SLJIT_OR | SLJIT_SET_Z, TMP2, 0, TMP2, 0, STR_PTR, 0); |
8285
|
|
|
|
|
|
|
slot += common->name_entry_size; |
8286
|
|
|
|
|
|
|
} |
8287
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0); |
8288
|
|
|
|
|
|
|
add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_ZERO)); |
8289
|
|
|
|
|
|
|
matchingpath += 1 + 2 * IMM2_SIZE; |
8290
|
|
|
|
|
|
|
} |
8291
|
|
|
|
|
|
|
else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF || *matchingpath == OP_FAIL) |
8292
|
|
|
|
|
|
|
{ |
8293
|
|
|
|
|
|
|
/* Never has other case. */ |
8294
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL; |
8295
|
|
|
|
|
|
|
SLJIT_ASSERT(!has_alternatives); |
8296
|
|
|
|
|
|
|
|
8297
|
|
|
|
|
|
|
if (*matchingpath == OP_FAIL) |
8298
|
|
|
|
|
|
|
stacksize = 0; |
8299
|
|
|
|
|
|
|
else if (*matchingpath == OP_RREF) |
8300
|
|
|
|
|
|
|
{ |
8301
|
|
|
|
|
|
|
stacksize = GET2(matchingpath, 1); |
8302
|
|
|
|
|
|
|
if (common->currententry == NULL) |
8303
|
|
|
|
|
|
|
stacksize = 0; |
8304
|
|
|
|
|
|
|
else if (stacksize == RREF_ANY) |
8305
|
|
|
|
|
|
|
stacksize = 1; |
8306
|
|
|
|
|
|
|
else if (common->currententry->start == 0) |
8307
|
|
|
|
|
|
|
stacksize = stacksize == 0; |
8308
|
|
|
|
|
|
|
else |
8309
|
|
|
|
|
|
|
stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); |
8310
|
|
|
|
|
|
|
|
8311
|
|
|
|
|
|
|
if (stacksize != 0) |
8312
|
|
|
|
|
|
|
matchingpath += 1 + IMM2_SIZE; |
8313
|
|
|
|
|
|
|
} |
8314
|
|
|
|
|
|
|
else |
8315
|
|
|
|
|
|
|
{ |
8316
|
|
|
|
|
|
|
if (common->currententry == NULL || common->currententry->start == 0) |
8317
|
|
|
|
|
|
|
stacksize = 0; |
8318
|
|
|
|
|
|
|
else |
8319
|
|
|
|
|
|
|
{ |
8320
|
|
|
|
|
|
|
stacksize = GET2(matchingpath, 1 + IMM2_SIZE); |
8321
|
|
|
|
|
|
|
slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size; |
8322
|
|
|
|
|
|
|
i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE); |
8323
|
|
|
|
|
|
|
while (stacksize > 0) |
8324
|
|
|
|
|
|
|
{ |
8325
|
|
|
|
|
|
|
if ((int)GET2(slot, 0) == i) |
8326
|
|
|
|
|
|
|
break; |
8327
|
|
|
|
|
|
|
slot += common->name_entry_size; |
8328
|
|
|
|
|
|
|
stacksize--; |
8329
|
|
|
|
|
|
|
} |
8330
|
|
|
|
|
|
|
} |
8331
|
|
|
|
|
|
|
|
8332
|
|
|
|
|
|
|
if (stacksize != 0) |
8333
|
|
|
|
|
|
|
matchingpath += 1 + 2 * IMM2_SIZE; |
8334
|
|
|
|
|
|
|
} |
8335
|
|
|
|
|
|
|
|
8336
|
|
|
|
|
|
|
/* The stacksize == 0 is a common "else" case. */ |
8337
|
|
|
|
|
|
|
if (stacksize == 0) |
8338
|
|
|
|
|
|
|
{ |
8339
|
|
|
|
|
|
|
if (*cc == OP_ALT) |
8340
|
|
|
|
|
|
|
{ |
8341
|
|
|
|
|
|
|
matchingpath = cc + 1 + LINK_SIZE; |
8342
|
|
|
|
|
|
|
cc += GET(cc, 1); |
8343
|
|
|
|
|
|
|
} |
8344
|
|
|
|
|
|
|
else |
8345
|
|
|
|
|
|
|
matchingpath = cc; |
8346
|
|
|
|
|
|
|
} |
8347
|
|
|
|
|
|
|
} |
8348
|
|
|
|
|
|
|
else |
8349
|
|
|
|
|
|
|
{ |
8350
|
|
|
|
|
|
|
SLJIT_ASSERT(has_alternatives && *matchingpath >= OP_ASSERT && *matchingpath <= OP_ASSERTBACK_NOT); |
8351
|
|
|
|
|
|
|
/* Similar code as PUSH_BACKTRACK macro. */ |
8352
|
|
|
|
|
|
|
assert = sljit_alloc_memory(compiler, sizeof(assert_backtrack)); |
8353
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
8354
|
|
|
|
|
|
|
return NULL; |
8355
|
|
|
|
|
|
|
memset(assert, 0, sizeof(assert_backtrack)); |
8356
|
|
|
|
|
|
|
assert->common.cc = matchingpath; |
8357
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->u.assert = assert; |
8358
|
|
|
|
|
|
|
matchingpath = compile_assert_matchingpath(common, matchingpath, assert, TRUE); |
8359
|
|
|
|
|
|
|
} |
8360
|
|
|
|
|
|
|
} |
8361
|
|
|
|
|
|
|
|
8362
|
|
|
|
|
|
|
compile_matchingpath(common, matchingpath, cc, backtrack); |
8363
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
8364
|
|
|
|
|
|
|
return NULL; |
8365
|
|
|
|
|
|
|
|
8366
|
|
|
|
|
|
|
if (opcode == OP_ONCE) |
8367
|
|
|
|
|
|
|
match_once_common(common, ket, BACKTRACK_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); |
8368
|
|
|
|
|
|
|
|
8369
|
|
|
|
|
|
|
stacksize = 0; |
8370
|
|
|
|
|
|
|
if (repeat_type == OP_MINUPTO) |
8371
|
|
|
|
|
|
|
{ |
8372
|
|
|
|
|
|
|
/* We need to preserve the counter. TMP2 will be used below. */ |
8373
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); |
8374
|
|
|
|
|
|
|
stacksize++; |
8375
|
|
|
|
|
|
|
} |
8376
|
|
|
|
|
|
|
if (ket != OP_KET || bra != OP_BRA) |
8377
|
|
|
|
|
|
|
stacksize++; |
8378
|
|
|
|
|
|
|
if (offset != 0) |
8379
|
|
|
|
|
|
|
{ |
8380
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
8381
|
|
|
|
|
|
|
stacksize++; |
8382
|
|
|
|
|
|
|
if (common->optimized_cbracket[offset >> 1] == 0) |
8383
|
|
|
|
|
|
|
stacksize += 2; |
8384
|
|
|
|
|
|
|
} |
8385
|
|
|
|
|
|
|
if (has_alternatives && opcode != OP_ONCE) |
8386
|
|
|
|
|
|
|
stacksize++; |
8387
|
|
|
|
|
|
|
|
8388
|
|
|
|
|
|
|
if (stacksize > 0) |
8389
|
|
|
|
|
|
|
allocate_stack(common, stacksize); |
8390
|
|
|
|
|
|
|
|
8391
|
|
|
|
|
|
|
stacksize = 0; |
8392
|
|
|
|
|
|
|
if (repeat_type == OP_MINUPTO) |
8393
|
|
|
|
|
|
|
{ |
8394
|
|
|
|
|
|
|
/* TMP2 was set above. */ |
8395
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); |
8396
|
|
|
|
|
|
|
stacksize++; |
8397
|
|
|
|
|
|
|
} |
8398
|
|
|
|
|
|
|
|
8399
|
|
|
|
|
|
|
if (ket != OP_KET || bra != OP_BRA) |
8400
|
|
|
|
|
|
|
{ |
8401
|
|
|
|
|
|
|
if (ket != OP_KET) |
8402
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
8403
|
|
|
|
|
|
|
else |
8404
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); |
8405
|
|
|
|
|
|
|
stacksize++; |
8406
|
|
|
|
|
|
|
} |
8407
|
|
|
|
|
|
|
|
8408
|
|
|
|
|
|
|
if (offset != 0) |
8409
|
|
|
|
|
|
|
stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); |
8410
|
|
|
|
|
|
|
|
8411
|
|
|
|
|
|
|
if (has_alternatives) |
8412
|
|
|
|
|
|
|
{ |
8413
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
8414
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); |
8415
|
|
|
|
|
|
|
if (ket != OP_KETRMAX) |
8416
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); |
8417
|
|
|
|
|
|
|
} |
8418
|
|
|
|
|
|
|
|
8419
|
|
|
|
|
|
|
/* Must be after the matchingpath label. */ |
8420
|
|
|
|
|
|
|
if (offset != 0 && common->optimized_cbracket[offset >> 1] != 0) |
8421
|
|
|
|
|
|
|
{ |
8422
|
|
|
|
|
|
|
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); |
8423
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
8424
|
|
|
|
|
|
|
} |
8425
|
|
|
|
|
|
|
|
8426
|
|
|
|
|
|
|
if (ket == OP_KETRMAX) |
8427
|
|
|
|
|
|
|
{ |
8428
|
|
|
|
|
|
|
if (repeat_type != 0) |
8429
|
|
|
|
|
|
|
{ |
8430
|
|
|
|
|
|
|
if (has_alternatives) |
8431
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); |
8432
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
8433
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, rmax_label); |
8434
|
|
|
|
|
|
|
/* Drop STR_PTR for greedy plus quantifier. */ |
8435
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
8436
|
|
|
|
|
|
|
free_stack(common, 1); |
8437
|
|
|
|
|
|
|
} |
8438
|
|
|
|
|
|
|
else if (opcode == OP_ONCE || opcode >= OP_SBRA) |
8439
|
|
|
|
|
|
|
{ |
8440
|
|
|
|
|
|
|
if (has_alternatives) |
8441
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->alternative_matchingpath = LABEL(); |
8442
|
|
|
|
|
|
|
/* Checking zero-length iteration. */ |
8443
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
8444
|
|
|
|
|
|
|
{ |
8445
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STR_PTR, 0, rmax_label); |
8446
|
|
|
|
|
|
|
/* Drop STR_PTR for greedy plus quantifier. */ |
8447
|
|
|
|
|
|
|
if (bra != OP_BRAZERO) |
8448
|
|
|
|
|
|
|
free_stack(common, 1); |
8449
|
|
|
|
|
|
|
} |
8450
|
|
|
|
|
|
|
else |
8451
|
|
|
|
|
|
|
/* TMP2 must contain the starting STR_PTR. */ |
8452
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, rmax_label); |
8453
|
|
|
|
|
|
|
} |
8454
|
|
|
|
|
|
|
else |
8455
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, rmax_label); |
8456
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->recursive_matchingpath = LABEL(); |
8457
|
|
|
|
|
|
|
} |
8458
|
|
|
|
|
|
|
|
8459
|
|
|
|
|
|
|
if (repeat_type == OP_EXACT) |
8460
|
|
|
|
|
|
|
{ |
8461
|
|
|
|
|
|
|
count_match(common); |
8462
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
8463
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, rmax_label); |
8464
|
|
|
|
|
|
|
} |
8465
|
|
|
|
|
|
|
else if (repeat_type == OP_UPTO) |
8466
|
|
|
|
|
|
|
{ |
8467
|
|
|
|
|
|
|
/* We need to preserve the counter. */ |
8468
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); |
8469
|
|
|
|
|
|
|
allocate_stack(common, 1); |
8470
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
8471
|
|
|
|
|
|
|
} |
8472
|
|
|
|
|
|
|
|
8473
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
8474
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->zero_matchingpath = LABEL(); |
8475
|
|
|
|
|
|
|
|
8476
|
|
|
|
|
|
|
if (bra == OP_BRAMINZERO) |
8477
|
|
|
|
|
|
|
{ |
8478
|
|
|
|
|
|
|
/* This is a backtrack path! (From the viewpoint of OP_BRAMINZERO) */ |
8479
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, ((braminzero_backtrack *)parent)->matchingpath); |
8480
|
|
|
|
|
|
|
if (braminzero != NULL) |
8481
|
|
|
|
|
|
|
{ |
8482
|
|
|
|
|
|
|
JUMPHERE(braminzero); |
8483
|
|
|
|
|
|
|
/* We need to release the end pointer to perform the |
8484
|
|
|
|
|
|
|
backtrack for the zero-length iteration. When |
8485
|
|
|
|
|
|
|
framesize is < 0, OP_ONCE will do the release itself. */ |
8486
|
|
|
|
|
|
|
if (opcode == OP_ONCE && BACKTRACK_AS(bracket_backtrack)->u.framesize >= 0) |
8487
|
|
|
|
|
|
|
{ |
8488
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8489
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
8490
|
|
|
|
|
|
|
} |
8491
|
|
|
|
|
|
|
else if (ket == OP_KETRMIN && opcode != OP_ONCE) |
8492
|
|
|
|
|
|
|
free_stack(common, 1); |
8493
|
|
|
|
|
|
|
} |
8494
|
|
|
|
|
|
|
/* Continue to the normal backtrack. */ |
8495
|
|
|
|
|
|
|
} |
8496
|
|
|
|
|
|
|
|
8497
|
|
|
|
|
|
|
if ((ket != OP_KET && bra != OP_BRAMINZERO) || bra == OP_BRAZERO) |
8498
|
|
|
|
|
|
|
count_match(common); |
8499
|
|
|
|
|
|
|
|
8500
|
|
|
|
|
|
|
/* Skip the other alternatives. */ |
8501
|
|
|
|
|
|
|
while (*cc == OP_ALT) |
8502
|
|
|
|
|
|
|
cc += GET(cc, 1); |
8503
|
|
|
|
|
|
|
cc += 1 + LINK_SIZE; |
8504
|
|
|
|
|
|
|
|
8505
|
|
|
|
|
|
|
if (opcode == OP_ONCE) |
8506
|
|
|
|
|
|
|
{ |
8507
|
|
|
|
|
|
|
/* We temporarily encode the needs_control_head in the lowest bit. |
8508
|
|
|
|
|
|
|
Note: on the target architectures of SLJIT the ((x << 1) >> 1) returns |
8509
|
|
|
|
|
|
|
the same value for small signed numbers (including negative numbers). */ |
8510
|
|
|
|
|
|
|
BACKTRACK_AS(bracket_backtrack)->u.framesize = (BACKTRACK_AS(bracket_backtrack)->u.framesize << 1) | (needs_control_head ? 1 : 0); |
8511
|
|
|
|
|
|
|
} |
8512
|
|
|
|
|
|
|
return cc + repeat_length; |
8513
|
|
|
|
|
|
|
} |
8514
|
|
|
|
|
|
|
|
8515
|
|
|
|
|
|
|
static pcre_uchar *compile_bracketpos_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
8516
|
|
|
|
|
|
|
{ |
8517
|
|
|
|
|
|
|
DEFINE_COMPILER; |
8518
|
|
|
|
|
|
|
backtrack_common *backtrack; |
8519
|
|
|
|
|
|
|
pcre_uchar opcode; |
8520
|
|
|
|
|
|
|
int private_data_ptr; |
8521
|
|
|
|
|
|
|
int cbraprivptr = 0; |
8522
|
|
|
|
|
|
|
BOOL needs_control_head; |
8523
|
|
|
|
|
|
|
int framesize; |
8524
|
|
|
|
|
|
|
int stacksize; |
8525
|
|
|
|
|
|
|
int offset = 0; |
8526
|
|
|
|
|
|
|
BOOL zero = FALSE; |
8527
|
|
|
|
|
|
|
pcre_uchar *ccbegin = NULL; |
8528
|
|
|
|
|
|
|
int stack; /* Also contains the offset of control head. */ |
8529
|
|
|
|
|
|
|
struct sljit_label *loop = NULL; |
8530
|
|
|
|
|
|
|
struct jump_list *emptymatch = NULL; |
8531
|
|
|
|
|
|
|
|
8532
|
|
|
|
|
|
|
PUSH_BACKTRACK(sizeof(bracketpos_backtrack), cc, NULL); |
8533
|
|
|
|
|
|
|
if (*cc == OP_BRAPOSZERO) |
8534
|
|
|
|
|
|
|
{ |
8535
|
|
|
|
|
|
|
zero = TRUE; |
8536
|
|
|
|
|
|
|
cc++; |
8537
|
|
|
|
|
|
|
} |
8538
|
|
|
|
|
|
|
|
8539
|
|
|
|
|
|
|
opcode = *cc; |
8540
|
|
|
|
|
|
|
private_data_ptr = PRIVATE_DATA(cc); |
8541
|
|
|
|
|
|
|
SLJIT_ASSERT(private_data_ptr != 0); |
8542
|
|
|
|
|
|
|
BACKTRACK_AS(bracketpos_backtrack)->private_data_ptr = private_data_ptr; |
8543
|
|
|
|
|
|
|
switch(opcode) |
8544
|
|
|
|
|
|
|
{ |
8545
|
|
|
|
|
|
|
case OP_BRAPOS: |
8546
|
|
|
|
|
|
|
case OP_SBRAPOS: |
8547
|
|
|
|
|
|
|
ccbegin = cc + 1 + LINK_SIZE; |
8548
|
|
|
|
|
|
|
break; |
8549
|
|
|
|
|
|
|
|
8550
|
|
|
|
|
|
|
case OP_CBRAPOS: |
8551
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
8552
|
|
|
|
|
|
|
offset = GET2(cc, 1 + LINK_SIZE); |
8553
|
|
|
|
|
|
|
/* This case cannot be optimized in the same was as |
8554
|
|
|
|
|
|
|
normal capturing brackets. */ |
8555
|
|
|
|
|
|
|
SLJIT_ASSERT(common->optimized_cbracket[offset] == 0); |
8556
|
|
|
|
|
|
|
cbraprivptr = OVECTOR_PRIV(offset); |
8557
|
|
|
|
|
|
|
offset <<= 1; |
8558
|
|
|
|
|
|
|
ccbegin = cc + 1 + LINK_SIZE + IMM2_SIZE; |
8559
|
|
|
|
|
|
|
break; |
8560
|
|
|
|
|
|
|
|
8561
|
|
|
|
|
|
|
default: |
8562
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
8563
|
|
|
|
|
|
|
break; |
8564
|
|
|
|
|
|
|
} |
8565
|
|
|
|
|
|
|
|
8566
|
|
|
|
|
|
|
framesize = get_framesize(common, cc, NULL, FALSE, &needs_control_head); |
8567
|
|
|
|
|
|
|
BACKTRACK_AS(bracketpos_backtrack)->framesize = framesize; |
8568
|
|
|
|
|
|
|
if (framesize < 0) |
8569
|
|
|
|
|
|
|
{ |
8570
|
|
|
|
|
|
|
if (offset != 0) |
8571
|
|
|
|
|
|
|
{ |
8572
|
|
|
|
|
|
|
stacksize = 2; |
8573
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
8574
|
|
|
|
|
|
|
stacksize++; |
8575
|
|
|
|
|
|
|
} |
8576
|
|
|
|
|
|
|
else |
8577
|
|
|
|
|
|
|
stacksize = 1; |
8578
|
|
|
|
|
|
|
|
8579
|
|
|
|
|
|
|
if (needs_control_head) |
8580
|
|
|
|
|
|
|
stacksize++; |
8581
|
|
|
|
|
|
|
if (!zero) |
8582
|
|
|
|
|
|
|
stacksize++; |
8583
|
|
|
|
|
|
|
|
8584
|
|
|
|
|
|
|
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; |
8585
|
|
|
|
|
|
|
allocate_stack(common, stacksize); |
8586
|
|
|
|
|
|
|
if (framesize == no_frame) |
8587
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0); |
8588
|
|
|
|
|
|
|
|
8589
|
|
|
|
|
|
|
stack = 0; |
8590
|
|
|
|
|
|
|
if (offset != 0) |
8591
|
|
|
|
|
|
|
{ |
8592
|
|
|
|
|
|
|
stack = 2; |
8593
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset)); |
8594
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1)); |
8595
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); |
8596
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
8597
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr); |
8598
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), TMP2, 0); |
8599
|
|
|
|
|
|
|
if (needs_control_head) |
8600
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
8601
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
8602
|
|
|
|
|
|
|
{ |
8603
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), TMP1, 0); |
8604
|
|
|
|
|
|
|
stack = 3; |
8605
|
|
|
|
|
|
|
} |
8606
|
|
|
|
|
|
|
} |
8607
|
|
|
|
|
|
|
else |
8608
|
|
|
|
|
|
|
{ |
8609
|
|
|
|
|
|
|
if (needs_control_head) |
8610
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
8611
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
8612
|
|
|
|
|
|
|
stack = 1; |
8613
|
|
|
|
|
|
|
} |
8614
|
|
|
|
|
|
|
|
8615
|
|
|
|
|
|
|
if (needs_control_head) |
8616
|
|
|
|
|
|
|
stack++; |
8617
|
|
|
|
|
|
|
if (!zero) |
8618
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), SLJIT_IMM, 1); |
8619
|
|
|
|
|
|
|
if (needs_control_head) |
8620
|
|
|
|
|
|
|
{ |
8621
|
|
|
|
|
|
|
stack--; |
8622
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); |
8623
|
|
|
|
|
|
|
} |
8624
|
|
|
|
|
|
|
} |
8625
|
|
|
|
|
|
|
else |
8626
|
|
|
|
|
|
|
{ |
8627
|
|
|
|
|
|
|
stacksize = framesize + 1; |
8628
|
|
|
|
|
|
|
if (!zero) |
8629
|
|
|
|
|
|
|
stacksize++; |
8630
|
|
|
|
|
|
|
if (needs_control_head) |
8631
|
|
|
|
|
|
|
stacksize++; |
8632
|
|
|
|
|
|
|
if (offset == 0) |
8633
|
|
|
|
|
|
|
stacksize++; |
8634
|
|
|
|
|
|
|
BACKTRACK_AS(bracketpos_backtrack)->stacksize = stacksize; |
8635
|
|
|
|
|
|
|
|
8636
|
|
|
|
|
|
|
allocate_stack(common, stacksize); |
8637
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8638
|
|
|
|
|
|
|
if (needs_control_head) |
8639
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
8640
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), private_data_ptr, STACK_TOP, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); |
8641
|
|
|
|
|
|
|
|
8642
|
|
|
|
|
|
|
stack = 0; |
8643
|
|
|
|
|
|
|
if (!zero) |
8644
|
|
|
|
|
|
|
{ |
8645
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 1); |
8646
|
|
|
|
|
|
|
stack = 1; |
8647
|
|
|
|
|
|
|
} |
8648
|
|
|
|
|
|
|
if (needs_control_head) |
8649
|
|
|
|
|
|
|
{ |
8650
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP2, 0); |
8651
|
|
|
|
|
|
|
stack++; |
8652
|
|
|
|
|
|
|
} |
8653
|
|
|
|
|
|
|
if (offset == 0) |
8654
|
|
|
|
|
|
|
{ |
8655
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), STR_PTR, 0); |
8656
|
|
|
|
|
|
|
stack++; |
8657
|
|
|
|
|
|
|
} |
8658
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stack), TMP1, 0); |
8659
|
|
|
|
|
|
|
init_frame(common, cc, NULL, stacksize - 1, stacksize - framesize, FALSE); |
8660
|
|
|
|
|
|
|
stack -= 1 + (offset == 0); |
8661
|
|
|
|
|
|
|
} |
8662
|
|
|
|
|
|
|
|
8663
|
|
|
|
|
|
|
if (offset != 0) |
8664
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); |
8665
|
|
|
|
|
|
|
|
8666
|
|
|
|
|
|
|
loop = LABEL(); |
8667
|
|
|
|
|
|
|
while (*cc != OP_KETRPOS) |
8668
|
|
|
|
|
|
|
{ |
8669
|
|
|
|
|
|
|
backtrack->top = NULL; |
8670
|
|
|
|
|
|
|
backtrack->topbacktracks = NULL; |
8671
|
|
|
|
|
|
|
cc += GET(cc, 1); |
8672
|
|
|
|
|
|
|
|
8673
|
|
|
|
|
|
|
compile_matchingpath(common, ccbegin, cc, backtrack); |
8674
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
8675
|
|
|
|
|
|
|
return NULL; |
8676
|
|
|
|
|
|
|
|
8677
|
|
|
|
|
|
|
if (framesize < 0) |
8678
|
|
|
|
|
|
|
{ |
8679
|
|
|
|
|
|
|
if (framesize == no_frame) |
8680
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8681
|
|
|
|
|
|
|
|
8682
|
|
|
|
|
|
|
if (offset != 0) |
8683
|
|
|
|
|
|
|
{ |
8684
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); |
8685
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
8686
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); |
8687
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
8688
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); |
8689
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
8690
|
|
|
|
|
|
|
} |
8691
|
|
|
|
|
|
|
else |
8692
|
|
|
|
|
|
|
{ |
8693
|
|
|
|
|
|
|
if (opcode == OP_SBRAPOS) |
8694
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
8695
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
8696
|
|
|
|
|
|
|
} |
8697
|
|
|
|
|
|
|
|
8698
|
|
|
|
|
|
|
/* Even if the match is empty, we need to reset the control head. */ |
8699
|
|
|
|
|
|
|
if (needs_control_head) |
8700
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); |
8701
|
|
|
|
|
|
|
|
8702
|
|
|
|
|
|
|
if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) |
8703
|
|
|
|
|
|
|
add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); |
8704
|
|
|
|
|
|
|
|
8705
|
|
|
|
|
|
|
if (!zero) |
8706
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); |
8707
|
|
|
|
|
|
|
} |
8708
|
|
|
|
|
|
|
else |
8709
|
|
|
|
|
|
|
{ |
8710
|
|
|
|
|
|
|
if (offset != 0) |
8711
|
|
|
|
|
|
|
{ |
8712
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_IMM, stacksize * sizeof(sljit_sw)); |
8713
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); |
8714
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
8715
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), cbraprivptr, STR_PTR, 0); |
8716
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
8717
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, offset >> 1); |
8718
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
8719
|
|
|
|
|
|
|
} |
8720
|
|
|
|
|
|
|
else |
8721
|
|
|
|
|
|
|
{ |
8722
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8723
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, TMP2, 0, SLJIT_IMM, stacksize * sizeof(sljit_sw)); |
8724
|
|
|
|
|
|
|
if (opcode == OP_SBRAPOS) |
8725
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2)); |
8726
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(TMP2), STACK(-framesize - 2), STR_PTR, 0); |
8727
|
|
|
|
|
|
|
} |
8728
|
|
|
|
|
|
|
|
8729
|
|
|
|
|
|
|
/* Even if the match is empty, we need to reset the control head. */ |
8730
|
|
|
|
|
|
|
if (needs_control_head) |
8731
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_MEM1(STACK_TOP), STACK(stack)); |
8732
|
|
|
|
|
|
|
|
8733
|
|
|
|
|
|
|
if (opcode == OP_SBRAPOS || opcode == OP_SCBRAPOS) |
8734
|
|
|
|
|
|
|
add_jump(compiler, &emptymatch, CMP(SLJIT_EQUAL, TMP1, 0, STR_PTR, 0)); |
8735
|
|
|
|
|
|
|
|
8736
|
|
|
|
|
|
|
if (!zero) |
8737
|
|
|
|
|
|
|
{ |
8738
|
|
|
|
|
|
|
if (framesize < 0) |
8739
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0); |
8740
|
|
|
|
|
|
|
else |
8741
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
8742
|
|
|
|
|
|
|
} |
8743
|
|
|
|
|
|
|
} |
8744
|
|
|
|
|
|
|
|
8745
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, loop); |
8746
|
|
|
|
|
|
|
flush_stubs(common); |
8747
|
|
|
|
|
|
|
|
8748
|
|
|
|
|
|
|
compile_backtrackingpath(common, backtrack->top); |
8749
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
8750
|
|
|
|
|
|
|
return NULL; |
8751
|
|
|
|
|
|
|
set_jumps(backtrack->topbacktracks, LABEL()); |
8752
|
|
|
|
|
|
|
|
8753
|
|
|
|
|
|
|
if (framesize < 0) |
8754
|
|
|
|
|
|
|
{ |
8755
|
|
|
|
|
|
|
if (offset != 0) |
8756
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); |
8757
|
|
|
|
|
|
|
else |
8758
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
8759
|
|
|
|
|
|
|
} |
8760
|
|
|
|
|
|
|
else |
8761
|
|
|
|
|
|
|
{ |
8762
|
|
|
|
|
|
|
if (offset != 0) |
8763
|
|
|
|
|
|
|
{ |
8764
|
|
|
|
|
|
|
/* Last alternative. */ |
8765
|
|
|
|
|
|
|
if (*cc == OP_KETRPOS) |
8766
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8767
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), cbraprivptr); |
8768
|
|
|
|
|
|
|
} |
8769
|
|
|
|
|
|
|
else |
8770
|
|
|
|
|
|
|
{ |
8771
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
8772
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP2), STACK(-framesize - 2)); |
8773
|
|
|
|
|
|
|
} |
8774
|
|
|
|
|
|
|
} |
8775
|
|
|
|
|
|
|
|
8776
|
|
|
|
|
|
|
if (*cc == OP_KETRPOS) |
8777
|
|
|
|
|
|
|
break; |
8778
|
|
|
|
|
|
|
ccbegin = cc + 1 + LINK_SIZE; |
8779
|
|
|
|
|
|
|
} |
8780
|
|
|
|
|
|
|
|
8781
|
|
|
|
|
|
|
/* We don't have to restore the control head in case of a failed match. */ |
8782
|
|
|
|
|
|
|
|
8783
|
|
|
|
|
|
|
backtrack->topbacktracks = NULL; |
8784
|
|
|
|
|
|
|
if (!zero) |
8785
|
|
|
|
|
|
|
{ |
8786
|
|
|
|
|
|
|
if (framesize < 0) |
8787
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(stacksize - 1), SLJIT_IMM, 0)); |
8788
|
|
|
|
|
|
|
else /* TMP2 is set to [private_data_ptr] above. */ |
8789
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(TMP2), STACK(-stacksize), SLJIT_IMM, 0)); |
8790
|
|
|
|
|
|
|
} |
8791
|
|
|
|
|
|
|
|
8792
|
|
|
|
|
|
|
/* None of them matched. */ |
8793
|
|
|
|
|
|
|
set_jumps(emptymatch, LABEL()); |
8794
|
|
|
|
|
|
|
count_match(common); |
8795
|
|
|
|
|
|
|
return cc + 1 + LINK_SIZE; |
8796
|
|
|
|
|
|
|
} |
8797
|
|
|
|
|
|
|
|
8798
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, sljit_u32 *max, sljit_u32 *exact, pcre_uchar **end) |
8799
|
|
|
|
|
|
|
{ |
8800
|
|
|
|
|
|
|
int class_len; |
8801
|
|
|
|
|
|
|
|
8802
|
|
|
|
|
|
|
*opcode = *cc; |
8803
|
|
|
|
|
|
|
*exact = 0; |
8804
|
|
|
|
|
|
|
|
8805
|
|
|
|
|
|
|
if (*opcode >= OP_STAR && *opcode <= OP_POSUPTO) |
8806
|
|
|
|
|
|
|
{ |
8807
|
|
|
|
|
|
|
cc++; |
8808
|
|
|
|
|
|
|
*type = OP_CHAR; |
8809
|
|
|
|
|
|
|
} |
8810
|
|
|
|
|
|
|
else if (*opcode >= OP_STARI && *opcode <= OP_POSUPTOI) |
8811
|
|
|
|
|
|
|
{ |
8812
|
|
|
|
|
|
|
cc++; |
8813
|
|
|
|
|
|
|
*type = OP_CHARI; |
8814
|
|
|
|
|
|
|
*opcode -= OP_STARI - OP_STAR; |
8815
|
|
|
|
|
|
|
} |
8816
|
|
|
|
|
|
|
else if (*opcode >= OP_NOTSTAR && *opcode <= OP_NOTPOSUPTO) |
8817
|
|
|
|
|
|
|
{ |
8818
|
|
|
|
|
|
|
cc++; |
8819
|
|
|
|
|
|
|
*type = OP_NOT; |
8820
|
|
|
|
|
|
|
*opcode -= OP_NOTSTAR - OP_STAR; |
8821
|
|
|
|
|
|
|
} |
8822
|
|
|
|
|
|
|
else if (*opcode >= OP_NOTSTARI && *opcode <= OP_NOTPOSUPTOI) |
8823
|
|
|
|
|
|
|
{ |
8824
|
|
|
|
|
|
|
cc++; |
8825
|
|
|
|
|
|
|
*type = OP_NOTI; |
8826
|
|
|
|
|
|
|
*opcode -= OP_NOTSTARI - OP_STAR; |
8827
|
|
|
|
|
|
|
} |
8828
|
|
|
|
|
|
|
else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO) |
8829
|
|
|
|
|
|
|
{ |
8830
|
|
|
|
|
|
|
cc++; |
8831
|
|
|
|
|
|
|
*opcode -= OP_TYPESTAR - OP_STAR; |
8832
|
|
|
|
|
|
|
*type = OP_END; |
8833
|
|
|
|
|
|
|
} |
8834
|
|
|
|
|
|
|
else |
8835
|
|
|
|
|
|
|
{ |
8836
|
|
|
|
|
|
|
SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS); |
8837
|
|
|
|
|
|
|
*type = *opcode; |
8838
|
|
|
|
|
|
|
cc++; |
8839
|
|
|
|
|
|
|
class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0); |
8840
|
|
|
|
|
|
|
*opcode = cc[class_len - 1]; |
8841
|
|
|
|
|
|
|
|
8842
|
|
|
|
|
|
|
if (*opcode >= OP_CRSTAR && *opcode <= OP_CRMINQUERY) |
8843
|
|
|
|
|
|
|
{ |
8844
|
|
|
|
|
|
|
*opcode -= OP_CRSTAR - OP_STAR; |
8845
|
|
|
|
|
|
|
*end = cc + class_len; |
8846
|
|
|
|
|
|
|
|
8847
|
|
|
|
|
|
|
if (*opcode == OP_PLUS || *opcode == OP_MINPLUS) |
8848
|
|
|
|
|
|
|
{ |
8849
|
|
|
|
|
|
|
*exact = 1; |
8850
|
|
|
|
|
|
|
*opcode -= OP_PLUS - OP_STAR; |
8851
|
|
|
|
|
|
|
} |
8852
|
|
|
|
|
|
|
} |
8853
|
|
|
|
|
|
|
else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY) |
8854
|
|
|
|
|
|
|
{ |
8855
|
|
|
|
|
|
|
*opcode -= OP_CRPOSSTAR - OP_POSSTAR; |
8856
|
|
|
|
|
|
|
*end = cc + class_len; |
8857
|
|
|
|
|
|
|
|
8858
|
|
|
|
|
|
|
if (*opcode == OP_POSPLUS) |
8859
|
|
|
|
|
|
|
{ |
8860
|
|
|
|
|
|
|
*exact = 1; |
8861
|
|
|
|
|
|
|
*opcode = OP_POSSTAR; |
8862
|
|
|
|
|
|
|
} |
8863
|
|
|
|
|
|
|
} |
8864
|
|
|
|
|
|
|
else |
8865
|
|
|
|
|
|
|
{ |
8866
|
|
|
|
|
|
|
SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE); |
8867
|
|
|
|
|
|
|
*max = GET2(cc, (class_len + IMM2_SIZE)); |
8868
|
|
|
|
|
|
|
*exact = GET2(cc, class_len); |
8869
|
|
|
|
|
|
|
|
8870
|
|
|
|
|
|
|
if (*max == 0) |
8871
|
|
|
|
|
|
|
{ |
8872
|
|
|
|
|
|
|
if (*opcode == OP_CRPOSRANGE) |
8873
|
|
|
|
|
|
|
*opcode = OP_POSSTAR; |
8874
|
|
|
|
|
|
|
else |
8875
|
|
|
|
|
|
|
*opcode -= OP_CRRANGE - OP_STAR; |
8876
|
|
|
|
|
|
|
} |
8877
|
|
|
|
|
|
|
else |
8878
|
|
|
|
|
|
|
{ |
8879
|
|
|
|
|
|
|
*max -= *exact; |
8880
|
|
|
|
|
|
|
if (*max == 0) |
8881
|
|
|
|
|
|
|
*opcode = OP_EXACT; |
8882
|
|
|
|
|
|
|
else if (*max == 1) |
8883
|
|
|
|
|
|
|
{ |
8884
|
|
|
|
|
|
|
if (*opcode == OP_CRPOSRANGE) |
8885
|
|
|
|
|
|
|
*opcode = OP_POSQUERY; |
8886
|
|
|
|
|
|
|
else |
8887
|
|
|
|
|
|
|
*opcode -= OP_CRRANGE - OP_QUERY; |
8888
|
|
|
|
|
|
|
} |
8889
|
|
|
|
|
|
|
else |
8890
|
|
|
|
|
|
|
{ |
8891
|
|
|
|
|
|
|
if (*opcode == OP_CRPOSRANGE) |
8892
|
|
|
|
|
|
|
*opcode = OP_POSUPTO; |
8893
|
|
|
|
|
|
|
else |
8894
|
|
|
|
|
|
|
*opcode -= OP_CRRANGE - OP_UPTO; |
8895
|
|
|
|
|
|
|
} |
8896
|
|
|
|
|
|
|
} |
8897
|
|
|
|
|
|
|
*end = cc + class_len + 2 * IMM2_SIZE; |
8898
|
|
|
|
|
|
|
} |
8899
|
|
|
|
|
|
|
return cc; |
8900
|
|
|
|
|
|
|
} |
8901
|
|
|
|
|
|
|
|
8902
|
|
|
|
|
|
|
switch(*opcode) |
8903
|
|
|
|
|
|
|
{ |
8904
|
|
|
|
|
|
|
case OP_EXACT: |
8905
|
|
|
|
|
|
|
*exact = GET2(cc, 0); |
8906
|
|
|
|
|
|
|
cc += IMM2_SIZE; |
8907
|
|
|
|
|
|
|
break; |
8908
|
|
|
|
|
|
|
|
8909
|
|
|
|
|
|
|
case OP_PLUS: |
8910
|
|
|
|
|
|
|
case OP_MINPLUS: |
8911
|
|
|
|
|
|
|
*exact = 1; |
8912
|
|
|
|
|
|
|
*opcode -= OP_PLUS - OP_STAR; |
8913
|
|
|
|
|
|
|
break; |
8914
|
|
|
|
|
|
|
|
8915
|
|
|
|
|
|
|
case OP_POSPLUS: |
8916
|
|
|
|
|
|
|
*exact = 1; |
8917
|
|
|
|
|
|
|
*opcode = OP_POSSTAR; |
8918
|
|
|
|
|
|
|
break; |
8919
|
|
|
|
|
|
|
|
8920
|
|
|
|
|
|
|
case OP_UPTO: |
8921
|
|
|
|
|
|
|
case OP_MINUPTO: |
8922
|
|
|
|
|
|
|
case OP_POSUPTO: |
8923
|
|
|
|
|
|
|
*max = GET2(cc, 0); |
8924
|
|
|
|
|
|
|
cc += IMM2_SIZE; |
8925
|
|
|
|
|
|
|
break; |
8926
|
|
|
|
|
|
|
} |
8927
|
|
|
|
|
|
|
|
8928
|
|
|
|
|
|
|
if (*type == OP_END) |
8929
|
|
|
|
|
|
|
{ |
8930
|
|
|
|
|
|
|
*type = *cc; |
8931
|
|
|
|
|
|
|
*end = next_opcode(common, cc); |
8932
|
|
|
|
|
|
|
cc++; |
8933
|
|
|
|
|
|
|
return cc; |
8934
|
|
|
|
|
|
|
} |
8935
|
|
|
|
|
|
|
|
8936
|
|
|
|
|
|
|
*end = cc + 1; |
8937
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
8938
|
|
|
|
|
|
|
if (common->utf && HAS_EXTRALEN(*cc)) *end += GET_EXTRALEN(*cc); |
8939
|
|
|
|
|
|
|
#endif |
8940
|
|
|
|
|
|
|
return cc; |
8941
|
|
|
|
|
|
|
} |
8942
|
|
|
|
|
|
|
|
8943
|
|
|
|
|
|
|
static pcre_uchar *compile_iterator_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
8944
|
|
|
|
|
|
|
{ |
8945
|
|
|
|
|
|
|
DEFINE_COMPILER; |
8946
|
|
|
|
|
|
|
backtrack_common *backtrack; |
8947
|
|
|
|
|
|
|
pcre_uchar opcode; |
8948
|
|
|
|
|
|
|
pcre_uchar type; |
8949
|
|
|
|
|
|
|
sljit_u32 max = 0, exact; |
8950
|
|
|
|
|
|
|
BOOL fast_fail; |
8951
|
|
|
|
|
|
|
sljit_s32 fast_str_ptr; |
8952
|
|
|
|
|
|
|
BOOL charpos_enabled; |
8953
|
|
|
|
|
|
|
pcre_uchar charpos_char; |
8954
|
|
|
|
|
|
|
unsigned int charpos_othercasebit; |
8955
|
|
|
|
|
|
|
pcre_uchar *end; |
8956
|
|
|
|
|
|
|
jump_list *no_match = NULL; |
8957
|
|
|
|
|
|
|
jump_list *no_char1_match = NULL; |
8958
|
|
|
|
|
|
|
struct sljit_jump *jump = NULL; |
8959
|
|
|
|
|
|
|
struct sljit_label *label; |
8960
|
|
|
|
|
|
|
int private_data_ptr = PRIVATE_DATA(cc); |
8961
|
|
|
|
|
|
|
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); |
8962
|
|
|
|
|
|
|
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; |
8963
|
|
|
|
|
|
|
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); |
8964
|
|
|
|
|
|
|
int tmp_base, tmp_offset; |
8965
|
|
|
|
|
|
|
|
8966
|
|
|
|
|
|
|
PUSH_BACKTRACK(sizeof(char_iterator_backtrack), cc, NULL); |
8967
|
|
|
|
|
|
|
|
8968
|
|
|
|
|
|
|
fast_str_ptr = PRIVATE_DATA(cc + 1); |
8969
|
|
|
|
|
|
|
fast_fail = TRUE; |
8970
|
|
|
|
|
|
|
|
8971
|
|
|
|
|
|
|
SLJIT_ASSERT(common->fast_forward_bc_ptr == NULL || fast_str_ptr == 0 || cc == common->fast_forward_bc_ptr); |
8972
|
|
|
|
|
|
|
|
8973
|
|
|
|
|
|
|
if (cc == common->fast_forward_bc_ptr) |
8974
|
|
|
|
|
|
|
fast_fail = FALSE; |
8975
|
|
|
|
|
|
|
else if (common->fast_fail_start_ptr == 0) |
8976
|
|
|
|
|
|
|
fast_str_ptr = 0; |
8977
|
|
|
|
|
|
|
|
8978
|
|
|
|
|
|
|
SLJIT_ASSERT(common->fast_forward_bc_ptr != NULL || fast_str_ptr == 0 |
8979
|
|
|
|
|
|
|
|| (fast_str_ptr >= common->fast_fail_start_ptr && fast_str_ptr <= common->fast_fail_end_ptr)); |
8980
|
|
|
|
|
|
|
|
8981
|
|
|
|
|
|
|
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); |
8982
|
|
|
|
|
|
|
|
8983
|
|
|
|
|
|
|
if (type != OP_EXTUNI) |
8984
|
|
|
|
|
|
|
{ |
8985
|
|
|
|
|
|
|
tmp_base = TMP3; |
8986
|
|
|
|
|
|
|
tmp_offset = 0; |
8987
|
|
|
|
|
|
|
} |
8988
|
|
|
|
|
|
|
else |
8989
|
|
|
|
|
|
|
{ |
8990
|
|
|
|
|
|
|
tmp_base = SLJIT_MEM1(SLJIT_SP); |
8991
|
|
|
|
|
|
|
tmp_offset = POSSESSIVE0; |
8992
|
|
|
|
|
|
|
} |
8993
|
|
|
|
|
|
|
|
8994
|
|
|
|
|
|
|
if (fast_fail && fast_str_ptr != 0) |
8995
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), fast_str_ptr)); |
8996
|
|
|
|
|
|
|
|
8997
|
|
|
|
|
|
|
/* Handle fixed part first. */ |
8998
|
|
|
|
|
|
|
if (exact > 1) |
8999
|
|
|
|
|
|
|
{ |
9000
|
|
|
|
|
|
|
SLJIT_ASSERT(fast_str_ptr == 0); |
9001
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE |
9002
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
9003
|
|
|
|
|
|
|
&& !common->utf |
9004
|
|
|
|
|
|
|
#endif |
9005
|
|
|
|
|
|
|
) |
9006
|
|
|
|
|
|
|
{ |
9007
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(exact)); |
9008
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_GREATER, TMP1, 0, STR_END, 0)); |
9009
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); |
9010
|
|
|
|
|
|
|
label = LABEL(); |
9011
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); |
9012
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9013
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
9014
|
|
|
|
|
|
|
} |
9015
|
|
|
|
|
|
|
else |
9016
|
|
|
|
|
|
|
{ |
9017
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, exact); |
9018
|
|
|
|
|
|
|
label = LABEL(); |
9019
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); |
9020
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9021
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
9022
|
|
|
|
|
|
|
} |
9023
|
|
|
|
|
|
|
} |
9024
|
|
|
|
|
|
|
else if (exact == 1) |
9025
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, TRUE); |
9026
|
|
|
|
|
|
|
|
9027
|
|
|
|
|
|
|
switch(opcode) |
9028
|
|
|
|
|
|
|
{ |
9029
|
|
|
|
|
|
|
case OP_STAR: |
9030
|
|
|
|
|
|
|
case OP_UPTO: |
9031
|
|
|
|
|
|
|
SLJIT_ASSERT(fast_str_ptr == 0 || opcode == OP_STAR); |
9032
|
|
|
|
|
|
|
|
9033
|
|
|
|
|
|
|
if (type == OP_ANYNL || type == OP_EXTUNI) |
9034
|
|
|
|
|
|
|
{ |
9035
|
|
|
|
|
|
|
SLJIT_ASSERT(private_data_ptr == 0); |
9036
|
|
|
|
|
|
|
SLJIT_ASSERT(fast_str_ptr == 0); |
9037
|
|
|
|
|
|
|
|
9038
|
|
|
|
|
|
|
allocate_stack(common, 2); |
9039
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
9040
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, 0); |
9041
|
|
|
|
|
|
|
|
9042
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9043
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, SLJIT_IMM, max); |
9044
|
|
|
|
|
|
|
|
9045
|
|
|
|
|
|
|
label = LABEL(); |
9046
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); |
9047
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9048
|
|
|
|
|
|
|
{ |
9049
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0); |
9050
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
9051
|
|
|
|
|
|
|
jump = JUMP(SLJIT_ZERO); |
9052
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE0, TMP1, 0); |
9053
|
|
|
|
|
|
|
} |
9054
|
|
|
|
|
|
|
|
9055
|
|
|
|
|
|
|
/* We cannot use TMP3 because of this allocate_stack. */ |
9056
|
|
|
|
|
|
|
allocate_stack(common, 1); |
9057
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
9058
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, label); |
9059
|
|
|
|
|
|
|
if (jump != NULL) |
9060
|
|
|
|
|
|
|
JUMPHERE(jump); |
9061
|
|
|
|
|
|
|
} |
9062
|
|
|
|
|
|
|
else |
9063
|
|
|
|
|
|
|
{ |
9064
|
|
|
|
|
|
|
charpos_enabled = FALSE; |
9065
|
|
|
|
|
|
|
charpos_char = 0; |
9066
|
|
|
|
|
|
|
charpos_othercasebit = 0; |
9067
|
|
|
|
|
|
|
|
9068
|
|
|
|
|
|
|
if ((type != OP_CHAR && type != OP_CHARI) && (*end == OP_CHAR || *end == OP_CHARI)) |
9069
|
|
|
|
|
|
|
{ |
9070
|
|
|
|
|
|
|
charpos_enabled = TRUE; |
9071
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
9072
|
|
|
|
|
|
|
charpos_enabled = !common->utf || !HAS_EXTRALEN(end[1]); |
9073
|
|
|
|
|
|
|
#endif |
9074
|
|
|
|
|
|
|
if (charpos_enabled && *end == OP_CHARI && char_has_othercase(common, end + 1)) |
9075
|
|
|
|
|
|
|
{ |
9076
|
|
|
|
|
|
|
charpos_othercasebit = char_get_othercase_bit(common, end + 1); |
9077
|
|
|
|
|
|
|
if (charpos_othercasebit == 0) |
9078
|
|
|
|
|
|
|
charpos_enabled = FALSE; |
9079
|
|
|
|
|
|
|
} |
9080
|
|
|
|
|
|
|
|
9081
|
|
|
|
|
|
|
if (charpos_enabled) |
9082
|
|
|
|
|
|
|
{ |
9083
|
|
|
|
|
|
|
charpos_char = end[1]; |
9084
|
|
|
|
|
|
|
/* Consumpe the OP_CHAR opcode. */ |
9085
|
|
|
|
|
|
|
end += 2; |
9086
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
9087
|
|
|
|
|
|
|
SLJIT_ASSERT((charpos_othercasebit >> 8) == 0); |
9088
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
9089
|
|
|
|
|
|
|
SLJIT_ASSERT((charpos_othercasebit >> 9) == 0); |
9090
|
|
|
|
|
|
|
if ((charpos_othercasebit & 0x100) != 0) |
9091
|
|
|
|
|
|
|
charpos_othercasebit = (charpos_othercasebit & 0xff) << 8; |
9092
|
|
|
|
|
|
|
#endif |
9093
|
|
|
|
|
|
|
if (charpos_othercasebit != 0) |
9094
|
|
|
|
|
|
|
charpos_char |= charpos_othercasebit; |
9095
|
|
|
|
|
|
|
|
9096
|
|
|
|
|
|
|
BACKTRACK_AS(char_iterator_backtrack)->u.charpos.enabled = TRUE; |
9097
|
|
|
|
|
|
|
BACKTRACK_AS(char_iterator_backtrack)->u.charpos.chr = charpos_char; |
9098
|
|
|
|
|
|
|
BACKTRACK_AS(char_iterator_backtrack)->u.charpos.othercasebit = charpos_othercasebit; |
9099
|
|
|
|
|
|
|
} |
9100
|
|
|
|
|
|
|
} |
9101
|
|
|
|
|
|
|
|
9102
|
|
|
|
|
|
|
if (charpos_enabled) |
9103
|
|
|
|
|
|
|
{ |
9104
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9105
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max + 1); |
9106
|
|
|
|
|
|
|
|
9107
|
|
|
|
|
|
|
/* Search the first instance of charpos_char. */ |
9108
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
9109
|
|
|
|
|
|
|
label = LABEL(); |
9110
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9111
|
|
|
|
|
|
|
{ |
9112
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9113
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_ZERO)); |
9114
|
|
|
|
|
|
|
} |
9115
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks, FALSE); |
9116
|
|
|
|
|
|
|
if (fast_str_ptr != 0) |
9117
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9118
|
|
|
|
|
|
|
JUMPHERE(jump); |
9119
|
|
|
|
|
|
|
|
9120
|
|
|
|
|
|
|
detect_partial_match(common, &backtrack->topbacktracks); |
9121
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
9122
|
|
|
|
|
|
|
if (charpos_othercasebit != 0) |
9123
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); |
9124
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); |
9125
|
|
|
|
|
|
|
|
9126
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9127
|
|
|
|
|
|
|
allocate_stack(common, 2); |
9128
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9129
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); |
9130
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9131
|
|
|
|
|
|
|
{ |
9132
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9133
|
|
|
|
|
|
|
add_jump(compiler, &no_match, JUMP(SLJIT_ZERO)); |
9134
|
|
|
|
|
|
|
} |
9135
|
|
|
|
|
|
|
|
9136
|
|
|
|
|
|
|
/* Search the last instance of charpos_char. */ |
9137
|
|
|
|
|
|
|
label = LABEL(); |
9138
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &no_match, FALSE); |
9139
|
|
|
|
|
|
|
if (fast_str_ptr != 0) |
9140
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9141
|
|
|
|
|
|
|
detect_partial_match(common, &no_match); |
9142
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(0)); |
9143
|
|
|
|
|
|
|
if (charpos_othercasebit != 0) |
9144
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, charpos_othercasebit); |
9145
|
|
|
|
|
|
|
if (opcode == OP_STAR) |
9146
|
|
|
|
|
|
|
{ |
9147
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char, label); |
9148
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9149
|
|
|
|
|
|
|
} |
9150
|
|
|
|
|
|
|
else |
9151
|
|
|
|
|
|
|
{ |
9152
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, charpos_char); |
9153
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9154
|
|
|
|
|
|
|
JUMPHERE(jump); |
9155
|
|
|
|
|
|
|
} |
9156
|
|
|
|
|
|
|
|
9157
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9158
|
|
|
|
|
|
|
{ |
9159
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9160
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
9161
|
|
|
|
|
|
|
} |
9162
|
|
|
|
|
|
|
else |
9163
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, label); |
9164
|
|
|
|
|
|
|
|
9165
|
|
|
|
|
|
|
set_jumps(no_match, LABEL()); |
9166
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9167
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9168
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9169
|
|
|
|
|
|
|
} |
9170
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
9171
|
|
|
|
|
|
|
else if (common->utf) |
9172
|
|
|
|
|
|
|
{ |
9173
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9174
|
|
|
|
|
|
|
allocate_stack(common, 2); |
9175
|
|
|
|
|
|
|
|
9176
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9177
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); |
9178
|
|
|
|
|
|
|
|
9179
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9180
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
9181
|
|
|
|
|
|
|
|
9182
|
|
|
|
|
|
|
label = LABEL(); |
9183
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &no_match, TRUE); |
9184
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9185
|
|
|
|
|
|
|
|
9186
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9187
|
|
|
|
|
|
|
{ |
9188
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9189
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
9190
|
|
|
|
|
|
|
} |
9191
|
|
|
|
|
|
|
else |
9192
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, label); |
9193
|
|
|
|
|
|
|
|
9194
|
|
|
|
|
|
|
set_jumps(no_match, LABEL()); |
9195
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9196
|
|
|
|
|
|
|
if (fast_str_ptr != 0) |
9197
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9198
|
|
|
|
|
|
|
} |
9199
|
|
|
|
|
|
|
#endif |
9200
|
|
|
|
|
|
|
else |
9201
|
|
|
|
|
|
|
{ |
9202
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9203
|
|
|
|
|
|
|
allocate_stack(common, 2); |
9204
|
|
|
|
|
|
|
|
9205
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset1, STR_PTR, 0); |
9206
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9207
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
9208
|
|
|
|
|
|
|
|
9209
|
|
|
|
|
|
|
label = LABEL(); |
9210
|
|
|
|
|
|
|
detect_partial_match(common, &no_match); |
9211
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); |
9212
|
|
|
|
|
|
|
if (opcode == OP_UPTO) |
9213
|
|
|
|
|
|
|
{ |
9214
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9215
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
9216
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9217
|
|
|
|
|
|
|
} |
9218
|
|
|
|
|
|
|
else |
9219
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, label); |
9220
|
|
|
|
|
|
|
|
9221
|
|
|
|
|
|
|
set_jumps(no_char1_match, LABEL()); |
9222
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9223
|
|
|
|
|
|
|
set_jumps(no_match, LABEL()); |
9224
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9225
|
|
|
|
|
|
|
if (fast_str_ptr != 0) |
9226
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9227
|
|
|
|
|
|
|
} |
9228
|
|
|
|
|
|
|
} |
9229
|
|
|
|
|
|
|
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); |
9230
|
|
|
|
|
|
|
break; |
9231
|
|
|
|
|
|
|
|
9232
|
|
|
|
|
|
|
case OP_MINSTAR: |
9233
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9234
|
|
|
|
|
|
|
allocate_stack(common, 1); |
9235
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9236
|
|
|
|
|
|
|
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); |
9237
|
|
|
|
|
|
|
if (fast_str_ptr != 0) |
9238
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9239
|
|
|
|
|
|
|
break; |
9240
|
|
|
|
|
|
|
|
9241
|
|
|
|
|
|
|
case OP_MINUPTO: |
9242
|
|
|
|
|
|
|
SLJIT_ASSERT(fast_str_ptr == 0); |
9243
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9244
|
|
|
|
|
|
|
allocate_stack(common, 2); |
9245
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9246
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset1, SLJIT_IMM, max + 1); |
9247
|
|
|
|
|
|
|
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); |
9248
|
|
|
|
|
|
|
break; |
9249
|
|
|
|
|
|
|
|
9250
|
|
|
|
|
|
|
case OP_QUERY: |
9251
|
|
|
|
|
|
|
case OP_MINQUERY: |
9252
|
|
|
|
|
|
|
SLJIT_ASSERT(fast_str_ptr == 0); |
9253
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9254
|
|
|
|
|
|
|
allocate_stack(common, 1); |
9255
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9256
|
|
|
|
|
|
|
if (opcode == OP_QUERY) |
9257
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &BACKTRACK_AS(char_iterator_backtrack)->u.backtracks, TRUE); |
9258
|
|
|
|
|
|
|
BACKTRACK_AS(char_iterator_backtrack)->matchingpath = LABEL(); |
9259
|
|
|
|
|
|
|
break; |
9260
|
|
|
|
|
|
|
|
9261
|
|
|
|
|
|
|
case OP_EXACT: |
9262
|
|
|
|
|
|
|
break; |
9263
|
|
|
|
|
|
|
|
9264
|
|
|
|
|
|
|
case OP_POSSTAR: |
9265
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
9266
|
|
|
|
|
|
|
if (common->utf) |
9267
|
|
|
|
|
|
|
{ |
9268
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
9269
|
|
|
|
|
|
|
label = LABEL(); |
9270
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &no_match, TRUE); |
9271
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
9272
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, label); |
9273
|
|
|
|
|
|
|
set_jumps(no_match, LABEL()); |
9274
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); |
9275
|
|
|
|
|
|
|
if (fast_str_ptr != 0) |
9276
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9277
|
|
|
|
|
|
|
break; |
9278
|
|
|
|
|
|
|
} |
9279
|
|
|
|
|
|
|
#endif |
9280
|
|
|
|
|
|
|
label = LABEL(); |
9281
|
|
|
|
|
|
|
detect_partial_match(common, &no_match); |
9282
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); |
9283
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, label); |
9284
|
|
|
|
|
|
|
set_jumps(no_char1_match, LABEL()); |
9285
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9286
|
|
|
|
|
|
|
set_jumps(no_match, LABEL()); |
9287
|
|
|
|
|
|
|
if (fast_str_ptr != 0) |
9288
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), fast_str_ptr, STR_PTR, 0); |
9289
|
|
|
|
|
|
|
break; |
9290
|
|
|
|
|
|
|
|
9291
|
|
|
|
|
|
|
case OP_POSUPTO: |
9292
|
|
|
|
|
|
|
SLJIT_ASSERT(fast_str_ptr == 0); |
9293
|
|
|
|
|
|
|
#if defined SUPPORT_UTF && !defined COMPILE_PCRE32 |
9294
|
|
|
|
|
|
|
if (common->utf) |
9295
|
|
|
|
|
|
|
{ |
9296
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); |
9297
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
9298
|
|
|
|
|
|
|
label = LABEL(); |
9299
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &no_match, TRUE); |
9300
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1, STR_PTR, 0); |
9301
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9302
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
9303
|
|
|
|
|
|
|
set_jumps(no_match, LABEL()); |
9304
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), POSSESSIVE1); |
9305
|
|
|
|
|
|
|
break; |
9306
|
|
|
|
|
|
|
} |
9307
|
|
|
|
|
|
|
#endif |
9308
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max); |
9309
|
|
|
|
|
|
|
label = LABEL(); |
9310
|
|
|
|
|
|
|
detect_partial_match(common, &no_match); |
9311
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &no_char1_match, FALSE); |
9312
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1); |
9313
|
|
|
|
|
|
|
JUMPTO(SLJIT_NOT_ZERO, label); |
9314
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9315
|
|
|
|
|
|
|
set_jumps(no_char1_match, LABEL()); |
9316
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9317
|
|
|
|
|
|
|
set_jumps(no_match, LABEL()); |
9318
|
|
|
|
|
|
|
break; |
9319
|
|
|
|
|
|
|
|
9320
|
|
|
|
|
|
|
case OP_POSQUERY: |
9321
|
|
|
|
|
|
|
SLJIT_ASSERT(fast_str_ptr == 0); |
9322
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
9323
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &no_match, TRUE); |
9324
|
|
|
|
|
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0); |
9325
|
|
|
|
|
|
|
set_jumps(no_match, LABEL()); |
9326
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset); |
9327
|
|
|
|
|
|
|
break; |
9328
|
|
|
|
|
|
|
|
9329
|
|
|
|
|
|
|
default: |
9330
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
9331
|
|
|
|
|
|
|
break; |
9332
|
|
|
|
|
|
|
} |
9333
|
|
|
|
|
|
|
|
9334
|
|
|
|
|
|
|
count_match(common); |
9335
|
|
|
|
|
|
|
return end; |
9336
|
|
|
|
|
|
|
} |
9337
|
|
|
|
|
|
|
|
9338
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *compile_fail_accept_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
9339
|
|
|
|
|
|
|
{ |
9340
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9341
|
|
|
|
|
|
|
backtrack_common *backtrack; |
9342
|
|
|
|
|
|
|
|
9343
|
|
|
|
|
|
|
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); |
9344
|
|
|
|
|
|
|
|
9345
|
|
|
|
|
|
|
if (*cc == OP_FAIL) |
9346
|
|
|
|
|
|
|
{ |
9347
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); |
9348
|
|
|
|
|
|
|
return cc + 1; |
9349
|
|
|
|
|
|
|
} |
9350
|
|
|
|
|
|
|
|
9351
|
|
|
|
|
|
|
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty) |
9352
|
|
|
|
|
|
|
{ |
9353
|
|
|
|
|
|
|
/* No need to check notempty conditions. */ |
9354
|
|
|
|
|
|
|
if (common->accept_label == NULL) |
9355
|
|
|
|
|
|
|
add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); |
9356
|
|
|
|
|
|
|
else |
9357
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->accept_label); |
9358
|
|
|
|
|
|
|
return cc + 1; |
9359
|
|
|
|
|
|
|
} |
9360
|
|
|
|
|
|
|
|
9361
|
|
|
|
|
|
|
if (common->accept_label == NULL) |
9362
|
|
|
|
|
|
|
add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0))); |
9363
|
|
|
|
|
|
|
else |
9364
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), common->accept_label); |
9365
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
9366
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); |
9367
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
9368
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); |
9369
|
|
|
|
|
|
|
if (common->accept_label == NULL) |
9370
|
|
|
|
|
|
|
add_jump(compiler, &common->accept, CMP(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0)); |
9371
|
|
|
|
|
|
|
else |
9372
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, common->accept_label); |
9373
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
9374
|
|
|
|
|
|
|
if (common->accept_label == NULL) |
9375
|
|
|
|
|
|
|
add_jump(compiler, &common->accept, CMP(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0)); |
9376
|
|
|
|
|
|
|
else |
9377
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, common->accept_label); |
9378
|
|
|
|
|
|
|
add_jump(compiler, &backtrack->topbacktracks, JUMP(SLJIT_JUMP)); |
9379
|
|
|
|
|
|
|
return cc + 1; |
9380
|
|
|
|
|
|
|
} |
9381
|
|
|
|
|
|
|
|
9382
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *compile_close_matchingpath(compiler_common *common, pcre_uchar *cc) |
9383
|
|
|
|
|
|
|
{ |
9384
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9385
|
|
|
|
|
|
|
int offset = GET2(cc, 1); |
9386
|
|
|
|
|
|
|
BOOL optimized_cbracket = common->optimized_cbracket[offset] != 0; |
9387
|
|
|
|
|
|
|
|
9388
|
|
|
|
|
|
|
/* Data will be discarded anyway... */ |
9389
|
|
|
|
|
|
|
if (common->currententry != NULL) |
9390
|
|
|
|
|
|
|
return cc + 1 + IMM2_SIZE; |
9391
|
|
|
|
|
|
|
|
9392
|
|
|
|
|
|
|
if (!optimized_cbracket) |
9393
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR_PRIV(offset)); |
9394
|
|
|
|
|
|
|
offset <<= 1; |
9395
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
9396
|
|
|
|
|
|
|
if (!optimized_cbracket) |
9397
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
9398
|
|
|
|
|
|
|
return cc + 1 + IMM2_SIZE; |
9399
|
|
|
|
|
|
|
} |
9400
|
|
|
|
|
|
|
|
9401
|
|
|
|
|
|
|
static SLJIT_INLINE pcre_uchar *compile_control_verb_matchingpath(compiler_common *common, pcre_uchar *cc, backtrack_common *parent) |
9402
|
|
|
|
|
|
|
{ |
9403
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9404
|
|
|
|
|
|
|
backtrack_common *backtrack; |
9405
|
|
|
|
|
|
|
pcre_uchar opcode = *cc; |
9406
|
|
|
|
|
|
|
pcre_uchar *ccend = cc + 1; |
9407
|
|
|
|
|
|
|
|
9408
|
|
|
|
|
|
|
if (opcode == OP_PRUNE_ARG || opcode == OP_SKIP_ARG || opcode == OP_THEN_ARG) |
9409
|
|
|
|
|
|
|
ccend += 2 + cc[1]; |
9410
|
|
|
|
|
|
|
|
9411
|
|
|
|
|
|
|
PUSH_BACKTRACK(sizeof(backtrack_common), cc, NULL); |
9412
|
|
|
|
|
|
|
|
9413
|
|
|
|
|
|
|
if (opcode == OP_SKIP) |
9414
|
|
|
|
|
|
|
{ |
9415
|
|
|
|
|
|
|
allocate_stack(common, 1); |
9416
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
9417
|
|
|
|
|
|
|
return ccend; |
9418
|
|
|
|
|
|
|
} |
9419
|
|
|
|
|
|
|
|
9420
|
|
|
|
|
|
|
if (opcode == OP_PRUNE_ARG || opcode == OP_THEN_ARG) |
9421
|
|
|
|
|
|
|
{ |
9422
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
9423
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); |
9424
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); |
9425
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); |
9426
|
|
|
|
|
|
|
} |
9427
|
|
|
|
|
|
|
|
9428
|
|
|
|
|
|
|
return ccend; |
9429
|
|
|
|
|
|
|
} |
9430
|
|
|
|
|
|
|
|
9431
|
|
|
|
|
|
|
static pcre_uchar then_trap_opcode[1] = { OP_THEN_TRAP }; |
9432
|
|
|
|
|
|
|
|
9433
|
|
|
|
|
|
|
static SLJIT_INLINE void compile_then_trap_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) |
9434
|
|
|
|
|
|
|
{ |
9435
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9436
|
|
|
|
|
|
|
backtrack_common *backtrack; |
9437
|
|
|
|
|
|
|
BOOL needs_control_head; |
9438
|
|
|
|
|
|
|
int size; |
9439
|
|
|
|
|
|
|
|
9440
|
|
|
|
|
|
|
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); |
9441
|
|
|
|
|
|
|
common->then_trap = BACKTRACK_AS(then_trap_backtrack); |
9442
|
|
|
|
|
|
|
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; |
9443
|
|
|
|
|
|
|
BACKTRACK_AS(then_trap_backtrack)->start = (sljit_sw)(cc - common->start); |
9444
|
|
|
|
|
|
|
BACKTRACK_AS(then_trap_backtrack)->framesize = get_framesize(common, cc, ccend, FALSE, &needs_control_head); |
9445
|
|
|
|
|
|
|
|
9446
|
|
|
|
|
|
|
size = BACKTRACK_AS(then_trap_backtrack)->framesize; |
9447
|
|
|
|
|
|
|
size = 3 + (size < 0 ? 0 : size); |
9448
|
|
|
|
|
|
|
|
9449
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
9450
|
|
|
|
|
|
|
allocate_stack(common, size); |
9451
|
|
|
|
|
|
|
if (size > 3) |
9452
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0, SLJIT_IMM, (size - 3) * sizeof(sljit_sw)); |
9453
|
|
|
|
|
|
|
else |
9454
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); |
9455
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 1), SLJIT_IMM, BACKTRACK_AS(then_trap_backtrack)->start); |
9456
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 2), SLJIT_IMM, type_then_trap); |
9457
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(size - 3), TMP2, 0); |
9458
|
|
|
|
|
|
|
|
9459
|
|
|
|
|
|
|
size = BACKTRACK_AS(then_trap_backtrack)->framesize; |
9460
|
|
|
|
|
|
|
if (size >= 0) |
9461
|
|
|
|
|
|
|
init_frame(common, cc, ccend, size - 1, 0, FALSE); |
9462
|
|
|
|
|
|
|
} |
9463
|
|
|
|
|
|
|
|
9464
|
|
|
|
|
|
|
static void compile_matchingpath(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend, backtrack_common *parent) |
9465
|
|
|
|
|
|
|
{ |
9466
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9467
|
|
|
|
|
|
|
backtrack_common *backtrack; |
9468
|
|
|
|
|
|
|
BOOL has_then_trap = FALSE; |
9469
|
|
|
|
|
|
|
then_trap_backtrack *save_then_trap = NULL; |
9470
|
|
|
|
|
|
|
|
9471
|
|
|
|
|
|
|
SLJIT_ASSERT(*ccend == OP_END || (*ccend >= OP_ALT && *ccend <= OP_KETRPOS)); |
9472
|
|
|
|
|
|
|
|
9473
|
|
|
|
|
|
|
if (common->has_then && common->then_offsets[cc - common->start] != 0) |
9474
|
|
|
|
|
|
|
{ |
9475
|
|
|
|
|
|
|
SLJIT_ASSERT(*ccend != OP_END && common->control_head_ptr != 0); |
9476
|
|
|
|
|
|
|
has_then_trap = TRUE; |
9477
|
|
|
|
|
|
|
save_then_trap = common->then_trap; |
9478
|
|
|
|
|
|
|
/* Tail item on backtrack. */ |
9479
|
|
|
|
|
|
|
compile_then_trap_matchingpath(common, cc, ccend, parent); |
9480
|
|
|
|
|
|
|
} |
9481
|
|
|
|
|
|
|
|
9482
|
|
|
|
|
|
|
while (cc < ccend) |
9483
|
|
|
|
|
|
|
{ |
9484
|
|
|
|
|
|
|
switch(*cc) |
9485
|
|
|
|
|
|
|
{ |
9486
|
|
|
|
|
|
|
case OP_SOD: |
9487
|
|
|
|
|
|
|
case OP_SOM: |
9488
|
|
|
|
|
|
|
case OP_NOT_WORD_BOUNDARY: |
9489
|
|
|
|
|
|
|
case OP_WORD_BOUNDARY: |
9490
|
|
|
|
|
|
|
case OP_EODN: |
9491
|
|
|
|
|
|
|
case OP_EOD: |
9492
|
|
|
|
|
|
|
case OP_DOLL: |
9493
|
|
|
|
|
|
|
case OP_DOLLM: |
9494
|
|
|
|
|
|
|
case OP_CIRC: |
9495
|
|
|
|
|
|
|
case OP_CIRCM: |
9496
|
|
|
|
|
|
|
case OP_REVERSE: |
9497
|
|
|
|
|
|
|
cc = compile_simple_assertion_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
9498
|
|
|
|
|
|
|
break; |
9499
|
|
|
|
|
|
|
|
9500
|
|
|
|
|
|
|
case OP_NOT_DIGIT: |
9501
|
|
|
|
|
|
|
case OP_DIGIT: |
9502
|
|
|
|
|
|
|
case OP_NOT_WHITESPACE: |
9503
|
|
|
|
|
|
|
case OP_WHITESPACE: |
9504
|
|
|
|
|
|
|
case OP_NOT_WORDCHAR: |
9505
|
|
|
|
|
|
|
case OP_WORDCHAR: |
9506
|
|
|
|
|
|
|
case OP_ANY: |
9507
|
|
|
|
|
|
|
case OP_ALLANY: |
9508
|
|
|
|
|
|
|
case OP_ANYBYTE: |
9509
|
|
|
|
|
|
|
case OP_NOTPROP: |
9510
|
|
|
|
|
|
|
case OP_PROP: |
9511
|
|
|
|
|
|
|
case OP_ANYNL: |
9512
|
|
|
|
|
|
|
case OP_NOT_HSPACE: |
9513
|
|
|
|
|
|
|
case OP_HSPACE: |
9514
|
|
|
|
|
|
|
case OP_NOT_VSPACE: |
9515
|
|
|
|
|
|
|
case OP_VSPACE: |
9516
|
|
|
|
|
|
|
case OP_EXTUNI: |
9517
|
|
|
|
|
|
|
case OP_NOT: |
9518
|
|
|
|
|
|
|
case OP_NOTI: |
9519
|
|
|
|
|
|
|
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); |
9520
|
|
|
|
|
|
|
break; |
9521
|
|
|
|
|
|
|
|
9522
|
|
|
|
|
|
|
case OP_SET_SOM: |
9523
|
|
|
|
|
|
|
PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); |
9524
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
9525
|
|
|
|
|
|
|
allocate_stack(common, 1); |
9526
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); |
9527
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP2, 0); |
9528
|
|
|
|
|
|
|
cc++; |
9529
|
|
|
|
|
|
|
break; |
9530
|
|
|
|
|
|
|
|
9531
|
|
|
|
|
|
|
case OP_CHAR: |
9532
|
|
|
|
|
|
|
case OP_CHARI: |
9533
|
|
|
|
|
|
|
if (common->mode == JIT_COMPILE) |
9534
|
|
|
|
|
|
|
cc = compile_charn_matchingpath(common, cc, ccend, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
9535
|
|
|
|
|
|
|
else |
9536
|
|
|
|
|
|
|
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); |
9537
|
|
|
|
|
|
|
break; |
9538
|
|
|
|
|
|
|
|
9539
|
|
|
|
|
|
|
case OP_STAR: |
9540
|
|
|
|
|
|
|
case OP_MINSTAR: |
9541
|
|
|
|
|
|
|
case OP_PLUS: |
9542
|
|
|
|
|
|
|
case OP_MINPLUS: |
9543
|
|
|
|
|
|
|
case OP_QUERY: |
9544
|
|
|
|
|
|
|
case OP_MINQUERY: |
9545
|
|
|
|
|
|
|
case OP_UPTO: |
9546
|
|
|
|
|
|
|
case OP_MINUPTO: |
9547
|
|
|
|
|
|
|
case OP_EXACT: |
9548
|
|
|
|
|
|
|
case OP_POSSTAR: |
9549
|
|
|
|
|
|
|
case OP_POSPLUS: |
9550
|
|
|
|
|
|
|
case OP_POSQUERY: |
9551
|
|
|
|
|
|
|
case OP_POSUPTO: |
9552
|
|
|
|
|
|
|
case OP_STARI: |
9553
|
|
|
|
|
|
|
case OP_MINSTARI: |
9554
|
|
|
|
|
|
|
case OP_PLUSI: |
9555
|
|
|
|
|
|
|
case OP_MINPLUSI: |
9556
|
|
|
|
|
|
|
case OP_QUERYI: |
9557
|
|
|
|
|
|
|
case OP_MINQUERYI: |
9558
|
|
|
|
|
|
|
case OP_UPTOI: |
9559
|
|
|
|
|
|
|
case OP_MINUPTOI: |
9560
|
|
|
|
|
|
|
case OP_EXACTI: |
9561
|
|
|
|
|
|
|
case OP_POSSTARI: |
9562
|
|
|
|
|
|
|
case OP_POSPLUSI: |
9563
|
|
|
|
|
|
|
case OP_POSQUERYI: |
9564
|
|
|
|
|
|
|
case OP_POSUPTOI: |
9565
|
|
|
|
|
|
|
case OP_NOTSTAR: |
9566
|
|
|
|
|
|
|
case OP_NOTMINSTAR: |
9567
|
|
|
|
|
|
|
case OP_NOTPLUS: |
9568
|
|
|
|
|
|
|
case OP_NOTMINPLUS: |
9569
|
|
|
|
|
|
|
case OP_NOTQUERY: |
9570
|
|
|
|
|
|
|
case OP_NOTMINQUERY: |
9571
|
|
|
|
|
|
|
case OP_NOTUPTO: |
9572
|
|
|
|
|
|
|
case OP_NOTMINUPTO: |
9573
|
|
|
|
|
|
|
case OP_NOTEXACT: |
9574
|
|
|
|
|
|
|
case OP_NOTPOSSTAR: |
9575
|
|
|
|
|
|
|
case OP_NOTPOSPLUS: |
9576
|
|
|
|
|
|
|
case OP_NOTPOSQUERY: |
9577
|
|
|
|
|
|
|
case OP_NOTPOSUPTO: |
9578
|
|
|
|
|
|
|
case OP_NOTSTARI: |
9579
|
|
|
|
|
|
|
case OP_NOTMINSTARI: |
9580
|
|
|
|
|
|
|
case OP_NOTPLUSI: |
9581
|
|
|
|
|
|
|
case OP_NOTMINPLUSI: |
9582
|
|
|
|
|
|
|
case OP_NOTQUERYI: |
9583
|
|
|
|
|
|
|
case OP_NOTMINQUERYI: |
9584
|
|
|
|
|
|
|
case OP_NOTUPTOI: |
9585
|
|
|
|
|
|
|
case OP_NOTMINUPTOI: |
9586
|
|
|
|
|
|
|
case OP_NOTEXACTI: |
9587
|
|
|
|
|
|
|
case OP_NOTPOSSTARI: |
9588
|
|
|
|
|
|
|
case OP_NOTPOSPLUSI: |
9589
|
|
|
|
|
|
|
case OP_NOTPOSQUERYI: |
9590
|
|
|
|
|
|
|
case OP_NOTPOSUPTOI: |
9591
|
|
|
|
|
|
|
case OP_TYPESTAR: |
9592
|
|
|
|
|
|
|
case OP_TYPEMINSTAR: |
9593
|
|
|
|
|
|
|
case OP_TYPEPLUS: |
9594
|
|
|
|
|
|
|
case OP_TYPEMINPLUS: |
9595
|
|
|
|
|
|
|
case OP_TYPEQUERY: |
9596
|
|
|
|
|
|
|
case OP_TYPEMINQUERY: |
9597
|
|
|
|
|
|
|
case OP_TYPEUPTO: |
9598
|
|
|
|
|
|
|
case OP_TYPEMINUPTO: |
9599
|
|
|
|
|
|
|
case OP_TYPEEXACT: |
9600
|
|
|
|
|
|
|
case OP_TYPEPOSSTAR: |
9601
|
|
|
|
|
|
|
case OP_TYPEPOSPLUS: |
9602
|
|
|
|
|
|
|
case OP_TYPEPOSQUERY: |
9603
|
|
|
|
|
|
|
case OP_TYPEPOSUPTO: |
9604
|
|
|
|
|
|
|
cc = compile_iterator_matchingpath(common, cc, parent); |
9605
|
|
|
|
|
|
|
break; |
9606
|
|
|
|
|
|
|
|
9607
|
|
|
|
|
|
|
case OP_CLASS: |
9608
|
|
|
|
|
|
|
case OP_NCLASS: |
9609
|
|
|
|
|
|
|
if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE) |
9610
|
|
|
|
|
|
|
cc = compile_iterator_matchingpath(common, cc, parent); |
9611
|
|
|
|
|
|
|
else |
9612
|
|
|
|
|
|
|
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); |
9613
|
|
|
|
|
|
|
break; |
9614
|
|
|
|
|
|
|
|
9615
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32 |
9616
|
|
|
|
|
|
|
case OP_XCLASS: |
9617
|
|
|
|
|
|
|
if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE) |
9618
|
|
|
|
|
|
|
cc = compile_iterator_matchingpath(common, cc, parent); |
9619
|
|
|
|
|
|
|
else |
9620
|
|
|
|
|
|
|
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE); |
9621
|
|
|
|
|
|
|
break; |
9622
|
|
|
|
|
|
|
#endif |
9623
|
|
|
|
|
|
|
|
9624
|
|
|
|
|
|
|
case OP_REF: |
9625
|
|
|
|
|
|
|
case OP_REFI: |
9626
|
|
|
|
|
|
|
if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE) |
9627
|
|
|
|
|
|
|
cc = compile_ref_iterator_matchingpath(common, cc, parent); |
9628
|
|
|
|
|
|
|
else |
9629
|
|
|
|
|
|
|
{ |
9630
|
|
|
|
|
|
|
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); |
9631
|
|
|
|
|
|
|
cc += 1 + IMM2_SIZE; |
9632
|
|
|
|
|
|
|
} |
9633
|
|
|
|
|
|
|
break; |
9634
|
|
|
|
|
|
|
|
9635
|
|
|
|
|
|
|
case OP_DNREF: |
9636
|
|
|
|
|
|
|
case OP_DNREFI: |
9637
|
|
|
|
|
|
|
if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE) |
9638
|
|
|
|
|
|
|
cc = compile_ref_iterator_matchingpath(common, cc, parent); |
9639
|
|
|
|
|
|
|
else |
9640
|
|
|
|
|
|
|
{ |
9641
|
|
|
|
|
|
|
compile_dnref_search(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks); |
9642
|
|
|
|
|
|
|
compile_ref_matchingpath(common, cc, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks, TRUE, FALSE); |
9643
|
|
|
|
|
|
|
cc += 1 + 2 * IMM2_SIZE; |
9644
|
|
|
|
|
|
|
} |
9645
|
|
|
|
|
|
|
break; |
9646
|
|
|
|
|
|
|
|
9647
|
|
|
|
|
|
|
case OP_RECURSE: |
9648
|
|
|
|
|
|
|
cc = compile_recurse_matchingpath(common, cc, parent); |
9649
|
|
|
|
|
|
|
break; |
9650
|
|
|
|
|
|
|
|
9651
|
|
|
|
|
|
|
case OP_CALLOUT: |
9652
|
|
|
|
|
|
|
cc = compile_callout_matchingpath(common, cc, parent); |
9653
|
|
|
|
|
|
|
break; |
9654
|
|
|
|
|
|
|
|
9655
|
|
|
|
|
|
|
case OP_ASSERT: |
9656
|
|
|
|
|
|
|
case OP_ASSERT_NOT: |
9657
|
|
|
|
|
|
|
case OP_ASSERTBACK: |
9658
|
|
|
|
|
|
|
case OP_ASSERTBACK_NOT: |
9659
|
|
|
|
|
|
|
PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); |
9660
|
|
|
|
|
|
|
cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); |
9661
|
|
|
|
|
|
|
break; |
9662
|
|
|
|
|
|
|
|
9663
|
|
|
|
|
|
|
case OP_BRAMINZERO: |
9664
|
|
|
|
|
|
|
PUSH_BACKTRACK_NOVALUE(sizeof(braminzero_backtrack), cc); |
9665
|
|
|
|
|
|
|
cc = bracketend(cc + 1); |
9666
|
|
|
|
|
|
|
if (*(cc - 1 - LINK_SIZE) != OP_KETRMIN) |
9667
|
|
|
|
|
|
|
{ |
9668
|
|
|
|
|
|
|
allocate_stack(common, 1); |
9669
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
9670
|
|
|
|
|
|
|
} |
9671
|
|
|
|
|
|
|
else |
9672
|
|
|
|
|
|
|
{ |
9673
|
|
|
|
|
|
|
allocate_stack(common, 2); |
9674
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
9675
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), STR_PTR, 0); |
9676
|
|
|
|
|
|
|
} |
9677
|
|
|
|
|
|
|
BACKTRACK_AS(braminzero_backtrack)->matchingpath = LABEL(); |
9678
|
|
|
|
|
|
|
count_match(common); |
9679
|
|
|
|
|
|
|
break; |
9680
|
|
|
|
|
|
|
|
9681
|
|
|
|
|
|
|
case OP_ONCE: |
9682
|
|
|
|
|
|
|
case OP_ONCE_NC: |
9683
|
|
|
|
|
|
|
case OP_BRA: |
9684
|
|
|
|
|
|
|
case OP_CBRA: |
9685
|
|
|
|
|
|
|
case OP_COND: |
9686
|
|
|
|
|
|
|
case OP_SBRA: |
9687
|
|
|
|
|
|
|
case OP_SCBRA: |
9688
|
|
|
|
|
|
|
case OP_SCOND: |
9689
|
|
|
|
|
|
|
cc = compile_bracket_matchingpath(common, cc, parent); |
9690
|
|
|
|
|
|
|
break; |
9691
|
|
|
|
|
|
|
|
9692
|
|
|
|
|
|
|
case OP_BRAZERO: |
9693
|
|
|
|
|
|
|
if (cc[1] > OP_ASSERTBACK_NOT) |
9694
|
|
|
|
|
|
|
cc = compile_bracket_matchingpath(common, cc, parent); |
9695
|
|
|
|
|
|
|
else |
9696
|
|
|
|
|
|
|
{ |
9697
|
|
|
|
|
|
|
PUSH_BACKTRACK_NOVALUE(sizeof(assert_backtrack), cc); |
9698
|
|
|
|
|
|
|
cc = compile_assert_matchingpath(common, cc, BACKTRACK_AS(assert_backtrack), FALSE); |
9699
|
|
|
|
|
|
|
} |
9700
|
|
|
|
|
|
|
break; |
9701
|
|
|
|
|
|
|
|
9702
|
|
|
|
|
|
|
case OP_BRAPOS: |
9703
|
|
|
|
|
|
|
case OP_CBRAPOS: |
9704
|
|
|
|
|
|
|
case OP_SBRAPOS: |
9705
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
9706
|
|
|
|
|
|
|
case OP_BRAPOSZERO: |
9707
|
|
|
|
|
|
|
cc = compile_bracketpos_matchingpath(common, cc, parent); |
9708
|
|
|
|
|
|
|
break; |
9709
|
|
|
|
|
|
|
|
9710
|
|
|
|
|
|
|
case OP_MARK: |
9711
|
|
|
|
|
|
|
PUSH_BACKTRACK_NOVALUE(sizeof(backtrack_common), cc); |
9712
|
|
|
|
|
|
|
SLJIT_ASSERT(common->mark_ptr != 0); |
9713
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), common->mark_ptr); |
9714
|
|
|
|
|
|
|
allocate_stack(common, common->has_skip_arg ? 5 : 1); |
9715
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
9716
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0), TMP2, 0); |
9717
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, (sljit_sw)(cc + 2)); |
9718
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP2, 0); |
9719
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, mark_ptr), TMP2, 0); |
9720
|
|
|
|
|
|
|
if (common->has_skip_arg) |
9721
|
|
|
|
|
|
|
{ |
9722
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
9723
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, STACK_TOP, 0); |
9724
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(1), SLJIT_IMM, type_mark); |
9725
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(2), SLJIT_IMM, (sljit_sw)(cc + 2)); |
9726
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(3), STR_PTR, 0); |
9727
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), TMP1, 0); |
9728
|
|
|
|
|
|
|
} |
9729
|
|
|
|
|
|
|
cc += 1 + 2 + cc[1]; |
9730
|
|
|
|
|
|
|
break; |
9731
|
|
|
|
|
|
|
|
9732
|
|
|
|
|
|
|
case OP_PRUNE: |
9733
|
|
|
|
|
|
|
case OP_PRUNE_ARG: |
9734
|
|
|
|
|
|
|
case OP_SKIP: |
9735
|
|
|
|
|
|
|
case OP_SKIP_ARG: |
9736
|
|
|
|
|
|
|
case OP_THEN: |
9737
|
|
|
|
|
|
|
case OP_THEN_ARG: |
9738
|
|
|
|
|
|
|
case OP_COMMIT: |
9739
|
|
|
|
|
|
|
cc = compile_control_verb_matchingpath(common, cc, parent); |
9740
|
|
|
|
|
|
|
break; |
9741
|
|
|
|
|
|
|
|
9742
|
|
|
|
|
|
|
case OP_FAIL: |
9743
|
|
|
|
|
|
|
case OP_ACCEPT: |
9744
|
|
|
|
|
|
|
case OP_ASSERT_ACCEPT: |
9745
|
|
|
|
|
|
|
cc = compile_fail_accept_matchingpath(common, cc, parent); |
9746
|
|
|
|
|
|
|
break; |
9747
|
|
|
|
|
|
|
|
9748
|
|
|
|
|
|
|
case OP_CLOSE: |
9749
|
|
|
|
|
|
|
cc = compile_close_matchingpath(common, cc); |
9750
|
|
|
|
|
|
|
break; |
9751
|
|
|
|
|
|
|
|
9752
|
|
|
|
|
|
|
case OP_SKIPZERO: |
9753
|
|
|
|
|
|
|
cc = bracketend(cc + 1); |
9754
|
|
|
|
|
|
|
break; |
9755
|
|
|
|
|
|
|
|
9756
|
|
|
|
|
|
|
default: |
9757
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
9758
|
|
|
|
|
|
|
return; |
9759
|
|
|
|
|
|
|
} |
9760
|
|
|
|
|
|
|
if (cc == NULL) |
9761
|
|
|
|
|
|
|
return; |
9762
|
|
|
|
|
|
|
} |
9763
|
|
|
|
|
|
|
|
9764
|
|
|
|
|
|
|
if (has_then_trap) |
9765
|
|
|
|
|
|
|
{ |
9766
|
|
|
|
|
|
|
/* Head item on backtrack. */ |
9767
|
|
|
|
|
|
|
PUSH_BACKTRACK_NOVALUE(sizeof(then_trap_backtrack), cc); |
9768
|
|
|
|
|
|
|
BACKTRACK_AS(then_trap_backtrack)->common.cc = then_trap_opcode; |
9769
|
|
|
|
|
|
|
BACKTRACK_AS(then_trap_backtrack)->then_trap = common->then_trap; |
9770
|
|
|
|
|
|
|
common->then_trap = save_then_trap; |
9771
|
|
|
|
|
|
|
} |
9772
|
|
|
|
|
|
|
SLJIT_ASSERT(cc == ccend); |
9773
|
|
|
|
|
|
|
} |
9774
|
|
|
|
|
|
|
|
9775
|
|
|
|
|
|
|
#undef PUSH_BACKTRACK |
9776
|
|
|
|
|
|
|
#undef PUSH_BACKTRACK_NOVALUE |
9777
|
|
|
|
|
|
|
#undef BACKTRACK_AS |
9778
|
|
|
|
|
|
|
|
9779
|
|
|
|
|
|
|
#define COMPILE_BACKTRACKINGPATH(current) \ |
9780
|
|
|
|
|
|
|
do \ |
9781
|
|
|
|
|
|
|
{ \ |
9782
|
|
|
|
|
|
|
compile_backtrackingpath(common, (current)); \ |
9783
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) \ |
9784
|
|
|
|
|
|
|
return; \ |
9785
|
|
|
|
|
|
|
} \ |
9786
|
|
|
|
|
|
|
while (0) |
9787
|
|
|
|
|
|
|
|
9788
|
|
|
|
|
|
|
#define CURRENT_AS(type) ((type *)current) |
9789
|
|
|
|
|
|
|
|
9790
|
|
|
|
|
|
|
static void compile_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9791
|
|
|
|
|
|
|
{ |
9792
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9793
|
|
|
|
|
|
|
pcre_uchar *cc = current->cc; |
9794
|
|
|
|
|
|
|
pcre_uchar opcode; |
9795
|
|
|
|
|
|
|
pcre_uchar type; |
9796
|
|
|
|
|
|
|
sljit_u32 max = 0, exact; |
9797
|
|
|
|
|
|
|
struct sljit_label *label = NULL; |
9798
|
|
|
|
|
|
|
struct sljit_jump *jump = NULL; |
9799
|
|
|
|
|
|
|
jump_list *jumplist = NULL; |
9800
|
|
|
|
|
|
|
pcre_uchar *end; |
9801
|
|
|
|
|
|
|
int private_data_ptr = PRIVATE_DATA(cc); |
9802
|
|
|
|
|
|
|
int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_SP); |
9803
|
|
|
|
|
|
|
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr; |
9804
|
|
|
|
|
|
|
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw); |
9805
|
|
|
|
|
|
|
|
9806
|
|
|
|
|
|
|
cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &exact, &end); |
9807
|
|
|
|
|
|
|
|
9808
|
|
|
|
|
|
|
switch(opcode) |
9809
|
|
|
|
|
|
|
{ |
9810
|
|
|
|
|
|
|
case OP_STAR: |
9811
|
|
|
|
|
|
|
case OP_UPTO: |
9812
|
|
|
|
|
|
|
if (type == OP_ANYNL || type == OP_EXTUNI) |
9813
|
|
|
|
|
|
|
{ |
9814
|
|
|
|
|
|
|
SLJIT_ASSERT(private_data_ptr == 0); |
9815
|
|
|
|
|
|
|
set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); |
9816
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9817
|
|
|
|
|
|
|
free_stack(common, 1); |
9818
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9819
|
|
|
|
|
|
|
} |
9820
|
|
|
|
|
|
|
else |
9821
|
|
|
|
|
|
|
{ |
9822
|
|
|
|
|
|
|
if (CURRENT_AS(char_iterator_backtrack)->u.charpos.enabled) |
9823
|
|
|
|
|
|
|
{ |
9824
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9825
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, base, offset1); |
9826
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STR_PTR, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(1)); |
9827
|
|
|
|
|
|
|
|
9828
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, TMP2, 0); |
9829
|
|
|
|
|
|
|
label = LABEL(); |
9830
|
|
|
|
|
|
|
OP1(MOV_UCHAR, TMP1, 0, SLJIT_MEM1(STR_PTR), IN_UCHARS(-1)); |
9831
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9832
|
|
|
|
|
|
|
if (CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit != 0) |
9833
|
|
|
|
|
|
|
OP2(SLJIT_OR, TMP1, 0, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.othercasebit); |
9834
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, CURRENT_AS(char_iterator_backtrack)->u.charpos.chr, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9835
|
|
|
|
|
|
|
skip_char_back(common); |
9836
|
|
|
|
|
|
|
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP2, 0, label); |
9837
|
|
|
|
|
|
|
} |
9838
|
|
|
|
|
|
|
else |
9839
|
|
|
|
|
|
|
{ |
9840
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9841
|
|
|
|
|
|
|
jump = CMP(SLJIT_LESS_EQUAL, STR_PTR, 0, base, offset1); |
9842
|
|
|
|
|
|
|
skip_char_back(common); |
9843
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9844
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9845
|
|
|
|
|
|
|
} |
9846
|
|
|
|
|
|
|
JUMPHERE(jump); |
9847
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9848
|
|
|
|
|
|
|
free_stack(common, 2); |
9849
|
|
|
|
|
|
|
} |
9850
|
|
|
|
|
|
|
break; |
9851
|
|
|
|
|
|
|
|
9852
|
|
|
|
|
|
|
case OP_MINSTAR: |
9853
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9854
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); |
9855
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9856
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9857
|
|
|
|
|
|
|
set_jumps(jumplist, LABEL()); |
9858
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9859
|
|
|
|
|
|
|
free_stack(common, 1); |
9860
|
|
|
|
|
|
|
break; |
9861
|
|
|
|
|
|
|
|
9862
|
|
|
|
|
|
|
case OP_MINUPTO: |
9863
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, base, offset1); |
9864
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9865
|
|
|
|
|
|
|
OP2(SLJIT_SUB | SLJIT_SET_Z, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
9866
|
|
|
|
|
|
|
add_jump(compiler, &jumplist, JUMP(SLJIT_ZERO)); |
9867
|
|
|
|
|
|
|
|
9868
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset1, TMP1, 0); |
9869
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); |
9870
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0); |
9871
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9872
|
|
|
|
|
|
|
|
9873
|
|
|
|
|
|
|
set_jumps(jumplist, LABEL()); |
9874
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9875
|
|
|
|
|
|
|
free_stack(common, 2); |
9876
|
|
|
|
|
|
|
break; |
9877
|
|
|
|
|
|
|
|
9878
|
|
|
|
|
|
|
case OP_QUERY: |
9879
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9880
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); |
9881
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9882
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
9883
|
|
|
|
|
|
|
set_jumps(CURRENT_AS(char_iterator_backtrack)->u.backtracks, LABEL()); |
9884
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9885
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); |
9886
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9887
|
|
|
|
|
|
|
JUMPHERE(jump); |
9888
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9889
|
|
|
|
|
|
|
free_stack(common, 1); |
9890
|
|
|
|
|
|
|
break; |
9891
|
|
|
|
|
|
|
|
9892
|
|
|
|
|
|
|
case OP_MINQUERY: |
9893
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0); |
9894
|
|
|
|
|
|
|
OP1(SLJIT_MOV, base, offset0, SLJIT_IMM, 0); |
9895
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
9896
|
|
|
|
|
|
|
compile_char1_matchingpath(common, type, cc, &jumplist, TRUE); |
9897
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(char_iterator_backtrack)->matchingpath); |
9898
|
|
|
|
|
|
|
set_jumps(jumplist, LABEL()); |
9899
|
|
|
|
|
|
|
JUMPHERE(jump); |
9900
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
9901
|
|
|
|
|
|
|
free_stack(common, 1); |
9902
|
|
|
|
|
|
|
break; |
9903
|
|
|
|
|
|
|
|
9904
|
|
|
|
|
|
|
case OP_EXACT: |
9905
|
|
|
|
|
|
|
case OP_POSSTAR: |
9906
|
|
|
|
|
|
|
case OP_POSQUERY: |
9907
|
|
|
|
|
|
|
case OP_POSUPTO: |
9908
|
|
|
|
|
|
|
break; |
9909
|
|
|
|
|
|
|
|
9910
|
|
|
|
|
|
|
default: |
9911
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
9912
|
|
|
|
|
|
|
break; |
9913
|
|
|
|
|
|
|
} |
9914
|
|
|
|
|
|
|
|
9915
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
9916
|
|
|
|
|
|
|
} |
9917
|
|
|
|
|
|
|
|
9918
|
|
|
|
|
|
|
static SLJIT_INLINE void compile_ref_iterator_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9919
|
|
|
|
|
|
|
{ |
9920
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9921
|
|
|
|
|
|
|
pcre_uchar *cc = current->cc; |
9922
|
|
|
|
|
|
|
BOOL ref = (*cc == OP_REF || *cc == OP_REFI); |
9923
|
|
|
|
|
|
|
pcre_uchar type; |
9924
|
|
|
|
|
|
|
|
9925
|
|
|
|
|
|
|
type = cc[ref ? 1 + IMM2_SIZE : 1 + 2 * IMM2_SIZE]; |
9926
|
|
|
|
|
|
|
|
9927
|
|
|
|
|
|
|
if ((type & 0x1) == 0) |
9928
|
|
|
|
|
|
|
{ |
9929
|
|
|
|
|
|
|
/* Maximize case. */ |
9930
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
9931
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9932
|
|
|
|
|
|
|
free_stack(common, 1); |
9933
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); |
9934
|
|
|
|
|
|
|
return; |
9935
|
|
|
|
|
|
|
} |
9936
|
|
|
|
|
|
|
|
9937
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9938
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(ref_iterator_backtrack)->matchingpath); |
9939
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
9940
|
|
|
|
|
|
|
free_stack(common, ref ? 2 : 3); |
9941
|
|
|
|
|
|
|
} |
9942
|
|
|
|
|
|
|
|
9943
|
|
|
|
|
|
|
static SLJIT_INLINE void compile_recurse_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9944
|
|
|
|
|
|
|
{ |
9945
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9946
|
|
|
|
|
|
|
|
9947
|
|
|
|
|
|
|
if (CURRENT_AS(recurse_backtrack)->inlined_pattern) |
9948
|
|
|
|
|
|
|
compile_backtrackingpath(common, current->top); |
9949
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
9950
|
|
|
|
|
|
|
if (CURRENT_AS(recurse_backtrack)->inlined_pattern) |
9951
|
|
|
|
|
|
|
return; |
9952
|
|
|
|
|
|
|
|
9953
|
|
|
|
|
|
|
if (common->has_set_som && common->mark_ptr != 0) |
9954
|
|
|
|
|
|
|
{ |
9955
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9956
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
9957
|
|
|
|
|
|
|
free_stack(common, 2); |
9958
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP2, 0); |
9959
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); |
9960
|
|
|
|
|
|
|
} |
9961
|
|
|
|
|
|
|
else if (common->has_set_som || common->mark_ptr != 0) |
9962
|
|
|
|
|
|
|
{ |
9963
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9964
|
|
|
|
|
|
|
free_stack(common, 1); |
9965
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->has_set_som ? (int)(OVECTOR(0)) : common->mark_ptr, TMP2, 0); |
9966
|
|
|
|
|
|
|
} |
9967
|
|
|
|
|
|
|
} |
9968
|
|
|
|
|
|
|
|
9969
|
|
|
|
|
|
|
static void compile_assert_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
9970
|
|
|
|
|
|
|
{ |
9971
|
|
|
|
|
|
|
DEFINE_COMPILER; |
9972
|
|
|
|
|
|
|
pcre_uchar *cc = current->cc; |
9973
|
|
|
|
|
|
|
pcre_uchar bra = OP_BRA; |
9974
|
|
|
|
|
|
|
struct sljit_jump *brajump = NULL; |
9975
|
|
|
|
|
|
|
|
9976
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc != OP_BRAMINZERO); |
9977
|
|
|
|
|
|
|
if (*cc == OP_BRAZERO) |
9978
|
|
|
|
|
|
|
{ |
9979
|
|
|
|
|
|
|
bra = *cc; |
9980
|
|
|
|
|
|
|
cc++; |
9981
|
|
|
|
|
|
|
} |
9982
|
|
|
|
|
|
|
|
9983
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
9984
|
|
|
|
|
|
|
{ |
9985
|
|
|
|
|
|
|
SLJIT_ASSERT(current->topbacktracks == NULL); |
9986
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
9987
|
|
|
|
|
|
|
} |
9988
|
|
|
|
|
|
|
|
9989
|
|
|
|
|
|
|
if (CURRENT_AS(assert_backtrack)->framesize < 0) |
9990
|
|
|
|
|
|
|
{ |
9991
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
9992
|
|
|
|
|
|
|
|
9993
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
9994
|
|
|
|
|
|
|
{ |
9995
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
9996
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); |
9997
|
|
|
|
|
|
|
free_stack(common, 1); |
9998
|
|
|
|
|
|
|
} |
9999
|
|
|
|
|
|
|
return; |
10000
|
|
|
|
|
|
|
} |
10001
|
|
|
|
|
|
|
|
10002
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
10003
|
|
|
|
|
|
|
{ |
10004
|
|
|
|
|
|
|
if (*cc == OP_ASSERT_NOT || *cc == OP_ASSERTBACK_NOT) |
10005
|
|
|
|
|
|
|
{ |
10006
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
10007
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(assert_backtrack)->matchingpath); |
10008
|
|
|
|
|
|
|
free_stack(common, 1); |
10009
|
|
|
|
|
|
|
return; |
10010
|
|
|
|
|
|
|
} |
10011
|
|
|
|
|
|
|
free_stack(common, 1); |
10012
|
|
|
|
|
|
|
brajump = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0); |
10013
|
|
|
|
|
|
|
} |
10014
|
|
|
|
|
|
|
|
10015
|
|
|
|
|
|
|
if (*cc == OP_ASSERT || *cc == OP_ASSERTBACK) |
10016
|
|
|
|
|
|
|
{ |
10017
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr); |
10018
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10019
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(assert_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(assert_backtrack)->framesize - 1)); |
10020
|
|
|
|
|
|
|
|
10021
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
10022
|
|
|
|
|
|
|
} |
10023
|
|
|
|
|
|
|
else |
10024
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
10025
|
|
|
|
|
|
|
|
10026
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
10027
|
|
|
|
|
|
|
{ |
10028
|
|
|
|
|
|
|
/* We know there is enough place on the stack. */ |
10029
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, sizeof(sljit_sw)); |
10030
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0); |
10031
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(assert_backtrack)->matchingpath); |
10032
|
|
|
|
|
|
|
JUMPHERE(brajump); |
10033
|
|
|
|
|
|
|
} |
10034
|
|
|
|
|
|
|
} |
10035
|
|
|
|
|
|
|
|
10036
|
|
|
|
|
|
|
static void compile_bracket_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10037
|
|
|
|
|
|
|
{ |
10038
|
|
|
|
|
|
|
DEFINE_COMPILER; |
10039
|
|
|
|
|
|
|
int opcode, stacksize, alt_count, alt_max; |
10040
|
|
|
|
|
|
|
int offset = 0; |
10041
|
|
|
|
|
|
|
int private_data_ptr = CURRENT_AS(bracket_backtrack)->private_data_ptr; |
10042
|
|
|
|
|
|
|
int repeat_ptr = 0, repeat_type = 0, repeat_count = 0; |
10043
|
|
|
|
|
|
|
pcre_uchar *cc = current->cc; |
10044
|
|
|
|
|
|
|
pcre_uchar *ccbegin; |
10045
|
|
|
|
|
|
|
pcre_uchar *ccprev; |
10046
|
|
|
|
|
|
|
pcre_uchar bra = OP_BRA; |
10047
|
|
|
|
|
|
|
pcre_uchar ket; |
10048
|
|
|
|
|
|
|
assert_backtrack *assert; |
10049
|
|
|
|
|
|
|
sljit_uw *next_update_addr = NULL; |
10050
|
|
|
|
|
|
|
BOOL has_alternatives; |
10051
|
|
|
|
|
|
|
BOOL needs_control_head = FALSE; |
10052
|
|
|
|
|
|
|
struct sljit_jump *brazero = NULL; |
10053
|
|
|
|
|
|
|
struct sljit_jump *alt1 = NULL; |
10054
|
|
|
|
|
|
|
struct sljit_jump *alt2 = NULL; |
10055
|
|
|
|
|
|
|
struct sljit_jump *once = NULL; |
10056
|
|
|
|
|
|
|
struct sljit_jump *cond = NULL; |
10057
|
|
|
|
|
|
|
struct sljit_label *rmin_label = NULL; |
10058
|
|
|
|
|
|
|
struct sljit_label *exact_label = NULL; |
10059
|
|
|
|
|
|
|
|
10060
|
|
|
|
|
|
|
if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) |
10061
|
|
|
|
|
|
|
{ |
10062
|
|
|
|
|
|
|
bra = *cc; |
10063
|
|
|
|
|
|
|
cc++; |
10064
|
|
|
|
|
|
|
} |
10065
|
|
|
|
|
|
|
|
10066
|
|
|
|
|
|
|
opcode = *cc; |
10067
|
|
|
|
|
|
|
ccbegin = bracketend(cc) - 1 - LINK_SIZE; |
10068
|
|
|
|
|
|
|
ket = *ccbegin; |
10069
|
|
|
|
|
|
|
if (ket == OP_KET && PRIVATE_DATA(ccbegin) != 0) |
10070
|
|
|
|
|
|
|
{ |
10071
|
|
|
|
|
|
|
repeat_ptr = PRIVATE_DATA(ccbegin); |
10072
|
|
|
|
|
|
|
repeat_type = PRIVATE_DATA(ccbegin + 2); |
10073
|
|
|
|
|
|
|
repeat_count = PRIVATE_DATA(ccbegin + 3); |
10074
|
|
|
|
|
|
|
SLJIT_ASSERT(repeat_type != 0 && repeat_count != 0); |
10075
|
|
|
|
|
|
|
if (repeat_type == OP_UPTO) |
10076
|
|
|
|
|
|
|
ket = OP_KETRMAX; |
10077
|
|
|
|
|
|
|
if (repeat_type == OP_MINUPTO) |
10078
|
|
|
|
|
|
|
ket = OP_KETRMIN; |
10079
|
|
|
|
|
|
|
} |
10080
|
|
|
|
|
|
|
ccbegin = cc; |
10081
|
|
|
|
|
|
|
cc += GET(cc, 1); |
10082
|
|
|
|
|
|
|
has_alternatives = *cc == OP_ALT; |
10083
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) |
10084
|
|
|
|
|
|
|
has_alternatives = (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) || CURRENT_AS(bracket_backtrack)->u.condfailed != NULL; |
10085
|
|
|
|
|
|
|
if (opcode == OP_CBRA || opcode == OP_SCBRA) |
10086
|
|
|
|
|
|
|
offset = (GET2(ccbegin, 1 + LINK_SIZE)) << 1; |
10087
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN)) |
10088
|
|
|
|
|
|
|
opcode = OP_SCOND; |
10089
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(opcode == OP_ONCE_NC)) |
10090
|
|
|
|
|
|
|
opcode = OP_ONCE; |
10091
|
|
|
|
|
|
|
|
10092
|
|
|
|
|
|
|
alt_max = has_alternatives ? no_alternatives(ccbegin) : 0; |
10093
|
|
|
|
|
|
|
|
10094
|
|
|
|
|
|
|
/* Decoding the needs_control_head in framesize. */ |
10095
|
|
|
|
|
|
|
if (opcode == OP_ONCE) |
10096
|
|
|
|
|
|
|
{ |
10097
|
|
|
|
|
|
|
needs_control_head = (CURRENT_AS(bracket_backtrack)->u.framesize & 0x1) != 0; |
10098
|
|
|
|
|
|
|
CURRENT_AS(bracket_backtrack)->u.framesize >>= 1; |
10099
|
|
|
|
|
|
|
} |
10100
|
|
|
|
|
|
|
|
10101
|
|
|
|
|
|
|
if (ket != OP_KET && repeat_type != 0) |
10102
|
|
|
|
|
|
|
{ |
10103
|
|
|
|
|
|
|
/* TMP1 is used in OP_KETRMIN below. */ |
10104
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10105
|
|
|
|
|
|
|
free_stack(common, 1); |
10106
|
|
|
|
|
|
|
if (repeat_type == OP_UPTO) |
10107
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0, SLJIT_IMM, 1); |
10108
|
|
|
|
|
|
|
else |
10109
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); |
10110
|
|
|
|
|
|
|
} |
10111
|
|
|
|
|
|
|
|
10112
|
|
|
|
|
|
|
if (ket == OP_KETRMAX) |
10113
|
|
|
|
|
|
|
{ |
10114
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
10115
|
|
|
|
|
|
|
{ |
10116
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10117
|
|
|
|
|
|
|
free_stack(common, 1); |
10118
|
|
|
|
|
|
|
brazero = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, 0); |
10119
|
|
|
|
|
|
|
} |
10120
|
|
|
|
|
|
|
} |
10121
|
|
|
|
|
|
|
else if (ket == OP_KETRMIN) |
10122
|
|
|
|
|
|
|
{ |
10123
|
|
|
|
|
|
|
if (bra != OP_BRAMINZERO) |
10124
|
|
|
|
|
|
|
{ |
10125
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10126
|
|
|
|
|
|
|
if (repeat_type != 0) |
10127
|
|
|
|
|
|
|
{ |
10128
|
|
|
|
|
|
|
/* TMP1 was set a few lines above. */ |
10129
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
10130
|
|
|
|
|
|
|
/* Drop STR_PTR for non-greedy plus quantifier. */ |
10131
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
10132
|
|
|
|
|
|
|
free_stack(common, 1); |
10133
|
|
|
|
|
|
|
} |
10134
|
|
|
|
|
|
|
else if (opcode >= OP_SBRA || opcode == OP_ONCE) |
10135
|
|
|
|
|
|
|
{ |
10136
|
|
|
|
|
|
|
/* Checking zero-length iteration. */ |
10137
|
|
|
|
|
|
|
if (opcode != OP_ONCE || CURRENT_AS(bracket_backtrack)->u.framesize < 0) |
10138
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
10139
|
|
|
|
|
|
|
else |
10140
|
|
|
|
|
|
|
{ |
10141
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
10142
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_MEM1(TMP1), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 2), CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
10143
|
|
|
|
|
|
|
} |
10144
|
|
|
|
|
|
|
/* Drop STR_PTR for non-greedy plus quantifier. */ |
10145
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
10146
|
|
|
|
|
|
|
free_stack(common, 1); |
10147
|
|
|
|
|
|
|
} |
10148
|
|
|
|
|
|
|
else |
10149
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
10150
|
|
|
|
|
|
|
} |
10151
|
|
|
|
|
|
|
rmin_label = LABEL(); |
10152
|
|
|
|
|
|
|
if (repeat_type != 0) |
10153
|
|
|
|
|
|
|
OP2(SLJIT_ADD, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
10154
|
|
|
|
|
|
|
} |
10155
|
|
|
|
|
|
|
else if (bra == OP_BRAZERO) |
10156
|
|
|
|
|
|
|
{ |
10157
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10158
|
|
|
|
|
|
|
free_stack(common, 1); |
10159
|
|
|
|
|
|
|
brazero = CMP(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0); |
10160
|
|
|
|
|
|
|
} |
10161
|
|
|
|
|
|
|
else if (repeat_type == OP_EXACT) |
10162
|
|
|
|
|
|
|
{ |
10163
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
10164
|
|
|
|
|
|
|
exact_label = LABEL(); |
10165
|
|
|
|
|
|
|
} |
10166
|
|
|
|
|
|
|
|
10167
|
|
|
|
|
|
|
if (offset != 0) |
10168
|
|
|
|
|
|
|
{ |
10169
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
10170
|
|
|
|
|
|
|
{ |
10171
|
|
|
|
|
|
|
SLJIT_ASSERT(common->optimized_cbracket[offset >> 1] == 0); |
10172
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10173
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10174
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); |
10175
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); |
10176
|
|
|
|
|
|
|
free_stack(common, 3); |
10177
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP2, 0); |
10178
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP1, 0); |
10179
|
|
|
|
|
|
|
} |
10180
|
|
|
|
|
|
|
else if (common->optimized_cbracket[offset >> 1] == 0) |
10181
|
|
|
|
|
|
|
{ |
10182
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10183
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10184
|
|
|
|
|
|
|
free_stack(common, 2); |
10185
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
10186
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); |
10187
|
|
|
|
|
|
|
} |
10188
|
|
|
|
|
|
|
} |
10189
|
|
|
|
|
|
|
|
10190
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(opcode == OP_ONCE)) |
10191
|
|
|
|
|
|
|
{ |
10192
|
|
|
|
|
|
|
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) |
10193
|
|
|
|
|
|
|
{ |
10194
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
10195
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10196
|
|
|
|
|
|
|
} |
10197
|
|
|
|
|
|
|
once = JUMP(SLJIT_JUMP); |
10198
|
|
|
|
|
|
|
} |
10199
|
|
|
|
|
|
|
else if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) |
10200
|
|
|
|
|
|
|
{ |
10201
|
|
|
|
|
|
|
if (has_alternatives) |
10202
|
|
|
|
|
|
|
{ |
10203
|
|
|
|
|
|
|
/* Always exactly one alternative. */ |
10204
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10205
|
|
|
|
|
|
|
free_stack(common, 1); |
10206
|
|
|
|
|
|
|
|
10207
|
|
|
|
|
|
|
alt_max = 2; |
10208
|
|
|
|
|
|
|
alt1 = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); |
10209
|
|
|
|
|
|
|
} |
10210
|
|
|
|
|
|
|
} |
10211
|
|
|
|
|
|
|
else if (has_alternatives) |
10212
|
|
|
|
|
|
|
{ |
10213
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10214
|
|
|
|
|
|
|
free_stack(common, 1); |
10215
|
|
|
|
|
|
|
|
10216
|
|
|
|
|
|
|
if (alt_max > 4) |
10217
|
|
|
|
|
|
|
{ |
10218
|
|
|
|
|
|
|
/* Table jump if alt_max is greater than 4. */ |
10219
|
|
|
|
|
|
|
next_update_addr = allocate_read_only_data(common, alt_max * sizeof(sljit_uw)); |
10220
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(next_update_addr == NULL)) |
10221
|
|
|
|
|
|
|
return; |
10222
|
|
|
|
|
|
|
sljit_emit_ijump(compiler, SLJIT_JUMP, SLJIT_MEM1(TMP1), (sljit_sw)next_update_addr); |
10223
|
|
|
|
|
|
|
add_label_addr(common, next_update_addr++); |
10224
|
|
|
|
|
|
|
} |
10225
|
|
|
|
|
|
|
else |
10226
|
|
|
|
|
|
|
{ |
10227
|
|
|
|
|
|
|
if (alt_max == 4) |
10228
|
|
|
|
|
|
|
alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); |
10229
|
|
|
|
|
|
|
alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, sizeof(sljit_uw)); |
10230
|
|
|
|
|
|
|
} |
10231
|
|
|
|
|
|
|
} |
10232
|
|
|
|
|
|
|
|
10233
|
|
|
|
|
|
|
COMPILE_BACKTRACKINGPATH(current->top); |
10234
|
|
|
|
|
|
|
if (current->topbacktracks) |
10235
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
10236
|
|
|
|
|
|
|
|
10237
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND)) |
10238
|
|
|
|
|
|
|
{ |
10239
|
|
|
|
|
|
|
/* Conditional block always has at most one alternative. */ |
10240
|
|
|
|
|
|
|
if (ccbegin[1 + LINK_SIZE] >= OP_ASSERT && ccbegin[1 + LINK_SIZE] <= OP_ASSERTBACK_NOT) |
10241
|
|
|
|
|
|
|
{ |
10242
|
|
|
|
|
|
|
SLJIT_ASSERT(has_alternatives); |
10243
|
|
|
|
|
|
|
assert = CURRENT_AS(bracket_backtrack)->u.assert; |
10244
|
|
|
|
|
|
|
if (assert->framesize >= 0 && (ccbegin[1 + LINK_SIZE] == OP_ASSERT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK)) |
10245
|
|
|
|
|
|
|
{ |
10246
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); |
10247
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10248
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1)); |
10249
|
|
|
|
|
|
|
} |
10250
|
|
|
|
|
|
|
cond = JUMP(SLJIT_JUMP); |
10251
|
|
|
|
|
|
|
set_jumps(CURRENT_AS(bracket_backtrack)->u.assert->condfailed, LABEL()); |
10252
|
|
|
|
|
|
|
} |
10253
|
|
|
|
|
|
|
else if (CURRENT_AS(bracket_backtrack)->u.condfailed != NULL) |
10254
|
|
|
|
|
|
|
{ |
10255
|
|
|
|
|
|
|
SLJIT_ASSERT(has_alternatives); |
10256
|
|
|
|
|
|
|
cond = JUMP(SLJIT_JUMP); |
10257
|
|
|
|
|
|
|
set_jumps(CURRENT_AS(bracket_backtrack)->u.condfailed, LABEL()); |
10258
|
|
|
|
|
|
|
} |
10259
|
|
|
|
|
|
|
else |
10260
|
|
|
|
|
|
|
SLJIT_ASSERT(!has_alternatives); |
10261
|
|
|
|
|
|
|
} |
10262
|
|
|
|
|
|
|
|
10263
|
|
|
|
|
|
|
if (has_alternatives) |
10264
|
|
|
|
|
|
|
{ |
10265
|
|
|
|
|
|
|
alt_count = sizeof(sljit_uw); |
10266
|
|
|
|
|
|
|
do |
10267
|
|
|
|
|
|
|
{ |
10268
|
|
|
|
|
|
|
current->top = NULL; |
10269
|
|
|
|
|
|
|
current->topbacktracks = NULL; |
10270
|
|
|
|
|
|
|
current->nextbacktracks = NULL; |
10271
|
|
|
|
|
|
|
/* Conditional blocks always have an additional alternative, even if it is empty. */ |
10272
|
|
|
|
|
|
|
if (*cc == OP_ALT) |
10273
|
|
|
|
|
|
|
{ |
10274
|
|
|
|
|
|
|
ccprev = cc + 1 + LINK_SIZE; |
10275
|
|
|
|
|
|
|
cc += GET(cc, 1); |
10276
|
|
|
|
|
|
|
if (opcode != OP_COND && opcode != OP_SCOND) |
10277
|
|
|
|
|
|
|
{ |
10278
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
10279
|
|
|
|
|
|
|
{ |
10280
|
|
|
|
|
|
|
if (private_data_ptr != 0) |
10281
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), private_data_ptr); |
10282
|
|
|
|
|
|
|
else |
10283
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10284
|
|
|
|
|
|
|
} |
10285
|
|
|
|
|
|
|
else |
10286
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(needs_control_head ? 1 : 0)); |
10287
|
|
|
|
|
|
|
} |
10288
|
|
|
|
|
|
|
compile_matchingpath(common, ccprev, cc, current); |
10289
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
10290
|
|
|
|
|
|
|
return; |
10291
|
|
|
|
|
|
|
} |
10292
|
|
|
|
|
|
|
|
10293
|
|
|
|
|
|
|
/* Instructions after the current alternative is successfully matched. */ |
10294
|
|
|
|
|
|
|
/* There is a similar code in compile_bracket_matchingpath. */ |
10295
|
|
|
|
|
|
|
if (opcode == OP_ONCE) |
10296
|
|
|
|
|
|
|
match_once_common(common, ket, CURRENT_AS(bracket_backtrack)->u.framesize, private_data_ptr, has_alternatives, needs_control_head); |
10297
|
|
|
|
|
|
|
|
10298
|
|
|
|
|
|
|
stacksize = 0; |
10299
|
|
|
|
|
|
|
if (repeat_type == OP_MINUPTO) |
10300
|
|
|
|
|
|
|
{ |
10301
|
|
|
|
|
|
|
/* We need to preserve the counter. TMP2 will be used below. */ |
10302
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr); |
10303
|
|
|
|
|
|
|
stacksize++; |
10304
|
|
|
|
|
|
|
} |
10305
|
|
|
|
|
|
|
if (ket != OP_KET || bra != OP_BRA) |
10306
|
|
|
|
|
|
|
stacksize++; |
10307
|
|
|
|
|
|
|
if (offset != 0) |
10308
|
|
|
|
|
|
|
{ |
10309
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
10310
|
|
|
|
|
|
|
stacksize++; |
10311
|
|
|
|
|
|
|
if (common->optimized_cbracket[offset >> 1] == 0) |
10312
|
|
|
|
|
|
|
stacksize += 2; |
10313
|
|
|
|
|
|
|
} |
10314
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
10315
|
|
|
|
|
|
|
stacksize++; |
10316
|
|
|
|
|
|
|
|
10317
|
|
|
|
|
|
|
if (stacksize > 0) |
10318
|
|
|
|
|
|
|
allocate_stack(common, stacksize); |
10319
|
|
|
|
|
|
|
|
10320
|
|
|
|
|
|
|
stacksize = 0; |
10321
|
|
|
|
|
|
|
if (repeat_type == OP_MINUPTO) |
10322
|
|
|
|
|
|
|
{ |
10323
|
|
|
|
|
|
|
/* TMP2 was set above. */ |
10324
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_MEM1(STACK_TOP), STACK(stacksize), TMP2, 0, SLJIT_IMM, 1); |
10325
|
|
|
|
|
|
|
stacksize++; |
10326
|
|
|
|
|
|
|
} |
10327
|
|
|
|
|
|
|
|
10328
|
|
|
|
|
|
|
if (ket != OP_KET || bra != OP_BRA) |
10329
|
|
|
|
|
|
|
{ |
10330
|
|
|
|
|
|
|
if (ket != OP_KET) |
10331
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), STR_PTR, 0); |
10332
|
|
|
|
|
|
|
else |
10333
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, 0); |
10334
|
|
|
|
|
|
|
stacksize++; |
10335
|
|
|
|
|
|
|
} |
10336
|
|
|
|
|
|
|
|
10337
|
|
|
|
|
|
|
if (offset != 0) |
10338
|
|
|
|
|
|
|
stacksize = match_capture_common(common, stacksize, offset, private_data_ptr); |
10339
|
|
|
|
|
|
|
|
10340
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
10341
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(stacksize), SLJIT_IMM, alt_count); |
10342
|
|
|
|
|
|
|
|
10343
|
|
|
|
|
|
|
if (offset != 0 && ket == OP_KETRMAX && common->optimized_cbracket[offset >> 1] != 0) |
10344
|
|
|
|
|
|
|
{ |
10345
|
|
|
|
|
|
|
/* If ket is not OP_KETRMAX, this code path is executed after the jump to alternative_matchingpath. */ |
10346
|
|
|
|
|
|
|
SLJIT_ASSERT(private_data_ptr == OVECTOR(offset + 0)); |
10347
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), STR_PTR, 0); |
10348
|
|
|
|
|
|
|
} |
10349
|
|
|
|
|
|
|
|
10350
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->alternative_matchingpath); |
10351
|
|
|
|
|
|
|
|
10352
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
10353
|
|
|
|
|
|
|
{ |
10354
|
|
|
|
|
|
|
if (alt_max > 4) |
10355
|
|
|
|
|
|
|
add_label_addr(common, next_update_addr++); |
10356
|
|
|
|
|
|
|
else |
10357
|
|
|
|
|
|
|
{ |
10358
|
|
|
|
|
|
|
if (alt_count != 2 * sizeof(sljit_uw)) |
10359
|
|
|
|
|
|
|
{ |
10360
|
|
|
|
|
|
|
JUMPHERE(alt1); |
10361
|
|
|
|
|
|
|
if (alt_max == 3 && alt_count == sizeof(sljit_uw)) |
10362
|
|
|
|
|
|
|
alt2 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 2 * sizeof(sljit_uw)); |
10363
|
|
|
|
|
|
|
} |
10364
|
|
|
|
|
|
|
else |
10365
|
|
|
|
|
|
|
{ |
10366
|
|
|
|
|
|
|
JUMPHERE(alt2); |
10367
|
|
|
|
|
|
|
if (alt_max == 4) |
10368
|
|
|
|
|
|
|
alt1 = CMP(SLJIT_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, 3 * sizeof(sljit_uw)); |
10369
|
|
|
|
|
|
|
} |
10370
|
|
|
|
|
|
|
} |
10371
|
|
|
|
|
|
|
alt_count += sizeof(sljit_uw); |
10372
|
|
|
|
|
|
|
} |
10373
|
|
|
|
|
|
|
|
10374
|
|
|
|
|
|
|
COMPILE_BACKTRACKINGPATH(current->top); |
10375
|
|
|
|
|
|
|
if (current->topbacktracks) |
10376
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
10377
|
|
|
|
|
|
|
SLJIT_ASSERT(!current->nextbacktracks); |
10378
|
|
|
|
|
|
|
} |
10379
|
|
|
|
|
|
|
while (*cc == OP_ALT); |
10380
|
|
|
|
|
|
|
|
10381
|
|
|
|
|
|
|
if (cond != NULL) |
10382
|
|
|
|
|
|
|
{ |
10383
|
|
|
|
|
|
|
SLJIT_ASSERT(opcode == OP_COND || opcode == OP_SCOND); |
10384
|
|
|
|
|
|
|
assert = CURRENT_AS(bracket_backtrack)->u.assert; |
10385
|
|
|
|
|
|
|
if ((ccbegin[1 + LINK_SIZE] == OP_ASSERT_NOT || ccbegin[1 + LINK_SIZE] == OP_ASSERTBACK_NOT) && assert->framesize >= 0) |
10386
|
|
|
|
|
|
|
{ |
10387
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr); |
10388
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10389
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), assert->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-assert->framesize - 1)); |
10390
|
|
|
|
|
|
|
} |
10391
|
|
|
|
|
|
|
JUMPHERE(cond); |
10392
|
|
|
|
|
|
|
} |
10393
|
|
|
|
|
|
|
|
10394
|
|
|
|
|
|
|
/* Free the STR_PTR. */ |
10395
|
|
|
|
|
|
|
if (private_data_ptr == 0) |
10396
|
|
|
|
|
|
|
free_stack(common, 1); |
10397
|
|
|
|
|
|
|
} |
10398
|
|
|
|
|
|
|
|
10399
|
|
|
|
|
|
|
if (offset != 0) |
10400
|
|
|
|
|
|
|
{ |
10401
|
|
|
|
|
|
|
/* Using both tmp register is better for instruction scheduling. */ |
10402
|
|
|
|
|
|
|
if (common->optimized_cbracket[offset >> 1] != 0) |
10403
|
|
|
|
|
|
|
{ |
10404
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10405
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10406
|
|
|
|
|
|
|
free_stack(common, 2); |
10407
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
10408
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); |
10409
|
|
|
|
|
|
|
} |
10410
|
|
|
|
|
|
|
else |
10411
|
|
|
|
|
|
|
{ |
10412
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10413
|
|
|
|
|
|
|
free_stack(common, 1); |
10414
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
10415
|
|
|
|
|
|
|
} |
10416
|
|
|
|
|
|
|
} |
10417
|
|
|
|
|
|
|
else if (opcode == OP_SBRA || opcode == OP_SCOND) |
10418
|
|
|
|
|
|
|
{ |
10419
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10420
|
|
|
|
|
|
|
free_stack(common, 1); |
10421
|
|
|
|
|
|
|
} |
10422
|
|
|
|
|
|
|
else if (opcode == OP_ONCE) |
10423
|
|
|
|
|
|
|
{ |
10424
|
|
|
|
|
|
|
cc = ccbegin + GET(ccbegin, 1); |
10425
|
|
|
|
|
|
|
stacksize = needs_control_head ? 1 : 0; |
10426
|
|
|
|
|
|
|
|
10427
|
|
|
|
|
|
|
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) |
10428
|
|
|
|
|
|
|
{ |
10429
|
|
|
|
|
|
|
/* Reset head and drop saved frame. */ |
10430
|
|
|
|
|
|
|
stacksize += CURRENT_AS(bracket_backtrack)->u.framesize + ((ket != OP_KET || *cc == OP_ALT) ? 2 : 1); |
10431
|
|
|
|
|
|
|
} |
10432
|
|
|
|
|
|
|
else if (ket == OP_KETRMAX || (*cc == OP_ALT && ket != OP_KETRMIN)) |
10433
|
|
|
|
|
|
|
{ |
10434
|
|
|
|
|
|
|
/* The STR_PTR must be released. */ |
10435
|
|
|
|
|
|
|
stacksize++; |
10436
|
|
|
|
|
|
|
} |
10437
|
|
|
|
|
|
|
|
10438
|
|
|
|
|
|
|
if (stacksize > 0) |
10439
|
|
|
|
|
|
|
free_stack(common, stacksize); |
10440
|
|
|
|
|
|
|
|
10441
|
|
|
|
|
|
|
JUMPHERE(once); |
10442
|
|
|
|
|
|
|
/* Restore previous private_data_ptr */ |
10443
|
|
|
|
|
|
|
if (CURRENT_AS(bracket_backtrack)->u.framesize >= 0) |
10444
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracket_backtrack)->u.framesize - 1)); |
10445
|
|
|
|
|
|
|
else if (ket == OP_KETRMIN) |
10446
|
|
|
|
|
|
|
{ |
10447
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10448
|
|
|
|
|
|
|
/* See the comment below. */ |
10449
|
|
|
|
|
|
|
free_stack(common, 2); |
10450
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), private_data_ptr, TMP1, 0); |
10451
|
|
|
|
|
|
|
} |
10452
|
|
|
|
|
|
|
} |
10453
|
|
|
|
|
|
|
|
10454
|
|
|
|
|
|
|
if (repeat_type == OP_EXACT) |
10455
|
|
|
|
|
|
|
{ |
10456
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, SLJIT_MEM1(SLJIT_SP), repeat_ptr, SLJIT_IMM, 1); |
10457
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), repeat_ptr, TMP1, 0); |
10458
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS_EQUAL, TMP1, 0, SLJIT_IMM, repeat_count, exact_label); |
10459
|
|
|
|
|
|
|
} |
10460
|
|
|
|
|
|
|
else if (ket == OP_KETRMAX) |
10461
|
|
|
|
|
|
|
{ |
10462
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10463
|
|
|
|
|
|
|
if (bra != OP_BRAZERO) |
10464
|
|
|
|
|
|
|
free_stack(common, 1); |
10465
|
|
|
|
|
|
|
|
10466
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0, CURRENT_AS(bracket_backtrack)->recursive_matchingpath); |
10467
|
|
|
|
|
|
|
if (bra == OP_BRAZERO) |
10468
|
|
|
|
|
|
|
{ |
10469
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10470
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); |
10471
|
|
|
|
|
|
|
JUMPHERE(brazero); |
10472
|
|
|
|
|
|
|
free_stack(common, 1); |
10473
|
|
|
|
|
|
|
} |
10474
|
|
|
|
|
|
|
} |
10475
|
|
|
|
|
|
|
else if (ket == OP_KETRMIN) |
10476
|
|
|
|
|
|
|
{ |
10477
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10478
|
|
|
|
|
|
|
|
10479
|
|
|
|
|
|
|
/* OP_ONCE removes everything in case of a backtrack, so we don't |
10480
|
|
|
|
|
|
|
need to explicitly release the STR_PTR. The extra release would |
10481
|
|
|
|
|
|
|
affect badly the free_stack(2) above. */ |
10482
|
|
|
|
|
|
|
if (opcode != OP_ONCE) |
10483
|
|
|
|
|
|
|
free_stack(common, 1); |
10484
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP1, 0, SLJIT_IMM, 0, rmin_label); |
10485
|
|
|
|
|
|
|
if (opcode == OP_ONCE) |
10486
|
|
|
|
|
|
|
free_stack(common, bra == OP_BRAMINZERO ? 2 : 1); |
10487
|
|
|
|
|
|
|
else if (bra == OP_BRAMINZERO) |
10488
|
|
|
|
|
|
|
free_stack(common, 1); |
10489
|
|
|
|
|
|
|
} |
10490
|
|
|
|
|
|
|
else if (bra == OP_BRAZERO) |
10491
|
|
|
|
|
|
|
{ |
10492
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10493
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, CURRENT_AS(bracket_backtrack)->zero_matchingpath); |
10494
|
|
|
|
|
|
|
JUMPHERE(brazero); |
10495
|
|
|
|
|
|
|
} |
10496
|
|
|
|
|
|
|
} |
10497
|
|
|
|
|
|
|
|
10498
|
|
|
|
|
|
|
static SLJIT_INLINE void compile_bracketpos_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10499
|
|
|
|
|
|
|
{ |
10500
|
|
|
|
|
|
|
DEFINE_COMPILER; |
10501
|
|
|
|
|
|
|
int offset; |
10502
|
|
|
|
|
|
|
struct sljit_jump *jump; |
10503
|
|
|
|
|
|
|
|
10504
|
|
|
|
|
|
|
if (CURRENT_AS(bracketpos_backtrack)->framesize < 0) |
10505
|
|
|
|
|
|
|
{ |
10506
|
|
|
|
|
|
|
if (*current->cc == OP_CBRAPOS || *current->cc == OP_SCBRAPOS) |
10507
|
|
|
|
|
|
|
{ |
10508
|
|
|
|
|
|
|
offset = (GET2(current->cc, 1 + LINK_SIZE)) << 1; |
10509
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10510
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(1)); |
10511
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset), TMP1, 0); |
10512
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
10513
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(2)); |
10514
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1), TMP2, 0); |
10515
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
10516
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, TMP1, 0); |
10517
|
|
|
|
|
|
|
} |
10518
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
10519
|
|
|
|
|
|
|
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); |
10520
|
|
|
|
|
|
|
return; |
10521
|
|
|
|
|
|
|
} |
10522
|
|
|
|
|
|
|
|
10523
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr); |
10524
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10525
|
|
|
|
|
|
|
|
10526
|
|
|
|
|
|
|
if (current->topbacktracks) |
10527
|
|
|
|
|
|
|
{ |
10528
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
10529
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
10530
|
|
|
|
|
|
|
/* Drop the stack frame. */ |
10531
|
|
|
|
|
|
|
free_stack(common, CURRENT_AS(bracketpos_backtrack)->stacksize); |
10532
|
|
|
|
|
|
|
JUMPHERE(jump); |
10533
|
|
|
|
|
|
|
} |
10534
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), CURRENT_AS(bracketpos_backtrack)->private_data_ptr, SLJIT_MEM1(STACK_TOP), STACK(-CURRENT_AS(bracketpos_backtrack)->framesize - 1)); |
10535
|
|
|
|
|
|
|
} |
10536
|
|
|
|
|
|
|
|
10537
|
|
|
|
|
|
|
static SLJIT_INLINE void compile_braminzero_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10538
|
|
|
|
|
|
|
{ |
10539
|
|
|
|
|
|
|
assert_backtrack backtrack; |
10540
|
|
|
|
|
|
|
|
10541
|
|
|
|
|
|
|
current->top = NULL; |
10542
|
|
|
|
|
|
|
current->topbacktracks = NULL; |
10543
|
|
|
|
|
|
|
current->nextbacktracks = NULL; |
10544
|
|
|
|
|
|
|
if (current->cc[1] > OP_ASSERTBACK_NOT) |
10545
|
|
|
|
|
|
|
{ |
10546
|
|
|
|
|
|
|
/* Manual call of compile_bracket_matchingpath and compile_bracket_backtrackingpath. */ |
10547
|
|
|
|
|
|
|
compile_bracket_matchingpath(common, current->cc, current); |
10548
|
|
|
|
|
|
|
compile_bracket_backtrackingpath(common, current->top); |
10549
|
|
|
|
|
|
|
} |
10550
|
|
|
|
|
|
|
else |
10551
|
|
|
|
|
|
|
{ |
10552
|
|
|
|
|
|
|
memset(&backtrack, 0, sizeof(backtrack)); |
10553
|
|
|
|
|
|
|
backtrack.common.cc = current->cc; |
10554
|
|
|
|
|
|
|
backtrack.matchingpath = CURRENT_AS(braminzero_backtrack)->matchingpath; |
10555
|
|
|
|
|
|
|
/* Manual call of compile_assert_matchingpath. */ |
10556
|
|
|
|
|
|
|
compile_assert_matchingpath(common, current->cc, &backtrack, FALSE); |
10557
|
|
|
|
|
|
|
} |
10558
|
|
|
|
|
|
|
SLJIT_ASSERT(!current->nextbacktracks && !current->topbacktracks); |
10559
|
|
|
|
|
|
|
} |
10560
|
|
|
|
|
|
|
|
10561
|
|
|
|
|
|
|
static SLJIT_INLINE void compile_control_verb_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10562
|
|
|
|
|
|
|
{ |
10563
|
|
|
|
|
|
|
DEFINE_COMPILER; |
10564
|
|
|
|
|
|
|
pcre_uchar opcode = *current->cc; |
10565
|
|
|
|
|
|
|
struct sljit_label *loop; |
10566
|
|
|
|
|
|
|
struct sljit_jump *jump; |
10567
|
|
|
|
|
|
|
|
10568
|
|
|
|
|
|
|
if (opcode == OP_THEN || opcode == OP_THEN_ARG) |
10569
|
|
|
|
|
|
|
{ |
10570
|
|
|
|
|
|
|
if (common->then_trap != NULL) |
10571
|
|
|
|
|
|
|
{ |
10572
|
|
|
|
|
|
|
SLJIT_ASSERT(common->control_head_ptr != 0); |
10573
|
|
|
|
|
|
|
|
10574
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
10575
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, type_then_trap); |
10576
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, common->then_trap->start); |
10577
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
10578
|
|
|
|
|
|
|
|
10579
|
|
|
|
|
|
|
loop = LABEL(); |
10580
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10581
|
|
|
|
|
|
|
JUMPHERE(jump); |
10582
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(1), TMP1, 0, loop); |
10583
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(STACK_TOP), STACK(2), TMP2, 0, loop); |
10584
|
|
|
|
|
|
|
add_jump(compiler, &common->then_trap->quit, JUMP(SLJIT_JUMP)); |
10585
|
|
|
|
|
|
|
return; |
10586
|
|
|
|
|
|
|
} |
10587
|
|
|
|
|
|
|
else if (common->positive_assert) |
10588
|
|
|
|
|
|
|
{ |
10589
|
|
|
|
|
|
|
add_jump(compiler, &common->positive_assert_quit, JUMP(SLJIT_JUMP)); |
10590
|
|
|
|
|
|
|
return; |
10591
|
|
|
|
|
|
|
} |
10592
|
|
|
|
|
|
|
} |
10593
|
|
|
|
|
|
|
|
10594
|
|
|
|
|
|
|
if (common->local_exit) |
10595
|
|
|
|
|
|
|
{ |
10596
|
|
|
|
|
|
|
if (common->quit_label == NULL) |
10597
|
|
|
|
|
|
|
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); |
10598
|
|
|
|
|
|
|
else |
10599
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->quit_label); |
10600
|
|
|
|
|
|
|
return; |
10601
|
|
|
|
|
|
|
} |
10602
|
|
|
|
|
|
|
|
10603
|
|
|
|
|
|
|
if (opcode == OP_SKIP_ARG) |
10604
|
|
|
|
|
|
|
{ |
10605
|
|
|
|
|
|
|
SLJIT_ASSERT(common->control_head_ptr != 0); |
10606
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr); |
10607
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS0, STACK_TOP, 0); |
10608
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_IMM, (sljit_sw)(current->cc + 2)); |
10609
|
|
|
|
|
|
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(do_search_mark)); |
10610
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
10611
|
|
|
|
|
|
|
|
10612
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); |
10613
|
|
|
|
|
|
|
add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, SLJIT_IMM, 0)); |
10614
|
|
|
|
|
|
|
return; |
10615
|
|
|
|
|
|
|
} |
10616
|
|
|
|
|
|
|
|
10617
|
|
|
|
|
|
|
if (opcode == OP_SKIP) |
10618
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10619
|
|
|
|
|
|
|
else |
10620
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_IMM, 0); |
10621
|
|
|
|
|
|
|
add_jump(compiler, &common->reset_match, JUMP(SLJIT_JUMP)); |
10622
|
|
|
|
|
|
|
} |
10623
|
|
|
|
|
|
|
|
10624
|
|
|
|
|
|
|
static SLJIT_INLINE void compile_then_trap_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10625
|
|
|
|
|
|
|
{ |
10626
|
|
|
|
|
|
|
DEFINE_COMPILER; |
10627
|
|
|
|
|
|
|
struct sljit_jump *jump; |
10628
|
|
|
|
|
|
|
int size; |
10629
|
|
|
|
|
|
|
|
10630
|
|
|
|
|
|
|
if (CURRENT_AS(then_trap_backtrack)->then_trap) |
10631
|
|
|
|
|
|
|
{ |
10632
|
|
|
|
|
|
|
common->then_trap = CURRENT_AS(then_trap_backtrack)->then_trap; |
10633
|
|
|
|
|
|
|
return; |
10634
|
|
|
|
|
|
|
} |
10635
|
|
|
|
|
|
|
|
10636
|
|
|
|
|
|
|
size = CURRENT_AS(then_trap_backtrack)->framesize; |
10637
|
|
|
|
|
|
|
size = 3 + (size < 0 ? 0 : size); |
10638
|
|
|
|
|
|
|
|
10639
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(size - 3)); |
10640
|
|
|
|
|
|
|
free_stack(common, size); |
10641
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
10642
|
|
|
|
|
|
|
|
10643
|
|
|
|
|
|
|
set_jumps(CURRENT_AS(then_trap_backtrack)->quit, LABEL()); |
10644
|
|
|
|
|
|
|
/* STACK_TOP is set by THEN. */ |
10645
|
|
|
|
|
|
|
if (CURRENT_AS(then_trap_backtrack)->framesize >= 0) |
10646
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10647
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10648
|
|
|
|
|
|
|
free_stack(common, 3); |
10649
|
|
|
|
|
|
|
|
10650
|
|
|
|
|
|
|
JUMPHERE(jump); |
10651
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP1, 0); |
10652
|
|
|
|
|
|
|
} |
10653
|
|
|
|
|
|
|
|
10654
|
|
|
|
|
|
|
static void compile_backtrackingpath(compiler_common *common, struct backtrack_common *current) |
10655
|
|
|
|
|
|
|
{ |
10656
|
|
|
|
|
|
|
DEFINE_COMPILER; |
10657
|
|
|
|
|
|
|
then_trap_backtrack *save_then_trap = common->then_trap; |
10658
|
|
|
|
|
|
|
|
10659
|
|
|
|
|
|
|
while (current) |
10660
|
|
|
|
|
|
|
{ |
10661
|
|
|
|
|
|
|
if (current->nextbacktracks != NULL) |
10662
|
|
|
|
|
|
|
set_jumps(current->nextbacktracks, LABEL()); |
10663
|
|
|
|
|
|
|
switch(*current->cc) |
10664
|
|
|
|
|
|
|
{ |
10665
|
|
|
|
|
|
|
case OP_SET_SOM: |
10666
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10667
|
|
|
|
|
|
|
free_stack(common, 1); |
10668
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), TMP1, 0); |
10669
|
|
|
|
|
|
|
break; |
10670
|
|
|
|
|
|
|
|
10671
|
|
|
|
|
|
|
case OP_STAR: |
10672
|
|
|
|
|
|
|
case OP_MINSTAR: |
10673
|
|
|
|
|
|
|
case OP_PLUS: |
10674
|
|
|
|
|
|
|
case OP_MINPLUS: |
10675
|
|
|
|
|
|
|
case OP_QUERY: |
10676
|
|
|
|
|
|
|
case OP_MINQUERY: |
10677
|
|
|
|
|
|
|
case OP_UPTO: |
10678
|
|
|
|
|
|
|
case OP_MINUPTO: |
10679
|
|
|
|
|
|
|
case OP_EXACT: |
10680
|
|
|
|
|
|
|
case OP_POSSTAR: |
10681
|
|
|
|
|
|
|
case OP_POSPLUS: |
10682
|
|
|
|
|
|
|
case OP_POSQUERY: |
10683
|
|
|
|
|
|
|
case OP_POSUPTO: |
10684
|
|
|
|
|
|
|
case OP_STARI: |
10685
|
|
|
|
|
|
|
case OP_MINSTARI: |
10686
|
|
|
|
|
|
|
case OP_PLUSI: |
10687
|
|
|
|
|
|
|
case OP_MINPLUSI: |
10688
|
|
|
|
|
|
|
case OP_QUERYI: |
10689
|
|
|
|
|
|
|
case OP_MINQUERYI: |
10690
|
|
|
|
|
|
|
case OP_UPTOI: |
10691
|
|
|
|
|
|
|
case OP_MINUPTOI: |
10692
|
|
|
|
|
|
|
case OP_EXACTI: |
10693
|
|
|
|
|
|
|
case OP_POSSTARI: |
10694
|
|
|
|
|
|
|
case OP_POSPLUSI: |
10695
|
|
|
|
|
|
|
case OP_POSQUERYI: |
10696
|
|
|
|
|
|
|
case OP_POSUPTOI: |
10697
|
|
|
|
|
|
|
case OP_NOTSTAR: |
10698
|
|
|
|
|
|
|
case OP_NOTMINSTAR: |
10699
|
|
|
|
|
|
|
case OP_NOTPLUS: |
10700
|
|
|
|
|
|
|
case OP_NOTMINPLUS: |
10701
|
|
|
|
|
|
|
case OP_NOTQUERY: |
10702
|
|
|
|
|
|
|
case OP_NOTMINQUERY: |
10703
|
|
|
|
|
|
|
case OP_NOTUPTO: |
10704
|
|
|
|
|
|
|
case OP_NOTMINUPTO: |
10705
|
|
|
|
|
|
|
case OP_NOTEXACT: |
10706
|
|
|
|
|
|
|
case OP_NOTPOSSTAR: |
10707
|
|
|
|
|
|
|
case OP_NOTPOSPLUS: |
10708
|
|
|
|
|
|
|
case OP_NOTPOSQUERY: |
10709
|
|
|
|
|
|
|
case OP_NOTPOSUPTO: |
10710
|
|
|
|
|
|
|
case OP_NOTSTARI: |
10711
|
|
|
|
|
|
|
case OP_NOTMINSTARI: |
10712
|
|
|
|
|
|
|
case OP_NOTPLUSI: |
10713
|
|
|
|
|
|
|
case OP_NOTMINPLUSI: |
10714
|
|
|
|
|
|
|
case OP_NOTQUERYI: |
10715
|
|
|
|
|
|
|
case OP_NOTMINQUERYI: |
10716
|
|
|
|
|
|
|
case OP_NOTUPTOI: |
10717
|
|
|
|
|
|
|
case OP_NOTMINUPTOI: |
10718
|
|
|
|
|
|
|
case OP_NOTEXACTI: |
10719
|
|
|
|
|
|
|
case OP_NOTPOSSTARI: |
10720
|
|
|
|
|
|
|
case OP_NOTPOSPLUSI: |
10721
|
|
|
|
|
|
|
case OP_NOTPOSQUERYI: |
10722
|
|
|
|
|
|
|
case OP_NOTPOSUPTOI: |
10723
|
|
|
|
|
|
|
case OP_TYPESTAR: |
10724
|
|
|
|
|
|
|
case OP_TYPEMINSTAR: |
10725
|
|
|
|
|
|
|
case OP_TYPEPLUS: |
10726
|
|
|
|
|
|
|
case OP_TYPEMINPLUS: |
10727
|
|
|
|
|
|
|
case OP_TYPEQUERY: |
10728
|
|
|
|
|
|
|
case OP_TYPEMINQUERY: |
10729
|
|
|
|
|
|
|
case OP_TYPEUPTO: |
10730
|
|
|
|
|
|
|
case OP_TYPEMINUPTO: |
10731
|
|
|
|
|
|
|
case OP_TYPEEXACT: |
10732
|
|
|
|
|
|
|
case OP_TYPEPOSSTAR: |
10733
|
|
|
|
|
|
|
case OP_TYPEPOSPLUS: |
10734
|
|
|
|
|
|
|
case OP_TYPEPOSQUERY: |
10735
|
|
|
|
|
|
|
case OP_TYPEPOSUPTO: |
10736
|
|
|
|
|
|
|
case OP_CLASS: |
10737
|
|
|
|
|
|
|
case OP_NCLASS: |
10738
|
|
|
|
|
|
|
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
10739
|
|
|
|
|
|
|
case OP_XCLASS: |
10740
|
|
|
|
|
|
|
#endif |
10741
|
|
|
|
|
|
|
compile_iterator_backtrackingpath(common, current); |
10742
|
|
|
|
|
|
|
break; |
10743
|
|
|
|
|
|
|
|
10744
|
|
|
|
|
|
|
case OP_REF: |
10745
|
|
|
|
|
|
|
case OP_REFI: |
10746
|
|
|
|
|
|
|
case OP_DNREF: |
10747
|
|
|
|
|
|
|
case OP_DNREFI: |
10748
|
|
|
|
|
|
|
compile_ref_iterator_backtrackingpath(common, current); |
10749
|
|
|
|
|
|
|
break; |
10750
|
|
|
|
|
|
|
|
10751
|
|
|
|
|
|
|
case OP_RECURSE: |
10752
|
|
|
|
|
|
|
compile_recurse_backtrackingpath(common, current); |
10753
|
|
|
|
|
|
|
break; |
10754
|
|
|
|
|
|
|
|
10755
|
|
|
|
|
|
|
case OP_ASSERT: |
10756
|
|
|
|
|
|
|
case OP_ASSERT_NOT: |
10757
|
|
|
|
|
|
|
case OP_ASSERTBACK: |
10758
|
|
|
|
|
|
|
case OP_ASSERTBACK_NOT: |
10759
|
|
|
|
|
|
|
compile_assert_backtrackingpath(common, current); |
10760
|
|
|
|
|
|
|
break; |
10761
|
|
|
|
|
|
|
|
10762
|
|
|
|
|
|
|
case OP_ONCE: |
10763
|
|
|
|
|
|
|
case OP_ONCE_NC: |
10764
|
|
|
|
|
|
|
case OP_BRA: |
10765
|
|
|
|
|
|
|
case OP_CBRA: |
10766
|
|
|
|
|
|
|
case OP_COND: |
10767
|
|
|
|
|
|
|
case OP_SBRA: |
10768
|
|
|
|
|
|
|
case OP_SCBRA: |
10769
|
|
|
|
|
|
|
case OP_SCOND: |
10770
|
|
|
|
|
|
|
compile_bracket_backtrackingpath(common, current); |
10771
|
|
|
|
|
|
|
break; |
10772
|
|
|
|
|
|
|
|
10773
|
|
|
|
|
|
|
case OP_BRAZERO: |
10774
|
|
|
|
|
|
|
if (current->cc[1] > OP_ASSERTBACK_NOT) |
10775
|
|
|
|
|
|
|
compile_bracket_backtrackingpath(common, current); |
10776
|
|
|
|
|
|
|
else |
10777
|
|
|
|
|
|
|
compile_assert_backtrackingpath(common, current); |
10778
|
|
|
|
|
|
|
break; |
10779
|
|
|
|
|
|
|
|
10780
|
|
|
|
|
|
|
case OP_BRAPOS: |
10781
|
|
|
|
|
|
|
case OP_CBRAPOS: |
10782
|
|
|
|
|
|
|
case OP_SBRAPOS: |
10783
|
|
|
|
|
|
|
case OP_SCBRAPOS: |
10784
|
|
|
|
|
|
|
case OP_BRAPOSZERO: |
10785
|
|
|
|
|
|
|
compile_bracketpos_backtrackingpath(common, current); |
10786
|
|
|
|
|
|
|
break; |
10787
|
|
|
|
|
|
|
|
10788
|
|
|
|
|
|
|
case OP_BRAMINZERO: |
10789
|
|
|
|
|
|
|
compile_braminzero_backtrackingpath(common, current); |
10790
|
|
|
|
|
|
|
break; |
10791
|
|
|
|
|
|
|
|
10792
|
|
|
|
|
|
|
case OP_MARK: |
10793
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(common->has_skip_arg ? 4 : 0)); |
10794
|
|
|
|
|
|
|
if (common->has_skip_arg) |
10795
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10796
|
|
|
|
|
|
|
free_stack(common, common->has_skip_arg ? 5 : 1); |
10797
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, TMP1, 0); |
10798
|
|
|
|
|
|
|
if (common->has_skip_arg) |
10799
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); |
10800
|
|
|
|
|
|
|
break; |
10801
|
|
|
|
|
|
|
|
10802
|
|
|
|
|
|
|
case OP_THEN: |
10803
|
|
|
|
|
|
|
case OP_THEN_ARG: |
10804
|
|
|
|
|
|
|
case OP_PRUNE: |
10805
|
|
|
|
|
|
|
case OP_PRUNE_ARG: |
10806
|
|
|
|
|
|
|
case OP_SKIP: |
10807
|
|
|
|
|
|
|
case OP_SKIP_ARG: |
10808
|
|
|
|
|
|
|
compile_control_verb_backtrackingpath(common, current); |
10809
|
|
|
|
|
|
|
break; |
10810
|
|
|
|
|
|
|
|
10811
|
|
|
|
|
|
|
case OP_COMMIT: |
10812
|
|
|
|
|
|
|
if (!common->local_exit) |
10813
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); |
10814
|
|
|
|
|
|
|
if (common->quit_label == NULL) |
10815
|
|
|
|
|
|
|
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); |
10816
|
|
|
|
|
|
|
else |
10817
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->quit_label); |
10818
|
|
|
|
|
|
|
break; |
10819
|
|
|
|
|
|
|
|
10820
|
|
|
|
|
|
|
case OP_CALLOUT: |
10821
|
|
|
|
|
|
|
case OP_FAIL: |
10822
|
|
|
|
|
|
|
case OP_ACCEPT: |
10823
|
|
|
|
|
|
|
case OP_ASSERT_ACCEPT: |
10824
|
|
|
|
|
|
|
set_jumps(current->topbacktracks, LABEL()); |
10825
|
|
|
|
|
|
|
break; |
10826
|
|
|
|
|
|
|
|
10827
|
|
|
|
|
|
|
case OP_THEN_TRAP: |
10828
|
|
|
|
|
|
|
/* A virtual opcode for then traps. */ |
10829
|
|
|
|
|
|
|
compile_then_trap_backtrackingpath(common, current); |
10830
|
|
|
|
|
|
|
break; |
10831
|
|
|
|
|
|
|
|
10832
|
|
|
|
|
|
|
default: |
10833
|
|
|
|
|
|
|
SLJIT_UNREACHABLE(); |
10834
|
|
|
|
|
|
|
break; |
10835
|
|
|
|
|
|
|
} |
10836
|
|
|
|
|
|
|
current = current->prev; |
10837
|
|
|
|
|
|
|
} |
10838
|
|
|
|
|
|
|
common->then_trap = save_then_trap; |
10839
|
|
|
|
|
|
|
} |
10840
|
|
|
|
|
|
|
|
10841
|
|
|
|
|
|
|
static SLJIT_INLINE void compile_recurse(compiler_common *common) |
10842
|
|
|
|
|
|
|
{ |
10843
|
|
|
|
|
|
|
DEFINE_COMPILER; |
10844
|
|
|
|
|
|
|
pcre_uchar *cc = common->start + common->currententry->start; |
10845
|
|
|
|
|
|
|
pcre_uchar *ccbegin = cc + 1 + LINK_SIZE + (*cc == OP_BRA ? 0 : IMM2_SIZE); |
10846
|
|
|
|
|
|
|
pcre_uchar *ccend = bracketend(cc) - (1 + LINK_SIZE); |
10847
|
|
|
|
|
|
|
BOOL needs_control_head; |
10848
|
|
|
|
|
|
|
int framesize = get_framesize(common, cc, NULL, TRUE, &needs_control_head); |
10849
|
|
|
|
|
|
|
int private_data_size = get_private_data_copy_length(common, ccbegin, ccend, needs_control_head); |
10850
|
|
|
|
|
|
|
int alternativesize; |
10851
|
|
|
|
|
|
|
BOOL needs_frame; |
10852
|
|
|
|
|
|
|
backtrack_common altbacktrack; |
10853
|
|
|
|
|
|
|
struct sljit_jump *jump; |
10854
|
|
|
|
|
|
|
|
10855
|
|
|
|
|
|
|
/* Recurse captures then. */ |
10856
|
|
|
|
|
|
|
common->then_trap = NULL; |
10857
|
|
|
|
|
|
|
|
10858
|
|
|
|
|
|
|
SLJIT_ASSERT(*cc == OP_BRA || *cc == OP_CBRA || *cc == OP_CBRAPOS || *cc == OP_SCBRA || *cc == OP_SCBRAPOS); |
10859
|
|
|
|
|
|
|
needs_frame = framesize >= 0; |
10860
|
|
|
|
|
|
|
if (!needs_frame) |
10861
|
|
|
|
|
|
|
framesize = 0; |
10862
|
|
|
|
|
|
|
alternativesize = *(cc + GET(cc, 1)) == OP_ALT ? 1 : 0; |
10863
|
|
|
|
|
|
|
|
10864
|
|
|
|
|
|
|
SLJIT_ASSERT(common->currententry->entry == NULL && common->recursive_head_ptr != 0); |
10865
|
|
|
|
|
|
|
common->currententry->entry = LABEL(); |
10866
|
|
|
|
|
|
|
set_jumps(common->currententry->calls, common->currententry->entry); |
10867
|
|
|
|
|
|
|
|
10868
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, TMP2, 0); |
10869
|
|
|
|
|
|
|
count_match(common); |
10870
|
|
|
|
|
|
|
allocate_stack(common, private_data_size + framesize + alternativesize); |
10871
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(private_data_size + framesize + alternativesize - 1), TMP2, 0); |
10872
|
|
|
|
|
|
|
copy_private_data(common, ccbegin, ccend, TRUE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head); |
10873
|
|
|
|
|
|
|
if (needs_control_head) |
10874
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
10875
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, STACK_TOP, 0); |
10876
|
|
|
|
|
|
|
if (needs_frame) |
10877
|
|
|
|
|
|
|
init_frame(common, cc, NULL, framesize + alternativesize - 1, alternativesize, TRUE); |
10878
|
|
|
|
|
|
|
|
10879
|
|
|
|
|
|
|
if (alternativesize > 0) |
10880
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), STR_PTR, 0); |
10881
|
|
|
|
|
|
|
|
10882
|
|
|
|
|
|
|
memset(&altbacktrack, 0, sizeof(backtrack_common)); |
10883
|
|
|
|
|
|
|
common->quit_label = NULL; |
10884
|
|
|
|
|
|
|
common->accept_label = NULL; |
10885
|
|
|
|
|
|
|
common->quit = NULL; |
10886
|
|
|
|
|
|
|
common->accept = NULL; |
10887
|
|
|
|
|
|
|
altbacktrack.cc = ccbegin; |
10888
|
|
|
|
|
|
|
cc += GET(cc, 1); |
10889
|
|
|
|
|
|
|
while (1) |
10890
|
|
|
|
|
|
|
{ |
10891
|
|
|
|
|
|
|
altbacktrack.top = NULL; |
10892
|
|
|
|
|
|
|
altbacktrack.topbacktracks = NULL; |
10893
|
|
|
|
|
|
|
|
10894
|
|
|
|
|
|
|
if (altbacktrack.cc != ccbegin) |
10895
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(STACK_TOP), STACK(0)); |
10896
|
|
|
|
|
|
|
|
10897
|
|
|
|
|
|
|
compile_matchingpath(common, altbacktrack.cc, cc, &altbacktrack); |
10898
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
10899
|
|
|
|
|
|
|
return; |
10900
|
|
|
|
|
|
|
|
10901
|
|
|
|
|
|
|
add_jump(compiler, &common->accept, JUMP(SLJIT_JUMP)); |
10902
|
|
|
|
|
|
|
|
10903
|
|
|
|
|
|
|
compile_backtrackingpath(common, altbacktrack.top); |
10904
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
10905
|
|
|
|
|
|
|
return; |
10906
|
|
|
|
|
|
|
set_jumps(altbacktrack.topbacktracks, LABEL()); |
10907
|
|
|
|
|
|
|
|
10908
|
|
|
|
|
|
|
if (*cc != OP_ALT) |
10909
|
|
|
|
|
|
|
break; |
10910
|
|
|
|
|
|
|
|
10911
|
|
|
|
|
|
|
altbacktrack.cc = cc + 1 + LINK_SIZE; |
10912
|
|
|
|
|
|
|
cc += GET(cc, 1); |
10913
|
|
|
|
|
|
|
} |
10914
|
|
|
|
|
|
|
|
10915
|
|
|
|
|
|
|
/* None of them matched. */ |
10916
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); |
10917
|
|
|
|
|
|
|
jump = JUMP(SLJIT_JUMP); |
10918
|
|
|
|
|
|
|
|
10919
|
|
|
|
|
|
|
if (common->quit != NULL) |
10920
|
|
|
|
|
|
|
{ |
10921
|
|
|
|
|
|
|
set_jumps(common->quit, LABEL()); |
10922
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); |
10923
|
|
|
|
|
|
|
if (needs_frame) |
10924
|
|
|
|
|
|
|
{ |
10925
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); |
10926
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10927
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); |
10928
|
|
|
|
|
|
|
} |
10929
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 0); |
10930
|
|
|
|
|
|
|
common->quit = NULL; |
10931
|
|
|
|
|
|
|
add_jump(compiler, &common->quit, JUMP(SLJIT_JUMP)); |
10932
|
|
|
|
|
|
|
} |
10933
|
|
|
|
|
|
|
|
10934
|
|
|
|
|
|
|
set_jumps(common->accept, LABEL()); |
10935
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr); |
10936
|
|
|
|
|
|
|
if (needs_frame) |
10937
|
|
|
|
|
|
|
{ |
10938
|
|
|
|
|
|
|
OP2(SLJIT_ADD, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); |
10939
|
|
|
|
|
|
|
add_jump(compiler, &common->revertframes, JUMP(SLJIT_FAST_CALL)); |
10940
|
|
|
|
|
|
|
OP2(SLJIT_SUB, STACK_TOP, 0, STACK_TOP, 0, SLJIT_IMM, (framesize + alternativesize) * sizeof(sljit_sw)); |
10941
|
|
|
|
|
|
|
} |
10942
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP3, 0, SLJIT_IMM, 1); |
10943
|
|
|
|
|
|
|
|
10944
|
|
|
|
|
|
|
JUMPHERE(jump); |
10945
|
|
|
|
|
|
|
if (common->quit != NULL) |
10946
|
|
|
|
|
|
|
set_jumps(common->quit, LABEL()); |
10947
|
|
|
|
|
|
|
copy_private_data(common, ccbegin, ccend, FALSE, framesize + alternativesize, private_data_size + framesize + alternativesize, needs_control_head); |
10948
|
|
|
|
|
|
|
free_stack(common, private_data_size + framesize + alternativesize); |
10949
|
|
|
|
|
|
|
if (needs_control_head) |
10950
|
|
|
|
|
|
|
{ |
10951
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(STACK_TOP), STACK(-3)); |
10952
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); |
10953
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP1, 0); |
10954
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
10955
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, TMP2, 0); |
10956
|
|
|
|
|
|
|
} |
10957
|
|
|
|
|
|
|
else |
10958
|
|
|
|
|
|
|
{ |
10959
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(STACK_TOP), STACK(-2)); |
10960
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, TMP3, 0); |
10961
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->recursive_head_ptr, TMP2, 0); |
10962
|
|
|
|
|
|
|
} |
10963
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, SLJIT_MEM1(STACK_TOP), STACK(-1)); |
10964
|
|
|
|
|
|
|
} |
10965
|
|
|
|
|
|
|
|
10966
|
|
|
|
|
|
|
#undef COMPILE_BACKTRACKINGPATH |
10967
|
|
|
|
|
|
|
#undef CURRENT_AS |
10968
|
|
|
|
|
|
|
|
10969
|
|
|
|
|
|
|
void |
10970
|
|
|
|
|
|
|
PRIV(jit_compile)(const REAL_PCRE *re, PUBL(extra) *extra, int mode) |
10971
|
|
|
|
|
|
|
{ |
10972
|
|
|
|
|
|
|
struct sljit_compiler *compiler; |
10973
|
|
|
|
|
|
|
backtrack_common rootbacktrack; |
10974
|
|
|
|
|
|
|
compiler_common common_data; |
10975
|
|
|
|
|
|
|
compiler_common *common = &common_data; |
10976
|
|
|
|
|
|
|
const sljit_u8 *tables = re->tables; |
10977
|
|
|
|
|
|
|
pcre_study_data *study; |
10978
|
|
|
|
|
|
|
int private_data_size; |
10979
|
|
|
|
|
|
|
pcre_uchar *ccend; |
10980
|
|
|
|
|
|
|
executable_functions *functions; |
10981
|
|
|
|
|
|
|
void *executable_func; |
10982
|
|
|
|
|
|
|
sljit_uw executable_size; |
10983
|
|
|
|
|
|
|
sljit_uw total_length; |
10984
|
|
|
|
|
|
|
label_addr_list *label_addr; |
10985
|
|
|
|
|
|
|
struct sljit_label *mainloop_label = NULL; |
10986
|
|
|
|
|
|
|
struct sljit_label *continue_match_label; |
10987
|
|
|
|
|
|
|
struct sljit_label *empty_match_found_label = NULL; |
10988
|
|
|
|
|
|
|
struct sljit_label *empty_match_backtrack_label = NULL; |
10989
|
|
|
|
|
|
|
struct sljit_label *reset_match_label; |
10990
|
|
|
|
|
|
|
struct sljit_label *quit_label; |
10991
|
|
|
|
|
|
|
struct sljit_jump *jump; |
10992
|
|
|
|
|
|
|
struct sljit_jump *minlength_check_failed = NULL; |
10993
|
|
|
|
|
|
|
struct sljit_jump *reqbyte_notfound = NULL; |
10994
|
|
|
|
|
|
|
struct sljit_jump *empty_match = NULL; |
10995
|
|
|
|
|
|
|
|
10996
|
|
|
|
|
|
|
SLJIT_ASSERT((extra->flags & PCRE_EXTRA_STUDY_DATA) != 0); |
10997
|
|
|
|
|
|
|
study = extra->study_data; |
10998
|
|
|
|
|
|
|
|
10999
|
|
|
|
|
|
|
if (!tables) |
11000
|
|
|
|
|
|
|
tables = PRIV(default_tables); |
11001
|
|
|
|
|
|
|
|
11002
|
|
|
|
|
|
|
memset(&rootbacktrack, 0, sizeof(backtrack_common)); |
11003
|
|
|
|
|
|
|
memset(common, 0, sizeof(compiler_common)); |
11004
|
|
|
|
|
|
|
rootbacktrack.cc = (pcre_uchar *)re + re->name_table_offset + re->name_count * re->name_entry_size; |
11005
|
|
|
|
|
|
|
|
11006
|
|
|
|
|
|
|
common->start = rootbacktrack.cc; |
11007
|
|
|
|
|
|
|
common->read_only_data_head = NULL; |
11008
|
|
|
|
|
|
|
common->fcc = tables + fcc_offset; |
11009
|
|
|
|
|
|
|
common->lcc = (sljit_sw)(tables + lcc_offset); |
11010
|
|
|
|
|
|
|
common->mode = mode; |
11011
|
|
|
|
|
|
|
common->might_be_empty = study->minlength == 0; |
11012
|
|
|
|
|
|
|
common->nltype = NLTYPE_FIXED; |
11013
|
|
|
|
|
|
|
switch(re->options & PCRE_NEWLINE_BITS) |
11014
|
|
|
|
|
|
|
{ |
11015
|
|
|
|
|
|
|
case 0: |
11016
|
|
|
|
|
|
|
/* Compile-time default */ |
11017
|
|
|
|
|
|
|
switch(NEWLINE) |
11018
|
|
|
|
|
|
|
{ |
11019
|
|
|
|
|
|
|
case -1: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; |
11020
|
|
|
|
|
|
|
case -2: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; |
11021
|
|
|
|
|
|
|
default: common->newline = NEWLINE; break; |
11022
|
|
|
|
|
|
|
} |
11023
|
|
|
|
|
|
|
break; |
11024
|
|
|
|
|
|
|
case PCRE_NEWLINE_CR: common->newline = CHAR_CR; break; |
11025
|
|
|
|
|
|
|
case PCRE_NEWLINE_LF: common->newline = CHAR_NL; break; |
11026
|
|
|
|
|
|
|
case PCRE_NEWLINE_CR+ |
11027
|
|
|
|
|
|
|
PCRE_NEWLINE_LF: common->newline = (CHAR_CR << 8) | CHAR_NL; break; |
11028
|
|
|
|
|
|
|
case PCRE_NEWLINE_ANY: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANY; break; |
11029
|
|
|
|
|
|
|
case PCRE_NEWLINE_ANYCRLF: common->newline = (CHAR_CR << 8) | CHAR_NL; common->nltype = NLTYPE_ANYCRLF; break; |
11030
|
|
|
|
|
|
|
default: return; |
11031
|
|
|
|
|
|
|
} |
11032
|
|
|
|
|
|
|
common->nlmax = READ_CHAR_MAX; |
11033
|
|
|
|
|
|
|
common->nlmin = 0; |
11034
|
|
|
|
|
|
|
if ((re->options & PCRE_BSR_ANYCRLF) != 0) |
11035
|
|
|
|
|
|
|
common->bsr_nltype = NLTYPE_ANYCRLF; |
11036
|
|
|
|
|
|
|
else if ((re->options & PCRE_BSR_UNICODE) != 0) |
11037
|
|
|
|
|
|
|
common->bsr_nltype = NLTYPE_ANY; |
11038
|
|
|
|
|
|
|
else |
11039
|
|
|
|
|
|
|
{ |
11040
|
|
|
|
|
|
|
#ifdef BSR_ANYCRLF |
11041
|
|
|
|
|
|
|
common->bsr_nltype = NLTYPE_ANYCRLF; |
11042
|
|
|
|
|
|
|
#else |
11043
|
|
|
|
|
|
|
common->bsr_nltype = NLTYPE_ANY; |
11044
|
|
|
|
|
|
|
#endif |
11045
|
|
|
|
|
|
|
} |
11046
|
|
|
|
|
|
|
common->bsr_nlmax = READ_CHAR_MAX; |
11047
|
|
|
|
|
|
|
common->bsr_nlmin = 0; |
11048
|
|
|
|
|
|
|
common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
11049
|
|
|
|
|
|
|
common->ctypes = (sljit_sw)(tables + ctypes_offset); |
11050
|
|
|
|
|
|
|
common->name_table = ((pcre_uchar *)re) + re->name_table_offset; |
11051
|
|
|
|
|
|
|
common->name_count = re->name_count; |
11052
|
|
|
|
|
|
|
common->name_entry_size = re->name_entry_size; |
11053
|
|
|
|
|
|
|
common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
11054
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
11055
|
|
|
|
|
|
|
/* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */ |
11056
|
|
|
|
|
|
|
common->utf = (re->options & PCRE_UTF8) != 0; |
11057
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
11058
|
|
|
|
|
|
|
common->use_ucp = (re->options & PCRE_UCP) != 0; |
11059
|
|
|
|
|
|
|
#endif |
11060
|
|
|
|
|
|
|
if (common->utf) |
11061
|
|
|
|
|
|
|
{ |
11062
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_ANY) |
11063
|
|
|
|
|
|
|
common->nlmax = 0x2029; |
11064
|
|
|
|
|
|
|
else if (common->nltype == NLTYPE_ANYCRLF) |
11065
|
|
|
|
|
|
|
common->nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; |
11066
|
|
|
|
|
|
|
else |
11067
|
|
|
|
|
|
|
{ |
11068
|
|
|
|
|
|
|
/* We only care about the first newline character. */ |
11069
|
|
|
|
|
|
|
common->nlmax = common->newline & 0xff; |
11070
|
|
|
|
|
|
|
} |
11071
|
|
|
|
|
|
|
|
11072
|
|
|
|
|
|
|
if (common->nltype == NLTYPE_FIXED) |
11073
|
|
|
|
|
|
|
common->nlmin = common->newline & 0xff; |
11074
|
|
|
|
|
|
|
else |
11075
|
|
|
|
|
|
|
common->nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; |
11076
|
|
|
|
|
|
|
|
11077
|
|
|
|
|
|
|
if (common->bsr_nltype == NLTYPE_ANY) |
11078
|
|
|
|
|
|
|
common->bsr_nlmax = 0x2029; |
11079
|
|
|
|
|
|
|
else |
11080
|
|
|
|
|
|
|
common->bsr_nlmax = (CHAR_CR > CHAR_NL) ? CHAR_CR : CHAR_NL; |
11081
|
|
|
|
|
|
|
common->bsr_nlmin = (CHAR_CR < CHAR_NL) ? CHAR_CR : CHAR_NL; |
11082
|
|
|
|
|
|
|
} |
11083
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
11084
|
|
|
|
|
|
|
ccend = bracketend(common->start); |
11085
|
|
|
|
|
|
|
|
11086
|
|
|
|
|
|
|
/* Calculate the local space size on the stack. */ |
11087
|
|
|
|
|
|
|
common->ovector_start = LIMIT_MATCH + sizeof(sljit_sw); |
11088
|
|
|
|
|
|
|
common->optimized_cbracket = (sljit_u8 *)SLJIT_MALLOC(re->top_bracket + 1, compiler->allocator_data); |
11089
|
|
|
|
|
|
|
if (!common->optimized_cbracket) |
11090
|
|
|
|
|
|
|
return; |
11091
|
|
|
|
|
|
|
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 1 |
11092
|
|
|
|
|
|
|
memset(common->optimized_cbracket, 0, re->top_bracket + 1); |
11093
|
|
|
|
|
|
|
#else |
11094
|
|
|
|
|
|
|
memset(common->optimized_cbracket, 1, re->top_bracket + 1); |
11095
|
|
|
|
|
|
|
#endif |
11096
|
|
|
|
|
|
|
|
11097
|
|
|
|
|
|
|
SLJIT_ASSERT(*common->start == OP_BRA && ccend[-(1 + LINK_SIZE)] == OP_KET); |
11098
|
|
|
|
|
|
|
#if defined DEBUG_FORCE_UNOPTIMIZED_CBRAS && DEBUG_FORCE_UNOPTIMIZED_CBRAS == 2 |
11099
|
|
|
|
|
|
|
common->capture_last_ptr = common->ovector_start; |
11100
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
11101
|
|
|
|
|
|
|
#endif |
11102
|
|
|
|
|
|
|
if (!check_opcode_types(common, common->start, ccend)) |
11103
|
|
|
|
|
|
|
{ |
11104
|
|
|
|
|
|
|
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11105
|
|
|
|
|
|
|
return; |
11106
|
|
|
|
|
|
|
} |
11107
|
|
|
|
|
|
|
|
11108
|
|
|
|
|
|
|
/* Checking flags and updating ovector_start. */ |
11109
|
|
|
|
|
|
|
if (mode == JIT_COMPILE && (re->flags & PCRE_REQCHSET) != 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) |
11110
|
|
|
|
|
|
|
{ |
11111
|
|
|
|
|
|
|
common->req_char_ptr = common->ovector_start; |
11112
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
11113
|
|
|
|
|
|
|
} |
11114
|
|
|
|
|
|
|
if (mode != JIT_COMPILE) |
11115
|
|
|
|
|
|
|
{ |
11116
|
|
|
|
|
|
|
common->start_used_ptr = common->ovector_start; |
11117
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
11118
|
|
|
|
|
|
|
if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11119
|
|
|
|
|
|
|
{ |
11120
|
|
|
|
|
|
|
common->hit_start = common->ovector_start; |
11121
|
|
|
|
|
|
|
common->ovector_start += 2 * sizeof(sljit_sw); |
11122
|
|
|
|
|
|
|
} |
11123
|
|
|
|
|
|
|
} |
11124
|
|
|
|
|
|
|
if ((re->options & PCRE_FIRSTLINE) != 0) |
11125
|
|
|
|
|
|
|
{ |
11126
|
|
|
|
|
|
|
common->match_end_ptr = common->ovector_start; |
11127
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
11128
|
|
|
|
|
|
|
} |
11129
|
|
|
|
|
|
|
#if defined DEBUG_FORCE_CONTROL_HEAD && DEBUG_FORCE_CONTROL_HEAD |
11130
|
|
|
|
|
|
|
common->control_head_ptr = 1; |
11131
|
|
|
|
|
|
|
#endif |
11132
|
|
|
|
|
|
|
if (common->control_head_ptr != 0) |
11133
|
|
|
|
|
|
|
{ |
11134
|
|
|
|
|
|
|
common->control_head_ptr = common->ovector_start; |
11135
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
11136
|
|
|
|
|
|
|
} |
11137
|
|
|
|
|
|
|
if (common->has_set_som) |
11138
|
|
|
|
|
|
|
{ |
11139
|
|
|
|
|
|
|
/* Saving the real start pointer is necessary. */ |
11140
|
|
|
|
|
|
|
common->start_ptr = common->ovector_start; |
11141
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
11142
|
|
|
|
|
|
|
} |
11143
|
|
|
|
|
|
|
|
11144
|
|
|
|
|
|
|
/* Aligning ovector to even number of sljit words. */ |
11145
|
|
|
|
|
|
|
if ((common->ovector_start & sizeof(sljit_sw)) != 0) |
11146
|
|
|
|
|
|
|
common->ovector_start += sizeof(sljit_sw); |
11147
|
|
|
|
|
|
|
|
11148
|
|
|
|
|
|
|
if (common->start_ptr == 0) |
11149
|
|
|
|
|
|
|
common->start_ptr = OVECTOR(0); |
11150
|
|
|
|
|
|
|
|
11151
|
|
|
|
|
|
|
/* Capturing brackets cannot be optimized if callouts are allowed. */ |
11152
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
11153
|
|
|
|
|
|
|
memset(common->optimized_cbracket, 0, re->top_bracket + 1); |
11154
|
|
|
|
|
|
|
|
11155
|
|
|
|
|
|
|
SLJIT_ASSERT(!(common->req_char_ptr != 0 && common->start_used_ptr != 0)); |
11156
|
|
|
|
|
|
|
common->cbra_ptr = OVECTOR_START + (re->top_bracket + 1) * 2 * sizeof(sljit_sw); |
11157
|
|
|
|
|
|
|
|
11158
|
|
|
|
|
|
|
total_length = ccend - common->start; |
11159
|
|
|
|
|
|
|
common->private_data_ptrs = (sljit_s32 *)SLJIT_MALLOC(total_length * (sizeof(sljit_s32) + (common->has_then ? 1 : 0)), compiler->allocator_data); |
11160
|
|
|
|
|
|
|
if (!common->private_data_ptrs) |
11161
|
|
|
|
|
|
|
{ |
11162
|
|
|
|
|
|
|
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11163
|
|
|
|
|
|
|
return; |
11164
|
|
|
|
|
|
|
} |
11165
|
|
|
|
|
|
|
memset(common->private_data_ptrs, 0, total_length * sizeof(sljit_s32)); |
11166
|
|
|
|
|
|
|
|
11167
|
|
|
|
|
|
|
private_data_size = common->cbra_ptr + (re->top_bracket + 1) * sizeof(sljit_sw); |
11168
|
|
|
|
|
|
|
set_private_data_ptrs(common, &private_data_size, ccend); |
11169
|
|
|
|
|
|
|
if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) |
11170
|
|
|
|
|
|
|
{ |
11171
|
|
|
|
|
|
|
if (!detect_fast_forward_skip(common, &private_data_size) && !common->has_skip_in_assert_back) |
11172
|
|
|
|
|
|
|
detect_fast_fail(common, common->start, &private_data_size, 4); |
11173
|
|
|
|
|
|
|
} |
11174
|
|
|
|
|
|
|
|
11175
|
|
|
|
|
|
|
SLJIT_ASSERT(common->fast_fail_start_ptr <= common->fast_fail_end_ptr); |
11176
|
|
|
|
|
|
|
|
11177
|
|
|
|
|
|
|
if (private_data_size > SLJIT_MAX_LOCAL_SIZE) |
11178
|
|
|
|
|
|
|
{ |
11179
|
|
|
|
|
|
|
SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11180
|
|
|
|
|
|
|
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11181
|
|
|
|
|
|
|
return; |
11182
|
|
|
|
|
|
|
} |
11183
|
|
|
|
|
|
|
|
11184
|
|
|
|
|
|
|
if (common->has_then) |
11185
|
|
|
|
|
|
|
{ |
11186
|
|
|
|
|
|
|
common->then_offsets = (sljit_u8 *)(common->private_data_ptrs + total_length); |
11187
|
|
|
|
|
|
|
memset(common->then_offsets, 0, total_length); |
11188
|
|
|
|
|
|
|
set_then_offsets(common, common->start, NULL); |
11189
|
|
|
|
|
|
|
} |
11190
|
|
|
|
|
|
|
|
11191
|
|
|
|
|
|
|
compiler = sljit_create_compiler(NULL); |
11192
|
|
|
|
|
|
|
if (!compiler) |
11193
|
|
|
|
|
|
|
{ |
11194
|
|
|
|
|
|
|
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11195
|
|
|
|
|
|
|
SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11196
|
|
|
|
|
|
|
return; |
11197
|
|
|
|
|
|
|
} |
11198
|
|
|
|
|
|
|
common->compiler = compiler; |
11199
|
|
|
|
|
|
|
|
11200
|
|
|
|
|
|
|
/* Main pcre_jit_exec entry. */ |
11201
|
|
|
|
|
|
|
sljit_emit_enter(compiler, 0, SLJIT_ARG1(SW), 5, 5, 0, 0, private_data_size); |
11202
|
|
|
|
|
|
|
|
11203
|
|
|
|
|
|
|
/* Register init. */ |
11204
|
|
|
|
|
|
|
reset_ovector(common, (re->top_bracket + 1) * 2); |
11205
|
|
|
|
|
|
|
if (common->req_char_ptr != 0) |
11206
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->req_char_ptr, SLJIT_R0, 0); |
11207
|
|
|
|
|
|
|
|
11208
|
|
|
|
|
|
|
OP1(SLJIT_MOV, ARGUMENTS, 0, SLJIT_S0, 0); |
11209
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_S0, 0); |
11210
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
11211
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_END, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, end)); |
11212
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, stack)); |
11213
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U32, TMP1, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, limit_match)); |
11214
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, end)); |
11215
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_MEM1(TMP2), SLJIT_OFFSETOF(struct sljit_stack, start)); |
11216
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1); |
11217
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH, TMP1, 0); |
11218
|
|
|
|
|
|
|
|
11219
|
|
|
|
|
|
|
if (common->fast_fail_start_ptr < common->fast_fail_end_ptr) |
11220
|
|
|
|
|
|
|
reset_fast_fail(common); |
11221
|
|
|
|
|
|
|
|
11222
|
|
|
|
|
|
|
if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11223
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); |
11224
|
|
|
|
|
|
|
if (common->mark_ptr != 0) |
11225
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->mark_ptr, SLJIT_IMM, 0); |
11226
|
|
|
|
|
|
|
if (common->control_head_ptr != 0) |
11227
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->control_head_ptr, SLJIT_IMM, 0); |
11228
|
|
|
|
|
|
|
|
11229
|
|
|
|
|
|
|
/* Main part of the matching */ |
11230
|
|
|
|
|
|
|
if ((re->options & PCRE_ANCHORED) == 0) |
11231
|
|
|
|
|
|
|
{ |
11232
|
|
|
|
|
|
|
mainloop_label = mainloop_entry(common, (re->flags & PCRE_HASCRORLF) != 0); |
11233
|
|
|
|
|
|
|
continue_match_label = LABEL(); |
11234
|
|
|
|
|
|
|
/* Forward search if possible. */ |
11235
|
|
|
|
|
|
|
if ((re->options & PCRE_NO_START_OPTIMIZE) == 0) |
11236
|
|
|
|
|
|
|
{ |
11237
|
|
|
|
|
|
|
if (mode == JIT_COMPILE && fast_forward_first_n_chars(common)) |
11238
|
|
|
|
|
|
|
; |
11239
|
|
|
|
|
|
|
else if ((re->flags & PCRE_FIRSTSET) != 0) |
11240
|
|
|
|
|
|
|
fast_forward_first_char(common, (pcre_uchar)re->first_char, (re->flags & PCRE_FCH_CASELESS) != 0); |
11241
|
|
|
|
|
|
|
else if ((re->flags & PCRE_STARTLINE) != 0) |
11242
|
|
|
|
|
|
|
fast_forward_newline(common); |
11243
|
|
|
|
|
|
|
else if (study != NULL && (study->flags & PCRE_STUDY_MAPPED) != 0) |
11244
|
|
|
|
|
|
|
fast_forward_start_bits(common, study->start_bits); |
11245
|
|
|
|
|
|
|
} |
11246
|
|
|
|
|
|
|
} |
11247
|
|
|
|
|
|
|
else |
11248
|
|
|
|
|
|
|
continue_match_label = LABEL(); |
11249
|
|
|
|
|
|
|
|
11250
|
|
|
|
|
|
|
if (mode == JIT_COMPILE && study->minlength > 0 && (re->options & PCRE_NO_START_OPTIMIZE) == 0) |
11251
|
|
|
|
|
|
|
{ |
11252
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); |
11253
|
|
|
|
|
|
|
OP2(SLJIT_ADD, TMP2, 0, STR_PTR, 0, SLJIT_IMM, IN_UCHARS(study->minlength)); |
11254
|
|
|
|
|
|
|
minlength_check_failed = CMP(SLJIT_GREATER, TMP2, 0, STR_END, 0); |
11255
|
|
|
|
|
|
|
} |
11256
|
|
|
|
|
|
|
if (common->req_char_ptr != 0) |
11257
|
|
|
|
|
|
|
reqbyte_notfound = search_requested_char(common, (pcre_uchar)re->req_char, (re->flags & PCRE_RCH_CASELESS) != 0, (re->flags & PCRE_FIRSTSET) != 0); |
11258
|
|
|
|
|
|
|
|
11259
|
|
|
|
|
|
|
/* Store the current STR_PTR in OVECTOR(0). */ |
11260
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), OVECTOR(0), STR_PTR, 0); |
11261
|
|
|
|
|
|
|
/* Copy the limit of allowed recursions. */ |
11262
|
|
|
|
|
|
|
OP1(SLJIT_MOV, COUNT_MATCH, 0, SLJIT_MEM1(SLJIT_SP), LIMIT_MATCH); |
11263
|
|
|
|
|
|
|
if (common->capture_last_ptr != 0) |
11264
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->capture_last_ptr, SLJIT_IMM, -1); |
11265
|
|
|
|
|
|
|
if (common->fast_forward_bc_ptr != NULL) |
11266
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), PRIVATE_DATA(common->fast_forward_bc_ptr + 1), STR_PTR, 0); |
11267
|
|
|
|
|
|
|
|
11268
|
|
|
|
|
|
|
if (common->start_ptr != OVECTOR(0)) |
11269
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_ptr, STR_PTR, 0); |
11270
|
|
|
|
|
|
|
|
11271
|
|
|
|
|
|
|
/* Copy the beginning of the string. */ |
11272
|
|
|
|
|
|
|
if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11273
|
|
|
|
|
|
|
{ |
11274
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1); |
11275
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
11276
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start + sizeof(sljit_sw), STR_PTR, 0); |
11277
|
|
|
|
|
|
|
JUMPHERE(jump); |
11278
|
|
|
|
|
|
|
} |
11279
|
|
|
|
|
|
|
else if (mode == JIT_PARTIAL_HARD_COMPILE) |
11280
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); |
11281
|
|
|
|
|
|
|
|
11282
|
|
|
|
|
|
|
compile_matchingpath(common, common->start, ccend, &rootbacktrack); |
11283
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
11284
|
|
|
|
|
|
|
{ |
11285
|
|
|
|
|
|
|
sljit_free_compiler(compiler); |
11286
|
|
|
|
|
|
|
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11287
|
|
|
|
|
|
|
SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11288
|
|
|
|
|
|
|
free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11289
|
|
|
|
|
|
|
return; |
11290
|
|
|
|
|
|
|
} |
11291
|
|
|
|
|
|
|
|
11292
|
|
|
|
|
|
|
if (common->might_be_empty) |
11293
|
|
|
|
|
|
|
{ |
11294
|
|
|
|
|
|
|
empty_match = CMP(SLJIT_EQUAL, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(0)); |
11295
|
|
|
|
|
|
|
empty_match_found_label = LABEL(); |
11296
|
|
|
|
|
|
|
} |
11297
|
|
|
|
|
|
|
|
11298
|
|
|
|
|
|
|
common->accept_label = LABEL(); |
11299
|
|
|
|
|
|
|
if (common->accept != NULL) |
11300
|
|
|
|
|
|
|
set_jumps(common->accept, common->accept_label); |
11301
|
|
|
|
|
|
|
|
11302
|
|
|
|
|
|
|
/* This means we have a match. Update the ovector. */ |
11303
|
|
|
|
|
|
|
copy_ovector(common, re->top_bracket + 1); |
11304
|
|
|
|
|
|
|
common->quit_label = common->forced_quit_label = LABEL(); |
11305
|
|
|
|
|
|
|
if (common->quit != NULL) |
11306
|
|
|
|
|
|
|
set_jumps(common->quit, common->quit_label); |
11307
|
|
|
|
|
|
|
if (common->forced_quit != NULL) |
11308
|
|
|
|
|
|
|
set_jumps(common->forced_quit, common->forced_quit_label); |
11309
|
|
|
|
|
|
|
if (minlength_check_failed != NULL) |
11310
|
|
|
|
|
|
|
SET_LABEL(minlength_check_failed, common->forced_quit_label); |
11311
|
|
|
|
|
|
|
sljit_emit_return(compiler, SLJIT_MOV, SLJIT_RETURN_REG, 0); |
11312
|
|
|
|
|
|
|
|
11313
|
|
|
|
|
|
|
if (mode != JIT_COMPILE) |
11314
|
|
|
|
|
|
|
{ |
11315
|
|
|
|
|
|
|
common->partialmatchlabel = LABEL(); |
11316
|
|
|
|
|
|
|
set_jumps(common->partialmatch, common->partialmatchlabel); |
11317
|
|
|
|
|
|
|
return_with_partial_match(common, common->quit_label); |
11318
|
|
|
|
|
|
|
} |
11319
|
|
|
|
|
|
|
|
11320
|
|
|
|
|
|
|
if (common->might_be_empty) |
11321
|
|
|
|
|
|
|
empty_match_backtrack_label = LABEL(); |
11322
|
|
|
|
|
|
|
compile_backtrackingpath(common, rootbacktrack.top); |
11323
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
11324
|
|
|
|
|
|
|
{ |
11325
|
|
|
|
|
|
|
sljit_free_compiler(compiler); |
11326
|
|
|
|
|
|
|
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11327
|
|
|
|
|
|
|
SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11328
|
|
|
|
|
|
|
free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11329
|
|
|
|
|
|
|
return; |
11330
|
|
|
|
|
|
|
} |
11331
|
|
|
|
|
|
|
|
11332
|
|
|
|
|
|
|
SLJIT_ASSERT(rootbacktrack.prev == NULL); |
11333
|
|
|
|
|
|
|
reset_match_label = LABEL(); |
11334
|
|
|
|
|
|
|
|
11335
|
|
|
|
|
|
|
if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11336
|
|
|
|
|
|
|
{ |
11337
|
|
|
|
|
|
|
/* Update hit_start only in the first time. */ |
11338
|
|
|
|
|
|
|
jump = CMP(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); |
11339
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr); |
11340
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); |
11341
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, TMP1, 0); |
11342
|
|
|
|
|
|
|
JUMPHERE(jump); |
11343
|
|
|
|
|
|
|
} |
11344
|
|
|
|
|
|
|
|
11345
|
|
|
|
|
|
|
/* Check we have remaining characters. */ |
11346
|
|
|
|
|
|
|
if ((re->options & PCRE_ANCHORED) == 0 && (re->options & PCRE_FIRSTLINE) != 0) |
11347
|
|
|
|
|
|
|
{ |
11348
|
|
|
|
|
|
|
SLJIT_ASSERT(common->match_end_ptr != 0); |
11349
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), common->match_end_ptr); |
11350
|
|
|
|
|
|
|
} |
11351
|
|
|
|
|
|
|
|
11352
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), |
11353
|
|
|
|
|
|
|
(common->fast_forward_bc_ptr != NULL) ? (PRIVATE_DATA(common->fast_forward_bc_ptr + 1)) : common->start_ptr); |
11354
|
|
|
|
|
|
|
|
11355
|
|
|
|
|
|
|
if ((re->options & PCRE_ANCHORED) == 0) |
11356
|
|
|
|
|
|
|
{ |
11357
|
|
|
|
|
|
|
if (common->ff_newline_shortcut != NULL) |
11358
|
|
|
|
|
|
|
{ |
11359
|
|
|
|
|
|
|
if ((re->options & PCRE_FIRSTLINE) == 0) |
11360
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS, STR_PTR, 0, STR_END, 0, common->ff_newline_shortcut); |
11361
|
|
|
|
|
|
|
/* There cannot be more newlines here. */ |
11362
|
|
|
|
|
|
|
} |
11363
|
|
|
|
|
|
|
else |
11364
|
|
|
|
|
|
|
CMPTO(SLJIT_LESS, STR_PTR, 0, ((re->options & PCRE_FIRSTLINE) == 0) ? STR_END : TMP1, 0, mainloop_label); |
11365
|
|
|
|
|
|
|
} |
11366
|
|
|
|
|
|
|
|
11367
|
|
|
|
|
|
|
/* No more remaining characters. */ |
11368
|
|
|
|
|
|
|
if (reqbyte_notfound != NULL) |
11369
|
|
|
|
|
|
|
JUMPHERE(reqbyte_notfound); |
11370
|
|
|
|
|
|
|
|
11371
|
|
|
|
|
|
|
if (mode == JIT_PARTIAL_SOFT_COMPILE) |
11372
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, -1, common->partialmatchlabel); |
11373
|
|
|
|
|
|
|
|
11374
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_NOMATCH); |
11375
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->quit_label); |
11376
|
|
|
|
|
|
|
|
11377
|
|
|
|
|
|
|
flush_stubs(common); |
11378
|
|
|
|
|
|
|
|
11379
|
|
|
|
|
|
|
if (common->might_be_empty) |
11380
|
|
|
|
|
|
|
{ |
11381
|
|
|
|
|
|
|
JUMPHERE(empty_match); |
11382
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, ARGUMENTS, 0); |
11383
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty)); |
11384
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_backtrack_label); |
11385
|
|
|
|
|
|
|
OP1(SLJIT_MOV_U8, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, notempty_atstart)); |
11386
|
|
|
|
|
|
|
CMPTO(SLJIT_EQUAL, TMP2, 0, SLJIT_IMM, 0, empty_match_found_label); |
11387
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, SLJIT_MEM1(TMP1), SLJIT_OFFSETOF(jit_arguments, str)); |
11388
|
|
|
|
|
|
|
CMPTO(SLJIT_NOT_EQUAL, TMP2, 0, STR_PTR, 0, empty_match_found_label); |
11389
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, empty_match_backtrack_label); |
11390
|
|
|
|
|
|
|
} |
11391
|
|
|
|
|
|
|
|
11392
|
|
|
|
|
|
|
common->fast_forward_bc_ptr = NULL; |
11393
|
|
|
|
|
|
|
common->fast_fail_start_ptr = 0; |
11394
|
|
|
|
|
|
|
common->fast_fail_end_ptr = 0; |
11395
|
|
|
|
|
|
|
common->currententry = common->entries; |
11396
|
|
|
|
|
|
|
common->local_exit = TRUE; |
11397
|
|
|
|
|
|
|
quit_label = common->quit_label; |
11398
|
|
|
|
|
|
|
while (common->currententry != NULL) |
11399
|
|
|
|
|
|
|
{ |
11400
|
|
|
|
|
|
|
/* Might add new entries. */ |
11401
|
|
|
|
|
|
|
compile_recurse(common); |
11402
|
|
|
|
|
|
|
if (SLJIT_UNLIKELY(sljit_get_compiler_error(compiler))) |
11403
|
|
|
|
|
|
|
{ |
11404
|
|
|
|
|
|
|
sljit_free_compiler(compiler); |
11405
|
|
|
|
|
|
|
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11406
|
|
|
|
|
|
|
SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11407
|
|
|
|
|
|
|
free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11408
|
|
|
|
|
|
|
return; |
11409
|
|
|
|
|
|
|
} |
11410
|
|
|
|
|
|
|
flush_stubs(common); |
11411
|
|
|
|
|
|
|
common->currententry = common->currententry->next; |
11412
|
|
|
|
|
|
|
} |
11413
|
|
|
|
|
|
|
common->local_exit = FALSE; |
11414
|
|
|
|
|
|
|
common->quit_label = quit_label; |
11415
|
|
|
|
|
|
|
|
11416
|
|
|
|
|
|
|
/* Allocating stack, returns with PCRE_ERROR_JIT_STACKLIMIT if fails. */ |
11417
|
|
|
|
|
|
|
/* This is a (really) rare case. */ |
11418
|
|
|
|
|
|
|
set_jumps(common->stackalloc, LABEL()); |
11419
|
|
|
|
|
|
|
/* RETURN_ADDR is not a saved register. */ |
11420
|
|
|
|
|
|
|
sljit_emit_fast_enter(compiler, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
11421
|
|
|
|
|
|
|
|
11422
|
|
|
|
|
|
|
SLJIT_ASSERT(TMP1 == SLJIT_R0 && STACK_TOP == SLJIT_R1); |
11423
|
|
|
|
|
|
|
|
11424
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), LOCALS1, STACK_TOP, 0); |
11425
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R0, 0, ARGUMENTS, 0); |
11426
|
|
|
|
|
|
|
OP2(SLJIT_SUB, SLJIT_R1, 0, STACK_LIMIT, 0, SLJIT_IMM, STACK_GROWTH_RATE); |
11427
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_R0), SLJIT_OFFSETOF(jit_arguments, stack)); |
11428
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_LIMIT, 0, TMP2, 0); |
11429
|
|
|
|
|
|
|
|
11430
|
|
|
|
|
|
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_RET(SW) | SLJIT_ARG1(SW) | SLJIT_ARG2(SW), SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_stack_resize)); |
11431
|
|
|
|
|
|
|
jump = CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0); |
11432
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP2, 0, STACK_LIMIT, 0); |
11433
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_LIMIT, 0, SLJIT_RETURN_REG, 0); |
11434
|
|
|
|
|
|
|
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), LOCALS0); |
11435
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_SP), LOCALS1); |
11436
|
|
|
|
|
|
|
sljit_emit_fast_return(compiler, TMP1, 0); |
11437
|
|
|
|
|
|
|
|
11438
|
|
|
|
|
|
|
/* Allocation failed. */ |
11439
|
|
|
|
|
|
|
JUMPHERE(jump); |
11440
|
|
|
|
|
|
|
/* We break the return address cache here, but this is a really rare case. */ |
11441
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_JIT_STACKLIMIT); |
11442
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->quit_label); |
11443
|
|
|
|
|
|
|
|
11444
|
|
|
|
|
|
|
/* Call limit reached. */ |
11445
|
|
|
|
|
|
|
set_jumps(common->calllimit, LABEL()); |
11446
|
|
|
|
|
|
|
OP1(SLJIT_MOV, SLJIT_RETURN_REG, 0, SLJIT_IMM, PCRE_ERROR_MATCHLIMIT); |
11447
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, common->quit_label); |
11448
|
|
|
|
|
|
|
|
11449
|
|
|
|
|
|
|
if (common->revertframes != NULL) |
11450
|
|
|
|
|
|
|
{ |
11451
|
|
|
|
|
|
|
set_jumps(common->revertframes, LABEL()); |
11452
|
|
|
|
|
|
|
do_revertframes(common); |
11453
|
|
|
|
|
|
|
} |
11454
|
|
|
|
|
|
|
if (common->wordboundary != NULL) |
11455
|
|
|
|
|
|
|
{ |
11456
|
|
|
|
|
|
|
set_jumps(common->wordboundary, LABEL()); |
11457
|
|
|
|
|
|
|
check_wordboundary(common); |
11458
|
|
|
|
|
|
|
} |
11459
|
|
|
|
|
|
|
if (common->anynewline != NULL) |
11460
|
|
|
|
|
|
|
{ |
11461
|
|
|
|
|
|
|
set_jumps(common->anynewline, LABEL()); |
11462
|
|
|
|
|
|
|
check_anynewline(common); |
11463
|
|
|
|
|
|
|
} |
11464
|
|
|
|
|
|
|
if (common->hspace != NULL) |
11465
|
|
|
|
|
|
|
{ |
11466
|
|
|
|
|
|
|
set_jumps(common->hspace, LABEL()); |
11467
|
|
|
|
|
|
|
check_hspace(common); |
11468
|
|
|
|
|
|
|
} |
11469
|
|
|
|
|
|
|
if (common->vspace != NULL) |
11470
|
|
|
|
|
|
|
{ |
11471
|
|
|
|
|
|
|
set_jumps(common->vspace, LABEL()); |
11472
|
|
|
|
|
|
|
check_vspace(common); |
11473
|
|
|
|
|
|
|
} |
11474
|
|
|
|
|
|
|
if (common->casefulcmp != NULL) |
11475
|
|
|
|
|
|
|
{ |
11476
|
|
|
|
|
|
|
set_jumps(common->casefulcmp, LABEL()); |
11477
|
|
|
|
|
|
|
do_casefulcmp(common); |
11478
|
|
|
|
|
|
|
} |
11479
|
|
|
|
|
|
|
if (common->caselesscmp != NULL) |
11480
|
|
|
|
|
|
|
{ |
11481
|
|
|
|
|
|
|
set_jumps(common->caselesscmp, LABEL()); |
11482
|
|
|
|
|
|
|
do_caselesscmp(common); |
11483
|
|
|
|
|
|
|
} |
11484
|
|
|
|
|
|
|
if (common->reset_match != NULL) |
11485
|
|
|
|
|
|
|
{ |
11486
|
|
|
|
|
|
|
set_jumps(common->reset_match, LABEL()); |
11487
|
|
|
|
|
|
|
do_reset_match(common, (re->top_bracket + 1) * 2); |
11488
|
|
|
|
|
|
|
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label); |
11489
|
|
|
|
|
|
|
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0); |
11490
|
|
|
|
|
|
|
JUMPTO(SLJIT_JUMP, reset_match_label); |
11491
|
|
|
|
|
|
|
} |
11492
|
|
|
|
|
|
|
#ifdef SUPPORT_UTF |
11493
|
|
|
|
|
|
|
#ifdef COMPILE_PCRE8 |
11494
|
|
|
|
|
|
|
if (common->utfreadchar != NULL) |
11495
|
|
|
|
|
|
|
{ |
11496
|
|
|
|
|
|
|
set_jumps(common->utfreadchar, LABEL()); |
11497
|
|
|
|
|
|
|
do_utfreadchar(common); |
11498
|
|
|
|
|
|
|
} |
11499
|
|
|
|
|
|
|
if (common->utfreadchar16 != NULL) |
11500
|
|
|
|
|
|
|
{ |
11501
|
|
|
|
|
|
|
set_jumps(common->utfreadchar16, LABEL()); |
11502
|
|
|
|
|
|
|
do_utfreadchar16(common); |
11503
|
|
|
|
|
|
|
} |
11504
|
|
|
|
|
|
|
if (common->utfreadtype8 != NULL) |
11505
|
|
|
|
|
|
|
{ |
11506
|
|
|
|
|
|
|
set_jumps(common->utfreadtype8, LABEL()); |
11507
|
|
|
|
|
|
|
do_utfreadtype8(common); |
11508
|
|
|
|
|
|
|
} |
11509
|
|
|
|
|
|
|
#endif /* COMPILE_PCRE8 */ |
11510
|
|
|
|
|
|
|
#endif /* SUPPORT_UTF */ |
11511
|
|
|
|
|
|
|
#ifdef SUPPORT_UCP |
11512
|
|
|
|
|
|
|
if (common->getucd != NULL) |
11513
|
|
|
|
|
|
|
{ |
11514
|
|
|
|
|
|
|
set_jumps(common->getucd, LABEL()); |
11515
|
|
|
|
|
|
|
do_getucd(common); |
11516
|
|
|
|
|
|
|
} |
11517
|
|
|
|
|
|
|
#endif |
11518
|
|
|
|
|
|
|
|
11519
|
|
|
|
|
|
|
SLJIT_FREE(common->optimized_cbracket, compiler->allocator_data); |
11520
|
|
|
|
|
|
|
SLJIT_FREE(common->private_data_ptrs, compiler->allocator_data); |
11521
|
|
|
|
|
|
|
|
11522
|
|
|
|
|
|
|
executable_func = sljit_generate_code(compiler); |
11523
|
|
|
|
|
|
|
executable_size = sljit_get_generated_code_size(compiler); |
11524
|
|
|
|
|
|
|
label_addr = common->label_addrs; |
11525
|
|
|
|
|
|
|
while (label_addr != NULL) |
11526
|
|
|
|
|
|
|
{ |
11527
|
|
|
|
|
|
|
*label_addr->update_addr = sljit_get_label_addr(label_addr->label); |
11528
|
|
|
|
|
|
|
label_addr = label_addr->next; |
11529
|
|
|
|
|
|
|
} |
11530
|
|
|
|
|
|
|
sljit_free_compiler(compiler); |
11531
|
|
|
|
|
|
|
if (executable_func == NULL) |
11532
|
|
|
|
|
|
|
{ |
11533
|
|
|
|
|
|
|
free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11534
|
|
|
|
|
|
|
return; |
11535
|
|
|
|
|
|
|
} |
11536
|
|
|
|
|
|
|
|
11537
|
|
|
|
|
|
|
/* Reuse the function descriptor if possible. */ |
11538
|
|
|
|
|
|
|
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && extra->executable_jit != NULL) |
11539
|
|
|
|
|
|
|
functions = (executable_functions *)extra->executable_jit; |
11540
|
|
|
|
|
|
|
else |
11541
|
|
|
|
|
|
|
{ |
11542
|
|
|
|
|
|
|
/* Note: If your memory-checker has flagged the allocation below as a |
11543
|
|
|
|
|
|
|
* memory leak, it is probably because you either forgot to call |
11544
|
|
|
|
|
|
|
* pcre_free_study() (or pcre16_free_study()) on the pcre_extra (or |
11545
|
|
|
|
|
|
|
* pcre16_extra) object, or you called said function after having |
11546
|
|
|
|
|
|
|
* cleared the PCRE_EXTRA_EXECUTABLE_JIT bit from the "flags" field |
11547
|
|
|
|
|
|
|
* of the object. (The function will only free the JIT data if the |
11548
|
|
|
|
|
|
|
* bit remains set, as the bit indicates that the pointer to the data |
11549
|
|
|
|
|
|
|
* is valid.) |
11550
|
|
|
|
|
|
|
*/ |
11551
|
|
|
|
|
|
|
functions = SLJIT_MALLOC(sizeof(executable_functions), compiler->allocator_data); |
11552
|
|
|
|
|
|
|
if (functions == NULL) |
11553
|
|
|
|
|
|
|
{ |
11554
|
|
|
|
|
|
|
/* This case is highly unlikely since we just recently |
11555
|
|
|
|
|
|
|
freed a lot of memory. Not impossible though. */ |
11556
|
|
|
|
|
|
|
sljit_free_code(executable_func); |
11557
|
|
|
|
|
|
|
free_read_only_data(common->read_only_data_head, compiler->allocator_data); |
11558
|
|
|
|
|
|
|
return; |
11559
|
|
|
|
|
|
|
} |
11560
|
|
|
|
|
|
|
memset(functions, 0, sizeof(executable_functions)); |
11561
|
|
|
|
|
|
|
functions->top_bracket = (re->top_bracket + 1) * 2; |
11562
|
|
|
|
|
|
|
functions->limit_match = (re->flags & PCRE_MLSET) != 0 ? re->limit_match : 0; |
11563
|
|
|
|
|
|
|
extra->executable_jit = functions; |
11564
|
|
|
|
|
|
|
extra->flags |= PCRE_EXTRA_EXECUTABLE_JIT; |
11565
|
|
|
|
|
|
|
} |
11566
|
|
|
|
|
|
|
|
11567
|
|
|
|
|
|
|
functions->executable_funcs[mode] = executable_func; |
11568
|
|
|
|
|
|
|
functions->read_only_data_heads[mode] = common->read_only_data_head; |
11569
|
|
|
|
|
|
|
functions->executable_sizes[mode] = executable_size; |
11570
|
|
|
|
|
|
|
} |
11571
|
|
|
|
|
|
|
|
11572
|
|
|
|
|
|
|
static SLJIT_NOINLINE int jit_machine_stack_exec(jit_arguments *arguments, void *executable_func) |
11573
|
|
|
|
|
|
|
{ |
11574
|
|
|
|
|
|
|
union { |
11575
|
|
|
|
|
|
|
void *executable_func; |
11576
|
|
|
|
|
|
|
jit_function call_executable_func; |
11577
|
|
|
|
|
|
|
} convert_executable_func; |
11578
|
|
|
|
|
|
|
sljit_u8 local_space[MACHINE_STACK_SIZE]; |
11579
|
|
|
|
|
|
|
struct sljit_stack local_stack; |
11580
|
|
|
|
|
|
|
|
11581
|
|
|
|
|
|
|
local_stack.min_start = local_space; |
11582
|
|
|
|
|
|
|
local_stack.start = local_space; |
11583
|
|
|
|
|
|
|
local_stack.end = local_space + MACHINE_STACK_SIZE; |
11584
|
|
|
|
|
|
|
local_stack.top = local_space + MACHINE_STACK_SIZE; |
11585
|
|
|
|
|
|
|
arguments->stack = &local_stack; |
11586
|
|
|
|
|
|
|
convert_executable_func.executable_func = executable_func; |
11587
|
|
|
|
|
|
|
return convert_executable_func.call_executable_func(arguments); |
11588
|
|
|
|
|
|
|
} |
11589
|
|
|
|
|
|
|
|
11590
|
|
|
|
|
|
|
int |
11591
|
|
|
|
|
|
|
PRIV(jit_exec)(const PUBL(extra) *extra_data, const pcre_uchar *subject, |
11592
|
|
|
|
|
|
|
int length, int start_offset, int options, int *offsets, int offset_count) |
11593
|
|
|
|
|
|
|
{ |
11594
|
|
|
|
|
|
|
executable_functions *functions = (executable_functions *)extra_data->executable_jit; |
11595
|
|
|
|
|
|
|
union { |
11596
|
|
|
|
|
|
|
void *executable_func; |
11597
|
|
|
|
|
|
|
jit_function call_executable_func; |
11598
|
|
|
|
|
|
|
} convert_executable_func; |
11599
|
|
|
|
|
|
|
jit_arguments arguments; |
11600
|
|
|
|
|
|
|
int max_offset_count; |
11601
|
|
|
|
|
|
|
int retval; |
11602
|
|
|
|
|
|
|
int mode = JIT_COMPILE; |
11603
|
|
|
|
|
|
|
|
11604
|
|
|
|
|
|
|
if ((options & PCRE_PARTIAL_HARD) != 0) |
11605
|
|
|
|
|
|
|
mode = JIT_PARTIAL_HARD_COMPILE; |
11606
|
|
|
|
|
|
|
else if ((options & PCRE_PARTIAL_SOFT) != 0) |
11607
|
|
|
|
|
|
|
mode = JIT_PARTIAL_SOFT_COMPILE; |
11608
|
|
|
|
|
|
|
|
11609
|
|
|
|
|
|
|
if (functions->executable_funcs[mode] == NULL) |
11610
|
|
|
|
|
|
|
return PCRE_ERROR_JIT_BADOPTION; |
11611
|
|
|
|
|
|
|
|
11612
|
|
|
|
|
|
|
/* Sanity checks should be handled by pcre_exec. */ |
11613
|
|
|
|
|
|
|
arguments.str = subject + start_offset; |
11614
|
|
|
|
|
|
|
arguments.begin = subject; |
11615
|
|
|
|
|
|
|
arguments.end = subject + length; |
11616
|
|
|
|
|
|
|
arguments.mark_ptr = NULL; |
11617
|
|
|
|
|
|
|
/* JIT decreases this value less frequently than the interpreter. */ |
11618
|
|
|
|
|
|
|
arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit); |
11619
|
|
|
|
|
|
|
if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) |
11620
|
|
|
|
|
|
|
arguments.limit_match = functions->limit_match; |
11621
|
|
|
|
|
|
|
arguments.notbol = (options & PCRE_NOTBOL) != 0; |
11622
|
|
|
|
|
|
|
arguments.noteol = (options & PCRE_NOTEOL) != 0; |
11623
|
|
|
|
|
|
|
arguments.notempty = (options & PCRE_NOTEMPTY) != 0; |
11624
|
|
|
|
|
|
|
arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; |
11625
|
|
|
|
|
|
|
arguments.offsets = offsets; |
11626
|
|
|
|
|
|
|
arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; |
11627
|
|
|
|
|
|
|
arguments.real_offset_count = offset_count; |
11628
|
|
|
|
|
|
|
|
11629
|
|
|
|
|
|
|
/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of |
11630
|
|
|
|
|
|
|
the output vector for storing captured strings, with the remainder used as |
11631
|
|
|
|
|
|
|
workspace. We don't need the workspace here. For compatibility, we limit the |
11632
|
|
|
|
|
|
|
number of captured strings in the same way as pcre_exec(), so that the user |
11633
|
|
|
|
|
|
|
gets the same result with and without JIT. */ |
11634
|
|
|
|
|
|
|
|
11635
|
|
|
|
|
|
|
if (offset_count != 2) |
11636
|
|
|
|
|
|
|
offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; |
11637
|
|
|
|
|
|
|
max_offset_count = functions->top_bracket; |
11638
|
|
|
|
|
|
|
if (offset_count > max_offset_count) |
11639
|
|
|
|
|
|
|
offset_count = max_offset_count; |
11640
|
|
|
|
|
|
|
arguments.offset_count = offset_count; |
11641
|
|
|
|
|
|
|
|
11642
|
|
|
|
|
|
|
if (functions->callback) |
11643
|
|
|
|
|
|
|
arguments.stack = (struct sljit_stack *)functions->callback(functions->userdata); |
11644
|
|
|
|
|
|
|
else |
11645
|
|
|
|
|
|
|
arguments.stack = (struct sljit_stack *)functions->userdata; |
11646
|
|
|
|
|
|
|
|
11647
|
|
|
|
|
|
|
if (arguments.stack == NULL) |
11648
|
|
|
|
|
|
|
retval = jit_machine_stack_exec(&arguments, functions->executable_funcs[mode]); |
11649
|
|
|
|
|
|
|
else |
11650
|
|
|
|
|
|
|
{ |
11651
|
|
|
|
|
|
|
convert_executable_func.executable_func = functions->executable_funcs[mode]; |
11652
|
|
|
|
|
|
|
retval = convert_executable_func.call_executable_func(&arguments); |
11653
|
|
|
|
|
|
|
} |
11654
|
|
|
|
|
|
|
|
11655
|
|
|
|
|
|
|
if (retval * 2 > offset_count) |
11656
|
|
|
|
|
|
|
retval = 0; |
11657
|
|
|
|
|
|
|
if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) |
11658
|
|
|
|
|
|
|
*(extra_data->mark) = arguments.mark_ptr; |
11659
|
|
|
|
|
|
|
|
11660
|
|
|
|
|
|
|
return retval; |
11661
|
|
|
|
|
|
|
} |
11662
|
|
|
|
|
|
|
|
11663
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11664
|
|
|
|
|
|
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
11665
|
|
|
|
|
|
|
pcre_jit_exec(const pcre *argument_re, const pcre_extra *extra_data, |
11666
|
|
|
|
|
|
|
PCRE_SPTR subject, int length, int start_offset, int options, |
11667
|
|
|
|
|
|
|
int *offsets, int offset_count, pcre_jit_stack *stack) |
11668
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11669
|
|
|
|
|
|
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
11670
|
|
|
|
|
|
|
pcre16_jit_exec(const pcre16 *argument_re, const pcre16_extra *extra_data, |
11671
|
|
|
|
|
|
|
PCRE_SPTR16 subject, int length, int start_offset, int options, |
11672
|
|
|
|
|
|
|
int *offsets, int offset_count, pcre16_jit_stack *stack) |
11673
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11674
|
|
|
|
|
|
|
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
11675
|
|
|
|
|
|
|
pcre32_jit_exec(const pcre32 *argument_re, const pcre32_extra *extra_data, |
11676
|
|
|
|
|
|
|
PCRE_SPTR32 subject, int length, int start_offset, int options, |
11677
|
|
|
|
|
|
|
int *offsets, int offset_count, pcre32_jit_stack *stack) |
11678
|
|
|
|
|
|
|
#endif |
11679
|
|
|
|
|
|
|
{ |
11680
|
|
|
|
|
|
|
pcre_uchar *subject_ptr = (pcre_uchar *)subject; |
11681
|
|
|
|
|
|
|
executable_functions *functions = (executable_functions *)extra_data->executable_jit; |
11682
|
|
|
|
|
|
|
union { |
11683
|
|
|
|
|
|
|
void *executable_func; |
11684
|
|
|
|
|
|
|
jit_function call_executable_func; |
11685
|
|
|
|
|
|
|
} convert_executable_func; |
11686
|
|
|
|
|
|
|
jit_arguments arguments; |
11687
|
|
|
|
|
|
|
int max_offset_count; |
11688
|
|
|
|
|
|
|
int retval; |
11689
|
|
|
|
|
|
|
int mode = JIT_COMPILE; |
11690
|
|
|
|
|
|
|
|
11691
|
|
|
|
|
|
|
SLJIT_UNUSED_ARG(argument_re); |
11692
|
|
|
|
|
|
|
|
11693
|
|
|
|
|
|
|
/* Plausibility checks */ |
11694
|
|
|
|
|
|
|
if ((options & ~PUBLIC_JIT_EXEC_OPTIONS) != 0) return PCRE_ERROR_JIT_BADOPTION; |
11695
|
|
|
|
|
|
|
|
11696
|
|
|
|
|
|
|
if ((options & PCRE_PARTIAL_HARD) != 0) |
11697
|
|
|
|
|
|
|
mode = JIT_PARTIAL_HARD_COMPILE; |
11698
|
|
|
|
|
|
|
else if ((options & PCRE_PARTIAL_SOFT) != 0) |
11699
|
|
|
|
|
|
|
mode = JIT_PARTIAL_SOFT_COMPILE; |
11700
|
|
|
|
|
|
|
|
11701
|
|
|
|
|
|
|
if (functions == NULL || functions->executable_funcs[mode] == NULL) |
11702
|
|
|
|
|
|
|
return PCRE_ERROR_JIT_BADOPTION; |
11703
|
|
|
|
|
|
|
|
11704
|
|
|
|
|
|
|
/* Sanity checks should be handled by pcre_exec. */ |
11705
|
|
|
|
|
|
|
arguments.stack = (struct sljit_stack *)stack; |
11706
|
|
|
|
|
|
|
arguments.str = subject_ptr + start_offset; |
11707
|
|
|
|
|
|
|
arguments.begin = subject_ptr; |
11708
|
|
|
|
|
|
|
arguments.end = subject_ptr + length; |
11709
|
|
|
|
|
|
|
arguments.mark_ptr = NULL; |
11710
|
|
|
|
|
|
|
/* JIT decreases this value less frequently than the interpreter. */ |
11711
|
|
|
|
|
|
|
arguments.limit_match = ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) ? MATCH_LIMIT : (sljit_u32)(extra_data->match_limit); |
11712
|
|
|
|
|
|
|
if (functions->limit_match != 0 && functions->limit_match < arguments.limit_match) |
11713
|
|
|
|
|
|
|
arguments.limit_match = functions->limit_match; |
11714
|
|
|
|
|
|
|
arguments.notbol = (options & PCRE_NOTBOL) != 0; |
11715
|
|
|
|
|
|
|
arguments.noteol = (options & PCRE_NOTEOL) != 0; |
11716
|
|
|
|
|
|
|
arguments.notempty = (options & PCRE_NOTEMPTY) != 0; |
11717
|
|
|
|
|
|
|
arguments.notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; |
11718
|
|
|
|
|
|
|
arguments.offsets = offsets; |
11719
|
|
|
|
|
|
|
arguments.callout_data = (extra_data->flags & PCRE_EXTRA_CALLOUT_DATA) != 0 ? extra_data->callout_data : NULL; |
11720
|
|
|
|
|
|
|
arguments.real_offset_count = offset_count; |
11721
|
|
|
|
|
|
|
|
11722
|
|
|
|
|
|
|
/* pcre_exec() rounds offset_count to a multiple of 3, and then uses only 2/3 of |
11723
|
|
|
|
|
|
|
the output vector for storing captured strings, with the remainder used as |
11724
|
|
|
|
|
|
|
workspace. We don't need the workspace here. For compatibility, we limit the |
11725
|
|
|
|
|
|
|
number of captured strings in the same way as pcre_exec(), so that the user |
11726
|
|
|
|
|
|
|
gets the same result with and without JIT. */ |
11727
|
|
|
|
|
|
|
|
11728
|
|
|
|
|
|
|
if (offset_count != 2) |
11729
|
|
|
|
|
|
|
offset_count = ((offset_count - (offset_count % 3)) * 2) / 3; |
11730
|
|
|
|
|
|
|
max_offset_count = functions->top_bracket; |
11731
|
|
|
|
|
|
|
if (offset_count > max_offset_count) |
11732
|
|
|
|
|
|
|
offset_count = max_offset_count; |
11733
|
|
|
|
|
|
|
arguments.offset_count = offset_count; |
11734
|
|
|
|
|
|
|
|
11735
|
|
|
|
|
|
|
convert_executable_func.executable_func = functions->executable_funcs[mode]; |
11736
|
|
|
|
|
|
|
retval = convert_executable_func.call_executable_func(&arguments); |
11737
|
|
|
|
|
|
|
|
11738
|
|
|
|
|
|
|
if (retval * 2 > offset_count) |
11739
|
|
|
|
|
|
|
retval = 0; |
11740
|
|
|
|
|
|
|
if ((extra_data->flags & PCRE_EXTRA_MARK) != 0) |
11741
|
|
|
|
|
|
|
*(extra_data->mark) = arguments.mark_ptr; |
11742
|
|
|
|
|
|
|
|
11743
|
|
|
|
|
|
|
return retval; |
11744
|
|
|
|
|
|
|
} |
11745
|
|
|
|
|
|
|
|
11746
|
|
|
|
|
|
|
void |
11747
|
|
|
|
|
|
|
PRIV(jit_free)(void *executable_funcs) |
11748
|
|
|
|
|
|
|
{ |
11749
|
|
|
|
|
|
|
int i; |
11750
|
|
|
|
|
|
|
executable_functions *functions = (executable_functions *)executable_funcs; |
11751
|
|
|
|
|
|
|
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) |
11752
|
|
|
|
|
|
|
{ |
11753
|
|
|
|
|
|
|
if (functions->executable_funcs[i] != NULL) |
11754
|
|
|
|
|
|
|
sljit_free_code(functions->executable_funcs[i]); |
11755
|
|
|
|
|
|
|
free_read_only_data(functions->read_only_data_heads[i], NULL); |
11756
|
|
|
|
|
|
|
} |
11757
|
|
|
|
|
|
|
SLJIT_FREE(functions, compiler->allocator_data); |
11758
|
|
|
|
|
|
|
} |
11759
|
|
|
|
|
|
|
|
11760
|
|
|
|
|
|
|
int |
11761
|
|
|
|
|
|
|
PRIV(jit_get_size)(void *executable_funcs) |
11762
|
|
|
|
|
|
|
{ |
11763
|
|
|
|
|
|
|
int i; |
11764
|
|
|
|
|
|
|
sljit_uw size = 0; |
11765
|
|
|
|
|
|
|
sljit_uw *executable_sizes = ((executable_functions *)executable_funcs)->executable_sizes; |
11766
|
|
|
|
|
|
|
for (i = 0; i < JIT_NUMBER_OF_COMPILE_MODES; i++) |
11767
|
|
|
|
|
|
|
size += executable_sizes[i]; |
11768
|
|
|
|
|
|
|
return (int)size; |
11769
|
|
|
|
|
|
|
} |
11770
|
|
|
|
|
|
|
|
11771
|
|
|
|
|
|
|
const char* |
11772
|
|
|
|
|
|
|
PRIV(jit_get_target)(void) |
11773
|
|
|
|
|
|
|
{ |
11774
|
|
|
|
|
|
|
return sljit_get_platform_name(); |
11775
|
|
|
|
|
|
|
} |
11776
|
|
|
|
|
|
|
|
11777
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11778
|
|
|
|
|
|
|
PCRE_EXP_DECL pcre_jit_stack * |
11779
|
|
|
|
|
|
|
pcre_jit_stack_alloc(int startsize, int maxsize) |
11780
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11781
|
|
|
|
|
|
|
PCRE_EXP_DECL pcre16_jit_stack * |
11782
|
|
|
|
|
|
|
pcre16_jit_stack_alloc(int startsize, int maxsize) |
11783
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11784
|
|
|
|
|
|
|
PCRE_EXP_DECL pcre32_jit_stack * |
11785
|
|
|
|
|
|
|
pcre32_jit_stack_alloc(int startsize, int maxsize) |
11786
|
|
|
|
|
|
|
#endif |
11787
|
|
|
|
|
|
|
{ |
11788
|
|
|
|
|
|
|
if (startsize < 1 || maxsize < 1) |
11789
|
|
|
|
|
|
|
return NULL; |
11790
|
|
|
|
|
|
|
if (startsize > maxsize) |
11791
|
|
|
|
|
|
|
startsize = maxsize; |
11792
|
|
|
|
|
|
|
startsize = (startsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); |
11793
|
|
|
|
|
|
|
maxsize = (maxsize + STACK_GROWTH_RATE - 1) & ~(STACK_GROWTH_RATE - 1); |
11794
|
|
|
|
|
|
|
return (PUBL(jit_stack)*)sljit_allocate_stack(startsize, maxsize, NULL); |
11795
|
|
|
|
|
|
|
} |
11796
|
|
|
|
|
|
|
|
11797
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11798
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11799
|
|
|
|
|
|
|
pcre_jit_stack_free(pcre_jit_stack *stack) |
11800
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11801
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11802
|
|
|
|
|
|
|
pcre16_jit_stack_free(pcre16_jit_stack *stack) |
11803
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11804
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11805
|
|
|
|
|
|
|
pcre32_jit_stack_free(pcre32_jit_stack *stack) |
11806
|
|
|
|
|
|
|
#endif |
11807
|
|
|
|
|
|
|
{ |
11808
|
|
|
|
|
|
|
sljit_free_stack((struct sljit_stack *)stack, NULL); |
11809
|
|
|
|
|
|
|
} |
11810
|
|
|
|
|
|
|
|
11811
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11812
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11813
|
|
|
|
|
|
|
pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) |
11814
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11815
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11816
|
|
|
|
|
|
|
pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) |
11817
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11818
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11819
|
|
|
|
|
|
|
pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) |
11820
|
|
|
|
|
|
|
#endif |
11821
|
|
|
|
|
|
|
{ |
11822
|
|
|
|
|
|
|
executable_functions *functions; |
11823
|
|
|
|
|
|
|
if (extra != NULL && |
11824
|
|
|
|
|
|
|
(extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && |
11825
|
|
|
|
|
|
|
extra->executable_jit != NULL) |
11826
|
|
|
|
|
|
|
{ |
11827
|
|
|
|
|
|
|
functions = (executable_functions *)extra->executable_jit; |
11828
|
|
|
|
|
|
|
functions->callback = callback; |
11829
|
|
|
|
|
|
|
functions->userdata = userdata; |
11830
|
|
|
|
|
|
|
} |
11831
|
|
|
|
|
|
|
} |
11832
|
|
|
|
|
|
|
|
11833
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11834
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11835
|
|
|
|
|
|
|
pcre_jit_free_unused_memory(void) |
11836
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11837
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11838
|
|
|
|
|
|
|
pcre16_jit_free_unused_memory(void) |
11839
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11840
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11841
|
|
|
|
|
|
|
pcre32_jit_free_unused_memory(void) |
11842
|
|
|
|
|
|
|
#endif |
11843
|
|
|
|
|
|
|
{ |
11844
|
|
|
|
|
|
|
sljit_free_unused_memory_exec(); |
11845
|
|
|
|
|
|
|
} |
11846
|
|
|
|
|
|
|
|
11847
|
|
|
|
|
|
|
#else /* SUPPORT_JIT */ |
11848
|
|
|
|
|
|
|
|
11849
|
|
|
|
|
|
|
/* These are dummy functions to avoid linking errors when JIT support is not |
11850
|
|
|
|
|
|
|
being compiled. */ |
11851
|
|
|
|
|
|
|
|
11852
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11853
|
|
|
|
|
|
|
PCRE_EXP_DECL pcre_jit_stack * |
11854
|
0
|
|
|
|
|
|
pcre_jit_stack_alloc(int startsize, int maxsize) |
11855
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11856
|
|
|
|
|
|
|
PCRE_EXP_DECL pcre16_jit_stack * |
11857
|
|
|
|
|
|
|
pcre16_jit_stack_alloc(int startsize, int maxsize) |
11858
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11859
|
|
|
|
|
|
|
PCRE_EXP_DECL pcre32_jit_stack * |
11860
|
|
|
|
|
|
|
pcre32_jit_stack_alloc(int startsize, int maxsize) |
11861
|
|
|
|
|
|
|
#endif |
11862
|
|
|
|
|
|
|
{ |
11863
|
|
|
|
|
|
|
(void)startsize; |
11864
|
|
|
|
|
|
|
(void)maxsize; |
11865
|
0
|
|
|
|
|
|
return NULL; |
11866
|
|
|
|
|
|
|
} |
11867
|
|
|
|
|
|
|
|
11868
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11869
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11870
|
0
|
|
|
|
|
|
pcre_jit_stack_free(pcre_jit_stack *stack) |
11871
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11872
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11873
|
|
|
|
|
|
|
pcre16_jit_stack_free(pcre16_jit_stack *stack) |
11874
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11875
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11876
|
|
|
|
|
|
|
pcre32_jit_stack_free(pcre32_jit_stack *stack) |
11877
|
|
|
|
|
|
|
#endif |
11878
|
|
|
|
|
|
|
{ |
11879
|
|
|
|
|
|
|
(void)stack; |
11880
|
0
|
|
|
|
|
|
} |
11881
|
|
|
|
|
|
|
|
11882
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11883
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11884
|
0
|
|
|
|
|
|
pcre_assign_jit_stack(pcre_extra *extra, pcre_jit_callback callback, void *userdata) |
11885
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11886
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11887
|
|
|
|
|
|
|
pcre16_assign_jit_stack(pcre16_extra *extra, pcre16_jit_callback callback, void *userdata) |
11888
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11889
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11890
|
|
|
|
|
|
|
pcre32_assign_jit_stack(pcre32_extra *extra, pcre32_jit_callback callback, void *userdata) |
11891
|
|
|
|
|
|
|
#endif |
11892
|
|
|
|
|
|
|
{ |
11893
|
|
|
|
|
|
|
(void)extra; |
11894
|
|
|
|
|
|
|
(void)callback; |
11895
|
|
|
|
|
|
|
(void)userdata; |
11896
|
0
|
|
|
|
|
|
} |
11897
|
|
|
|
|
|
|
|
11898
|
|
|
|
|
|
|
#if defined COMPILE_PCRE8 |
11899
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11900
|
0
|
|
|
|
|
|
pcre_jit_free_unused_memory(void) |
11901
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE16 |
11902
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11903
|
|
|
|
|
|
|
pcre16_jit_free_unused_memory(void) |
11904
|
|
|
|
|
|
|
#elif defined COMPILE_PCRE32 |
11905
|
|
|
|
|
|
|
PCRE_EXP_DECL void |
11906
|
|
|
|
|
|
|
pcre32_jit_free_unused_memory(void) |
11907
|
|
|
|
|
|
|
#endif |
11908
|
|
|
|
|
|
|
{ |
11909
|
0
|
|
|
|
|
|
} |
11910
|
|
|
|
|
|
|
|
11911
|
|
|
|
|
|
|
#endif |
11912
|
|
|
|
|
|
|
|
11913
|
|
|
|
|
|
|
/* End of pcre_jit_compile.c */ |