File Coverage

deps/libgit2/deps/pcre/pcre_exec.c
Criterion Covered Total %
statement 250 1664 15.0
branch 154 1902 8.1
condition n/a
subroutine n/a
pod n/a
total 404 3566 11.3


line stmt bran cond sub pod time code
1             /*************************************************
2             * Perl-Compatible Regular Expressions *
3             *************************************************/
4              
5             /* PCRE is a library of functions to support regular expressions whose syntax
6             and semantics are as close as possible to those of the Perl 5 language.
7              
8             Written by Philip Hazel
9             Copyright (c) 1997-2018 University of Cambridge
10              
11             -----------------------------------------------------------------------------
12             Redistribution and use in source and binary forms, with or without
13             modification, are permitted provided that the following conditions are met:
14              
15             * Redistributions of source code must retain the above copyright notice,
16             this list of conditions and the following disclaimer.
17              
18             * Redistributions in binary form must reproduce the above copyright
19             notice, this list of conditions and the following disclaimer in the
20             documentation and/or other materials provided with the distribution.
21              
22             * Neither the name of the University of Cambridge nor the names of its
23             contributors may be used to endorse or promote products derived from
24             this software without specific prior written permission.
25              
26             THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27             AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28             IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29             ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30             LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31             CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32             SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33             INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34             CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35             ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36             POSSIBILITY OF SUCH DAMAGE.
37             -----------------------------------------------------------------------------
38             */
39              
40             /* This module contains pcre_exec(), the externally visible function that does
41             pattern matching using an NFA algorithm, trying to mimic Perl as closely as
42             possible. There are also some static supporting functions. */
43              
44             #ifdef HAVE_CONFIG_H
45             #include "config.h"
46             #endif
47              
48             #define NLBLOCK md /* Block containing newline information */
49             #define PSSTART start_subject /* Field containing processed string start */
50             #define PSEND end_subject /* Field containing processed string end */
51              
52             #include "pcre_internal.h"
53              
54             /* Undefine some potentially clashing cpp symbols */
55              
56             #undef min
57             #undef max
58              
59             /* The md->capture_last field uses the lower 16 bits for the last captured
60             substring (which can never be greater than 65535) and a bit in the top half
61             to mean "capture vector overflowed". This odd way of doing things was
62             implemented when it was realized that preserving and restoring the overflow bit
63             whenever the last capture number was saved/restored made for a neater
64             interface, and doing it this way saved on (a) another variable, which would
65             have increased the stack frame size (a big NO-NO in PCRE) and (b) another
66             separate set of save/restore instructions. The following defines are used in
67             implementing this. */
68              
69             #define CAPLMASK 0x0000ffff /* The bits used for last_capture */
70             #define OVFLMASK 0xffff0000 /* The bits used for the overflow flag */
71             #define OVFLBIT 0x00010000 /* The bit that is set for overflow */
72              
73             /* Values for setting in md->match_function_type to indicate two special types
74             of call to match(). We do it this way to save on using another stack variable,
75             as stack usage is to be discouraged. */
76              
77             #define MATCH_CONDASSERT 1 /* Called to check a condition assertion */
78             #define MATCH_CBEGROUP 2 /* Could-be-empty unlimited repeat group */
79              
80             /* Non-error returns from the match() function. Error returns are externally
81             defined PCRE_ERROR_xxx codes, which are all negative. */
82              
83             #define MATCH_MATCH 1
84             #define MATCH_NOMATCH 0
85              
86             /* Special internal returns from the match() function. Make them sufficiently
87             negative to avoid the external error codes. */
88              
89             #define MATCH_ACCEPT (-999)
90             #define MATCH_KETRPOS (-998)
91             #define MATCH_ONCE (-997)
92             /* The next 5 must be kept together and in sequence so that a test that checks
93             for any one of them can use a range. */
94             #define MATCH_COMMIT (-996)
95             #define MATCH_PRUNE (-995)
96             #define MATCH_SKIP (-994)
97             #define MATCH_SKIP_ARG (-993)
98             #define MATCH_THEN (-992)
99             #define MATCH_BACKTRACK_MAX MATCH_THEN
100             #define MATCH_BACKTRACK_MIN MATCH_COMMIT
101              
102             /* Maximum number of ints of offset to save on the stack for recursive calls.
103             If the offset vector is bigger, malloc is used. This should be a multiple of 3,
104             because the offset vector is always a multiple of 3 long. */
105              
106             #define REC_STACK_SAVE_MAX 30
107              
108             /* Min and max values for the common repeats; for the maxima, 0 => infinity */
109              
110             static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
111             static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
112              
113             #ifdef PCRE_DEBUG
114             /*************************************************
115             * Debugging function to print chars *
116             *************************************************/
117              
118             /* Print a sequence of chars in printable format, stopping at the end of the
119             subject if the requested.
120              
121             Arguments:
122             p points to characters
123             length number to print
124             is_subject TRUE if printing from within md->start_subject
125             md pointer to matching data block, if is_subject is TRUE
126              
127             Returns: nothing
128             */
129              
130             static void
131             pchars(const pcre_uchar *p, int length, BOOL is_subject, match_data *md)
132             {
133             pcre_uint32 c;
134             BOOL utf = md->utf;
135             if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
136             while (length-- > 0)
137             if (isprint(c = UCHAR21INCTEST(p))) printf("%c", (char)c); else printf("\\x{%02x}", c);
138             }
139             #endif
140              
141              
142              
143             /*************************************************
144             * Match a back-reference *
145             *************************************************/
146              
147             /* Normally, if a back reference hasn't been set, the length that is passed is
148             negative, so the match always fails. However, in JavaScript compatibility mode,
149             the length passed is zero. Note that in caseless UTF-8 mode, the number of
150             subject bytes matched may be different to the number of reference bytes.
151              
152             Arguments:
153             offset index into the offset vector
154             eptr pointer into the subject
155             length length of reference to be matched (number of bytes)
156             md points to match data block
157             caseless TRUE if caseless
158              
159             Returns: >= 0 the number of subject bytes matched
160             -1 no match
161             -2 partial match; always given if at end subject
162             */
163              
164             static int
165 0           match_ref(int offset, register PCRE_PUCHAR eptr, int length, match_data *md,
166             BOOL caseless)
167             {
168 0           PCRE_PUCHAR eptr_start = eptr;
169 0           register PCRE_PUCHAR p = md->start_subject + md->offset_vector[offset];
170             #if defined SUPPORT_UTF && defined SUPPORT_UCP
171             BOOL utf = md->utf;
172             #endif
173              
174             #ifdef PCRE_DEBUG
175             if (eptr >= md->end_subject)
176             printf("matching subject ");
177             else
178             {
179             printf("matching subject ");
180             pchars(eptr, length, TRUE, md);
181             }
182             printf(" against backref ");
183             pchars(p, length, FALSE, md);
184             printf("\n");
185             #endif
186              
187             /* Always fail if reference not set (and not JavaScript compatible - in that
188             case the length is passed as zero). */
189              
190 0 0         if (length < 0) return -1;
191              
192             /* Separate the caseless case for speed. In UTF-8 mode we can only do this
193             properly if Unicode properties are supported. Otherwise, we can check only
194             ASCII characters. */
195              
196 0 0         if (caseless)
197             {
198             #if defined SUPPORT_UTF && defined SUPPORT_UCP
199             if (utf)
200             {
201             /* Match characters up to the end of the reference. NOTE: the number of
202             data units matched may differ, because in UTF-8 there are some characters
203             whose upper and lower case versions code have different numbers of bytes.
204             For example, U+023A (2 bytes in UTF-8) is the upper case version of U+2C65
205             (3 bytes in UTF-8); a sequence of 3 of the former uses 6 bytes, as does a
206             sequence of two of the latter. It is important, therefore, to check the
207             length along the reference, not along the subject (earlier code did this
208             wrong). */
209              
210             PCRE_PUCHAR endptr = p + length;
211             while (p < endptr)
212             {
213             pcre_uint32 c, d;
214             const ucd_record *ur;
215             if (eptr >= md->end_subject) return -2; /* Partial match */
216             GETCHARINC(c, eptr);
217             GETCHARINC(d, p);
218             ur = GET_UCD(d);
219             if (c != d && c != d + ur->other_case)
220             {
221             const pcre_uint32 *pp = PRIV(ucd_caseless_sets) + ur->caseset;
222             for (;;)
223             {
224             if (c < *pp) return -1;
225             if (c == *pp++) break;
226             }
227             }
228             }
229             }
230             else
231             #endif
232              
233             /* The same code works when not in UTF-8 mode and in UTF-8 mode when there
234             is no UCP support. */
235             {
236 0 0         while (length-- > 0)
237             {
238             pcre_uint32 cc, cp;
239 0 0         if (eptr >= md->end_subject) return -2; /* Partial match */
240 0           cc = UCHAR21TEST(eptr);
241 0           cp = UCHAR21TEST(p);
242 0 0         if (TABLE_GET(cp, md->lcc, cp) != TABLE_GET(cc, md->lcc, cc)) return -1;
243 0           p++;
244 0           eptr++;
245             }
246             }
247             }
248              
249             /* In the caseful case, we can just compare the bytes, whether or not we
250             are in UTF-8 mode. */
251              
252             else
253             {
254 0 0         while (length-- > 0)
255             {
256 0 0         if (eptr >= md->end_subject) return -2; /* Partial match */
257 0 0         if (UCHAR21INCTEST(p) != UCHAR21INCTEST(eptr)) return -1;
258             }
259             }
260              
261 0           return (int)(eptr - eptr_start);
262             }
263              
264              
265              
266             /***************************************************************************
267             ****************************************************************************
268             RECURSION IN THE match() FUNCTION
269              
270             The match() function is highly recursive, though not every recursive call
271             increases the recursive depth. Nevertheless, some regular expressions can cause
272             it to recurse to a great depth. I was writing for Unix, so I just let it call
273             itself recursively. This uses the stack for saving everything that has to be
274             saved for a recursive call. On Unix, the stack can be large, and this works
275             fine.
276              
277             It turns out that on some non-Unix-like systems there are problems with
278             programs that use a lot of stack. (This despite the fact that every last chip
279             has oodles of memory these days, and techniques for extending the stack have
280             been known for decades.) So....
281              
282             There is a fudge, triggered by defining NO_RECURSE, which avoids recursive
283             calls by keeping local variables that need to be preserved in blocks of memory
284             obtained from malloc() instead instead of on the stack. Macros are used to
285             achieve this so that the actual code doesn't look very different to what it
286             always used to.
287              
288             The original heap-recursive code used longjmp(). However, it seems that this
289             can be very slow on some operating systems. Following a suggestion from Stan
290             Switzer, the use of longjmp() has been abolished, at the cost of having to
291             provide a unique number for each call to RMATCH. There is no way of generating
292             a sequence of numbers at compile time in C. I have given them names, to make
293             them stand out more clearly.
294              
295             Crude tests on x86 Linux show a small speedup of around 5-8%. However, on
296             FreeBSD, avoiding longjmp() more than halves the time taken to run the standard
297             tests. Furthermore, not using longjmp() means that local dynamic variables
298             don't have indeterminate values; this has meant that the frame size can be
299             reduced because the result can be "passed back" by straight setting of the
300             variable instead of being passed in the frame.
301             ****************************************************************************
302             ***************************************************************************/
303              
304             /* Numbers for RMATCH calls. When this list is changed, the code at HEAP_RETURN
305             below must be updated in sync. */
306              
307             enum { RM1=1, RM2, RM3, RM4, RM5, RM6, RM7, RM8, RM9, RM10,
308             RM11, RM12, RM13, RM14, RM15, RM16, RM17, RM18, RM19, RM20,
309             RM21, RM22, RM23, RM24, RM25, RM26, RM27, RM28, RM29, RM30,
310             RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40,
311             RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50,
312             RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60,
313             RM61, RM62, RM63, RM64, RM65, RM66, RM67 };
314              
315             /* These versions of the macros use the stack, as normal. There are debugging
316             versions and production versions. Note that the "rw" argument of RMATCH isn't
317             actually used in this definition. */
318              
319             #ifndef NO_RECURSE
320             #define REGISTER register
321              
322             #ifdef PCRE_DEBUG
323             #define RMATCH(ra,rb,rc,rd,re,rw) \
324             { \
325             printf("match() called in line %d\n", __LINE__); \
326             rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1); \
327             printf("to line %d\n", __LINE__); \
328             }
329             #define RRETURN(ra) \
330             { \
331             printf("match() returned %d from line %d\n", ra, __LINE__); \
332             return ra; \
333             }
334             #else
335             #define RMATCH(ra,rb,rc,rd,re,rw) \
336             rrc = match(ra,rb,mstart,rc,rd,re,rdepth+1)
337             #define RRETURN(ra) return ra
338             #endif
339              
340             #else
341              
342              
343             /* These versions of the macros manage a private stack on the heap. Note that
344             the "rd" argument of RMATCH isn't actually used in this definition. It's the md
345             argument of match(), which never changes. */
346              
347             #define REGISTER
348              
349             #define RMATCH(ra,rb,rc,rd,re,rw)\
350             {\
351             heapframe *newframe = frame->Xnextframe;\
352             if (newframe == NULL)\
353             {\
354             newframe = (heapframe *)(PUBL(stack_malloc))(sizeof(heapframe));\
355             if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\
356             newframe->Xnextframe = NULL;\
357             frame->Xnextframe = newframe;\
358             }\
359             frame->Xwhere = rw;\
360             newframe->Xeptr = ra;\
361             newframe->Xecode = rb;\
362             newframe->Xmstart = mstart;\
363             newframe->Xoffset_top = rc;\
364             newframe->Xeptrb = re;\
365             newframe->Xrdepth = frame->Xrdepth + 1;\
366             newframe->Xprevframe = frame;\
367             frame = newframe;\
368             DPRINTF(("restarting from line %d\n", __LINE__));\
369             goto HEAP_RECURSE;\
370             L_##rw:\
371             DPRINTF(("jumped back to line %d\n", __LINE__));\
372             }
373              
374             #define RRETURN(ra)\
375             {\
376             heapframe *oldframe = frame;\
377             frame = oldframe->Xprevframe;\
378             if (frame != NULL)\
379             {\
380             rrc = ra;\
381             goto HEAP_RETURN;\
382             }\
383             return ra;\
384             }
385              
386              
387             /* Structure for remembering the local variables in a private frame */
388              
389             typedef struct heapframe {
390             struct heapframe *Xprevframe;
391             struct heapframe *Xnextframe;
392              
393             /* Function arguments that may change */
394              
395             PCRE_PUCHAR Xeptr;
396             const pcre_uchar *Xecode;
397             PCRE_PUCHAR Xmstart;
398             int Xoffset_top;
399             eptrblock *Xeptrb;
400             unsigned int Xrdepth;
401              
402             /* Function local variables */
403              
404             PCRE_PUCHAR Xcallpat;
405             #ifdef SUPPORT_UTF
406             PCRE_PUCHAR Xcharptr;
407             #endif
408             PCRE_PUCHAR Xdata;
409             PCRE_PUCHAR Xnext;
410             PCRE_PUCHAR Xpp;
411             PCRE_PUCHAR Xprev;
412             PCRE_PUCHAR Xsaved_eptr;
413              
414             recursion_info Xnew_recursive;
415              
416             BOOL Xcur_is_word;
417             BOOL Xcondition;
418             BOOL Xprev_is_word;
419              
420             #ifdef SUPPORT_UCP
421             int Xprop_type;
422             unsigned int Xprop_value;
423             int Xprop_fail_result;
424             int Xoclength;
425             pcre_uchar Xocchars[6];
426             #endif
427              
428             int Xcodelink;
429             int Xctype;
430             unsigned int Xfc;
431             int Xfi;
432             int Xlength;
433             int Xmax;
434             int Xmin;
435             unsigned int Xnumber;
436             int Xoffset;
437             unsigned int Xop;
438             pcre_int32 Xsave_capture_last;
439             int Xsave_offset1, Xsave_offset2, Xsave_offset3;
440             int Xstacksave[REC_STACK_SAVE_MAX];
441              
442             eptrblock Xnewptrb;
443              
444             /* Where to jump back to */
445              
446             int Xwhere;
447              
448             } heapframe;
449              
450             #endif
451              
452              
453             /***************************************************************************
454             ***************************************************************************/
455              
456              
457              
458             /*************************************************
459             * Match from current position *
460             *************************************************/
461              
462             /* This function is called recursively in many circumstances. Whenever it
463             returns a negative (error) response, the outer incarnation must also return the
464             same response. */
465              
466             /* These macros pack up tests that are used for partial matching, and which
467             appear several times in the code. We set the "hit end" flag if the pointer is
468             at the end of the subject and also past the start of the subject (i.e.
469             something has been matched). For hard partial matching, we then return
470             immediately. The second one is used when we already know we are past the end of
471             the subject. */
472              
473             #define CHECK_PARTIAL()\
474             if (md->partial != 0 && eptr >= md->end_subject && \
475             eptr > md->start_used_ptr) \
476             { \
477             md->hitend = TRUE; \
478             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
479             }
480              
481             #define SCHECK_PARTIAL()\
482             if (md->partial != 0 && eptr > md->start_used_ptr) \
483             { \
484             md->hitend = TRUE; \
485             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL); \
486             }
487              
488              
489             /* Performance note: It might be tempting to extract commonly used fields from
490             the md structure (e.g. utf, end_subject) into individual variables to improve
491             performance. Tests using gcc on a SPARC disproved this; in the first case, it
492             made performance worse.
493              
494             Arguments:
495             eptr pointer to current character in subject
496             ecode pointer to current position in compiled code
497             mstart pointer to the current match start position (can be modified
498             by encountering \K)
499             offset_top current top pointer
500             md pointer to "static" info for the match
501             eptrb pointer to chain of blocks containing eptr at start of
502             brackets - for testing for empty matches
503             rdepth the recursion depth
504              
505             Returns: MATCH_MATCH if matched ) these values are >= 0
506             MATCH_NOMATCH if failed to match )
507             a negative MATCH_xxx value for PRUNE, SKIP, etc
508             a negative PCRE_ERROR_xxx value if aborted by an error condition
509             (e.g. stopped by repeated call or recursion limit)
510             */
511              
512             static int
513 172           match(REGISTER PCRE_PUCHAR eptr, REGISTER const pcre_uchar *ecode,
514             PCRE_PUCHAR mstart, int offset_top, match_data *md, eptrblock *eptrb,
515             unsigned int rdepth)
516             {
517             /* These variables do not need to be preserved over recursion in this function,
518             so they can be ordinary variables in all cases. Mark some of them with
519             "register" because they are used a lot in loops. */
520              
521             register int rrc; /* Returns from recursive calls */
522             register int i; /* Used for loops not involving calls to RMATCH() */
523             register pcre_uint32 c; /* Character values not kept over RMATCH() calls */
524             register BOOL utf; /* Local copy of UTF flag for speed */
525              
526             BOOL minimize, possessive; /* Quantifier options */
527             BOOL caseless;
528             int condcode;
529              
530             /* When recursion is not being used, all "local" variables that have to be
531             preserved over calls to RMATCH() are part of a "frame". We set up the top-level
532             frame on the stack here; subsequent instantiations are obtained from the heap
533             whenever RMATCH() does a "recursion". See the macro definitions above. Putting
534             the top-level on the stack rather than malloc-ing them all gives a performance
535             boost in many cases where there is not much "recursion". */
536              
537             #ifdef NO_RECURSE
538             heapframe *frame = (heapframe *)md->match_frames_base;
539              
540             /* Copy in the original argument variables */
541              
542             frame->Xeptr = eptr;
543             frame->Xecode = ecode;
544             frame->Xmstart = mstart;
545             frame->Xoffset_top = offset_top;
546             frame->Xeptrb = eptrb;
547             frame->Xrdepth = rdepth;
548              
549             /* This is where control jumps back to to effect "recursion" */
550              
551             HEAP_RECURSE:
552              
553             /* Macros make the argument variables come from the current frame */
554              
555             #define eptr frame->Xeptr
556             #define ecode frame->Xecode
557             #define mstart frame->Xmstart
558             #define offset_top frame->Xoffset_top
559             #define eptrb frame->Xeptrb
560             #define rdepth frame->Xrdepth
561              
562             /* Ditto for the local variables */
563              
564             #ifdef SUPPORT_UTF
565             #define charptr frame->Xcharptr
566             #endif
567             #define callpat frame->Xcallpat
568             #define codelink frame->Xcodelink
569             #define data frame->Xdata
570             #define next frame->Xnext
571             #define pp frame->Xpp
572             #define prev frame->Xprev
573             #define saved_eptr frame->Xsaved_eptr
574              
575             #define new_recursive frame->Xnew_recursive
576              
577             #define cur_is_word frame->Xcur_is_word
578             #define condition frame->Xcondition
579             #define prev_is_word frame->Xprev_is_word
580              
581             #ifdef SUPPORT_UCP
582             #define prop_type frame->Xprop_type
583             #define prop_value frame->Xprop_value
584             #define prop_fail_result frame->Xprop_fail_result
585             #define oclength frame->Xoclength
586             #define occhars frame->Xocchars
587             #endif
588              
589             #define ctype frame->Xctype
590             #define fc frame->Xfc
591             #define fi frame->Xfi
592             #define length frame->Xlength
593             #define max frame->Xmax
594             #define min frame->Xmin
595             #define number frame->Xnumber
596             #define offset frame->Xoffset
597             #define op frame->Xop
598             #define save_capture_last frame->Xsave_capture_last
599             #define save_offset1 frame->Xsave_offset1
600             #define save_offset2 frame->Xsave_offset2
601             #define save_offset3 frame->Xsave_offset3
602             #define stacksave frame->Xstacksave
603              
604             #define newptrb frame->Xnewptrb
605              
606             /* When recursion is being used, local variables are allocated on the stack and
607             get preserved during recursion in the normal way. In this environment, fi and
608             i, and fc and c, can be the same variables. */
609              
610             #else /* NO_RECURSE not defined */
611             #define fi i
612             #define fc c
613              
614             /* Many of the following variables are used only in small blocks of the code.
615             My normal style of coding would have declared them within each of those blocks.
616             However, in order to accommodate the version of this code that uses an external
617             "stack" implemented on the heap, it is easier to declare them all here, so the
618             declarations can be cut out in a block. The only declarations within blocks
619             below are for variables that do not have to be preserved over a recursive call
620             to RMATCH(). */
621              
622             #ifdef SUPPORT_UTF
623             const pcre_uchar *charptr;
624             #endif
625             const pcre_uchar *callpat;
626             const pcre_uchar *data;
627             const pcre_uchar *next;
628             PCRE_PUCHAR pp;
629             const pcre_uchar *prev;
630             PCRE_PUCHAR saved_eptr;
631              
632             recursion_info new_recursive;
633              
634             BOOL cur_is_word;
635             BOOL condition;
636             BOOL prev_is_word;
637              
638             #ifdef SUPPORT_UCP
639             int prop_type;
640             unsigned int prop_value;
641             int prop_fail_result;
642             int oclength;
643             pcre_uchar occhars[6];
644             #endif
645              
646             int codelink;
647             int ctype;
648             int length;
649             int max;
650             int min;
651             unsigned int number;
652             int offset;
653             unsigned int op;
654             pcre_int32 save_capture_last;
655             int save_offset1, save_offset2, save_offset3;
656             int stacksave[REC_STACK_SAVE_MAX];
657              
658             eptrblock newptrb;
659              
660             /* There is a special fudge for calling match() in a way that causes it to
661             measure the size of its basic stack frame when the stack is being used for
662             recursion. The second argument (ecode) being NULL triggers this behaviour. It
663             cannot normally ever be NULL. The return is the negated value of the frame
664             size. */
665              
666 172 50         if (ecode == NULL)
667             {
668 0 0         if (rdepth == 0)
669 0           return match((PCRE_PUCHAR)&rdepth, NULL, NULL, 0, NULL, NULL, 1);
670             else
671             {
672 0           int len = (int)((char *)&rdepth - (char *)eptr);
673 0           return (len > 0)? -len : len;
674             }
675             }
676             #endif /* NO_RECURSE */
677              
678             /* To save space on the stack and in the heap frame, I have doubled up on some
679             of the local variables that are used only in localised parts of the code, but
680             still need to be preserved over recursive calls of match(). These macros define
681             the alternative names that are used. */
682              
683             #define allow_zero cur_is_word
684             #define cbegroup condition
685             #define code_offset codelink
686             #define condassert condition
687             #define matched_once prev_is_word
688             #define foc number
689             #define save_mark data
690              
691             /* These statements are here to stop the compiler complaining about unitialized
692             variables. */
693              
694             #ifdef SUPPORT_UCP
695             prop_value = 0;
696             prop_fail_result = 0;
697             #endif
698              
699              
700             /* This label is used for tail recursion, which is used in a few cases even
701             when NO_RECURSE is not defined, in order to reduce the amount of stack that is
702             used. Thanks to Ian Taylor for noticing this possibility and sending the
703             original patch. */
704              
705             TAIL_RECURSE:
706              
707             /* OK, now we can get on with the real code of the function. Recursive calls
708             are specified by the macro RMATCH and RRETURN is used to return. When
709             NO_RECURSE is *not* defined, these just turn into a recursive call to match()
710             and a "return", respectively (possibly with some debugging if PCRE_DEBUG is
711             defined). However, RMATCH isn't like a function call because it's quite a
712             complicated macro. It has to be used in one particular way. This shouldn't,
713             however, impact performance when true recursion is being used. */
714              
715             #ifdef SUPPORT_UTF
716             utf = md->utf; /* Local copy of the flag */
717             #else
718 282           utf = FALSE;
719             #endif
720              
721             /* First check that we haven't called match() too many times, or that we
722             haven't exceeded the recursive call limit. */
723              
724 282 50         if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT);
725 282 50         if (rdepth >= md->match_limit_recursion) RRETURN(PCRE_ERROR_RECURSIONLIMIT);
726              
727             /* At the start of a group with an unlimited repeat that may match an empty
728             string, the variable md->match_function_type is set to MATCH_CBEGROUP. It is
729             done this way to save having to use another function argument, which would take
730             up space on the stack. See also MATCH_CONDASSERT below.
731              
732             When MATCH_CBEGROUP is set, add the current subject pointer to the chain of
733             such remembered pointers, to be checked when we hit the closing ket, in order
734             to break infinite loops that match no characters. When match() is called in
735             other circumstances, don't add to the chain. The MATCH_CBEGROUP feature must
736             NOT be used with tail recursion, because the memory block that is used is on
737             the stack, so a new one may be required for each match(). */
738              
739 282 50         if (md->match_function_type == MATCH_CBEGROUP)
740             {
741 0           newptrb.epb_saved_eptr = eptr;
742 0           newptrb.epb_prev = eptrb;
743 0           eptrb = &newptrb;
744 0           md->match_function_type = 0;
745             }
746              
747             /* Now start processing the opcodes. */
748              
749             for (;;)
750             {
751 664           minimize = possessive = FALSE;
752 664           op = *ecode;
753              
754 664           switch(op)
755             {
756             case OP_MARK:
757 0           md->nomatch_mark = ecode + 2;
758 0           md->mark = NULL; /* In case previously set by assertion */
759 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
760             eptrb, RM55);
761 0 0         if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    0          
    0          
762 0           md->mark == NULL) md->mark = ecode + 2;
763              
764             /* A return of MATCH_SKIP_ARG means that matching failed at SKIP with an
765             argument, and we must check whether that argument matches this MARK's
766             argument. It is passed back in md->start_match_ptr (an overloading of that
767             variable). If it does match, we reset that variable to the current subject
768             position and return MATCH_SKIP. Otherwise, pass back the return code
769             unaltered. */
770              
771 0 0         else if (rrc == MATCH_SKIP_ARG &&
    0          
772 0           STRCMP_UC_UC_TEST(ecode + 2, md->start_match_ptr) == 0)
773             {
774 0           md->start_match_ptr = eptr;
775 0           RRETURN(MATCH_SKIP);
776             }
777 0           RRETURN(rrc);
778              
779             case OP_FAIL:
780 0           RRETURN(MATCH_NOMATCH);
781              
782             case OP_COMMIT:
783 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
784             eptrb, RM52);
785 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
786 0           RRETURN(MATCH_COMMIT);
787              
788             case OP_PRUNE:
789 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
790             eptrb, RM51);
791 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
792 0           RRETURN(MATCH_PRUNE);
793              
794             case OP_PRUNE_ARG:
795 0           md->nomatch_mark = ecode + 2;
796 0           md->mark = NULL; /* In case previously set by assertion */
797 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
798             eptrb, RM56);
799 0 0         if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    0          
    0          
800 0           md->mark == NULL) md->mark = ecode + 2;
801 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
802 0           RRETURN(MATCH_PRUNE);
803              
804             case OP_SKIP:
805 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
806             eptrb, RM53);
807 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
808 0           md->start_match_ptr = eptr; /* Pass back current position */
809 0           RRETURN(MATCH_SKIP);
810              
811             /* Note that, for Perl compatibility, SKIP with an argument does NOT set
812             nomatch_mark. When a pattern match ends with a SKIP_ARG for which there was
813             not a matching mark, we have to re-run the match, ignoring the SKIP_ARG
814             that failed and any that precede it (either they also failed, or were not
815             triggered). To do this, we maintain a count of executed SKIP_ARGs. If a
816             SKIP_ARG gets to top level, the match is re-run with md->ignore_skip_arg
817             set to the count of the one that failed. */
818              
819             case OP_SKIP_ARG:
820 0           md->skip_arg_count++;
821 0 0         if (md->skip_arg_count <= md->ignore_skip_arg)
822             {
823 0           ecode += PRIV(OP_lengths)[*ecode] + ecode[1];
824 0           break;
825             }
826 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top, md,
827             eptrb, RM57);
828 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
829              
830             /* Pass back the current skip name by overloading md->start_match_ptr and
831             returning the special MATCH_SKIP_ARG return code. This will either be
832             caught by a matching MARK, or get to the top, where it causes a rematch
833             with md->ignore_skip_arg set to the value of md->skip_arg_count. */
834              
835 0           md->start_match_ptr = ecode + 2;
836 0           RRETURN(MATCH_SKIP_ARG);
837              
838             /* For THEN (and THEN_ARG) we pass back the address of the opcode, so that
839             the branch in which it occurs can be determined. Overload the start of
840             match pointer to do this. */
841              
842             case OP_THEN:
843 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
844             eptrb, RM54);
845 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
846 0           md->start_match_ptr = ecode;
847 0           RRETURN(MATCH_THEN);
848              
849             case OP_THEN_ARG:
850 0           md->nomatch_mark = ecode + 2;
851 0           md->mark = NULL; /* In case previously set by assertion */
852 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode] + ecode[1], offset_top,
853             md, eptrb, RM58);
854 0 0         if ((rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) &&
    0          
    0          
855 0           md->mark == NULL) md->mark = ecode + 2;
856 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
857 0           md->start_match_ptr = ecode;
858 0           RRETURN(MATCH_THEN);
859              
860             /* Handle an atomic group that does not contain any capturing parentheses.
861             This can be handled like an assertion. Prior to 8.13, all atomic groups
862             were handled this way. In 8.13, the code was changed as below for ONCE, so
863             that backups pass through the group and thereby reset captured values.
864             However, this uses a lot more stack, so in 8.20, atomic groups that do not
865             contain any captures generate OP_ONCE_NC, which can be handled in the old,
866             less stack intensive way.
867              
868             Check the alternative branches in turn - the matching won't pass the KET
869             for this kind of subpattern. If any one branch matches, we carry on as at
870             the end of a normal bracket, leaving the subject pointer, but resetting
871             the start-of-match value in case it was changed by \K. */
872              
873             case OP_ONCE_NC:
874 0           prev = ecode;
875 0           saved_eptr = eptr;
876 0           save_mark = md->mark;
877             do
878             {
879 0           RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64);
880 0 0         if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */
881             {
882 0           mstart = md->start_match_ptr;
883 0           break;
884             }
885 0 0         if (rrc == MATCH_THEN)
886             {
887 0           next = ecode + GET(ecode,1);
888 0 0         if (md->start_match_ptr < next &&
    0          
889 0 0         (*ecode == OP_ALT || *next == OP_ALT))
890 0           rrc = MATCH_NOMATCH;
891             }
892              
893 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
894 0           ecode += GET(ecode,1);
895 0           md->mark = save_mark;
896             }
897 0 0         while (*ecode == OP_ALT);
898              
899             /* If hit the end of the group (which could be repeated), fail */
900              
901 0 0         if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH);
    0          
902              
903             /* Continue as from after the group, updating the offsets high water
904             mark, since extracts may have been taken. */
905              
906 0 0         do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
907              
908 0           offset_top = md->end_offset_top;
909 0           eptr = md->end_match_ptr;
910              
911             /* For a non-repeating ket, just continue at this level. This also
912             happens for a repeating ket if no characters were matched in the group.
913             This is the forcible breaking of infinite loops as implemented in Perl
914             5.005. */
915              
916 0 0         if (*ecode == OP_KET || eptr == saved_eptr)
    0          
917             {
918 0           ecode += 1+LINK_SIZE;
919 0           break;
920             }
921              
922             /* The repeating kets try the rest of the pattern or restart from the
923             preceding bracket, in the appropriate order. The second "call" of match()
924             uses tail recursion, to avoid using another stack frame. */
925              
926 0 0         if (*ecode == OP_KETRMIN)
927             {
928 0           RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65);
929 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
930 0           ecode = prev;
931 0           goto TAIL_RECURSE;
932             }
933             else /* OP_KETRMAX */
934             {
935 0           RMATCH(eptr, prev, offset_top, md, eptrb, RM66);
936 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
937 0           ecode += 1 + LINK_SIZE;
938 0           goto TAIL_RECURSE;
939             }
940             /* Control never gets here */
941              
942             /* Handle a capturing bracket, other than those that are possessive with an
943             unlimited repeat. If there is space in the offset vector, save the current
944             subject position in the working slot at the top of the vector. We mustn't
945             change the current values of the data slot, because they may be set from a
946             previous iteration of this group, and be referred to by a reference inside
947             the group. A failure to match might occur after the group has succeeded,
948             if something later on doesn't match. For this reason, we need to restore
949             the working value and also the values of the final offsets, in case they
950             were set by a previous iteration of the same bracket.
951              
952             If there isn't enough space in the offset vector, treat this as if it were
953             a non-capturing bracket. Don't worry about setting the flag for the error
954             case here; that is handled in the code for KET. */
955              
956             case OP_CBRA:
957             case OP_SCBRA:
958 10           number = GET2(ecode, 1+LINK_SIZE);
959 10           offset = number << 1;
960              
961             #ifdef PCRE_DEBUG
962             printf("start bracket %d\n", number);
963             printf("subject=");
964             pchars(eptr, 16, TRUE, md);
965             printf("\n");
966             #endif
967              
968 10 50         if (offset < md->offset_max)
969             {
970 0           save_offset1 = md->offset_vector[offset];
971 0           save_offset2 = md->offset_vector[offset+1];
972 0           save_offset3 = md->offset_vector[md->offset_end - number];
973 0           save_capture_last = md->capture_last;
974 0           save_mark = md->mark;
975              
976             DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
977 0           md->offset_vector[md->offset_end - number] =
978 0           (int)(eptr - md->start_subject);
979              
980             for (;;)
981             {
982 0 0         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
983 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
984             eptrb, RM1);
985 0 0         if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */
986              
987             /* If we backed up to a THEN, check whether it is within the current
988             branch by comparing the address of the THEN that is passed back with
989             the end of the branch. If it is within the current branch, and the
990             branch is one of two or more alternatives (it either starts or ends
991             with OP_ALT), we have reached the limit of THEN's action, so convert
992             the return code to NOMATCH, which will cause normal backtracking to
993             happen from now on. Otherwise, THEN is passed back to an outer
994             alternative. This implements Perl's treatment of parenthesized groups,
995             where a group not containing | does not affect the current alternative,
996             that is, (X) is NOT the same as (X|(*F)). */
997              
998 0 0         if (rrc == MATCH_THEN)
999             {
1000 0           next = ecode + GET(ecode,1);
1001 0 0         if (md->start_match_ptr < next &&
    0          
1002 0 0         (*ecode == OP_ALT || *next == OP_ALT))
1003 0           rrc = MATCH_NOMATCH;
1004             }
1005              
1006             /* Anything other than NOMATCH is passed back. */
1007              
1008 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1009 0           md->capture_last = save_capture_last;
1010 0           ecode += GET(ecode, 1);
1011 0           md->mark = save_mark;
1012 0 0         if (*ecode != OP_ALT) break;
1013 0           }
1014              
1015             DPRINTF(("bracket %d failed\n", number));
1016 0           md->offset_vector[offset] = save_offset1;
1017 0           md->offset_vector[offset+1] = save_offset2;
1018 0           md->offset_vector[md->offset_end - number] = save_offset3;
1019              
1020             /* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */
1021              
1022 0           RRETURN(rrc);
1023             }
1024              
1025             /* FALL THROUGH ... Insufficient room for saving captured contents. Treat
1026             as a non-capturing bracket. */
1027              
1028             /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1029             /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1030              
1031             DPRINTF(("insufficient capture room: treat as non-capturing\n"));
1032              
1033             /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1034             /* VVVVVVVVVVVVVVVVVVVVVVVVV */
1035              
1036             /* Non-capturing or atomic group, except for possessive with unlimited
1037             repeat and ONCE group with no captures. Loop for all the alternatives.
1038              
1039             When we get to the final alternative within the brackets, we used to return
1040             the result of a recursive call to match() whatever happened so it was
1041             possible to reduce stack usage by turning this into a tail recursion,
1042             except in the case of a possibly empty group. However, now that there is
1043             the possiblity of (*THEN) occurring in the final alternative, this
1044             optimization is no longer always possible.
1045              
1046             We can optimize if we know there are no (*THEN)s in the pattern; at present
1047             this is the best that can be done.
1048              
1049             MATCH_ONCE is returned when the end of an atomic group is successfully
1050             reached, but subsequent matching fails. It passes back up the tree (causing
1051             captured values to be reset) until the original atomic group level is
1052             reached. This is tested by comparing md->once_target with the start of the
1053             group. At this point, the return is converted into MATCH_NOMATCH so that
1054             previous backup points can be taken. */
1055              
1056             case OP_ONCE:
1057             case OP_BRA:
1058             case OP_SBRA:
1059             DPRINTF(("start non-capturing bracket\n"));
1060              
1061             for (;;)
1062             {
1063 110 50         if (op >= OP_SBRA || op == OP_ONCE)
    50          
1064 0           md->match_function_type = MATCH_CBEGROUP;
1065              
1066             /* If this is not a possibly empty group, and there are no (*THEN)s in
1067             the pattern, and this is the final alternative, optimize as described
1068             above. */
1069              
1070 110 50         else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT)
    50          
1071             {
1072 110           ecode += PRIV(OP_lengths)[*ecode];
1073 110           goto TAIL_RECURSE;
1074             }
1075              
1076             /* In all other cases, we have to make another call to match(). */
1077              
1078 0           save_mark = md->mark;
1079 0           save_capture_last = md->capture_last;
1080 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md, eptrb,
1081             RM2);
1082              
1083             /* See comment in the code for capturing groups above about handling
1084             THEN. */
1085              
1086 0 0         if (rrc == MATCH_THEN)
1087             {
1088 0           next = ecode + GET(ecode,1);
1089 0 0         if (md->start_match_ptr < next &&
    0          
1090 0 0         (*ecode == OP_ALT || *next == OP_ALT))
1091 0           rrc = MATCH_NOMATCH;
1092             }
1093              
1094 0 0         if (rrc != MATCH_NOMATCH)
1095             {
1096 0 0         if (rrc == MATCH_ONCE)
1097             {
1098 0           const pcre_uchar *scode = ecode;
1099 0 0         if (*scode != OP_ONCE) /* If not at start, find it */
1100             {
1101 0 0         while (*scode == OP_ALT) scode += GET(scode, 1);
1102 0           scode -= GET(scode, 1);
1103             }
1104 0 0         if (md->once_target == scode) rrc = MATCH_NOMATCH;
1105             }
1106 0           RRETURN(rrc);
1107             }
1108 0           ecode += GET(ecode, 1);
1109 0           md->mark = save_mark;
1110 0 0         if (*ecode != OP_ALT) break;
1111 0           md->capture_last = save_capture_last;
1112 0           }
1113              
1114 0           RRETURN(MATCH_NOMATCH);
1115              
1116             /* Handle possessive capturing brackets with an unlimited repeat. We come
1117             here from BRAZERO with allow_zero set TRUE. The offset_vector values are
1118             handled similarly to the normal case above. However, the matching is
1119             different. The end of these brackets will always be OP_KETRPOS, which
1120             returns MATCH_KETRPOS without going further in the pattern. By this means
1121             we can handle the group by iteration rather than recursion, thereby
1122             reducing the amount of stack needed. */
1123              
1124             case OP_CBRAPOS:
1125             case OP_SCBRAPOS:
1126 0           allow_zero = FALSE;
1127              
1128             POSSESSIVE_CAPTURE:
1129 0           number = GET2(ecode, 1+LINK_SIZE);
1130 0           offset = number << 1;
1131              
1132             #ifdef PCRE_DEBUG
1133             printf("start possessive bracket %d\n", number);
1134             printf("subject=");
1135             pchars(eptr, 16, TRUE, md);
1136             printf("\n");
1137             #endif
1138              
1139 0 0         if (offset >= md->offset_max) goto POSSESSIVE_NON_CAPTURE;
1140              
1141 0           matched_once = FALSE;
1142 0           code_offset = (int)(ecode - md->start_code);
1143              
1144 0           save_offset1 = md->offset_vector[offset];
1145 0           save_offset2 = md->offset_vector[offset+1];
1146 0           save_offset3 = md->offset_vector[md->offset_end - number];
1147 0           save_capture_last = md->capture_last;
1148              
1149             DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
1150              
1151             /* Each time round the loop, save the current subject position for use
1152             when the group matches. For MATCH_MATCH, the group has matched, so we
1153             restart it with a new subject starting position, remembering that we had
1154             at least one match. For MATCH_NOMATCH, carry on with the alternatives, as
1155             usual. If we haven't matched any alternatives in any iteration, check to
1156             see if a previous iteration matched. If so, the group has matched;
1157             continue from afterwards. Otherwise it has failed; restore the previous
1158             capture values before returning NOMATCH. */
1159              
1160             for (;;)
1161             {
1162 0           md->offset_vector[md->offset_end - number] =
1163 0           (int)(eptr - md->start_subject);
1164 0 0         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1165 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1166             eptrb, RM63);
1167 0 0         if (rrc == MATCH_KETRPOS)
1168             {
1169 0           offset_top = md->end_offset_top;
1170 0           ecode = md->start_code + code_offset;
1171 0           save_capture_last = md->capture_last;
1172 0           matched_once = TRUE;
1173 0           mstart = md->start_match_ptr; /* In case \K changed it */
1174 0 0         if (eptr == md->end_match_ptr) /* Matched an empty string */
1175             {
1176 0 0         do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1177 0           break;
1178             }
1179 0           eptr = md->end_match_ptr;
1180 0           continue;
1181             }
1182              
1183             /* See comment in the code for capturing groups above about handling
1184             THEN. */
1185              
1186 0 0         if (rrc == MATCH_THEN)
1187             {
1188 0           next = ecode + GET(ecode,1);
1189 0 0         if (md->start_match_ptr < next &&
    0          
1190 0 0         (*ecode == OP_ALT || *next == OP_ALT))
1191 0           rrc = MATCH_NOMATCH;
1192             }
1193              
1194 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1195 0           md->capture_last = save_capture_last;
1196 0           ecode += GET(ecode, 1);
1197 0 0         if (*ecode != OP_ALT) break;
1198 0           }
1199              
1200 0 0         if (!matched_once)
1201             {
1202 0           md->offset_vector[offset] = save_offset1;
1203 0           md->offset_vector[offset+1] = save_offset2;
1204 0           md->offset_vector[md->offset_end - number] = save_offset3;
1205             }
1206              
1207 0 0         if (allow_zero || matched_once)
    0          
1208             {
1209 0           ecode += 1 + LINK_SIZE;
1210 0           break;
1211             }
1212              
1213 0           RRETURN(MATCH_NOMATCH);
1214              
1215             /* Non-capturing possessive bracket with unlimited repeat. We come here
1216             from BRAZERO with allow_zero = TRUE. The code is similar to the above,
1217             without the capturing complication. It is written out separately for speed
1218             and cleanliness. */
1219              
1220             case OP_BRAPOS:
1221             case OP_SBRAPOS:
1222 0           allow_zero = FALSE;
1223              
1224             POSSESSIVE_NON_CAPTURE:
1225 0           matched_once = FALSE;
1226 0           code_offset = (int)(ecode - md->start_code);
1227 0           save_capture_last = md->capture_last;
1228              
1229             for (;;)
1230             {
1231 0 0         if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP;
1232 0           RMATCH(eptr, ecode + PRIV(OP_lengths)[*ecode], offset_top, md,
1233             eptrb, RM48);
1234 0 0         if (rrc == MATCH_KETRPOS)
1235             {
1236 0           offset_top = md->end_offset_top;
1237 0           ecode = md->start_code + code_offset;
1238 0           matched_once = TRUE;
1239 0           mstart = md->start_match_ptr; /* In case \K reset it */
1240 0 0         if (eptr == md->end_match_ptr) /* Matched an empty string */
1241             {
1242 0 0         do ecode += GET(ecode, 1); while (*ecode == OP_ALT);
1243 0           break;
1244             }
1245 0           eptr = md->end_match_ptr;
1246 0           continue;
1247             }
1248              
1249             /* See comment in the code for capturing groups above about handling
1250             THEN. */
1251              
1252 0 0         if (rrc == MATCH_THEN)
1253             {
1254 0           next = ecode + GET(ecode,1);
1255 0 0         if (md->start_match_ptr < next &&
    0          
1256 0 0         (*ecode == OP_ALT || *next == OP_ALT))
1257 0           rrc = MATCH_NOMATCH;
1258             }
1259              
1260 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1261 0           ecode += GET(ecode, 1);
1262 0 0         if (*ecode != OP_ALT) break;
1263 0           md->capture_last = save_capture_last;
1264 0           }
1265              
1266 0 0         if (matched_once || allow_zero)
    0          
1267             {
1268 0           ecode += 1 + LINK_SIZE;
1269 0           break;
1270             }
1271 0           RRETURN(MATCH_NOMATCH);
1272              
1273             /* Control never reaches here. */
1274              
1275             /* Conditional group: compilation checked that there are no more than two
1276             branches. If the condition is false, skipping the first branch takes us
1277             past the end of the item if there is only one branch, but that's exactly
1278             what we want. */
1279              
1280             case OP_COND:
1281             case OP_SCOND:
1282              
1283             /* The variable codelink will be added to ecode when the condition is
1284             false, to get to the second branch. Setting it to the offset to the ALT
1285             or KET, then incrementing ecode achieves this effect. We now have ecode
1286             pointing to the condition or callout. */
1287              
1288 0           codelink = GET(ecode, 1); /* Offset to the second branch */
1289 0           ecode += 1 + LINK_SIZE; /* From this opcode */
1290              
1291             /* Because of the way auto-callout works during compile, a callout item is
1292             inserted between OP_COND and an assertion condition. */
1293              
1294 0 0         if (*ecode == OP_CALLOUT)
1295             {
1296 0 0         if (PUBL(callout) != NULL)
1297             {
1298             PUBL(callout_block) cb;
1299 0           cb.version = 2; /* Version 1 of the callout block */
1300 0           cb.callout_number = ecode[1];
1301 0           cb.offset_vector = md->offset_vector;
1302             #if defined COMPILE_PCRE8
1303 0           cb.subject = (PCRE_SPTR)md->start_subject;
1304             #elif defined COMPILE_PCRE16
1305             cb.subject = (PCRE_SPTR16)md->start_subject;
1306             #elif defined COMPILE_PCRE32
1307             cb.subject = (PCRE_SPTR32)md->start_subject;
1308             #endif
1309 0           cb.subject_length = (int)(md->end_subject - md->start_subject);
1310 0           cb.start_match = (int)(mstart - md->start_subject);
1311 0           cb.current_position = (int)(eptr - md->start_subject);
1312 0           cb.pattern_position = GET(ecode, 2);
1313 0           cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1314 0           cb.capture_top = offset_top/2;
1315 0           cb.capture_last = md->capture_last & CAPLMASK;
1316             /* Internal change requires this for API compatibility. */
1317 0 0         if (cb.capture_last == 0) cb.capture_last = -1;
1318 0           cb.callout_data = md->callout_data;
1319 0           cb.mark = md->nomatch_mark;
1320 0 0         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1321 0 0         if (rrc < 0) RRETURN(rrc);
1322             }
1323              
1324             /* Advance ecode past the callout, so it now points to the condition. We
1325             must adjust codelink so that the value of ecode+codelink is unchanged. */
1326              
1327 0           ecode += PRIV(OP_lengths)[OP_CALLOUT];
1328 0           codelink -= PRIV(OP_lengths)[OP_CALLOUT];
1329             }
1330              
1331             /* Test the various possible conditions */
1332              
1333 0           condition = FALSE;
1334 0           switch(condcode = *ecode)
1335             {
1336             case OP_RREF: /* Numbered group recursion test */
1337 0 0         if (md->recursive != NULL) /* Not recursing => FALSE */
1338             {
1339 0           unsigned int recno = GET2(ecode, 1); /* Recursion group number*/
1340 0 0         condition = (recno == RREF_ANY || recno == md->recursive->group_num);
    0          
1341             }
1342 0           break;
1343              
1344             case OP_DNRREF: /* Duplicate named group recursion test */
1345 0 0         if (md->recursive != NULL)
1346             {
1347 0           int count = GET2(ecode, 1 + IMM2_SIZE);
1348 0           pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1349 0 0         while (count-- > 0)
1350             {
1351 0           unsigned int recno = GET2(slot, 0);
1352 0           condition = recno == md->recursive->group_num;
1353 0 0         if (condition) break;
1354 0           slot += md->name_entry_size;
1355             }
1356             }
1357 0           break;
1358              
1359             case OP_CREF: /* Numbered group used test */
1360 0           offset = GET2(ecode, 1) << 1; /* Doubled ref number */
1361 0 0         condition = offset < offset_top && md->offset_vector[offset] >= 0;
    0          
1362 0           break;
1363              
1364             case OP_DNCREF: /* Duplicate named group used test */
1365             {
1366 0           int count = GET2(ecode, 1 + IMM2_SIZE);
1367 0           pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
1368 0 0         while (count-- > 0)
1369             {
1370 0           offset = GET2(slot, 0) << 1;
1371 0 0         condition = offset < offset_top && md->offset_vector[offset] >= 0;
    0          
1372 0 0         if (condition) break;
1373 0           slot += md->name_entry_size;
1374             }
1375             }
1376 0           break;
1377              
1378             case OP_DEF: /* DEFINE - always false */
1379             case OP_FAIL: /* From optimized (?!) condition */
1380 0           break;
1381              
1382             /* The condition is an assertion. Call match() to evaluate it - setting
1383             md->match_function_type to MATCH_CONDASSERT causes it to stop at the end
1384             of an assertion. */
1385              
1386             default:
1387 0           md->match_function_type = MATCH_CONDASSERT;
1388 0           RMATCH(eptr, ecode, offset_top, md, NULL, RM3);
1389 0 0         if (rrc == MATCH_MATCH)
1390             {
1391 0 0         if (md->end_offset_top > offset_top)
1392 0           offset_top = md->end_offset_top; /* Captures may have happened */
1393 0           condition = TRUE;
1394              
1395             /* Advance ecode past the assertion to the start of the first branch,
1396             but adjust it so that the general choosing code below works. If the
1397             assertion has a quantifier that allows zero repeats we must skip over
1398             the BRAZERO. This is a lunatic thing to do, but somebody did! */
1399              
1400 0 0         if (*ecode == OP_BRAZERO) ecode++;
1401 0           ecode += GET(ecode, 1);
1402 0 0         while (*ecode == OP_ALT) ecode += GET(ecode, 1);
1403 0           ecode += 1 + LINK_SIZE - PRIV(OP_lengths)[condcode];
1404             }
1405              
1406             /* PCRE doesn't allow the effect of (*THEN) to escape beyond an
1407             assertion; it is therefore treated as NOMATCH. Any other return is an
1408             error. */
1409              
1410 0 0         else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN)
    0          
1411             {
1412 0           RRETURN(rrc); /* Need braces because of following else */
1413             }
1414 0           break;
1415             }
1416              
1417             /* Choose branch according to the condition */
1418              
1419 0 0         ecode += condition? PRIV(OP_lengths)[condcode] : codelink;
1420              
1421             /* We are now at the branch that is to be obeyed. As there is only one, we
1422             can use tail recursion to avoid using another stack frame, except when
1423             there is unlimited repeat of a possibly empty group. In the latter case, a
1424             recursive call to match() is always required, unless the second alternative
1425             doesn't exist, in which case we can just plough on. Note that, for
1426             compatibility with Perl, the | in a conditional group is NOT treated as
1427             creating two alternatives. If a THEN is encountered in the branch, it
1428             propagates out to the enclosing alternative (unless nested in a deeper set
1429             of alternatives, of course). */
1430              
1431 0 0         if (condition || ecode[-(1+LINK_SIZE)] == OP_ALT)
    0          
1432             {
1433 0 0         if (op != OP_SCOND)
1434             {
1435 0           goto TAIL_RECURSE;
1436             }
1437              
1438 0           md->match_function_type = MATCH_CBEGROUP;
1439 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM49);
1440 0           RRETURN(rrc);
1441             }
1442              
1443             /* Condition false & no alternative; continue after the group. */
1444              
1445             else
1446             {
1447             }
1448 0           break;
1449              
1450              
1451             /* Before OP_ACCEPT there may be any number of OP_CLOSE opcodes,
1452             to close any currently open capturing brackets. */
1453              
1454             case OP_CLOSE:
1455 0           number = GET2(ecode, 1); /* Must be less than 65536 */
1456 0           offset = number << 1;
1457              
1458             #ifdef PCRE_DEBUG
1459             printf("end bracket %d at *ACCEPT", number);
1460             printf("\n");
1461             #endif
1462              
1463 0           md->capture_last = (md->capture_last & OVFLMASK) | number;
1464 0 0         if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1465             {
1466 0           md->offset_vector[offset] =
1467 0           md->offset_vector[md->offset_end - number];
1468 0           md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1469              
1470             /* If this group is at or above the current highwater mark, ensure that
1471             any groups between the current high water mark and this group are marked
1472             unset and then update the high water mark. */
1473              
1474 0 0         if (offset >= offset_top)
1475             {
1476 0           register int *iptr = md->offset_vector + offset_top;
1477 0           register int *iend = md->offset_vector + offset;
1478 0 0         while (iptr < iend) *iptr++ = -1;
1479 0           offset_top = offset + 2;
1480             }
1481             }
1482 0           ecode += 1 + IMM2_SIZE;
1483 0           break;
1484              
1485              
1486             /* End of the pattern, either real or forced. */
1487              
1488             case OP_END:
1489             case OP_ACCEPT:
1490             case OP_ASSERT_ACCEPT:
1491              
1492             /* If we have matched an empty string, fail if not in an assertion and not
1493             in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART
1494             is set and we have matched at the start of the subject. In both cases,
1495             backtracking will then try other alternatives, if any. */
1496              
1497 14 50         if (eptr == mstart && op != OP_ASSERT_ACCEPT &&
    0          
    0          
1498 0 0         md->recursive == NULL &&
1499 0 0         (md->notempty ||
1500 0 0         (md->notempty_atstart &&
1501 0           mstart == md->start_subject + md->start_offset)))
1502 0           RRETURN(MATCH_NOMATCH);
1503              
1504             /* Otherwise, we have a match. */
1505              
1506 14           md->end_match_ptr = eptr; /* Record where we ended */
1507 14           md->end_offset_top = offset_top; /* and how many extracts were taken */
1508 14           md->start_match_ptr = mstart; /* and the start (\K can modify) */
1509              
1510             /* For some reason, the macros don't work properly if an expression is
1511             given as the argument to RRETURN when the heap is in use. */
1512              
1513 14 50         rrc = (op == OP_END)? MATCH_MATCH : MATCH_ACCEPT;
1514 14           RRETURN(rrc);
1515              
1516             /* Assertion brackets. Check the alternative branches in turn - the
1517             matching won't pass the KET for an assertion. If any one branch matches,
1518             the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
1519             start of each branch to move the current point backwards, so the code at
1520             this level is identical to the lookahead case. When the assertion is part
1521             of a condition, we want to return immediately afterwards. The caller of
1522             this incarnation of the match() function will have set MATCH_CONDASSERT in
1523             md->match_function type, and one of these opcodes will be the first opcode
1524             that is processed. We use a local variable that is preserved over calls to
1525             match() to remember this case. */
1526              
1527             case OP_ASSERT:
1528             case OP_ASSERTBACK:
1529 0           save_mark = md->mark;
1530 0 0         if (md->match_function_type == MATCH_CONDASSERT)
1531             {
1532 0           condassert = TRUE;
1533 0           md->match_function_type = 0;
1534             }
1535 0           else condassert = FALSE;
1536              
1537             /* Loop for each branch */
1538              
1539             do
1540             {
1541 0           RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM4);
1542              
1543             /* A match means that the assertion is true; break out of the loop
1544             that matches its alternatives. */
1545              
1546 0 0         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
    0          
1547             {
1548 0           mstart = md->start_match_ptr; /* In case \K reset it */
1549 0           break;
1550             }
1551              
1552             /* If not matched, restore the previous mark setting. */
1553              
1554 0           md->mark = save_mark;
1555              
1556             /* See comment in the code for capturing groups above about handling
1557             THEN. */
1558              
1559 0 0         if (rrc == MATCH_THEN)
1560             {
1561 0           next = ecode + GET(ecode,1);
1562 0 0         if (md->start_match_ptr < next &&
    0          
1563 0 0         (*ecode == OP_ALT || *next == OP_ALT))
1564 0           rrc = MATCH_NOMATCH;
1565             }
1566              
1567             /* Anything other than NOMATCH causes the entire assertion to fail,
1568             passing back the return code. This includes COMMIT, SKIP, PRUNE and an
1569             uncaptured THEN, which means they take their normal effect. This
1570             consistent approach does not always have exactly the same effect as in
1571             Perl. */
1572              
1573 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1574 0           ecode += GET(ecode, 1);
1575             }
1576 0 0         while (*ecode == OP_ALT); /* Continue for next alternative */
1577              
1578             /* If we have tried all the alternative branches, the assertion has
1579             failed. If not, we broke out after a match. */
1580              
1581 0 0         if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH);
1582              
1583             /* If checking an assertion for a condition, return MATCH_MATCH. */
1584              
1585 0 0         if (condassert) RRETURN(MATCH_MATCH);
1586              
1587             /* Continue from after a successful assertion, updating the offsets high
1588             water mark, since extracts may have been taken during the assertion. */
1589              
1590 0 0         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1591 0           ecode += 1 + LINK_SIZE;
1592 0           offset_top = md->end_offset_top;
1593 0           continue;
1594              
1595             /* Negative assertion: all branches must fail to match for the assertion to
1596             succeed. */
1597              
1598             case OP_ASSERT_NOT:
1599             case OP_ASSERTBACK_NOT:
1600 0           save_mark = md->mark;
1601 0 0         if (md->match_function_type == MATCH_CONDASSERT)
1602             {
1603 0           condassert = TRUE;
1604 0           md->match_function_type = 0;
1605             }
1606 0           else condassert = FALSE;
1607              
1608             /* Loop for each alternative branch. */
1609              
1610             do
1611             {
1612 0           RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM5);
1613 0           md->mark = save_mark; /* Always restore the mark setting */
1614              
1615 0           switch(rrc)
1616             {
1617             case MATCH_MATCH: /* A successful match means */
1618             case MATCH_ACCEPT: /* the assertion has failed. */
1619 0           RRETURN(MATCH_NOMATCH);
1620              
1621             case MATCH_NOMATCH: /* Carry on with next branch */
1622 0           break;
1623              
1624             /* See comment in the code for capturing groups above about handling
1625             THEN. */
1626              
1627             case MATCH_THEN:
1628 0           next = ecode + GET(ecode,1);
1629 0 0         if (md->start_match_ptr < next &&
    0          
1630 0 0         (*ecode == OP_ALT || *next == OP_ALT))
1631             {
1632 0           rrc = MATCH_NOMATCH;
1633 0           break;
1634             }
1635             /* Otherwise fall through. */
1636              
1637             /* COMMIT, SKIP, PRUNE, and an uncaptured THEN cause the whole
1638             assertion to fail to match, without considering any more alternatives.
1639             Failing to match means the assertion is true. This is a consistent
1640             approach, but does not always have the same effect as in Perl. */
1641              
1642             case MATCH_COMMIT:
1643             case MATCH_SKIP:
1644             case MATCH_SKIP_ARG:
1645             case MATCH_PRUNE:
1646 0 0         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1647 0           goto NEG_ASSERT_TRUE; /* Break out of alternation loop */
1648              
1649             /* Anything else is an error */
1650              
1651             default:
1652 0           RRETURN(rrc);
1653             }
1654              
1655             /* Continue with next branch */
1656              
1657 0           ecode += GET(ecode,1);
1658             }
1659 0 0         while (*ecode == OP_ALT);
1660              
1661             /* All branches in the assertion failed to match. */
1662              
1663             NEG_ASSERT_TRUE:
1664 0 0         if (condassert) RRETURN(MATCH_MATCH); /* Condition assertion */
1665 0           ecode += 1 + LINK_SIZE; /* Continue with current branch */
1666 0           continue;
1667              
1668             /* Move the subject pointer back. This occurs only at the start of
1669             each branch of a lookbehind assertion. If we are too close to the start to
1670             move back, this match function fails. When working with UTF-8 we move
1671             back a number of characters, not bytes. */
1672              
1673             case OP_REVERSE:
1674             #ifdef SUPPORT_UTF
1675             if (utf)
1676             {
1677             i = GET(ecode, 1);
1678             while (i-- > 0)
1679             {
1680             eptr--;
1681             if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1682             BACKCHAR(eptr);
1683             }
1684             }
1685             else
1686             #endif
1687              
1688             /* No UTF-8 support, or not in UTF-8 mode: count is byte count */
1689              
1690             {
1691 0           eptr -= GET(ecode, 1);
1692 0 0         if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH);
1693             }
1694              
1695             /* Save the earliest consulted character, then skip to next op code */
1696              
1697 0 0         if (eptr < md->start_used_ptr) md->start_used_ptr = eptr;
1698 0           ecode += 1 + LINK_SIZE;
1699 0           break;
1700              
1701             /* The callout item calls an external function, if one is provided, passing
1702             details of the match so far. This is mainly for debugging, though the
1703             function is able to force a failure. */
1704              
1705             case OP_CALLOUT:
1706 0 0         if (PUBL(callout) != NULL)
1707             {
1708             PUBL(callout_block) cb;
1709 0           cb.version = 2; /* Version 1 of the callout block */
1710 0           cb.callout_number = ecode[1];
1711 0           cb.offset_vector = md->offset_vector;
1712             #if defined COMPILE_PCRE8
1713 0           cb.subject = (PCRE_SPTR)md->start_subject;
1714             #elif defined COMPILE_PCRE16
1715             cb.subject = (PCRE_SPTR16)md->start_subject;
1716             #elif defined COMPILE_PCRE32
1717             cb.subject = (PCRE_SPTR32)md->start_subject;
1718             #endif
1719 0           cb.subject_length = (int)(md->end_subject - md->start_subject);
1720 0           cb.start_match = (int)(mstart - md->start_subject);
1721 0           cb.current_position = (int)(eptr - md->start_subject);
1722 0           cb.pattern_position = GET(ecode, 2);
1723 0           cb.next_item_length = GET(ecode, 2 + LINK_SIZE);
1724 0           cb.capture_top = offset_top/2;
1725 0           cb.capture_last = md->capture_last & CAPLMASK;
1726             /* Internal change requires this for API compatibility. */
1727 0 0         if (cb.capture_last == 0) cb.capture_last = -1;
1728 0           cb.callout_data = md->callout_data;
1729 0           cb.mark = md->nomatch_mark;
1730 0 0         if ((rrc = (*PUBL(callout))(&cb)) > 0) RRETURN(MATCH_NOMATCH);
1731 0 0         if (rrc < 0) RRETURN(rrc);
1732             }
1733 0           ecode += 2 + 2*LINK_SIZE;
1734 0           break;
1735              
1736             /* Recursion either matches the current regex, or some subexpression. The
1737             offset data is the offset to the starting bracket from the start of the
1738             whole pattern. (This is so that it works from duplicated subpatterns.)
1739              
1740             The state of the capturing groups is preserved over recursion, and
1741             re-instated afterwards. We don't know how many are started and not yet
1742             finished (offset_top records the completed total) so we just have to save
1743             all the potential data. There may be up to 65535 such values, which is too
1744             large to put on the stack, but using malloc for small numbers seems
1745             expensive. As a compromise, the stack is used when there are no more than
1746             REC_STACK_SAVE_MAX values to store; otherwise malloc is used.
1747              
1748             There are also other values that have to be saved. We use a chained
1749             sequence of blocks that actually live on the stack. Thanks to Robin Houston
1750             for the original version of this logic. It has, however, been hacked around
1751             a lot, so he is not to blame for the current way it works. */
1752              
1753             case OP_RECURSE:
1754             {
1755             recursion_info *ri;
1756             unsigned int recno;
1757              
1758 0           callpat = md->start_code + GET(ecode, 1);
1759 0 0         recno = (callpat == md->start_code)? 0 :
1760 0           GET2(callpat, 1 + LINK_SIZE);
1761              
1762             /* Check for repeating a recursion without advancing the subject pointer.
1763             This should catch convoluted mutual recursions. (Some simple cases are
1764             caught at compile time.) */
1765              
1766 0 0         for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
1767 0 0         if (recno == ri->group_num && eptr == ri->subject_position)
    0          
1768 0           RRETURN(PCRE_ERROR_RECURSELOOP);
1769              
1770             /* Add to "recursing stack" */
1771              
1772 0           new_recursive.group_num = recno;
1773 0           new_recursive.saved_capture_last = md->capture_last;
1774 0           new_recursive.subject_position = eptr;
1775 0           new_recursive.prevrec = md->recursive;
1776 0           md->recursive = &new_recursive;
1777              
1778             /* Where to continue from afterwards */
1779              
1780 0           ecode += 1 + LINK_SIZE;
1781              
1782             /* Now save the offset data */
1783              
1784 0           new_recursive.saved_max = md->offset_end;
1785 0 0         if (new_recursive.saved_max <= REC_STACK_SAVE_MAX)
1786 0           new_recursive.offset_save = stacksave;
1787             else
1788             {
1789 0           new_recursive.offset_save =
1790 0           (int *)(PUBL(malloc))(new_recursive.saved_max * sizeof(int));
1791 0 0         if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY);
1792             }
1793 0           memcpy(new_recursive.offset_save, md->offset_vector,
1794 0           new_recursive.saved_max * sizeof(int));
1795              
1796             /* OK, now we can do the recursion. After processing each alternative,
1797             restore the offset data and the last captured value. If there were nested
1798             recursions, md->recursive might be changed, so reset it before looping.
1799             */
1800              
1801             DPRINTF(("Recursing into group %d\n", new_recursive.group_num));
1802 0           cbegroup = (*callpat >= OP_SBRA);
1803             do
1804             {
1805 0 0         if (cbegroup) md->match_function_type = MATCH_CBEGROUP;
1806 0           RMATCH(eptr, callpat + PRIV(OP_lengths)[*callpat], offset_top,
1807             md, eptrb, RM6);
1808 0           memcpy(md->offset_vector, new_recursive.offset_save,
1809 0           new_recursive.saved_max * sizeof(int));
1810 0           md->capture_last = new_recursive.saved_capture_last;
1811 0           md->recursive = new_recursive.prevrec;
1812 0 0         if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT)
    0          
1813             {
1814             DPRINTF(("Recursion matched\n"));
1815 0 0         if (new_recursive.offset_save != stacksave)
1816 0           (PUBL(free))(new_recursive.offset_save);
1817              
1818             /* Set where we got to in the subject, and reset the start in case
1819             it was changed by \K. This *is* propagated back out of a recursion,
1820             for Perl compatibility. */
1821              
1822 0           eptr = md->end_match_ptr;
1823 0           mstart = md->start_match_ptr;
1824 0           goto RECURSION_MATCHED; /* Exit loop; end processing */
1825             }
1826              
1827             /* PCRE does not allow THEN, SKIP, PRUNE or COMMIT to escape beyond a
1828             recursion; they cause a NOMATCH for the entire recursion. These codes
1829             are defined in a range that can be tested for. */
1830              
1831 0 0         if (rrc >= MATCH_BACKTRACK_MIN && rrc <= MATCH_BACKTRACK_MAX)
    0          
1832             {
1833 0 0         if (new_recursive.offset_save != stacksave)
1834 0           (PUBL(free))(new_recursive.offset_save);
1835 0           RRETURN(MATCH_NOMATCH);
1836             }
1837              
1838             /* Any return code other than NOMATCH is an error. */
1839              
1840 0 0         if (rrc != MATCH_NOMATCH)
1841             {
1842             DPRINTF(("Recursion gave error %d\n", rrc));
1843 0 0         if (new_recursive.offset_save != stacksave)
1844 0           (PUBL(free))(new_recursive.offset_save);
1845 0           RRETURN(rrc);
1846             }
1847              
1848 0           md->recursive = &new_recursive;
1849 0           callpat += GET(callpat, 1);
1850             }
1851 0 0         while (*callpat == OP_ALT);
1852              
1853             DPRINTF(("Recursion didn't match\n"));
1854 0           md->recursive = new_recursive.prevrec;
1855 0 0         if (new_recursive.offset_save != stacksave)
1856 0           (PUBL(free))(new_recursive.offset_save);
1857 0           RRETURN(MATCH_NOMATCH);
1858             }
1859              
1860             RECURSION_MATCHED:
1861 0           break;
1862              
1863             /* An alternation is the end of a branch; scan along to find the end of the
1864             bracketed group and go to there. */
1865              
1866             case OP_ALT:
1867 0 0         do ecode += GET(ecode,1); while (*ecode == OP_ALT);
1868 0           break;
1869              
1870             /* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group,
1871             indicating that it may occur zero times. It may repeat infinitely, or not
1872             at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets
1873             with fixed upper repeat limits are compiled as a number of copies, with the
1874             optional ones preceded by BRAZERO or BRAMINZERO. */
1875              
1876             case OP_BRAZERO:
1877 10           next = ecode + 1;
1878 10           RMATCH(eptr, next, offset_top, md, eptrb, RM10);
1879 10 100         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1880 7 50         do next += GET(next, 1); while (*next == OP_ALT);
1881 7           ecode = next + 1 + LINK_SIZE;
1882 7           break;
1883              
1884             case OP_BRAMINZERO:
1885 0           next = ecode + 1;
1886 0 0         do next += GET(next, 1); while (*next == OP_ALT);
1887 0           RMATCH(eptr, next + 1+LINK_SIZE, offset_top, md, eptrb, RM11);
1888 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
1889 0           ecode++;
1890 0           break;
1891              
1892             case OP_SKIPZERO:
1893 0           next = ecode+1;
1894 0 0         do next += GET(next,1); while (*next == OP_ALT);
1895 0           ecode = next + 1 + LINK_SIZE;
1896 0           break;
1897              
1898             /* BRAPOSZERO occurs before a possessive bracket group. Don't do anything
1899             here; just jump to the group, with allow_zero set TRUE. */
1900              
1901             case OP_BRAPOSZERO:
1902 0           op = *(++ecode);
1903 0           allow_zero = TRUE;
1904 0 0         if (op == OP_CBRAPOS || op == OP_SCBRAPOS) goto POSSESSIVE_CAPTURE;
    0          
1905 0           goto POSSESSIVE_NON_CAPTURE;
1906              
1907             /* End of a group, repeated or non-repeating. */
1908              
1909             case OP_KET:
1910             case OP_KETRMIN:
1911             case OP_KETRMAX:
1912             case OP_KETRPOS:
1913 17           prev = ecode - GET(ecode, 1);
1914              
1915             /* If this was a group that remembered the subject start, in order to break
1916             infinite repeats of empty string matches, retrieve the subject start from
1917             the chain. Otherwise, set it NULL. */
1918              
1919 17 50         if (*prev >= OP_SBRA || *prev == OP_ONCE)
    50          
1920             {
1921 0           saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */
1922 0           eptrb = eptrb->epb_prev; /* Backup to previous group */
1923             }
1924 17           else saved_eptr = NULL;
1925              
1926             /* If we are at the end of an assertion group or a non-capturing atomic
1927             group, stop matching and return MATCH_MATCH, but record the current high
1928             water mark for use by positive assertions. We also need to record the match
1929             start in case it was changed by \K. */
1930              
1931 17 50         if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) ||
    50          
    50          
1932 17           *prev == OP_ONCE_NC)
1933             {
1934 0           md->end_match_ptr = eptr; /* For ONCE_NC */
1935 0           md->end_offset_top = offset_top;
1936 0           md->start_match_ptr = mstart;
1937 0           RRETURN(MATCH_MATCH); /* Sets md->mark */
1938             }
1939              
1940             /* For capturing groups we have to check the group number back at the start
1941             and if necessary complete handling an extraction by setting the offsets and
1942             bumping the high water mark. Whole-pattern recursion is coded as a recurse
1943             into group 0, so it won't be picked up here. Instead, we catch it when the
1944             OP_END is reached. Other recursion is handled here. We just have to record
1945             the current subject position and start match pointer and give a MATCH
1946             return. */
1947              
1948 17 100         if (*prev == OP_CBRA || *prev == OP_SCBRA ||
    50          
    50          
1949 14 50         *prev == OP_CBRAPOS || *prev == OP_SCBRAPOS)
1950             {
1951 3           number = GET2(prev, 1+LINK_SIZE);
1952 3           offset = number << 1;
1953              
1954             #ifdef PCRE_DEBUG
1955             printf("end bracket %d", number);
1956             printf("\n");
1957             #endif
1958              
1959             /* Handle a recursively called group. */
1960              
1961 3 50         if (md->recursive != NULL && md->recursive->group_num == number)
    0          
1962             {
1963 0           md->end_match_ptr = eptr;
1964 0           md->start_match_ptr = mstart;
1965 0           RRETURN(MATCH_MATCH);
1966             }
1967              
1968             /* Deal with capturing */
1969              
1970 3           md->capture_last = (md->capture_last & OVFLMASK) | number;
1971 3 50         if (offset >= md->offset_max) md->capture_last |= OVFLBIT; else
1972             {
1973             /* If offset is greater than offset_top, it means that we are
1974             "skipping" a capturing group, and that group's offsets must be marked
1975             unset. In earlier versions of PCRE, all the offsets were unset at the
1976             start of matching, but this doesn't work because atomic groups and
1977             assertions can cause a value to be set that should later be unset.
1978             Example: matching /(?>(a))b|(a)c/ against "ac". This sets group 1 as
1979             part of the atomic group, but this is not on the final matching path,
1980             so must be unset when 2 is set. (If there is no group 2, there is no
1981             problem, because offset_top will then be 2, indicating no capture.) */
1982              
1983 0 0         if (offset > offset_top)
1984             {
1985 0           register int *iptr = md->offset_vector + offset_top;
1986 0           register int *iend = md->offset_vector + offset;
1987 0 0         while (iptr < iend) *iptr++ = -1;
1988             }
1989              
1990             /* Now make the extraction */
1991              
1992 0           md->offset_vector[offset] =
1993 0           md->offset_vector[md->offset_end - number];
1994 0           md->offset_vector[offset+1] = (int)(eptr - md->start_subject);
1995 0 0         if (offset_top <= offset) offset_top = offset + 2;
1996             }
1997             }
1998              
1999             /* OP_KETRPOS is a possessive repeating ket. Remember the current position,
2000             and return the MATCH_KETRPOS. This makes it possible to do the repeats one
2001             at a time from the outer level, thus saving stack. This must precede the
2002             empty string test - in this case that test is done at the outer level. */
2003              
2004 17 50         if (*ecode == OP_KETRPOS)
2005             {
2006 0           md->start_match_ptr = mstart; /* In case \K reset it */
2007 0           md->end_match_ptr = eptr;
2008 0           md->end_offset_top = offset_top;
2009 0           RRETURN(MATCH_KETRPOS);
2010             }
2011              
2012             /* For an ordinary non-repeating ket, just continue at this level. This
2013             also happens for a repeating ket if no characters were matched in the
2014             group. This is the forcible breaking of infinite loops as implemented in
2015             Perl 5.005. For a non-repeating atomic group that includes captures,
2016             establish a backup point by processing the rest of the pattern at a lower
2017             level. If this results in a NOMATCH return, pass MATCH_ONCE back to the
2018             original OP_ONCE level, thereby bypassing intermediate backup points, but
2019             resetting any captures that happened along the way. */
2020              
2021 17 50         if (*ecode == OP_KET || eptr == saved_eptr)
    0          
2022             {
2023 17 50         if (*prev == OP_ONCE)
2024             {
2025 0           RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12);
2026 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2027 0           md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
2028 0           RRETURN(MATCH_ONCE);
2029             }
2030 17           ecode += 1 + LINK_SIZE; /* Carry on at this level */
2031 17           break;
2032             }
2033              
2034             /* The normal repeating kets try the rest of the pattern or restart from
2035             the preceding bracket, in the appropriate order. In the second case, we can
2036             use tail recursion to avoid using another stack frame, unless we have an
2037             an atomic group or an unlimited repeat of a group that can match an empty
2038             string. */
2039              
2040 0 0         if (*ecode == OP_KETRMIN)
2041             {
2042 0           RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7);
2043 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2044 0 0         if (*prev == OP_ONCE)
2045             {
2046 0           RMATCH(eptr, prev, offset_top, md, eptrb, RM8);
2047 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2048 0           md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */
2049 0           RRETURN(MATCH_ONCE);
2050             }
2051 0 0         if (*prev >= OP_SBRA) /* Could match an empty string */
2052             {
2053 0           RMATCH(eptr, prev, offset_top, md, eptrb, RM50);
2054 0           RRETURN(rrc);
2055             }
2056 0           ecode = prev;
2057 0           goto TAIL_RECURSE;
2058             }
2059             else /* OP_KETRMAX */
2060             {
2061 0           RMATCH(eptr, prev, offset_top, md, eptrb, RM13);
2062 0 0         if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH;
    0          
2063 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2064 0 0         if (*prev == OP_ONCE)
2065             {
2066 0           RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9);
2067 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2068 0           md->once_target = prev;
2069 0           RRETURN(MATCH_ONCE);
2070             }
2071 0           ecode += 1 + LINK_SIZE;
2072 0           goto TAIL_RECURSE;
2073             }
2074             /* Control never gets here */
2075              
2076             /* Not multiline mode: start of subject assertion, unless notbol. */
2077              
2078             case OP_CIRC:
2079 32 50         if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
    0          
2080              
2081             /* Start of subject assertion */
2082              
2083             case OP_SOD:
2084 32 50         if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH);
2085 32           ecode++;
2086 32           break;
2087              
2088             /* Multiline mode: start of subject unless notbol, or after any newline. */
2089              
2090             case OP_CIRCM:
2091 0 0         if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH);
    0          
2092 0 0         if (eptr != md->start_subject &&
    0          
    0          
2093 0 0         (eptr == md->end_subject || !WAS_NEWLINE(eptr)))
    0          
    0          
    0          
    0          
    0          
    0          
    0          
2094 0           RRETURN(MATCH_NOMATCH);
2095 0           ecode++;
2096 0           break;
2097              
2098             /* Start of match assertion */
2099              
2100             case OP_SOM:
2101 0 0         if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH);
2102 0           ecode++;
2103 0           break;
2104              
2105             /* Reset the start of match point */
2106              
2107             case OP_SET_SOM:
2108 0           mstart = eptr;
2109 0           ecode++;
2110 0           break;
2111              
2112             /* Multiline mode: assert before any newline, or before end of subject
2113             unless noteol is set. */
2114              
2115             case OP_DOLLM:
2116 0 0         if (eptr < md->end_subject)
2117             {
2118 0 0         if (!IS_NEWLINE(eptr))
    0          
    0          
    0          
    0          
    0          
    0          
    0          
2119             {
2120 0 0         if (md->partial != 0 &&
    0          
2121 0 0         eptr + 1 >= md->end_subject &&
2122 0 0         NLBLOCK->nltype == NLTYPE_FIXED &&
2123 0 0         NLBLOCK->nllen == 2 &&
2124 0           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2125             {
2126 0           md->hitend = TRUE;
2127 0 0         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2128             }
2129 0           RRETURN(MATCH_NOMATCH);
2130             }
2131             }
2132             else
2133             {
2134 0 0         if (md->noteol) RRETURN(MATCH_NOMATCH);
2135 0 0         SCHECK_PARTIAL();
    0          
    0          
2136             }
2137 0           ecode++;
2138 0           break;
2139              
2140             /* Not multiline mode: assert before a terminating newline or before end of
2141             subject unless noteol is set. */
2142              
2143             case OP_DOLL:
2144 11 50         if (md->noteol) RRETURN(MATCH_NOMATCH);
2145 11 50         if (!md->endonly) goto ASSERT_NL_OR_EOS;
2146              
2147             /* ... else fall through for endonly */
2148              
2149             /* End of subject assertion (\z) */
2150              
2151             case OP_EOD:
2152 0 0         if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH);
2153 0 0         SCHECK_PARTIAL();
    0          
    0          
2154 0           ecode++;
2155 0           break;
2156              
2157             /* End of subject or ending \n assertion (\Z) */
2158              
2159             case OP_EODN:
2160             ASSERT_NL_OR_EOS:
2161 12 100         if (eptr < md->end_subject &&
    50          
    50          
2162 1 50         (!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen))
    50          
    0          
    0          
    0          
    0          
    0          
    0          
2163             {
2164 1 50         if (md->partial != 0 &&
    0          
2165 0 0         eptr + 1 >= md->end_subject &&
2166 0 0         NLBLOCK->nltype == NLTYPE_FIXED &&
2167 0 0         NLBLOCK->nllen == 2 &&
2168 0           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2169             {
2170 0           md->hitend = TRUE;
2171 0 0         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2172             }
2173 1           RRETURN(MATCH_NOMATCH);
2174             }
2175              
2176             /* Either at end of string or \n before end. */
2177              
2178 10 50         SCHECK_PARTIAL();
    0          
    0          
2179 10           ecode++;
2180 10           break;
2181              
2182             /* Word boundary assertions */
2183              
2184             case OP_NOT_WORD_BOUNDARY:
2185             case OP_WORD_BOUNDARY:
2186             {
2187              
2188             /* Find out if the previous and current characters are "word" characters.
2189             It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to
2190             be "non-word" characters. Remember the earliest consulted character for
2191             partial matching. */
2192              
2193             #ifdef SUPPORT_UTF
2194             if (utf)
2195             {
2196             /* Get status of previous character */
2197              
2198             if (eptr == md->start_subject) prev_is_word = FALSE; else
2199             {
2200             PCRE_PUCHAR lastptr = eptr - 1;
2201             BACKCHAR(lastptr);
2202             if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr;
2203             GETCHAR(c, lastptr);
2204             #ifdef SUPPORT_UCP
2205             if (md->use_ucp)
2206             {
2207             if (c == '_') prev_is_word = TRUE; else
2208             {
2209             int cat = UCD_CATEGORY(c);
2210             prev_is_word = (cat == ucp_L || cat == ucp_N);
2211             }
2212             }
2213             else
2214             #endif
2215             prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2216             }
2217              
2218             /* Get status of next character */
2219              
2220             if (eptr >= md->end_subject)
2221             {
2222             SCHECK_PARTIAL();
2223             cur_is_word = FALSE;
2224             }
2225             else
2226             {
2227             GETCHAR(c, eptr);
2228             #ifdef SUPPORT_UCP
2229             if (md->use_ucp)
2230             {
2231             if (c == '_') cur_is_word = TRUE; else
2232             {
2233             int cat = UCD_CATEGORY(c);
2234             cur_is_word = (cat == ucp_L || cat == ucp_N);
2235             }
2236             }
2237             else
2238             #endif
2239             cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0;
2240             }
2241             }
2242             else
2243             #endif
2244              
2245             /* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for
2246             consistency with the behaviour of \w we do use it in this case. */
2247              
2248             {
2249             /* Get status of previous character */
2250              
2251 0 0         if (eptr == md->start_subject) prev_is_word = FALSE; else
2252             {
2253 0 0         if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1;
2254             #ifdef SUPPORT_UCP
2255             if (md->use_ucp)
2256             {
2257             c = eptr[-1];
2258             if (c == '_') prev_is_word = TRUE; else
2259             {
2260             int cat = UCD_CATEGORY(c);
2261             prev_is_word = (cat == ucp_L || cat == ucp_N);
2262             }
2263             }
2264             else
2265             #endif
2266 0           prev_is_word = MAX_255(eptr[-1])
2267 0           && ((md->ctypes[eptr[-1]] & ctype_word) != 0);
2268             }
2269              
2270             /* Get status of next character */
2271              
2272 0 0         if (eptr >= md->end_subject)
2273             {
2274 0 0         SCHECK_PARTIAL();
    0          
    0          
2275 0           cur_is_word = FALSE;
2276             }
2277             else
2278             #ifdef SUPPORT_UCP
2279             if (md->use_ucp)
2280             {
2281             c = *eptr;
2282             if (c == '_') cur_is_word = TRUE; else
2283             {
2284             int cat = UCD_CATEGORY(c);
2285             cur_is_word = (cat == ucp_L || cat == ucp_N);
2286             }
2287             }
2288             else
2289             #endif
2290 0           cur_is_word = MAX_255(*eptr)
2291 0           && ((md->ctypes[*eptr] & ctype_word) != 0);
2292             }
2293              
2294             /* Now see if the situation is what we want */
2295              
2296 0 0         if ((*ecode++ == OP_WORD_BOUNDARY)?
    0          
2297             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
2298 0           RRETURN(MATCH_NOMATCH);
2299             }
2300 0           break;
2301              
2302             /* Match any single character type except newline; have to take care with
2303             CRLF newlines and partial matching. */
2304              
2305             case OP_ANY:
2306 0 0         if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
    0          
    0          
    0          
    0          
    0          
    0          
    0          
2307 0 0         if (md->partial != 0 &&
    0          
2308 0 0         eptr == md->end_subject - 1 &&
2309 0 0         NLBLOCK->nltype == NLTYPE_FIXED &&
2310 0 0         NLBLOCK->nllen == 2 &&
2311 0           UCHAR21TEST(eptr) == NLBLOCK->nl[0])
2312             {
2313 0           md->hitend = TRUE;
2314 0 0         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2315             }
2316              
2317             /* Fall through */
2318              
2319             /* Match any single character whatsoever. */
2320              
2321             case OP_ALLANY:
2322 0 0         if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2323             { /* not be updated before SCHECK_PARTIAL. */
2324 0 0         SCHECK_PARTIAL();
    0          
    0          
2325 0           RRETURN(MATCH_NOMATCH);
2326             }
2327 0           eptr++;
2328             #ifdef SUPPORT_UTF
2329             if (utf) ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
2330             #endif
2331 0           ecode++;
2332 0           break;
2333              
2334             /* Match a single byte, even in UTF-8 mode. This opcode really does match
2335             any byte, even newline, independent of the setting of PCRE_DOTALL. */
2336              
2337             case OP_ANYBYTE:
2338 0 0         if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */
2339             { /* not be updated before SCHECK_PARTIAL. */
2340 0 0         SCHECK_PARTIAL();
    0          
    0          
2341 0           RRETURN(MATCH_NOMATCH);
2342             }
2343 0           eptr++;
2344 0           ecode++;
2345 0           break;
2346              
2347             case OP_NOT_DIGIT:
2348 0 0         if (eptr >= md->end_subject)
2349             {
2350 0 0         SCHECK_PARTIAL();
    0          
    0          
2351 0           RRETURN(MATCH_NOMATCH);
2352             }
2353 0           GETCHARINCTEST(c, eptr);
2354 0 0         if (
2355             #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2356             c < 256 &&
2357             #endif
2358 0           (md->ctypes[c] & ctype_digit) != 0
2359             )
2360 0           RRETURN(MATCH_NOMATCH);
2361 0           ecode++;
2362 0           break;
2363              
2364             case OP_DIGIT:
2365 0 0         if (eptr >= md->end_subject)
2366             {
2367 0 0         SCHECK_PARTIAL();
    0          
    0          
2368 0           RRETURN(MATCH_NOMATCH);
2369             }
2370 0           GETCHARINCTEST(c, eptr);
2371 0 0         if (
2372             #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2373             c > 255 ||
2374             #endif
2375 0           (md->ctypes[c] & ctype_digit) == 0
2376             )
2377 0           RRETURN(MATCH_NOMATCH);
2378 0           ecode++;
2379 0           break;
2380              
2381             case OP_NOT_WHITESPACE:
2382 0 0         if (eptr >= md->end_subject)
2383             {
2384 0 0         SCHECK_PARTIAL();
    0          
    0          
2385 0           RRETURN(MATCH_NOMATCH);
2386             }
2387 0           GETCHARINCTEST(c, eptr);
2388 0 0         if (
2389             #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2390             c < 256 &&
2391             #endif
2392 0           (md->ctypes[c] & ctype_space) != 0
2393             )
2394 0           RRETURN(MATCH_NOMATCH);
2395 0           ecode++;
2396 0           break;
2397              
2398             case OP_WHITESPACE:
2399 0 0         if (eptr >= md->end_subject)
2400             {
2401 0 0         SCHECK_PARTIAL();
    0          
    0          
2402 0           RRETURN(MATCH_NOMATCH);
2403             }
2404 0           GETCHARINCTEST(c, eptr);
2405 0 0         if (
2406             #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2407             c > 255 ||
2408             #endif
2409 0           (md->ctypes[c] & ctype_space) == 0
2410             )
2411 0           RRETURN(MATCH_NOMATCH);
2412 0           ecode++;
2413 0           break;
2414              
2415             case OP_NOT_WORDCHAR:
2416 0 0         if (eptr >= md->end_subject)
2417             {
2418 0 0         SCHECK_PARTIAL();
    0          
    0          
2419 0           RRETURN(MATCH_NOMATCH);
2420             }
2421 0           GETCHARINCTEST(c, eptr);
2422 0 0         if (
2423             #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2424             c < 256 &&
2425             #endif
2426 0           (md->ctypes[c] & ctype_word) != 0
2427             )
2428 0           RRETURN(MATCH_NOMATCH);
2429 0           ecode++;
2430 0           break;
2431              
2432             case OP_WORDCHAR:
2433 0 0         if (eptr >= md->end_subject)
2434             {
2435 0 0         SCHECK_PARTIAL();
    0          
    0          
2436 0           RRETURN(MATCH_NOMATCH);
2437             }
2438 0           GETCHARINCTEST(c, eptr);
2439 0 0         if (
2440             #if defined SUPPORT_UTF || !(defined COMPILE_PCRE8)
2441             c > 255 ||
2442             #endif
2443 0           (md->ctypes[c] & ctype_word) == 0
2444             )
2445 0           RRETURN(MATCH_NOMATCH);
2446 0           ecode++;
2447 0           break;
2448              
2449             case OP_ANYNL:
2450 0 0         if (eptr >= md->end_subject)
2451             {
2452 0 0         SCHECK_PARTIAL();
    0          
    0          
2453 0           RRETURN(MATCH_NOMATCH);
2454             }
2455 0           GETCHARINCTEST(c, eptr);
2456 0           switch(c)
2457             {
2458 0           default: RRETURN(MATCH_NOMATCH);
2459              
2460             case CHAR_CR:
2461 0 0         if (eptr >= md->end_subject)
2462             {
2463 0 0         SCHECK_PARTIAL();
    0          
    0          
2464             }
2465 0 0         else if (UCHAR21TEST(eptr) == CHAR_LF) eptr++;
2466 0           break;
2467              
2468             case CHAR_LF:
2469 0           break;
2470              
2471             case CHAR_VT:
2472             case CHAR_FF:
2473             case CHAR_NEL:
2474             #ifndef EBCDIC
2475             case 0x2028:
2476             case 0x2029:
2477             #endif /* Not EBCDIC */
2478 0 0         if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
2479 0           break;
2480             }
2481 0           ecode++;
2482 0           break;
2483              
2484             case OP_NOT_HSPACE:
2485 0 0         if (eptr >= md->end_subject)
2486             {
2487 0 0         SCHECK_PARTIAL();
    0          
    0          
2488 0           RRETURN(MATCH_NOMATCH);
2489             }
2490 0           GETCHARINCTEST(c, eptr);
2491 0 0         switch(c)
2492             {
2493 0           HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
2494 0           default: break;
2495             }
2496 0           ecode++;
2497 0           break;
2498              
2499             case OP_HSPACE:
2500 0 0         if (eptr >= md->end_subject)
2501             {
2502 0 0         SCHECK_PARTIAL();
    0          
    0          
2503 0           RRETURN(MATCH_NOMATCH);
2504             }
2505 0           GETCHARINCTEST(c, eptr);
2506 0 0         switch(c)
2507             {
2508 0           HSPACE_CASES: break; /* Byte and multibyte cases */
2509 0           default: RRETURN(MATCH_NOMATCH);
2510             }
2511 0           ecode++;
2512 0           break;
2513              
2514             case OP_NOT_VSPACE:
2515 0 0         if (eptr >= md->end_subject)
2516             {
2517 0 0         SCHECK_PARTIAL();
    0          
    0          
2518 0           RRETURN(MATCH_NOMATCH);
2519             }
2520 0           GETCHARINCTEST(c, eptr);
2521 0 0         switch(c)
2522             {
2523 0           VSPACE_CASES: RRETURN(MATCH_NOMATCH);
2524 0           default: break;
2525             }
2526 0           ecode++;
2527 0           break;
2528              
2529             case OP_VSPACE:
2530 0 0         if (eptr >= md->end_subject)
2531             {
2532 0 0         SCHECK_PARTIAL();
    0          
    0          
2533 0           RRETURN(MATCH_NOMATCH);
2534             }
2535 0           GETCHARINCTEST(c, eptr);
2536 0 0         switch(c)
2537             {
2538 0           VSPACE_CASES: break;
2539 0           default: RRETURN(MATCH_NOMATCH);
2540             }
2541 0           ecode++;
2542 0           break;
2543              
2544             #ifdef SUPPORT_UCP
2545             /* Check the next character by Unicode property. We will get here only
2546             if the support is in the binary; otherwise a compile-time error occurs. */
2547              
2548             case OP_PROP:
2549             case OP_NOTPROP:
2550             if (eptr >= md->end_subject)
2551             {
2552             SCHECK_PARTIAL();
2553             RRETURN(MATCH_NOMATCH);
2554             }
2555             GETCHARINCTEST(c, eptr);
2556             {
2557             const pcre_uint32 *cp;
2558             const ucd_record *prop = GET_UCD(c);
2559              
2560             switch(ecode[1])
2561             {
2562             case PT_ANY:
2563             if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2564             break;
2565              
2566             case PT_LAMP:
2567             if ((prop->chartype == ucp_Lu ||
2568             prop->chartype == ucp_Ll ||
2569             prop->chartype == ucp_Lt) == (op == OP_NOTPROP))
2570             RRETURN(MATCH_NOMATCH);
2571             break;
2572              
2573             case PT_GC:
2574             if ((ecode[2] != PRIV(ucp_gentype)[prop->chartype]) == (op == OP_PROP))
2575             RRETURN(MATCH_NOMATCH);
2576             break;
2577              
2578             case PT_PC:
2579             if ((ecode[2] != prop->chartype) == (op == OP_PROP))
2580             RRETURN(MATCH_NOMATCH);
2581             break;
2582              
2583             case PT_SC:
2584             if ((ecode[2] != prop->script) == (op == OP_PROP))
2585             RRETURN(MATCH_NOMATCH);
2586             break;
2587              
2588             /* These are specials */
2589              
2590             case PT_ALNUM:
2591             if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2592             PRIV(ucp_gentype)[prop->chartype] == ucp_N) == (op == OP_NOTPROP))
2593             RRETURN(MATCH_NOMATCH);
2594             break;
2595              
2596             /* Perl space used to exclude VT, but from Perl 5.18 it is included,
2597             which means that Perl space and POSIX space are now identical. PCRE
2598             was changed at release 8.34. */
2599              
2600             case PT_SPACE: /* Perl space */
2601             case PT_PXSPACE: /* POSIX space */
2602             switch(c)
2603             {
2604             HSPACE_CASES:
2605             VSPACE_CASES:
2606             if (op == OP_NOTPROP) RRETURN(MATCH_NOMATCH);
2607             break;
2608              
2609             default:
2610             if ((PRIV(ucp_gentype)[prop->chartype] == ucp_Z) ==
2611             (op == OP_NOTPROP)) RRETURN(MATCH_NOMATCH);
2612             break;
2613             }
2614             break;
2615              
2616             case PT_WORD:
2617             if ((PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
2618             PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
2619             c == CHAR_UNDERSCORE) == (op == OP_NOTPROP))
2620             RRETURN(MATCH_NOMATCH);
2621             break;
2622              
2623             case PT_CLIST:
2624             cp = PRIV(ucd_caseless_sets) + ecode[2];
2625             for (;;)
2626             {
2627             if (c < *cp)
2628             { if (op == OP_PROP) { RRETURN(MATCH_NOMATCH); } else break; }
2629             if (c == *cp++)
2630             { if (op == OP_PROP) break; else { RRETURN(MATCH_NOMATCH); } }
2631             }
2632             break;
2633              
2634             case PT_UCNC:
2635             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
2636             c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
2637             c >= 0xe000) == (op == OP_NOTPROP))
2638             RRETURN(MATCH_NOMATCH);
2639             break;
2640              
2641             /* This should never occur */
2642              
2643             default:
2644             RRETURN(PCRE_ERROR_INTERNAL);
2645             }
2646              
2647             ecode += 3;
2648             }
2649             break;
2650              
2651             /* Match an extended Unicode sequence. We will get here only if the support
2652             is in the binary; otherwise a compile-time error occurs. */
2653              
2654             case OP_EXTUNI:
2655             if (eptr >= md->end_subject)
2656             {
2657             SCHECK_PARTIAL();
2658             RRETURN(MATCH_NOMATCH);
2659             }
2660             else
2661             {
2662             int lgb, rgb;
2663             GETCHARINCTEST(c, eptr);
2664             lgb = UCD_GRAPHBREAK(c);
2665             while (eptr < md->end_subject)
2666             {
2667             int len = 1;
2668             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
2669             rgb = UCD_GRAPHBREAK(c);
2670             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2671             lgb = rgb;
2672             eptr += len;
2673             }
2674             }
2675             CHECK_PARTIAL();
2676             ecode++;
2677             break;
2678             #endif /* SUPPORT_UCP */
2679              
2680              
2681             /* Match a back reference, possibly repeatedly. Look past the end of the
2682             item to see if there is repeat information following. The code is similar
2683             to that for character classes, but repeated for efficiency. Then obey
2684             similar code to character type repeats - written out again for speed.
2685             However, if the referenced string is the empty string, always treat
2686             it as matched, any number of times (otherwise there could be infinite
2687             loops). If the reference is unset, there are two possibilities:
2688              
2689             (a) In the default, Perl-compatible state, set the length negative;
2690             this ensures that every attempt at a match fails. We can't just fail
2691             here, because of the possibility of quantifiers with zero minima.
2692              
2693             (b) If the JavaScript compatibility flag is set, set the length to zero
2694             so that the back reference matches an empty string.
2695              
2696             Otherwise, set the length to the length of what was matched by the
2697             referenced subpattern.
2698              
2699             The OP_REF and OP_REFI opcodes are used for a reference to a numbered group
2700             or to a non-duplicated named group. For a duplicated named group, OP_DNREF
2701             and OP_DNREFI are used. In this case we must scan the list of groups to
2702             which the name refers, and use the first one that is set. */
2703              
2704             case OP_DNREF:
2705             case OP_DNREFI:
2706 0           caseless = op == OP_DNREFI;
2707             {
2708 0           int count = GET2(ecode, 1+IMM2_SIZE);
2709 0           pcre_uchar *slot = md->name_table + GET2(ecode, 1) * md->name_entry_size;
2710 0           ecode += 1 + 2*IMM2_SIZE;
2711              
2712             /* Setting the default length first and initializing 'offset' avoids
2713             compiler warnings in the REF_REPEAT code. */
2714              
2715 0 0         length = (md->jscript_compat)? 0 : -1;
2716 0           offset = 0;
2717              
2718 0 0         while (count-- > 0)
2719             {
2720 0           offset = GET2(slot, 0) << 1;
2721 0 0         if (offset < offset_top && md->offset_vector[offset] >= 0)
    0          
2722             {
2723 0           length = md->offset_vector[offset+1] - md->offset_vector[offset];
2724 0           break;
2725             }
2726 0           slot += md->name_entry_size;
2727             }
2728             }
2729 0           goto REF_REPEAT;
2730              
2731             case OP_REF:
2732             case OP_REFI:
2733 0           caseless = op == OP_REFI;
2734 0           offset = GET2(ecode, 1) << 1; /* Doubled ref number */
2735 0           ecode += 1 + IMM2_SIZE;
2736 0 0         if (offset >= offset_top || md->offset_vector[offset] < 0)
    0          
2737 0 0         length = (md->jscript_compat)? 0 : -1;
2738             else
2739 0           length = md->offset_vector[offset+1] - md->offset_vector[offset];
2740              
2741             /* Set up for repetition, or handle the non-repeated case */
2742              
2743             REF_REPEAT:
2744 0           switch (*ecode)
2745             {
2746             case OP_CRSTAR:
2747             case OP_CRMINSTAR:
2748             case OP_CRPLUS:
2749             case OP_CRMINPLUS:
2750             case OP_CRQUERY:
2751             case OP_CRMINQUERY:
2752 0           c = *ecode++ - OP_CRSTAR;
2753 0           minimize = (c & 1) != 0;
2754 0           min = rep_min[c]; /* Pick up values from tables; */
2755 0           max = rep_max[c]; /* zero for max => infinity */
2756 0 0         if (max == 0) max = INT_MAX;
2757 0           break;
2758              
2759             case OP_CRRANGE:
2760             case OP_CRMINRANGE:
2761 0           minimize = (*ecode == OP_CRMINRANGE);
2762 0           min = GET2(ecode, 1);
2763 0           max = GET2(ecode, 1 + IMM2_SIZE);
2764 0 0         if (max == 0) max = INT_MAX;
2765 0           ecode += 1 + 2 * IMM2_SIZE;
2766 0           break;
2767              
2768             default: /* No repeat follows */
2769 0 0         if ((length = match_ref(offset, eptr, length, md, caseless)) < 0)
2770             {
2771 0 0         if (length == -2) eptr = md->end_subject; /* Partial match */
2772 0 0         CHECK_PARTIAL();
    0          
    0          
    0          
2773 0           RRETURN(MATCH_NOMATCH);
2774             }
2775 0           eptr += length;
2776 0           continue; /* With the main loop */
2777             }
2778              
2779             /* Handle repeated back references. If the length of the reference is
2780             zero, just continue with the main loop. If the length is negative, it
2781             means the reference is unset in non-Java-compatible mode. If the minimum is
2782             zero, we can continue at the same level without recursion. For any other
2783             minimum, carrying on will result in NOMATCH. */
2784              
2785 0 0         if (length == 0) continue;
2786 0 0         if (length < 0 && min == 0) continue;
    0          
2787              
2788             /* First, ensure the minimum number of matches are present. We get back
2789             the length of the reference string explicitly rather than passing the
2790             address of eptr, so that eptr can be a register variable. */
2791              
2792 0 0         for (i = 1; i <= min; i++)
2793             {
2794             int slength;
2795 0 0         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2796             {
2797 0 0         if (slength == -2) eptr = md->end_subject; /* Partial match */
2798 0 0         CHECK_PARTIAL();
    0          
    0          
    0          
2799 0           RRETURN(MATCH_NOMATCH);
2800             }
2801 0           eptr += slength;
2802             }
2803              
2804             /* If min = max, continue at the same level without recursion.
2805             They are not both allowed to be zero. */
2806              
2807 0 0         if (min == max) continue;
2808              
2809             /* If minimizing, keep trying and advancing the pointer */
2810              
2811 0 0         if (minimize)
2812             {
2813 0           for (fi = min;; fi++)
2814             {
2815             int slength;
2816 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM14);
2817 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2818 0 0         if (fi >= max) RRETURN(MATCH_NOMATCH);
2819 0 0         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2820             {
2821 0 0         if (slength == -2) eptr = md->end_subject; /* Partial match */
2822 0 0         CHECK_PARTIAL();
    0          
    0          
    0          
2823 0           RRETURN(MATCH_NOMATCH);
2824             }
2825 0           eptr += slength;
2826 0           }
2827             /* Control never gets here */
2828             }
2829              
2830             /* If maximizing, find the longest string and work backwards */
2831              
2832             else
2833             {
2834 0           pp = eptr;
2835 0 0         for (i = min; i < max; i++)
2836             {
2837             int slength;
2838 0 0         if ((slength = match_ref(offset, eptr, length, md, caseless)) < 0)
2839             {
2840             /* Can't use CHECK_PARTIAL because we don't want to update eptr in
2841             the soft partial matching case. */
2842              
2843 0 0         if (slength == -2 && md->partial != 0 &&
    0          
    0          
2844 0           md->end_subject > md->start_used_ptr)
2845             {
2846 0           md->hitend = TRUE;
2847 0 0         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
2848             }
2849 0           break;
2850             }
2851 0           eptr += slength;
2852             }
2853              
2854 0 0         while (eptr >= pp)
2855             {
2856 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM15);
2857 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2858 0           eptr -= length;
2859             }
2860 0           RRETURN(MATCH_NOMATCH);
2861             }
2862             /* Control never gets here */
2863              
2864             /* Match a bit-mapped character class, possibly repeatedly. This op code is
2865             used when all the characters in the class have values in the range 0-255,
2866             and either the matching is caseful, or the characters are in the range
2867             0-127 when UTF-8 processing is enabled. The only difference between
2868             OP_CLASS and OP_NCLASS occurs when a data character outside the range is
2869             encountered.
2870              
2871             First, look past the end of the item to see if there is repeat information
2872             following. Then obey similar code to character type repeats - written out
2873             again for speed. */
2874              
2875             case OP_NCLASS:
2876             case OP_CLASS:
2877             {
2878             /* The data variable is saved across frames, so the byte map needs to
2879             be stored there. */
2880             #define BYTE_MAP ((pcre_uint8 *)data)
2881 0           data = ecode + 1; /* Save for matching */
2882 0           ecode += 1 + (32 / sizeof(pcre_uchar)); /* Advance past the item */
2883              
2884 0           switch (*ecode)
2885             {
2886             case OP_CRSTAR:
2887             case OP_CRMINSTAR:
2888             case OP_CRPLUS:
2889             case OP_CRMINPLUS:
2890             case OP_CRQUERY:
2891             case OP_CRMINQUERY:
2892             case OP_CRPOSSTAR:
2893             case OP_CRPOSPLUS:
2894             case OP_CRPOSQUERY:
2895 0           c = *ecode++ - OP_CRSTAR;
2896 0 0         if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
2897 0           else possessive = TRUE;
2898 0           min = rep_min[c]; /* Pick up values from tables; */
2899 0           max = rep_max[c]; /* zero for max => infinity */
2900 0 0         if (max == 0) max = INT_MAX;
2901 0           break;
2902              
2903             case OP_CRRANGE:
2904             case OP_CRMINRANGE:
2905             case OP_CRPOSRANGE:
2906 0           minimize = (*ecode == OP_CRMINRANGE);
2907 0           possessive = (*ecode == OP_CRPOSRANGE);
2908 0           min = GET2(ecode, 1);
2909 0           max = GET2(ecode, 1 + IMM2_SIZE);
2910 0 0         if (max == 0) max = INT_MAX;
2911 0           ecode += 1 + 2 * IMM2_SIZE;
2912 0           break;
2913              
2914             default: /* No repeat follows */
2915 0           min = max = 1;
2916 0           break;
2917             }
2918              
2919             /* First, ensure the minimum number of matches are present. */
2920              
2921             #ifdef SUPPORT_UTF
2922             if (utf)
2923             {
2924             for (i = 1; i <= min; i++)
2925             {
2926             if (eptr >= md->end_subject)
2927             {
2928             SCHECK_PARTIAL();
2929             RRETURN(MATCH_NOMATCH);
2930             }
2931             GETCHARINC(c, eptr);
2932             if (c > 255)
2933             {
2934             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2935             }
2936             else
2937             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2938             }
2939             }
2940             else
2941             #endif
2942             /* Not UTF mode */
2943             {
2944 0 0         for (i = 1; i <= min; i++)
2945             {
2946 0 0         if (eptr >= md->end_subject)
2947             {
2948 0 0         SCHECK_PARTIAL();
    0          
    0          
2949 0           RRETURN(MATCH_NOMATCH);
2950             }
2951 0           c = *eptr++;
2952             #ifndef COMPILE_PCRE8
2953             if (c > 255)
2954             {
2955             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2956             }
2957             else
2958             #endif
2959 0 0         if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2960             }
2961             }
2962              
2963             /* If max == min we can continue with the main loop without the
2964             need to recurse. */
2965              
2966 0 0         if (min == max) continue;
2967              
2968             /* If minimizing, keep testing the rest of the expression and advancing
2969             the pointer while it matches the class. */
2970              
2971 0 0         if (minimize)
2972             {
2973             #ifdef SUPPORT_UTF
2974             if (utf)
2975             {
2976             for (fi = min;; fi++)
2977             {
2978             RMATCH(eptr, ecode, offset_top, md, eptrb, RM16);
2979             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
2980             if (fi >= max) RRETURN(MATCH_NOMATCH);
2981             if (eptr >= md->end_subject)
2982             {
2983             SCHECK_PARTIAL();
2984             RRETURN(MATCH_NOMATCH);
2985             }
2986             GETCHARINC(c, eptr);
2987             if (c > 255)
2988             {
2989             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
2990             }
2991             else
2992             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
2993             }
2994             }
2995             else
2996             #endif
2997             /* Not UTF mode */
2998             {
2999 0           for (fi = min;; fi++)
3000             {
3001 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM17);
3002 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3003 0 0         if (fi >= max) RRETURN(MATCH_NOMATCH);
3004 0 0         if (eptr >= md->end_subject)
3005             {
3006 0 0         SCHECK_PARTIAL();
    0          
    0          
3007 0           RRETURN(MATCH_NOMATCH);
3008             }
3009 0           c = *eptr++;
3010             #ifndef COMPILE_PCRE8
3011             if (c > 255)
3012             {
3013             if (op == OP_CLASS) RRETURN(MATCH_NOMATCH);
3014             }
3015             else
3016             #endif
3017 0 0         if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH);
3018 0           }
3019             }
3020             /* Control never gets here */
3021             }
3022              
3023             /* If maximizing, find the longest possible run, then work backwards. */
3024              
3025             else
3026             {
3027 0           pp = eptr;
3028              
3029             #ifdef SUPPORT_UTF
3030             if (utf)
3031             {
3032             for (i = min; i < max; i++)
3033             {
3034             int len = 1;
3035             if (eptr >= md->end_subject)
3036             {
3037             SCHECK_PARTIAL();
3038             break;
3039             }
3040             GETCHARLEN(c, eptr, len);
3041             if (c > 255)
3042             {
3043             if (op == OP_CLASS) break;
3044             }
3045             else
3046             if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3047             eptr += len;
3048             }
3049              
3050             if (possessive) continue; /* No backtracking */
3051              
3052             for (;;)
3053             {
3054             RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
3055             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3056             if (eptr-- <= pp) break; /* Stop if tried at original pos */
3057             BACKCHAR(eptr);
3058             }
3059             }
3060             else
3061             #endif
3062             /* Not UTF mode */
3063             {
3064 0 0         for (i = min; i < max; i++)
3065             {
3066 0 0         if (eptr >= md->end_subject)
3067             {
3068 0 0         SCHECK_PARTIAL();
    0          
    0          
3069 0           break;
3070             }
3071 0           c = *eptr;
3072             #ifndef COMPILE_PCRE8
3073             if (c > 255)
3074             {
3075             if (op == OP_CLASS) break;
3076             }
3077             else
3078             #endif
3079 0 0         if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
3080 0           eptr++;
3081             }
3082              
3083 0 0         if (possessive) continue; /* No backtracking */
3084              
3085 0 0         while (eptr >= pp)
3086             {
3087 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
3088 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3089 0           eptr--;
3090             }
3091             }
3092              
3093 0           RRETURN(MATCH_NOMATCH);
3094             }
3095             #undef BYTE_MAP
3096             }
3097             /* Control never gets here */
3098              
3099              
3100             /* Match an extended character class. In the 8-bit library, this opcode is
3101             encountered only when UTF-8 mode mode is supported. In the 16-bit and
3102             32-bit libraries, codepoints greater than 255 may be encountered even when
3103             UTF is not supported. */
3104              
3105             #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
3106             case OP_XCLASS:
3107             {
3108             data = ecode + 1 + LINK_SIZE; /* Save for matching */
3109             ecode += GET(ecode, 1); /* Advance past the item */
3110              
3111             switch (*ecode)
3112             {
3113             case OP_CRSTAR:
3114             case OP_CRMINSTAR:
3115             case OP_CRPLUS:
3116             case OP_CRMINPLUS:
3117             case OP_CRQUERY:
3118             case OP_CRMINQUERY:
3119             case OP_CRPOSSTAR:
3120             case OP_CRPOSPLUS:
3121             case OP_CRPOSQUERY:
3122             c = *ecode++ - OP_CRSTAR;
3123             if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
3124             else possessive = TRUE;
3125             min = rep_min[c]; /* Pick up values from tables; */
3126             max = rep_max[c]; /* zero for max => infinity */
3127             if (max == 0) max = INT_MAX;
3128             break;
3129              
3130             case OP_CRRANGE:
3131             case OP_CRMINRANGE:
3132             case OP_CRPOSRANGE:
3133             minimize = (*ecode == OP_CRMINRANGE);
3134             possessive = (*ecode == OP_CRPOSRANGE);
3135             min = GET2(ecode, 1);
3136             max = GET2(ecode, 1 + IMM2_SIZE);
3137             if (max == 0) max = INT_MAX;
3138             ecode += 1 + 2 * IMM2_SIZE;
3139             break;
3140              
3141             default: /* No repeat follows */
3142             min = max = 1;
3143             break;
3144             }
3145              
3146             /* First, ensure the minimum number of matches are present. */
3147              
3148             for (i = 1; i <= min; i++)
3149             {
3150             if (eptr >= md->end_subject)
3151             {
3152             SCHECK_PARTIAL();
3153             RRETURN(MATCH_NOMATCH);
3154             }
3155             GETCHARINCTEST(c, eptr);
3156             if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3157             }
3158              
3159             /* If max == min we can continue with the main loop without the
3160             need to recurse. */
3161              
3162             if (min == max) continue;
3163              
3164             /* If minimizing, keep testing the rest of the expression and advancing
3165             the pointer while it matches the class. */
3166              
3167             if (minimize)
3168             {
3169             for (fi = min;; fi++)
3170             {
3171             RMATCH(eptr, ecode, offset_top, md, eptrb, RM20);
3172             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3173             if (fi >= max) RRETURN(MATCH_NOMATCH);
3174             if (eptr >= md->end_subject)
3175             {
3176             SCHECK_PARTIAL();
3177             RRETURN(MATCH_NOMATCH);
3178             }
3179             GETCHARINCTEST(c, eptr);
3180             if (!PRIV(xclass)(c, data, utf)) RRETURN(MATCH_NOMATCH);
3181             }
3182             /* Control never gets here */
3183             }
3184              
3185             /* If maximizing, find the longest possible run, then work backwards. */
3186              
3187             else
3188             {
3189             pp = eptr;
3190             for (i = min; i < max; i++)
3191             {
3192             int len = 1;
3193             if (eptr >= md->end_subject)
3194             {
3195             SCHECK_PARTIAL();
3196             break;
3197             }
3198             #ifdef SUPPORT_UTF
3199             GETCHARLENTEST(c, eptr, len);
3200             #else
3201             c = *eptr;
3202             #endif
3203             if (!PRIV(xclass)(c, data, utf)) break;
3204             eptr += len;
3205             }
3206              
3207             if (possessive) continue; /* No backtracking */
3208              
3209             for(;;)
3210             {
3211             RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
3212             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3213             if (eptr-- <= pp) break; /* Stop if tried at original pos */
3214             #ifdef SUPPORT_UTF
3215             if (utf) BACKCHAR(eptr);
3216             #endif
3217             }
3218             RRETURN(MATCH_NOMATCH);
3219             }
3220              
3221             /* Control never gets here */
3222             }
3223             #endif /* End of XCLASS */
3224              
3225             /* Match a single character, casefully */
3226              
3227             case OP_CHAR:
3228             #ifdef SUPPORT_UTF
3229             if (utf)
3230             {
3231             length = 1;
3232             ecode++;
3233             GETCHARLEN(fc, ecode, length);
3234             if (length > md->end_subject - eptr)
3235             {
3236             CHECK_PARTIAL(); /* Not SCHECK_PARTIAL() */
3237             RRETURN(MATCH_NOMATCH);
3238             }
3239             while (length-- > 0) if (*ecode++ != UCHAR21INC(eptr)) RRETURN(MATCH_NOMATCH);
3240             }
3241             else
3242             #endif
3243             /* Not UTF mode */
3244             {
3245 456 100         if (md->end_subject - eptr < 1)
3246             {
3247 10 50         SCHECK_PARTIAL(); /* This one can use SCHECK_PARTIAL() */
    0          
    0          
3248 10           RRETURN(MATCH_NOMATCH);
3249             }
3250 446 100         if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH);
3251 312           ecode += 2;
3252             }
3253 312           break;
3254              
3255             /* Match a single character, caselessly. If we are at the end of the
3256             subject, give up immediately. */
3257              
3258             case OP_CHARI:
3259 0 0         if (eptr >= md->end_subject)
3260             {
3261 0 0         SCHECK_PARTIAL();
    0          
    0          
3262 0           RRETURN(MATCH_NOMATCH);
3263             }
3264              
3265             #ifdef SUPPORT_UTF
3266             if (utf)
3267             {
3268             length = 1;
3269             ecode++;
3270             GETCHARLEN(fc, ecode, length);
3271              
3272             /* If the pattern character's value is < 128, we have only one byte, and
3273             we know that its other case must also be one byte long, so we can use the
3274             fast lookup table. We know that there is at least one byte left in the
3275             subject. */
3276              
3277             if (fc < 128)
3278             {
3279             pcre_uint32 cc = UCHAR21(eptr);
3280             if (md->lcc[fc] != TABLE_GET(cc, md->lcc, cc)) RRETURN(MATCH_NOMATCH);
3281             ecode++;
3282             eptr++;
3283             }
3284              
3285             /* Otherwise we must pick up the subject character. Note that we cannot
3286             use the value of "length" to check for sufficient bytes left, because the
3287             other case of the character may have more or fewer bytes. */
3288              
3289             else
3290             {
3291             pcre_uint32 dc;
3292             GETCHARINC(dc, eptr);
3293             ecode += length;
3294              
3295             /* If we have Unicode property support, we can use it to test the other
3296             case of the character, if there is one. */
3297              
3298             if (fc != dc)
3299             {
3300             #ifdef SUPPORT_UCP
3301             if (dc != UCD_OTHERCASE(fc))
3302             #endif
3303             RRETURN(MATCH_NOMATCH);
3304             }
3305             }
3306             }
3307             else
3308             #endif /* SUPPORT_UTF */
3309              
3310             /* Not UTF mode */
3311             {
3312 0 0         if (TABLE_GET(ecode[1], md->lcc, ecode[1])
3313 0           != TABLE_GET(*eptr, md->lcc, *eptr)) RRETURN(MATCH_NOMATCH);
3314 0           eptr++;
3315 0           ecode += 2;
3316             }
3317 0           break;
3318              
3319             /* Match a single character repeatedly. */
3320              
3321             case OP_EXACT:
3322             case OP_EXACTI:
3323 0           min = max = GET2(ecode, 1);
3324 0           ecode += 1 + IMM2_SIZE;
3325 0           goto REPEATCHAR;
3326              
3327             case OP_POSUPTO:
3328             case OP_POSUPTOI:
3329 0           possessive = TRUE;
3330             /* Fall through */
3331              
3332             case OP_UPTO:
3333             case OP_UPTOI:
3334             case OP_MINUPTO:
3335             case OP_MINUPTOI:
3336 0           min = 0;
3337 0           max = GET2(ecode, 1);
3338 0 0         minimize = *ecode == OP_MINUPTO || *ecode == OP_MINUPTOI;
    0          
3339 0           ecode += 1 + IMM2_SIZE;
3340 0           goto REPEATCHAR;
3341              
3342             case OP_POSSTAR:
3343             case OP_POSSTARI:
3344 0           possessive = TRUE;
3345 0           min = 0;
3346 0           max = INT_MAX;
3347 0           ecode++;
3348 0           goto REPEATCHAR;
3349              
3350             case OP_POSPLUS:
3351             case OP_POSPLUSI:
3352 0           possessive = TRUE;
3353 0           min = 1;
3354 0           max = INT_MAX;
3355 0           ecode++;
3356 0           goto REPEATCHAR;
3357              
3358             case OP_POSQUERY:
3359             case OP_POSQUERYI:
3360 0           possessive = TRUE;
3361 0           min = 0;
3362 0           max = 1;
3363 0           ecode++;
3364 0           goto REPEATCHAR;
3365              
3366             case OP_STAR:
3367             case OP_STARI:
3368             case OP_MINSTAR:
3369             case OP_MINSTARI:
3370             case OP_PLUS:
3371             case OP_PLUSI:
3372             case OP_MINPLUS:
3373             case OP_MINPLUSI:
3374             case OP_QUERY:
3375             case OP_QUERYI:
3376             case OP_MINQUERY:
3377             case OP_MINQUERYI:
3378 0 0         c = *ecode++ - ((op < OP_STARI)? OP_STAR : OP_STARI);
3379 0           minimize = (c & 1) != 0;
3380 0           min = rep_min[c]; /* Pick up values from tables; */
3381 0           max = rep_max[c]; /* zero for max => infinity */
3382 0 0         if (max == 0) max = INT_MAX;
3383              
3384             /* Common code for all repeated single-character matches. We first check
3385             for the minimum number of characters. If the minimum equals the maximum, we
3386             are done. Otherwise, if minimizing, check the rest of the pattern for a
3387             match; if there isn't one, advance up to the maximum, one character at a
3388             time.
3389              
3390             If maximizing, advance up to the maximum number of matching characters,
3391             until eptr is past the end of the maximum run. If possessive, we are
3392             then done (no backing up). Otherwise, match at this position; anything
3393             other than no match is immediately returned. For nomatch, back up one
3394             character, unless we are matching \R and the last thing matched was
3395             \r\n, in which case, back up two bytes. When we reach the first optional
3396             character position, we can save stack by doing a tail recurse.
3397              
3398             The various UTF/non-UTF and caseful/caseless cases are handled separately,
3399             for speed. */
3400              
3401             REPEATCHAR:
3402             #ifdef SUPPORT_UTF
3403             if (utf)
3404             {
3405             length = 1;
3406             charptr = ecode;
3407             GETCHARLEN(fc, ecode, length);
3408             ecode += length;
3409              
3410             /* Handle multibyte character matching specially here. There is
3411             support for caseless matching if UCP support is present. */
3412              
3413             if (length > 1)
3414             {
3415             #ifdef SUPPORT_UCP
3416             pcre_uint32 othercase;
3417             if (op >= OP_STARI && /* Caseless */
3418             (othercase = UCD_OTHERCASE(fc)) != fc)
3419             oclength = PRIV(ord2utf)(othercase, occhars);
3420             else oclength = 0;
3421             #endif /* SUPPORT_UCP */
3422              
3423             for (i = 1; i <= min; i++)
3424             {
3425             if (eptr <= md->end_subject - length &&
3426             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3427             #ifdef SUPPORT_UCP
3428             else if (oclength > 0 &&
3429             eptr <= md->end_subject - oclength &&
3430             memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3431             #endif /* SUPPORT_UCP */
3432             else
3433             {
3434             CHECK_PARTIAL();
3435             RRETURN(MATCH_NOMATCH);
3436             }
3437             }
3438              
3439             if (min == max) continue;
3440              
3441             if (minimize)
3442             {
3443             for (fi = min;; fi++)
3444             {
3445             RMATCH(eptr, ecode, offset_top, md, eptrb, RM22);
3446             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3447             if (fi >= max) RRETURN(MATCH_NOMATCH);
3448             if (eptr <= md->end_subject - length &&
3449             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3450             #ifdef SUPPORT_UCP
3451             else if (oclength > 0 &&
3452             eptr <= md->end_subject - oclength &&
3453             memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3454             #endif /* SUPPORT_UCP */
3455             else
3456             {
3457             CHECK_PARTIAL();
3458             RRETURN(MATCH_NOMATCH);
3459             }
3460             }
3461             /* Control never gets here */
3462             }
3463              
3464             else /* Maximize */
3465             {
3466             pp = eptr;
3467             for (i = min; i < max; i++)
3468             {
3469             if (eptr <= md->end_subject - length &&
3470             memcmp(eptr, charptr, IN_UCHARS(length)) == 0) eptr += length;
3471             #ifdef SUPPORT_UCP
3472             else if (oclength > 0 &&
3473             eptr <= md->end_subject - oclength &&
3474             memcmp(eptr, occhars, IN_UCHARS(oclength)) == 0) eptr += oclength;
3475             #endif /* SUPPORT_UCP */
3476             else
3477             {
3478             CHECK_PARTIAL();
3479             break;
3480             }
3481             }
3482              
3483             if (possessive) continue; /* No backtracking */
3484             for(;;)
3485             {
3486             if (eptr <= pp) goto TAIL_RECURSE;
3487             RMATCH(eptr, ecode, offset_top, md, eptrb, RM23);
3488             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3489             #ifdef SUPPORT_UCP
3490             eptr--;
3491             BACKCHAR(eptr);
3492             #else /* without SUPPORT_UCP */
3493             eptr -= length;
3494             #endif /* SUPPORT_UCP */
3495             }
3496             }
3497             /* Control never gets here */
3498             }
3499              
3500             /* If the length of a UTF-8 character is 1, we fall through here, and
3501             obey the code as for non-UTF-8 characters below, though in this case the
3502             value of fc will always be < 128. */
3503             }
3504             else
3505             #endif /* SUPPORT_UTF */
3506             /* When not in UTF-8 mode, load a single-byte character. */
3507 0           fc = *ecode++;
3508              
3509             /* The value of fc at this point is always one character, though we may
3510             or may not be in UTF mode. The code is duplicated for the caseless and
3511             caseful cases, for speed, since matching characters is likely to be quite
3512             common. First, ensure the minimum number of matches are present. If min =
3513             max, continue at the same level without recursing. Otherwise, if
3514             minimizing, keep trying the rest of the expression and advancing one
3515             matching character if failing, up to the maximum. Alternatively, if
3516             maximizing, find the maximum number of characters and work backwards. */
3517              
3518             DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3519             max, (char *)eptr));
3520              
3521 0 0         if (op >= OP_STARI) /* Caseless */
3522             {
3523             #ifdef COMPILE_PCRE8
3524             /* fc must be < 128 if UTF is enabled. */
3525 0           foc = md->fcc[fc];
3526             #else
3527             #ifdef SUPPORT_UTF
3528             #ifdef SUPPORT_UCP
3529             if (utf && fc > 127)
3530             foc = UCD_OTHERCASE(fc);
3531             #else
3532             if (utf && fc > 127)
3533             foc = fc;
3534             #endif /* SUPPORT_UCP */
3535             else
3536             #endif /* SUPPORT_UTF */
3537             foc = TABLE_GET(fc, md->fcc, fc);
3538             #endif /* COMPILE_PCRE8 */
3539              
3540 0 0         for (i = 1; i <= min; i++)
3541             {
3542             pcre_uint32 cc; /* Faster than pcre_uchar */
3543 0 0         if (eptr >= md->end_subject)
3544             {
3545 0 0         SCHECK_PARTIAL();
    0          
    0          
3546 0           RRETURN(MATCH_NOMATCH);
3547             }
3548 0           cc = UCHAR21TEST(eptr);
3549 0 0         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
    0          
3550 0           eptr++;
3551             }
3552 0 0         if (min == max) continue;
3553 0 0         if (minimize)
3554             {
3555 0           for (fi = min;; fi++)
3556             {
3557             pcre_uint32 cc; /* Faster than pcre_uchar */
3558 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM24);
3559 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3560 0 0         if (fi >= max) RRETURN(MATCH_NOMATCH);
3561 0 0         if (eptr >= md->end_subject)
3562             {
3563 0 0         SCHECK_PARTIAL();
    0          
    0          
3564 0           RRETURN(MATCH_NOMATCH);
3565             }
3566 0           cc = UCHAR21TEST(eptr);
3567 0 0         if (fc != cc && foc != cc) RRETURN(MATCH_NOMATCH);
    0          
3568 0           eptr++;
3569 0           }
3570             /* Control never gets here */
3571             }
3572             else /* Maximize */
3573             {
3574 0           pp = eptr;
3575 0 0         for (i = min; i < max; i++)
3576             {
3577             pcre_uint32 cc; /* Faster than pcre_uchar */
3578 0 0         if (eptr >= md->end_subject)
3579             {
3580 0 0         SCHECK_PARTIAL();
    0          
    0          
3581 0           break;
3582             }
3583 0           cc = UCHAR21TEST(eptr);
3584 0 0         if (fc != cc && foc != cc) break;
    0          
3585 0           eptr++;
3586             }
3587 0 0         if (possessive) continue; /* No backtracking */
3588             for (;;)
3589             {
3590 0 0         if (eptr == pp) goto TAIL_RECURSE;
3591 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM25);
3592 0           eptr--;
3593 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3594 0           }
3595             /* Control never gets here */
3596             }
3597             }
3598              
3599             /* Caseful comparisons (includes all multi-byte characters) */
3600              
3601             else
3602             {
3603 0 0         for (i = 1; i <= min; i++)
3604             {
3605 0 0         if (eptr >= md->end_subject)
3606             {
3607 0 0         SCHECK_PARTIAL();
    0          
    0          
3608 0           RRETURN(MATCH_NOMATCH);
3609             }
3610 0 0         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3611             }
3612              
3613 0 0         if (min == max) continue;
3614              
3615 0 0         if (minimize)
3616             {
3617 0           for (fi = min;; fi++)
3618             {
3619 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM26);
3620 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3621 0 0         if (fi >= max) RRETURN(MATCH_NOMATCH);
3622 0 0         if (eptr >= md->end_subject)
3623             {
3624 0 0         SCHECK_PARTIAL();
    0          
    0          
3625 0           RRETURN(MATCH_NOMATCH);
3626             }
3627 0 0         if (fc != UCHAR21INCTEST(eptr)) RRETURN(MATCH_NOMATCH);
3628 0           }
3629             /* Control never gets here */
3630             }
3631             else /* Maximize */
3632             {
3633 0           pp = eptr;
3634 0 0         for (i = min; i < max; i++)
3635             {
3636 0 0         if (eptr >= md->end_subject)
3637             {
3638 0 0         SCHECK_PARTIAL();
    0          
    0          
3639 0           break;
3640             }
3641 0 0         if (fc != UCHAR21TEST(eptr)) break;
3642 0           eptr++;
3643             }
3644 0 0         if (possessive) continue; /* No backtracking */
3645             for (;;)
3646             {
3647 0 0         if (eptr == pp) goto TAIL_RECURSE;
3648 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM27);
3649 0           eptr--;
3650 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3651 0           }
3652             /* Control never gets here */
3653             }
3654             }
3655             /* Control never gets here */
3656              
3657             /* Match a negated single one-byte character. The character we are
3658             checking can be multibyte. */
3659              
3660             case OP_NOT:
3661             case OP_NOTI:
3662 0 0         if (eptr >= md->end_subject)
3663             {
3664 0 0         SCHECK_PARTIAL();
    0          
    0          
3665 0           RRETURN(MATCH_NOMATCH);
3666             }
3667             #ifdef SUPPORT_UTF
3668             if (utf)
3669             {
3670             register pcre_uint32 ch, och;
3671              
3672             ecode++;
3673             GETCHARINC(ch, ecode);
3674             GETCHARINC(c, eptr);
3675              
3676             if (op == OP_NOT)
3677             {
3678             if (ch == c) RRETURN(MATCH_NOMATCH);
3679             }
3680             else
3681             {
3682             #ifdef SUPPORT_UCP
3683             if (ch > 127)
3684             och = UCD_OTHERCASE(ch);
3685             #else
3686             if (ch > 127)
3687             och = ch;
3688             #endif /* SUPPORT_UCP */
3689             else
3690             och = TABLE_GET(ch, md->fcc, ch);
3691             if (ch == c || och == c) RRETURN(MATCH_NOMATCH);
3692             }
3693             }
3694             else
3695             #endif
3696             {
3697 0           register pcre_uint32 ch = ecode[1];
3698 0           c = *eptr++;
3699 0 0         if (ch == c || (op == OP_NOTI && TABLE_GET(ch, md->fcc, ch) == c))
    0          
    0          
3700 0           RRETURN(MATCH_NOMATCH);
3701 0           ecode += 2;
3702             }
3703 0           break;
3704              
3705             /* Match a negated single one-byte character repeatedly. This is almost a
3706             repeat of the code for a repeated single character, but I haven't found a
3707             nice way of commoning these up that doesn't require a test of the
3708             positive/negative option for each character match. Maybe that wouldn't add
3709             very much to the time taken, but character matching *is* what this is all
3710             about... */
3711              
3712             case OP_NOTEXACT:
3713             case OP_NOTEXACTI:
3714 0           min = max = GET2(ecode, 1);
3715 0           ecode += 1 + IMM2_SIZE;
3716 0           goto REPEATNOTCHAR;
3717              
3718             case OP_NOTUPTO:
3719             case OP_NOTUPTOI:
3720             case OP_NOTMINUPTO:
3721             case OP_NOTMINUPTOI:
3722 0           min = 0;
3723 0           max = GET2(ecode, 1);
3724 0 0         minimize = *ecode == OP_NOTMINUPTO || *ecode == OP_NOTMINUPTOI;
    0          
3725 0           ecode += 1 + IMM2_SIZE;
3726 0           goto REPEATNOTCHAR;
3727              
3728             case OP_NOTPOSSTAR:
3729             case OP_NOTPOSSTARI:
3730 0           possessive = TRUE;
3731 0           min = 0;
3732 0           max = INT_MAX;
3733 0           ecode++;
3734 0           goto REPEATNOTCHAR;
3735              
3736             case OP_NOTPOSPLUS:
3737             case OP_NOTPOSPLUSI:
3738 0           possessive = TRUE;
3739 0           min = 1;
3740 0           max = INT_MAX;
3741 0           ecode++;
3742 0           goto REPEATNOTCHAR;
3743              
3744             case OP_NOTPOSQUERY:
3745             case OP_NOTPOSQUERYI:
3746 0           possessive = TRUE;
3747 0           min = 0;
3748 0           max = 1;
3749 0           ecode++;
3750 0           goto REPEATNOTCHAR;
3751              
3752             case OP_NOTPOSUPTO:
3753             case OP_NOTPOSUPTOI:
3754 0           possessive = TRUE;
3755 0           min = 0;
3756 0           max = GET2(ecode, 1);
3757 0           ecode += 1 + IMM2_SIZE;
3758 0           goto REPEATNOTCHAR;
3759              
3760             case OP_NOTSTAR:
3761             case OP_NOTSTARI:
3762             case OP_NOTMINSTAR:
3763             case OP_NOTMINSTARI:
3764             case OP_NOTPLUS:
3765             case OP_NOTPLUSI:
3766             case OP_NOTMINPLUS:
3767             case OP_NOTMINPLUSI:
3768             case OP_NOTQUERY:
3769             case OP_NOTQUERYI:
3770             case OP_NOTMINQUERY:
3771             case OP_NOTMINQUERYI:
3772 0 0         c = *ecode++ - ((op >= OP_NOTSTARI)? OP_NOTSTARI: OP_NOTSTAR);
3773 0           minimize = (c & 1) != 0;
3774 0           min = rep_min[c]; /* Pick up values from tables; */
3775 0           max = rep_max[c]; /* zero for max => infinity */
3776 0 0         if (max == 0) max = INT_MAX;
3777              
3778             /* Common code for all repeated single-byte matches. */
3779              
3780             REPEATNOTCHAR:
3781 0           GETCHARINCTEST(fc, ecode);
3782              
3783             /* The code is duplicated for the caseless and caseful cases, for speed,
3784             since matching characters is likely to be quite common. First, ensure the
3785             minimum number of matches are present. If min = max, continue at the same
3786             level without recursing. Otherwise, if minimizing, keep trying the rest of
3787             the expression and advancing one matching character if failing, up to the
3788             maximum. Alternatively, if maximizing, find the maximum number of
3789             characters and work backwards. */
3790              
3791             DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max,
3792             max, (char *)eptr));
3793              
3794 0 0         if (op >= OP_NOTSTARI) /* Caseless */
3795             {
3796             #ifdef SUPPORT_UTF
3797             #ifdef SUPPORT_UCP
3798             if (utf && fc > 127)
3799             foc = UCD_OTHERCASE(fc);
3800             #else
3801             if (utf && fc > 127)
3802             foc = fc;
3803             #endif /* SUPPORT_UCP */
3804             else
3805             #endif /* SUPPORT_UTF */
3806 0           foc = TABLE_GET(fc, md->fcc, fc);
3807              
3808             #ifdef SUPPORT_UTF
3809             if (utf)
3810             {
3811             register pcre_uint32 d;
3812             for (i = 1; i <= min; i++)
3813             {
3814             if (eptr >= md->end_subject)
3815             {
3816             SCHECK_PARTIAL();
3817             RRETURN(MATCH_NOMATCH);
3818             }
3819             GETCHARINC(d, eptr);
3820             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3821             }
3822             }
3823             else
3824             #endif /* SUPPORT_UTF */
3825             /* Not UTF mode */
3826             {
3827 0 0         for (i = 1; i <= min; i++)
3828             {
3829 0 0         if (eptr >= md->end_subject)
3830             {
3831 0 0         SCHECK_PARTIAL();
    0          
    0          
3832 0           RRETURN(MATCH_NOMATCH);
3833             }
3834 0 0         if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
    0          
3835 0           eptr++;
3836             }
3837             }
3838              
3839 0 0         if (min == max) continue;
3840              
3841 0 0         if (minimize)
3842             {
3843             #ifdef SUPPORT_UTF
3844             if (utf)
3845             {
3846             register pcre_uint32 d;
3847             for (fi = min;; fi++)
3848             {
3849             RMATCH(eptr, ecode, offset_top, md, eptrb, RM28);
3850             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3851             if (fi >= max) RRETURN(MATCH_NOMATCH);
3852             if (eptr >= md->end_subject)
3853             {
3854             SCHECK_PARTIAL();
3855             RRETURN(MATCH_NOMATCH);
3856             }
3857             GETCHARINC(d, eptr);
3858             if (fc == d || (unsigned int)foc == d) RRETURN(MATCH_NOMATCH);
3859             }
3860             }
3861             else
3862             #endif /*SUPPORT_UTF */
3863             /* Not UTF mode */
3864             {
3865 0           for (fi = min;; fi++)
3866             {
3867 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM29);
3868 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3869 0 0         if (fi >= max) RRETURN(MATCH_NOMATCH);
3870 0 0         if (eptr >= md->end_subject)
3871             {
3872 0 0         SCHECK_PARTIAL();
    0          
    0          
3873 0           RRETURN(MATCH_NOMATCH);
3874             }
3875 0 0         if (fc == *eptr || foc == *eptr) RRETURN(MATCH_NOMATCH);
    0          
3876 0           eptr++;
3877 0           }
3878             }
3879             /* Control never gets here */
3880             }
3881              
3882             /* Maximize case */
3883              
3884             else
3885             {
3886 0           pp = eptr;
3887              
3888             #ifdef SUPPORT_UTF
3889             if (utf)
3890             {
3891             register pcre_uint32 d;
3892             for (i = min; i < max; i++)
3893             {
3894             int len = 1;
3895             if (eptr >= md->end_subject)
3896             {
3897             SCHECK_PARTIAL();
3898             break;
3899             }
3900             GETCHARLEN(d, eptr, len);
3901             if (fc == d || (unsigned int)foc == d) break;
3902             eptr += len;
3903             }
3904             if (possessive) continue; /* No backtracking */
3905             for(;;)
3906             {
3907             if (eptr <= pp) goto TAIL_RECURSE;
3908             RMATCH(eptr, ecode, offset_top, md, eptrb, RM30);
3909             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3910             eptr--;
3911             BACKCHAR(eptr);
3912             }
3913             }
3914             else
3915             #endif /* SUPPORT_UTF */
3916             /* Not UTF mode */
3917             {
3918 0 0         for (i = min; i < max; i++)
3919             {
3920 0 0         if (eptr >= md->end_subject)
3921             {
3922 0 0         SCHECK_PARTIAL();
    0          
    0          
3923 0           break;
3924             }
3925 0 0         if (fc == *eptr || foc == *eptr) break;
    0          
3926 0           eptr++;
3927             }
3928 0 0         if (possessive) continue; /* No backtracking */
3929             for (;;)
3930             {
3931 0 0         if (eptr == pp) goto TAIL_RECURSE;
3932 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM31);
3933 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3934 0           eptr--;
3935 0           }
3936             }
3937             /* Control never gets here */
3938             }
3939             }
3940              
3941             /* Caseful comparisons */
3942              
3943             else
3944             {
3945             #ifdef SUPPORT_UTF
3946             if (utf)
3947             {
3948             register pcre_uint32 d;
3949             for (i = 1; i <= min; i++)
3950             {
3951             if (eptr >= md->end_subject)
3952             {
3953             SCHECK_PARTIAL();
3954             RRETURN(MATCH_NOMATCH);
3955             }
3956             GETCHARINC(d, eptr);
3957             if (fc == d) RRETURN(MATCH_NOMATCH);
3958             }
3959             }
3960             else
3961             #endif
3962             /* Not UTF mode */
3963             {
3964 0 0         for (i = 1; i <= min; i++)
3965             {
3966 0 0         if (eptr >= md->end_subject)
3967             {
3968 0 0         SCHECK_PARTIAL();
    0          
    0          
3969 0           RRETURN(MATCH_NOMATCH);
3970             }
3971 0 0         if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
3972             }
3973             }
3974              
3975 0 0         if (min == max) continue;
3976              
3977 0 0         if (minimize)
3978             {
3979             #ifdef SUPPORT_UTF
3980             if (utf)
3981             {
3982             register pcre_uint32 d;
3983             for (fi = min;; fi++)
3984             {
3985             RMATCH(eptr, ecode, offset_top, md, eptrb, RM32);
3986             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
3987             if (fi >= max) RRETURN(MATCH_NOMATCH);
3988             if (eptr >= md->end_subject)
3989             {
3990             SCHECK_PARTIAL();
3991             RRETURN(MATCH_NOMATCH);
3992             }
3993             GETCHARINC(d, eptr);
3994             if (fc == d) RRETURN(MATCH_NOMATCH);
3995             }
3996             }
3997             else
3998             #endif
3999             /* Not UTF mode */
4000             {
4001 0           for (fi = min;; fi++)
4002             {
4003 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM33);
4004 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4005 0 0         if (fi >= max) RRETURN(MATCH_NOMATCH);
4006 0 0         if (eptr >= md->end_subject)
4007             {
4008 0 0         SCHECK_PARTIAL();
    0          
    0          
4009 0           RRETURN(MATCH_NOMATCH);
4010             }
4011 0 0         if (fc == *eptr++) RRETURN(MATCH_NOMATCH);
4012 0           }
4013             }
4014             /* Control never gets here */
4015             }
4016              
4017             /* Maximize case */
4018              
4019             else
4020             {
4021 0           pp = eptr;
4022              
4023             #ifdef SUPPORT_UTF
4024             if (utf)
4025             {
4026             register pcre_uint32 d;
4027             for (i = min; i < max; i++)
4028             {
4029             int len = 1;
4030             if (eptr >= md->end_subject)
4031             {
4032             SCHECK_PARTIAL();
4033             break;
4034             }
4035             GETCHARLEN(d, eptr, len);
4036             if (fc == d) break;
4037             eptr += len;
4038             }
4039             if (possessive) continue; /* No backtracking */
4040             for(;;)
4041             {
4042             if (eptr <= pp) goto TAIL_RECURSE;
4043             RMATCH(eptr, ecode, offset_top, md, eptrb, RM34);
4044             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4045             eptr--;
4046             BACKCHAR(eptr);
4047             }
4048             }
4049             else
4050             #endif
4051             /* Not UTF mode */
4052             {
4053 0 0         for (i = min; i < max; i++)
4054             {
4055 0 0         if (eptr >= md->end_subject)
4056             {
4057 0 0         SCHECK_PARTIAL();
    0          
    0          
4058 0           break;
4059             }
4060 0 0         if (fc == *eptr) break;
4061 0           eptr++;
4062             }
4063 0 0         if (possessive) continue; /* No backtracking */
4064             for (;;)
4065             {
4066 0 0         if (eptr == pp) goto TAIL_RECURSE;
4067 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM35);
4068 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4069 0           eptr--;
4070 0           }
4071             }
4072             /* Control never gets here */
4073             }
4074             }
4075             /* Control never gets here */
4076              
4077             /* Match a single character type repeatedly; several different opcodes
4078             share code. This is very similar to the code for single characters, but we
4079             repeat it in the interests of efficiency. */
4080              
4081             case OP_TYPEEXACT:
4082 0           min = max = GET2(ecode, 1);
4083 0           minimize = TRUE;
4084 0           ecode += 1 + IMM2_SIZE;
4085 0           goto REPEATTYPE;
4086              
4087             case OP_TYPEUPTO:
4088             case OP_TYPEMINUPTO:
4089 0           min = 0;
4090 0           max = GET2(ecode, 1);
4091 0           minimize = *ecode == OP_TYPEMINUPTO;
4092 0           ecode += 1 + IMM2_SIZE;
4093 0           goto REPEATTYPE;
4094              
4095             case OP_TYPEPOSSTAR:
4096 0           possessive = TRUE;
4097 0           min = 0;
4098 0           max = INT_MAX;
4099 0           ecode++;
4100 0           goto REPEATTYPE;
4101              
4102             case OP_TYPEPOSPLUS:
4103 4           possessive = TRUE;
4104 4           min = 1;
4105 4           max = INT_MAX;
4106 4           ecode++;
4107 4           goto REPEATTYPE;
4108              
4109             case OP_TYPEPOSQUERY:
4110 0           possessive = TRUE;
4111 0           min = 0;
4112 0           max = 1;
4113 0           ecode++;
4114 0           goto REPEATTYPE;
4115              
4116             case OP_TYPEPOSUPTO:
4117 0           possessive = TRUE;
4118 0           min = 0;
4119 0           max = GET2(ecode, 1);
4120 0           ecode += 1 + IMM2_SIZE;
4121 0           goto REPEATTYPE;
4122              
4123             case OP_TYPESTAR:
4124             case OP_TYPEMINSTAR:
4125             case OP_TYPEPLUS:
4126             case OP_TYPEMINPLUS:
4127             case OP_TYPEQUERY:
4128             case OP_TYPEMINQUERY:
4129 10           c = *ecode++ - OP_TYPESTAR;
4130 10           minimize = (c & 1) != 0;
4131 10           min = rep_min[c]; /* Pick up values from tables; */
4132 10           max = rep_max[c]; /* zero for max => infinity */
4133 10 50         if (max == 0) max = INT_MAX;
4134              
4135             /* Common code for all repeated single character type matches. Note that
4136             in UTF-8 mode, '.' matches a character of any length, but for the other
4137             character types, the valid characters are all one-byte long. */
4138              
4139             REPEATTYPE:
4140 14           ctype = *ecode++; /* Code for the character type */
4141              
4142             #ifdef SUPPORT_UCP
4143             if (ctype == OP_PROP || ctype == OP_NOTPROP)
4144             {
4145             prop_fail_result = ctype == OP_NOTPROP;
4146             prop_type = *ecode++;
4147             prop_value = *ecode++;
4148             }
4149             else prop_type = -1;
4150             #endif
4151              
4152             /* First, ensure the minimum number of matches are present. Use inline
4153             code for maximizing the speed, and do the type test once at the start
4154             (i.e. keep it out of the loop). Separate the UTF-8 code completely as that
4155             is tidier. Also separate the UCP code, which can be the same for both UTF-8
4156             and single-bytes. */
4157              
4158 14 100         if (min > 0)
4159             {
4160             #ifdef SUPPORT_UCP
4161             if (prop_type >= 0)
4162             {
4163             switch(prop_type)
4164             {
4165             case PT_ANY:
4166             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4167             for (i = 1; i <= min; i++)
4168             {
4169             if (eptr >= md->end_subject)
4170             {
4171             SCHECK_PARTIAL();
4172             RRETURN(MATCH_NOMATCH);
4173             }
4174             GETCHARINCTEST(c, eptr);
4175             }
4176             break;
4177              
4178             case PT_LAMP:
4179             for (i = 1; i <= min; i++)
4180             {
4181             int chartype;
4182             if (eptr >= md->end_subject)
4183             {
4184             SCHECK_PARTIAL();
4185             RRETURN(MATCH_NOMATCH);
4186             }
4187             GETCHARINCTEST(c, eptr);
4188             chartype = UCD_CHARTYPE(c);
4189             if ((chartype == ucp_Lu ||
4190             chartype == ucp_Ll ||
4191             chartype == ucp_Lt) == prop_fail_result)
4192             RRETURN(MATCH_NOMATCH);
4193             }
4194             break;
4195              
4196             case PT_GC:
4197             for (i = 1; i <= min; i++)
4198             {
4199             if (eptr >= md->end_subject)
4200             {
4201             SCHECK_PARTIAL();
4202             RRETURN(MATCH_NOMATCH);
4203             }
4204             GETCHARINCTEST(c, eptr);
4205             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4206             RRETURN(MATCH_NOMATCH);
4207             }
4208             break;
4209              
4210             case PT_PC:
4211             for (i = 1; i <= min; i++)
4212             {
4213             if (eptr >= md->end_subject)
4214             {
4215             SCHECK_PARTIAL();
4216             RRETURN(MATCH_NOMATCH);
4217             }
4218             GETCHARINCTEST(c, eptr);
4219             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4220             RRETURN(MATCH_NOMATCH);
4221             }
4222             break;
4223              
4224             case PT_SC:
4225             for (i = 1; i <= min; i++)
4226             {
4227             if (eptr >= md->end_subject)
4228             {
4229             SCHECK_PARTIAL();
4230             RRETURN(MATCH_NOMATCH);
4231             }
4232             GETCHARINCTEST(c, eptr);
4233             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4234             RRETURN(MATCH_NOMATCH);
4235             }
4236             break;
4237              
4238             case PT_ALNUM:
4239             for (i = 1; i <= min; i++)
4240             {
4241             int category;
4242             if (eptr >= md->end_subject)
4243             {
4244             SCHECK_PARTIAL();
4245             RRETURN(MATCH_NOMATCH);
4246             }
4247             GETCHARINCTEST(c, eptr);
4248             category = UCD_CATEGORY(c);
4249             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4250             RRETURN(MATCH_NOMATCH);
4251             }
4252             break;
4253              
4254             /* Perl space used to exclude VT, but from Perl 5.18 it is included,
4255             which means that Perl space and POSIX space are now identical. PCRE
4256             was changed at release 8.34. */
4257              
4258             case PT_SPACE: /* Perl space */
4259             case PT_PXSPACE: /* POSIX space */
4260             for (i = 1; i <= min; i++)
4261             {
4262             if (eptr >= md->end_subject)
4263             {
4264             SCHECK_PARTIAL();
4265             RRETURN(MATCH_NOMATCH);
4266             }
4267             GETCHARINCTEST(c, eptr);
4268             switch(c)
4269             {
4270             HSPACE_CASES:
4271             VSPACE_CASES:
4272             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4273             break;
4274              
4275             default:
4276             if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
4277             RRETURN(MATCH_NOMATCH);
4278             break;
4279             }
4280             }
4281             break;
4282              
4283             case PT_WORD:
4284             for (i = 1; i <= min; i++)
4285             {
4286             int category;
4287             if (eptr >= md->end_subject)
4288             {
4289             SCHECK_PARTIAL();
4290             RRETURN(MATCH_NOMATCH);
4291             }
4292             GETCHARINCTEST(c, eptr);
4293             category = UCD_CATEGORY(c);
4294             if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE)
4295             == prop_fail_result)
4296             RRETURN(MATCH_NOMATCH);
4297             }
4298             break;
4299              
4300             case PT_CLIST:
4301             for (i = 1; i <= min; i++)
4302             {
4303             const pcre_uint32 *cp;
4304             if (eptr >= md->end_subject)
4305             {
4306             SCHECK_PARTIAL();
4307             RRETURN(MATCH_NOMATCH);
4308             }
4309             GETCHARINCTEST(c, eptr);
4310             cp = PRIV(ucd_caseless_sets) + prop_value;
4311             for (;;)
4312             {
4313             if (c < *cp)
4314             { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
4315             if (c == *cp++)
4316             { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
4317             }
4318             }
4319             break;
4320              
4321             case PT_UCNC:
4322             for (i = 1; i <= min; i++)
4323             {
4324             if (eptr >= md->end_subject)
4325             {
4326             SCHECK_PARTIAL();
4327             RRETURN(MATCH_NOMATCH);
4328             }
4329             GETCHARINCTEST(c, eptr);
4330             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
4331             c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
4332             c >= 0xe000) == prop_fail_result)
4333             RRETURN(MATCH_NOMATCH);
4334             }
4335             break;
4336              
4337             /* This should not occur */
4338              
4339             default:
4340             RRETURN(PCRE_ERROR_INTERNAL);
4341             }
4342             }
4343              
4344             /* Match extended Unicode sequences. We will get here only if the
4345             support is in the binary; otherwise a compile-time error occurs. */
4346              
4347             else if (ctype == OP_EXTUNI)
4348             {
4349             for (i = 1; i <= min; i++)
4350             {
4351             if (eptr >= md->end_subject)
4352             {
4353             SCHECK_PARTIAL();
4354             RRETURN(MATCH_NOMATCH);
4355             }
4356             else
4357             {
4358             int lgb, rgb;
4359             GETCHARINCTEST(c, eptr);
4360             lgb = UCD_GRAPHBREAK(c);
4361             while (eptr < md->end_subject)
4362             {
4363             int len = 1;
4364             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
4365             rgb = UCD_GRAPHBREAK(c);
4366             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
4367             lgb = rgb;
4368             eptr += len;
4369             }
4370             }
4371             CHECK_PARTIAL();
4372             }
4373             }
4374              
4375             else
4376             #endif /* SUPPORT_UCP */
4377              
4378             /* Handle all other cases when the coding is UTF-8 */
4379              
4380             #ifdef SUPPORT_UTF
4381             if (utf) switch(ctype)
4382             {
4383             case OP_ANY:
4384             for (i = 1; i <= min; i++)
4385             {
4386             if (eptr >= md->end_subject)
4387             {
4388             SCHECK_PARTIAL();
4389             RRETURN(MATCH_NOMATCH);
4390             }
4391             if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
4392             if (md->partial != 0 &&
4393             eptr + 1 >= md->end_subject &&
4394             NLBLOCK->nltype == NLTYPE_FIXED &&
4395             NLBLOCK->nllen == 2 &&
4396             UCHAR21(eptr) == NLBLOCK->nl[0])
4397             {
4398             md->hitend = TRUE;
4399             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4400             }
4401             eptr++;
4402             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4403             }
4404             break;
4405              
4406             case OP_ALLANY:
4407             for (i = 1; i <= min; i++)
4408             {
4409             if (eptr >= md->end_subject)
4410             {
4411             SCHECK_PARTIAL();
4412             RRETURN(MATCH_NOMATCH);
4413             }
4414             eptr++;
4415             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4416             }
4417             break;
4418              
4419             case OP_ANYBYTE:
4420             if (eptr > md->end_subject - min) RRETURN(MATCH_NOMATCH);
4421             eptr += min;
4422             break;
4423              
4424             case OP_ANYNL:
4425             for (i = 1; i <= min; i++)
4426             {
4427             if (eptr >= md->end_subject)
4428             {
4429             SCHECK_PARTIAL();
4430             RRETURN(MATCH_NOMATCH);
4431             }
4432             GETCHARINC(c, eptr);
4433             switch(c)
4434             {
4435             default: RRETURN(MATCH_NOMATCH);
4436              
4437             case CHAR_CR:
4438             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
4439             break;
4440              
4441             case CHAR_LF:
4442             break;
4443              
4444             case CHAR_VT:
4445             case CHAR_FF:
4446             case CHAR_NEL:
4447             #ifndef EBCDIC
4448             case 0x2028:
4449             case 0x2029:
4450             #endif /* Not EBCDIC */
4451             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4452             break;
4453             }
4454             }
4455             break;
4456              
4457             case OP_NOT_HSPACE:
4458             for (i = 1; i <= min; i++)
4459             {
4460             if (eptr >= md->end_subject)
4461             {
4462             SCHECK_PARTIAL();
4463             RRETURN(MATCH_NOMATCH);
4464             }
4465             GETCHARINC(c, eptr);
4466             switch(c)
4467             {
4468             HSPACE_CASES: RRETURN(MATCH_NOMATCH); /* Byte and multibyte cases */
4469             default: break;
4470             }
4471             }
4472             break;
4473              
4474             case OP_HSPACE:
4475             for (i = 1; i <= min; i++)
4476             {
4477             if (eptr >= md->end_subject)
4478             {
4479             SCHECK_PARTIAL();
4480             RRETURN(MATCH_NOMATCH);
4481             }
4482             GETCHARINC(c, eptr);
4483             switch(c)
4484             {
4485             HSPACE_CASES: break; /* Byte and multibyte cases */
4486             default: RRETURN(MATCH_NOMATCH);
4487             }
4488             }
4489             break;
4490              
4491             case OP_NOT_VSPACE:
4492             for (i = 1; i <= min; i++)
4493             {
4494             if (eptr >= md->end_subject)
4495             {
4496             SCHECK_PARTIAL();
4497             RRETURN(MATCH_NOMATCH);
4498             }
4499             GETCHARINC(c, eptr);
4500             switch(c)
4501             {
4502             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
4503             default: break;
4504             }
4505             }
4506             break;
4507              
4508             case OP_VSPACE:
4509             for (i = 1; i <= min; i++)
4510             {
4511             if (eptr >= md->end_subject)
4512             {
4513             SCHECK_PARTIAL();
4514             RRETURN(MATCH_NOMATCH);
4515             }
4516             GETCHARINC(c, eptr);
4517             switch(c)
4518             {
4519             VSPACE_CASES: break;
4520             default: RRETURN(MATCH_NOMATCH);
4521             }
4522             }
4523             break;
4524              
4525             case OP_NOT_DIGIT:
4526             for (i = 1; i <= min; i++)
4527             {
4528             if (eptr >= md->end_subject)
4529             {
4530             SCHECK_PARTIAL();
4531             RRETURN(MATCH_NOMATCH);
4532             }
4533             GETCHARINC(c, eptr);
4534             if (c < 128 && (md->ctypes[c] & ctype_digit) != 0)
4535             RRETURN(MATCH_NOMATCH);
4536             }
4537             break;
4538              
4539             case OP_DIGIT:
4540             for (i = 1; i <= min; i++)
4541             {
4542             pcre_uint32 cc;
4543             if (eptr >= md->end_subject)
4544             {
4545             SCHECK_PARTIAL();
4546             RRETURN(MATCH_NOMATCH);
4547             }
4548             cc = UCHAR21(eptr);
4549             if (cc >= 128 || (md->ctypes[cc] & ctype_digit) == 0)
4550             RRETURN(MATCH_NOMATCH);
4551             eptr++;
4552             /* No need to skip more bytes - we know it's a 1-byte character */
4553             }
4554             break;
4555              
4556             case OP_NOT_WHITESPACE:
4557             for (i = 1; i <= min; i++)
4558             {
4559             pcre_uint32 cc;
4560             if (eptr >= md->end_subject)
4561             {
4562             SCHECK_PARTIAL();
4563             RRETURN(MATCH_NOMATCH);
4564             }
4565             cc = UCHAR21(eptr);
4566             if (cc < 128 && (md->ctypes[cc] & ctype_space) != 0)
4567             RRETURN(MATCH_NOMATCH);
4568             eptr++;
4569             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4570             }
4571             break;
4572              
4573             case OP_WHITESPACE:
4574             for (i = 1; i <= min; i++)
4575             {
4576             pcre_uint32 cc;
4577             if (eptr >= md->end_subject)
4578             {
4579             SCHECK_PARTIAL();
4580             RRETURN(MATCH_NOMATCH);
4581             }
4582             cc = UCHAR21(eptr);
4583             if (cc >= 128 || (md->ctypes[cc] & ctype_space) == 0)
4584             RRETURN(MATCH_NOMATCH);
4585             eptr++;
4586             /* No need to skip more bytes - we know it's a 1-byte character */
4587             }
4588             break;
4589              
4590             case OP_NOT_WORDCHAR:
4591             for (i = 1; i <= min; i++)
4592             {
4593             pcre_uint32 cc;
4594             if (eptr >= md->end_subject)
4595             {
4596             SCHECK_PARTIAL();
4597             RRETURN(MATCH_NOMATCH);
4598             }
4599             cc = UCHAR21(eptr);
4600             if (cc < 128 && (md->ctypes[cc] & ctype_word) != 0)
4601             RRETURN(MATCH_NOMATCH);
4602             eptr++;
4603             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
4604             }
4605             break;
4606              
4607             case OP_WORDCHAR:
4608             for (i = 1; i <= min; i++)
4609             {
4610             pcre_uint32 cc;
4611             if (eptr >= md->end_subject)
4612             {
4613             SCHECK_PARTIAL();
4614             RRETURN(MATCH_NOMATCH);
4615             }
4616             cc = UCHAR21(eptr);
4617             if (cc >= 128 || (md->ctypes[cc] & ctype_word) == 0)
4618             RRETURN(MATCH_NOMATCH);
4619             eptr++;
4620             /* No need to skip more bytes - we know it's a 1-byte character */
4621             }
4622             break;
4623              
4624             default:
4625             RRETURN(PCRE_ERROR_INTERNAL);
4626             } /* End switch(ctype) */
4627              
4628             else
4629             #endif /* SUPPORT_UTF */
4630              
4631             /* Code for the non-UTF-8 case for minimum matching of operators other
4632             than OP_PROP and OP_NOTPROP. */
4633              
4634 4           switch(ctype)
4635             {
4636             case OP_ANY:
4637 8 100         for (i = 1; i <= min; i++)
4638             {
4639 4 50         if (eptr >= md->end_subject)
4640             {
4641 0 0         SCHECK_PARTIAL();
    0          
    0          
4642 0           RRETURN(MATCH_NOMATCH);
4643             }
4644 4 50         if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH);
    50          
    50          
    0          
    0          
    0          
    0          
    50          
4645 4 50         if (md->partial != 0 &&
    0          
4646 0 0         eptr + 1 >= md->end_subject &&
4647 0 0         NLBLOCK->nltype == NLTYPE_FIXED &&
4648 0 0         NLBLOCK->nllen == 2 &&
4649 0           *eptr == NLBLOCK->nl[0])
4650             {
4651 0           md->hitend = TRUE;
4652 0 0         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
4653             }
4654 4           eptr++;
4655             }
4656 4           break;
4657              
4658             case OP_ALLANY:
4659 0 0         if (eptr > md->end_subject - min)
4660             {
4661 0 0         SCHECK_PARTIAL();
    0          
    0          
4662 0           RRETURN(MATCH_NOMATCH);
4663             }
4664 0           eptr += min;
4665 0           break;
4666              
4667             case OP_ANYBYTE:
4668 0 0         if (eptr > md->end_subject - min)
4669             {
4670 0 0         SCHECK_PARTIAL();
    0          
    0          
4671 0           RRETURN(MATCH_NOMATCH);
4672             }
4673 0           eptr += min;
4674 0           break;
4675              
4676             case OP_ANYNL:
4677 0 0         for (i = 1; i <= min; i++)
4678             {
4679 0 0         if (eptr >= md->end_subject)
4680             {
4681 0 0         SCHECK_PARTIAL();
    0          
    0          
4682 0           RRETURN(MATCH_NOMATCH);
4683             }
4684 0           switch(*eptr++)
4685             {
4686 0           default: RRETURN(MATCH_NOMATCH);
4687              
4688             case CHAR_CR:
4689 0 0         if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
    0          
4690 0           break;
4691              
4692             case CHAR_LF:
4693 0           break;
4694              
4695             case CHAR_VT:
4696             case CHAR_FF:
4697             case CHAR_NEL:
4698             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4699             case 0x2028:
4700             case 0x2029:
4701             #endif
4702 0 0         if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
4703 0           break;
4704             }
4705             }
4706 0           break;
4707              
4708             case OP_NOT_HSPACE:
4709 0 0         for (i = 1; i <= min; i++)
4710             {
4711 0 0         if (eptr >= md->end_subject)
4712             {
4713 0 0         SCHECK_PARTIAL();
    0          
    0          
4714 0           RRETURN(MATCH_NOMATCH);
4715             }
4716 0 0         switch(*eptr++)
4717             {
4718 0           default: break;
4719             HSPACE_BYTE_CASES:
4720             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4721             HSPACE_MULTIBYTE_CASES:
4722             #endif
4723 0           RRETURN(MATCH_NOMATCH);
4724             }
4725             }
4726 0           break;
4727              
4728             case OP_HSPACE:
4729 0 0         for (i = 1; i <= min; i++)
4730             {
4731 0 0         if (eptr >= md->end_subject)
4732             {
4733 0 0         SCHECK_PARTIAL();
    0          
    0          
4734 0           RRETURN(MATCH_NOMATCH);
4735             }
4736 0 0         switch(*eptr++)
4737             {
4738 0           default: RRETURN(MATCH_NOMATCH);
4739             HSPACE_BYTE_CASES:
4740             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4741             HSPACE_MULTIBYTE_CASES:
4742             #endif
4743 0           break;
4744             }
4745             }
4746 0           break;
4747              
4748             case OP_NOT_VSPACE:
4749 0 0         for (i = 1; i <= min; i++)
4750             {
4751 0 0         if (eptr >= md->end_subject)
4752             {
4753 0 0         SCHECK_PARTIAL();
    0          
    0          
4754 0           RRETURN(MATCH_NOMATCH);
4755             }
4756 0 0         switch(*eptr++)
4757             {
4758             VSPACE_BYTE_CASES:
4759             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4760             VSPACE_MULTIBYTE_CASES:
4761             #endif
4762 0           RRETURN(MATCH_NOMATCH);
4763 0           default: break;
4764             }
4765             }
4766 0           break;
4767              
4768             case OP_VSPACE:
4769 0 0         for (i = 1; i <= min; i++)
4770             {
4771 0 0         if (eptr >= md->end_subject)
4772             {
4773 0 0         SCHECK_PARTIAL();
    0          
    0          
4774 0           RRETURN(MATCH_NOMATCH);
4775             }
4776 0 0         switch(*eptr++)
4777             {
4778 0           default: RRETURN(MATCH_NOMATCH);
4779             VSPACE_BYTE_CASES:
4780             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
4781             VSPACE_MULTIBYTE_CASES:
4782             #endif
4783 0           break;
4784             }
4785             }
4786 0           break;
4787              
4788             case OP_NOT_DIGIT:
4789 0 0         for (i = 1; i <= min; i++)
4790             {
4791 0 0         if (eptr >= md->end_subject)
4792             {
4793 0 0         SCHECK_PARTIAL();
    0          
    0          
4794 0           RRETURN(MATCH_NOMATCH);
4795             }
4796 0 0         if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0)
4797 0           RRETURN(MATCH_NOMATCH);
4798 0           eptr++;
4799             }
4800 0           break;
4801              
4802             case OP_DIGIT:
4803 0 0         for (i = 1; i <= min; i++)
4804             {
4805 0 0         if (eptr >= md->end_subject)
4806             {
4807 0 0         SCHECK_PARTIAL();
    0          
    0          
4808 0           RRETURN(MATCH_NOMATCH);
4809             }
4810 0 0         if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0)
4811 0           RRETURN(MATCH_NOMATCH);
4812 0           eptr++;
4813             }
4814 0           break;
4815              
4816             case OP_NOT_WHITESPACE:
4817 0 0         for (i = 1; i <= min; i++)
4818             {
4819 0 0         if (eptr >= md->end_subject)
4820             {
4821 0 0         SCHECK_PARTIAL();
    0          
    0          
4822 0           RRETURN(MATCH_NOMATCH);
4823             }
4824 0 0         if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0)
4825 0           RRETURN(MATCH_NOMATCH);
4826 0           eptr++;
4827             }
4828 0           break;
4829              
4830             case OP_WHITESPACE:
4831 0 0         for (i = 1; i <= min; i++)
4832             {
4833 0 0         if (eptr >= md->end_subject)
4834             {
4835 0 0         SCHECK_PARTIAL();
    0          
    0          
4836 0           RRETURN(MATCH_NOMATCH);
4837             }
4838 0 0         if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0)
4839 0           RRETURN(MATCH_NOMATCH);
4840 0           eptr++;
4841             }
4842 0           break;
4843              
4844             case OP_NOT_WORDCHAR:
4845 0 0         for (i = 1; i <= min; i++)
4846             {
4847 0 0         if (eptr >= md->end_subject)
4848             {
4849 0 0         SCHECK_PARTIAL();
    0          
    0          
4850 0           RRETURN(MATCH_NOMATCH);
4851             }
4852 0 0         if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0)
4853 0           RRETURN(MATCH_NOMATCH);
4854 0           eptr++;
4855             }
4856 0           break;
4857              
4858             case OP_WORDCHAR:
4859 0 0         for (i = 1; i <= min; i++)
4860             {
4861 0 0         if (eptr >= md->end_subject)
4862             {
4863 0 0         SCHECK_PARTIAL();
    0          
    0          
4864 0           RRETURN(MATCH_NOMATCH);
4865             }
4866 0 0         if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0)
4867 0           RRETURN(MATCH_NOMATCH);
4868 0           eptr++;
4869             }
4870 0           break;
4871              
4872             default:
4873 0           RRETURN(PCRE_ERROR_INTERNAL);
4874             }
4875             }
4876              
4877             /* If min = max, continue at the same level without recursing */
4878              
4879 14 50         if (min == max) continue;
4880              
4881             /* If minimizing, we have to test the rest of the pattern before each
4882             subsequent match. Again, separate the UTF-8 case for speed, and also
4883             separate the UCP cases. */
4884              
4885 14 50         if (minimize)
4886             {
4887             #ifdef SUPPORT_UCP
4888             if (prop_type >= 0)
4889             {
4890             switch(prop_type)
4891             {
4892             case PT_ANY:
4893             for (fi = min;; fi++)
4894             {
4895             RMATCH(eptr, ecode, offset_top, md, eptrb, RM36);
4896             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4897             if (fi >= max) RRETURN(MATCH_NOMATCH);
4898             if (eptr >= md->end_subject)
4899             {
4900             SCHECK_PARTIAL();
4901             RRETURN(MATCH_NOMATCH);
4902             }
4903             GETCHARINCTEST(c, eptr);
4904             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
4905             }
4906             /* Control never gets here */
4907              
4908             case PT_LAMP:
4909             for (fi = min;; fi++)
4910             {
4911             int chartype;
4912             RMATCH(eptr, ecode, offset_top, md, eptrb, RM37);
4913             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4914             if (fi >= max) RRETURN(MATCH_NOMATCH);
4915             if (eptr >= md->end_subject)
4916             {
4917             SCHECK_PARTIAL();
4918             RRETURN(MATCH_NOMATCH);
4919             }
4920             GETCHARINCTEST(c, eptr);
4921             chartype = UCD_CHARTYPE(c);
4922             if ((chartype == ucp_Lu ||
4923             chartype == ucp_Ll ||
4924             chartype == ucp_Lt) == prop_fail_result)
4925             RRETURN(MATCH_NOMATCH);
4926             }
4927             /* Control never gets here */
4928              
4929             case PT_GC:
4930             for (fi = min;; fi++)
4931             {
4932             RMATCH(eptr, ecode, offset_top, md, eptrb, RM38);
4933             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4934             if (fi >= max) RRETURN(MATCH_NOMATCH);
4935             if (eptr >= md->end_subject)
4936             {
4937             SCHECK_PARTIAL();
4938             RRETURN(MATCH_NOMATCH);
4939             }
4940             GETCHARINCTEST(c, eptr);
4941             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result)
4942             RRETURN(MATCH_NOMATCH);
4943             }
4944             /* Control never gets here */
4945              
4946             case PT_PC:
4947             for (fi = min;; fi++)
4948             {
4949             RMATCH(eptr, ecode, offset_top, md, eptrb, RM39);
4950             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4951             if (fi >= max) RRETURN(MATCH_NOMATCH);
4952             if (eptr >= md->end_subject)
4953             {
4954             SCHECK_PARTIAL();
4955             RRETURN(MATCH_NOMATCH);
4956             }
4957             GETCHARINCTEST(c, eptr);
4958             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result)
4959             RRETURN(MATCH_NOMATCH);
4960             }
4961             /* Control never gets here */
4962              
4963             case PT_SC:
4964             for (fi = min;; fi++)
4965             {
4966             RMATCH(eptr, ecode, offset_top, md, eptrb, RM40);
4967             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4968             if (fi >= max) RRETURN(MATCH_NOMATCH);
4969             if (eptr >= md->end_subject)
4970             {
4971             SCHECK_PARTIAL();
4972             RRETURN(MATCH_NOMATCH);
4973             }
4974             GETCHARINCTEST(c, eptr);
4975             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result)
4976             RRETURN(MATCH_NOMATCH);
4977             }
4978             /* Control never gets here */
4979              
4980             case PT_ALNUM:
4981             for (fi = min;; fi++)
4982             {
4983             int category;
4984             RMATCH(eptr, ecode, offset_top, md, eptrb, RM59);
4985             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
4986             if (fi >= max) RRETURN(MATCH_NOMATCH);
4987             if (eptr >= md->end_subject)
4988             {
4989             SCHECK_PARTIAL();
4990             RRETURN(MATCH_NOMATCH);
4991             }
4992             GETCHARINCTEST(c, eptr);
4993             category = UCD_CATEGORY(c);
4994             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
4995             RRETURN(MATCH_NOMATCH);
4996             }
4997             /* Control never gets here */
4998              
4999             /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5000             which means that Perl space and POSIX space are now identical. PCRE
5001             was changed at release 8.34. */
5002              
5003             case PT_SPACE: /* Perl space */
5004             case PT_PXSPACE: /* POSIX space */
5005             for (fi = min;; fi++)
5006             {
5007             RMATCH(eptr, ecode, offset_top, md, eptrb, RM61);
5008             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5009             if (fi >= max) RRETURN(MATCH_NOMATCH);
5010             if (eptr >= md->end_subject)
5011             {
5012             SCHECK_PARTIAL();
5013             RRETURN(MATCH_NOMATCH);
5014             }
5015             GETCHARINCTEST(c, eptr);
5016             switch(c)
5017             {
5018             HSPACE_CASES:
5019             VSPACE_CASES:
5020             if (prop_fail_result) RRETURN(MATCH_NOMATCH);
5021             break;
5022              
5023             default:
5024             if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5025             RRETURN(MATCH_NOMATCH);
5026             break;
5027             }
5028             }
5029             /* Control never gets here */
5030              
5031             case PT_WORD:
5032             for (fi = min;; fi++)
5033             {
5034             int category;
5035             RMATCH(eptr, ecode, offset_top, md, eptrb, RM62);
5036             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5037             if (fi >= max) RRETURN(MATCH_NOMATCH);
5038             if (eptr >= md->end_subject)
5039             {
5040             SCHECK_PARTIAL();
5041             RRETURN(MATCH_NOMATCH);
5042             }
5043             GETCHARINCTEST(c, eptr);
5044             category = UCD_CATEGORY(c);
5045             if ((category == ucp_L ||
5046             category == ucp_N ||
5047             c == CHAR_UNDERSCORE)
5048             == prop_fail_result)
5049             RRETURN(MATCH_NOMATCH);
5050             }
5051             /* Control never gets here */
5052              
5053             case PT_CLIST:
5054             for (fi = min;; fi++)
5055             {
5056             const pcre_uint32 *cp;
5057             RMATCH(eptr, ecode, offset_top, md, eptrb, RM67);
5058             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5059             if (fi >= max) RRETURN(MATCH_NOMATCH);
5060             if (eptr >= md->end_subject)
5061             {
5062             SCHECK_PARTIAL();
5063             RRETURN(MATCH_NOMATCH);
5064             }
5065             GETCHARINCTEST(c, eptr);
5066             cp = PRIV(ucd_caseless_sets) + prop_value;
5067             for (;;)
5068             {
5069             if (c < *cp)
5070             { if (prop_fail_result) break; else { RRETURN(MATCH_NOMATCH); } }
5071             if (c == *cp++)
5072             { if (prop_fail_result) { RRETURN(MATCH_NOMATCH); } else break; }
5073             }
5074             }
5075             /* Control never gets here */
5076              
5077             case PT_UCNC:
5078             for (fi = min;; fi++)
5079             {
5080             RMATCH(eptr, ecode, offset_top, md, eptrb, RM60);
5081             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5082             if (fi >= max) RRETURN(MATCH_NOMATCH);
5083             if (eptr >= md->end_subject)
5084             {
5085             SCHECK_PARTIAL();
5086             RRETURN(MATCH_NOMATCH);
5087             }
5088             GETCHARINCTEST(c, eptr);
5089             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5090             c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5091             c >= 0xe000) == prop_fail_result)
5092             RRETURN(MATCH_NOMATCH);
5093             }
5094             /* Control never gets here */
5095              
5096             /* This should never occur */
5097             default:
5098             RRETURN(PCRE_ERROR_INTERNAL);
5099             }
5100             }
5101              
5102             /* Match extended Unicode sequences. We will get here only if the
5103             support is in the binary; otherwise a compile-time error occurs. */
5104              
5105             else if (ctype == OP_EXTUNI)
5106             {
5107             for (fi = min;; fi++)
5108             {
5109             RMATCH(eptr, ecode, offset_top, md, eptrb, RM41);
5110             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5111             if (fi >= max) RRETURN(MATCH_NOMATCH);
5112             if (eptr >= md->end_subject)
5113             {
5114             SCHECK_PARTIAL();
5115             RRETURN(MATCH_NOMATCH);
5116             }
5117             else
5118             {
5119             int lgb, rgb;
5120             GETCHARINCTEST(c, eptr);
5121             lgb = UCD_GRAPHBREAK(c);
5122             while (eptr < md->end_subject)
5123             {
5124             int len = 1;
5125             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5126             rgb = UCD_GRAPHBREAK(c);
5127             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5128             lgb = rgb;
5129             eptr += len;
5130             }
5131             }
5132             CHECK_PARTIAL();
5133             }
5134             }
5135             else
5136             #endif /* SUPPORT_UCP */
5137              
5138             #ifdef SUPPORT_UTF
5139             if (utf)
5140             {
5141             for (fi = min;; fi++)
5142             {
5143             RMATCH(eptr, ecode, offset_top, md, eptrb, RM42);
5144             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5145             if (fi >= max) RRETURN(MATCH_NOMATCH);
5146             if (eptr >= md->end_subject)
5147             {
5148             SCHECK_PARTIAL();
5149             RRETURN(MATCH_NOMATCH);
5150             }
5151             if (ctype == OP_ANY && IS_NEWLINE(eptr))
5152             RRETURN(MATCH_NOMATCH);
5153             GETCHARINC(c, eptr);
5154             switch(ctype)
5155             {
5156             case OP_ANY: /* This is the non-NL case */
5157             if (md->partial != 0 && /* Take care with CRLF partial */
5158             eptr >= md->end_subject &&
5159             NLBLOCK->nltype == NLTYPE_FIXED &&
5160             NLBLOCK->nllen == 2 &&
5161             c == NLBLOCK->nl[0])
5162             {
5163             md->hitend = TRUE;
5164             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5165             }
5166             break;
5167              
5168             case OP_ALLANY:
5169             case OP_ANYBYTE:
5170             break;
5171              
5172             case OP_ANYNL:
5173             switch(c)
5174             {
5175             default: RRETURN(MATCH_NOMATCH);
5176             case CHAR_CR:
5177             if (eptr < md->end_subject && UCHAR21(eptr) == CHAR_LF) eptr++;
5178             break;
5179              
5180             case CHAR_LF:
5181             break;
5182              
5183             case CHAR_VT:
5184             case CHAR_FF:
5185             case CHAR_NEL:
5186             #ifndef EBCDIC
5187             case 0x2028:
5188             case 0x2029:
5189             #endif /* Not EBCDIC */
5190             if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5191             break;
5192             }
5193             break;
5194              
5195             case OP_NOT_HSPACE:
5196             switch(c)
5197             {
5198             HSPACE_CASES: RRETURN(MATCH_NOMATCH);
5199             default: break;
5200             }
5201             break;
5202              
5203             case OP_HSPACE:
5204             switch(c)
5205             {
5206             HSPACE_CASES: break;
5207             default: RRETURN(MATCH_NOMATCH);
5208             }
5209             break;
5210              
5211             case OP_NOT_VSPACE:
5212             switch(c)
5213             {
5214             VSPACE_CASES: RRETURN(MATCH_NOMATCH);
5215             default: break;
5216             }
5217             break;
5218              
5219             case OP_VSPACE:
5220             switch(c)
5221             {
5222             VSPACE_CASES: break;
5223             default: RRETURN(MATCH_NOMATCH);
5224             }
5225             break;
5226              
5227             case OP_NOT_DIGIT:
5228             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0)
5229             RRETURN(MATCH_NOMATCH);
5230             break;
5231              
5232             case OP_DIGIT:
5233             if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0)
5234             RRETURN(MATCH_NOMATCH);
5235             break;
5236              
5237             case OP_NOT_WHITESPACE:
5238             if (c < 256 && (md->ctypes[c] & ctype_space) != 0)
5239             RRETURN(MATCH_NOMATCH);
5240             break;
5241              
5242             case OP_WHITESPACE:
5243             if (c >= 256 || (md->ctypes[c] & ctype_space) == 0)
5244             RRETURN(MATCH_NOMATCH);
5245             break;
5246              
5247             case OP_NOT_WORDCHAR:
5248             if (c < 256 && (md->ctypes[c] & ctype_word) != 0)
5249             RRETURN(MATCH_NOMATCH);
5250             break;
5251              
5252             case OP_WORDCHAR:
5253             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0)
5254             RRETURN(MATCH_NOMATCH);
5255             break;
5256              
5257             default:
5258             RRETURN(PCRE_ERROR_INTERNAL);
5259             }
5260             }
5261             }
5262             else
5263             #endif
5264             /* Not UTF mode */
5265             {
5266 0           for (fi = min;; fi++)
5267             {
5268 0           RMATCH(eptr, ecode, offset_top, md, eptrb, RM43);
5269 0 0         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5270 0 0         if (fi >= max) RRETURN(MATCH_NOMATCH);
5271 0 0         if (eptr >= md->end_subject)
5272             {
5273 0 0         SCHECK_PARTIAL();
    0          
    0          
5274 0           RRETURN(MATCH_NOMATCH);
5275             }
5276 0 0         if (ctype == OP_ANY && IS_NEWLINE(eptr))
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
5277 0           RRETURN(MATCH_NOMATCH);
5278 0           c = *eptr++;
5279 0           switch(ctype)
5280             {
5281             case OP_ANY: /* This is the non-NL case */
5282 0 0         if (md->partial != 0 && /* Take care with CRLF partial */
    0          
5283 0 0         eptr >= md->end_subject &&
5284 0 0         NLBLOCK->nltype == NLTYPE_FIXED &&
5285 0 0         NLBLOCK->nllen == 2 &&
5286 0           c == NLBLOCK->nl[0])
5287             {
5288 0           md->hitend = TRUE;
5289 0 0         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5290             }
5291 0           break;
5292              
5293             case OP_ALLANY:
5294             case OP_ANYBYTE:
5295 0           break;
5296              
5297             case OP_ANYNL:
5298 0           switch(c)
5299             {
5300 0           default: RRETURN(MATCH_NOMATCH);
5301             case CHAR_CR:
5302 0 0         if (eptr < md->end_subject && *eptr == CHAR_LF) eptr++;
    0          
5303 0           break;
5304              
5305             case CHAR_LF:
5306 0           break;
5307              
5308             case CHAR_VT:
5309             case CHAR_FF:
5310             case CHAR_NEL:
5311             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5312             case 0x2028:
5313             case 0x2029:
5314             #endif
5315 0 0         if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH);
5316 0           break;
5317             }
5318 0           break;
5319              
5320             case OP_NOT_HSPACE:
5321 0 0         switch(c)
5322             {
5323 0           default: break;
5324             HSPACE_BYTE_CASES:
5325             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5326             HSPACE_MULTIBYTE_CASES:
5327             #endif
5328 0           RRETURN(MATCH_NOMATCH);
5329             }
5330 0           break;
5331              
5332             case OP_HSPACE:
5333 0 0         switch(c)
5334             {
5335 0           default: RRETURN(MATCH_NOMATCH);
5336             HSPACE_BYTE_CASES:
5337             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5338             HSPACE_MULTIBYTE_CASES:
5339             #endif
5340 0           break;
5341             }
5342 0           break;
5343              
5344             case OP_NOT_VSPACE:
5345 0 0         switch(c)
5346             {
5347 0           default: break;
5348             VSPACE_BYTE_CASES:
5349             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5350             VSPACE_MULTIBYTE_CASES:
5351             #endif
5352 0           RRETURN(MATCH_NOMATCH);
5353             }
5354 0           break;
5355              
5356             case OP_VSPACE:
5357 0 0         switch(c)
5358             {
5359 0           default: RRETURN(MATCH_NOMATCH);
5360             VSPACE_BYTE_CASES:
5361             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
5362             VSPACE_MULTIBYTE_CASES:
5363             #endif
5364 0           break;
5365             }
5366 0           break;
5367              
5368             case OP_NOT_DIGIT:
5369 0 0         if (MAX_255(c) && (md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH);
5370 0           break;
5371              
5372             case OP_DIGIT:
5373 0 0         if (!MAX_255(c) || (md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH);
5374 0           break;
5375              
5376             case OP_NOT_WHITESPACE:
5377 0 0         if (MAX_255(c) && (md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH);
5378 0           break;
5379              
5380             case OP_WHITESPACE:
5381 0 0         if (!MAX_255(c) || (md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH);
5382 0           break;
5383              
5384             case OP_NOT_WORDCHAR:
5385 0 0         if (MAX_255(c) && (md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH);
5386 0           break;
5387              
5388             case OP_WORDCHAR:
5389 0 0         if (!MAX_255(c) || (md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH);
5390 0           break;
5391              
5392             default:
5393 0           RRETURN(PCRE_ERROR_INTERNAL);
5394             }
5395 0           }
5396             }
5397             /* Control never gets here */
5398             }
5399              
5400             /* If maximizing, it is worth using inline code for speed, doing the type
5401             test once at the start (i.e. keep it out of the loop). Again, keep the
5402             UTF-8 and UCP stuff separate. */
5403              
5404             else
5405             {
5406 14           pp = eptr; /* Remember where we started */
5407              
5408             #ifdef SUPPORT_UCP
5409             if (prop_type >= 0)
5410             {
5411             switch(prop_type)
5412             {
5413             case PT_ANY:
5414             for (i = min; i < max; i++)
5415             {
5416             int len = 1;
5417             if (eptr >= md->end_subject)
5418             {
5419             SCHECK_PARTIAL();
5420             break;
5421             }
5422             GETCHARLENTEST(c, eptr, len);
5423             if (prop_fail_result) break;
5424             eptr+= len;
5425             }
5426             break;
5427              
5428             case PT_LAMP:
5429             for (i = min; i < max; i++)
5430             {
5431             int chartype;
5432             int len = 1;
5433             if (eptr >= md->end_subject)
5434             {
5435             SCHECK_PARTIAL();
5436             break;
5437             }
5438             GETCHARLENTEST(c, eptr, len);
5439             chartype = UCD_CHARTYPE(c);
5440             if ((chartype == ucp_Lu ||
5441             chartype == ucp_Ll ||
5442             chartype == ucp_Lt) == prop_fail_result)
5443             break;
5444             eptr+= len;
5445             }
5446             break;
5447              
5448             case PT_GC:
5449             for (i = min; i < max; i++)
5450             {
5451             int len = 1;
5452             if (eptr >= md->end_subject)
5453             {
5454             SCHECK_PARTIAL();
5455             break;
5456             }
5457             GETCHARLENTEST(c, eptr, len);
5458             if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break;
5459             eptr+= len;
5460             }
5461             break;
5462              
5463             case PT_PC:
5464             for (i = min; i < max; i++)
5465             {
5466             int len = 1;
5467             if (eptr >= md->end_subject)
5468             {
5469             SCHECK_PARTIAL();
5470             break;
5471             }
5472             GETCHARLENTEST(c, eptr, len);
5473             if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break;
5474             eptr+= len;
5475             }
5476             break;
5477              
5478             case PT_SC:
5479             for (i = min; i < max; i++)
5480             {
5481             int len = 1;
5482             if (eptr >= md->end_subject)
5483             {
5484             SCHECK_PARTIAL();
5485             break;
5486             }
5487             GETCHARLENTEST(c, eptr, len);
5488             if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break;
5489             eptr+= len;
5490             }
5491             break;
5492              
5493             case PT_ALNUM:
5494             for (i = min; i < max; i++)
5495             {
5496             int category;
5497             int len = 1;
5498             if (eptr >= md->end_subject)
5499             {
5500             SCHECK_PARTIAL();
5501             break;
5502             }
5503             GETCHARLENTEST(c, eptr, len);
5504             category = UCD_CATEGORY(c);
5505             if ((category == ucp_L || category == ucp_N) == prop_fail_result)
5506             break;
5507             eptr+= len;
5508             }
5509             break;
5510              
5511             /* Perl space used to exclude VT, but from Perl 5.18 it is included,
5512             which means that Perl space and POSIX space are now identical. PCRE
5513             was changed at release 8.34. */
5514              
5515             case PT_SPACE: /* Perl space */
5516             case PT_PXSPACE: /* POSIX space */
5517             for (i = min; i < max; i++)
5518             {
5519             int len = 1;
5520             if (eptr >= md->end_subject)
5521             {
5522             SCHECK_PARTIAL();
5523             break;
5524             }
5525             GETCHARLENTEST(c, eptr, len);
5526             switch(c)
5527             {
5528             HSPACE_CASES:
5529             VSPACE_CASES:
5530             if (prop_fail_result) goto ENDLOOP99; /* Break the loop */
5531             break;
5532              
5533             default:
5534             if ((UCD_CATEGORY(c) == ucp_Z) == prop_fail_result)
5535             goto ENDLOOP99; /* Break the loop */
5536             break;
5537             }
5538             eptr+= len;
5539             }
5540             ENDLOOP99:
5541             break;
5542              
5543             case PT_WORD:
5544             for (i = min; i < max; i++)
5545             {
5546             int category;
5547             int len = 1;
5548             if (eptr >= md->end_subject)
5549             {
5550             SCHECK_PARTIAL();
5551             break;
5552             }
5553             GETCHARLENTEST(c, eptr, len);
5554             category = UCD_CATEGORY(c);
5555             if ((category == ucp_L || category == ucp_N ||
5556             c == CHAR_UNDERSCORE) == prop_fail_result)
5557             break;
5558             eptr+= len;
5559             }
5560             break;
5561              
5562             case PT_CLIST:
5563             for (i = min; i < max; i++)
5564             {
5565             const pcre_uint32 *cp;
5566             int len = 1;
5567             if (eptr >= md->end_subject)
5568             {
5569             SCHECK_PARTIAL();
5570             break;
5571             }
5572             GETCHARLENTEST(c, eptr, len);
5573             cp = PRIV(ucd_caseless_sets) + prop_value;
5574             for (;;)
5575             {
5576             if (c < *cp)
5577             { if (prop_fail_result) break; else goto GOT_MAX; }
5578             if (c == *cp++)
5579             { if (prop_fail_result) goto GOT_MAX; else break; }
5580             }
5581             eptr += len;
5582             }
5583             GOT_MAX:
5584             break;
5585              
5586             case PT_UCNC:
5587             for (i = min; i < max; i++)
5588             {
5589             int len = 1;
5590             if (eptr >= md->end_subject)
5591             {
5592             SCHECK_PARTIAL();
5593             break;
5594             }
5595             GETCHARLENTEST(c, eptr, len);
5596             if ((c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
5597             c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
5598             c >= 0xe000) == prop_fail_result)
5599             break;
5600             eptr += len;
5601             }
5602             break;
5603              
5604             default:
5605             RRETURN(PCRE_ERROR_INTERNAL);
5606             }
5607              
5608             /* eptr is now past the end of the maximum run */
5609              
5610             if (possessive) continue; /* No backtracking */
5611             for(;;)
5612             {
5613             if (eptr <= pp) goto TAIL_RECURSE;
5614             RMATCH(eptr, ecode, offset_top, md, eptrb, RM44);
5615             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5616             eptr--;
5617             if (utf) BACKCHAR(eptr);
5618             }
5619             }
5620              
5621             /* Match extended Unicode grapheme clusters. We will get here only if the
5622             support is in the binary; otherwise a compile-time error occurs. */
5623              
5624             else if (ctype == OP_EXTUNI)
5625             {
5626             for (i = min; i < max; i++)
5627             {
5628             if (eptr >= md->end_subject)
5629             {
5630             SCHECK_PARTIAL();
5631             break;
5632             }
5633             else
5634             {
5635             int lgb, rgb;
5636             GETCHARINCTEST(c, eptr);
5637             lgb = UCD_GRAPHBREAK(c);
5638             while (eptr < md->end_subject)
5639             {
5640             int len = 1;
5641             if (!utf) c = *eptr; else { GETCHARLEN(c, eptr, len); }
5642             rgb = UCD_GRAPHBREAK(c);
5643             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5644             lgb = rgb;
5645             eptr += len;
5646             }
5647             }
5648             CHECK_PARTIAL();
5649             }
5650              
5651             /* eptr is now past the end of the maximum run */
5652              
5653             if (possessive) continue; /* No backtracking */
5654              
5655             /* We use <= pp rather than == pp to detect the start of the run while
5656             backtracking because the use of \C in UTF mode can cause BACKCHAR to
5657             move back past pp. This is just palliative; the use of \C in UTF mode
5658             is fraught with danger. */
5659              
5660             for(;;)
5661             {
5662             int lgb, rgb;
5663             PCRE_PUCHAR fptr;
5664              
5665             if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
5666             RMATCH(eptr, ecode, offset_top, md, eptrb, RM45);
5667             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5668              
5669             /* Backtracking over an extended grapheme cluster involves inspecting
5670             the previous two characters (if present) to see if a break is
5671             permitted between them. */
5672              
5673             eptr--;
5674             if (!utf) c = *eptr; else
5675             {
5676             BACKCHAR(eptr);
5677             GETCHAR(c, eptr);
5678             }
5679             rgb = UCD_GRAPHBREAK(c);
5680              
5681             for (;;)
5682             {
5683             if (eptr <= pp) goto TAIL_RECURSE; /* At start of char run */
5684             fptr = eptr - 1;
5685             if (!utf) c = *fptr; else
5686             {
5687             BACKCHAR(fptr);
5688             GETCHAR(c, fptr);
5689             }
5690             lgb = UCD_GRAPHBREAK(c);
5691             if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
5692             eptr = fptr;
5693             rgb = lgb;
5694             }
5695             }
5696             }
5697              
5698             else
5699             #endif /* SUPPORT_UCP */
5700              
5701             #ifdef SUPPORT_UTF
5702             if (utf)
5703             {
5704             switch(ctype)
5705             {
5706             case OP_ANY:
5707             for (i = min; i < max; i++)
5708             {
5709             if (eptr >= md->end_subject)
5710             {
5711             SCHECK_PARTIAL();
5712             break;
5713             }
5714             if (IS_NEWLINE(eptr)) break;
5715             if (md->partial != 0 && /* Take care with CRLF partial */
5716             eptr + 1 >= md->end_subject &&
5717             NLBLOCK->nltype == NLTYPE_FIXED &&
5718             NLBLOCK->nllen == 2 &&
5719             UCHAR21(eptr) == NLBLOCK->nl[0])
5720             {
5721             md->hitend = TRUE;
5722             if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5723             }
5724             eptr++;
5725             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5726             }
5727             break;
5728              
5729             case OP_ALLANY:
5730             if (max < INT_MAX)
5731             {
5732             for (i = min; i < max; i++)
5733             {
5734             if (eptr >= md->end_subject)
5735             {
5736             SCHECK_PARTIAL();
5737             break;
5738             }
5739             eptr++;
5740             ACROSSCHAR(eptr < md->end_subject, *eptr, eptr++);
5741             }
5742             }
5743             else
5744             {
5745             eptr = md->end_subject; /* Unlimited UTF-8 repeat */
5746             SCHECK_PARTIAL();
5747             }
5748             break;
5749              
5750             /* The byte case is the same as non-UTF8 */
5751              
5752             case OP_ANYBYTE:
5753             c = max - min;
5754             if (c > (unsigned int)(md->end_subject - eptr))
5755             {
5756             eptr = md->end_subject;
5757             SCHECK_PARTIAL();
5758             }
5759             else eptr += c;
5760             break;
5761              
5762             case OP_ANYNL:
5763             for (i = min; i < max; i++)
5764             {
5765             int len = 1;
5766             if (eptr >= md->end_subject)
5767             {
5768             SCHECK_PARTIAL();
5769             break;
5770             }
5771             GETCHARLEN(c, eptr, len);
5772             if (c == CHAR_CR)
5773             {
5774             if (++eptr >= md->end_subject) break;
5775             if (UCHAR21(eptr) == CHAR_LF) eptr++;
5776             }
5777             else
5778             {
5779             if (c != CHAR_LF &&
5780             (md->bsr_anycrlf ||
5781             (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
5782             #ifndef EBCDIC
5783             && c != 0x2028 && c != 0x2029
5784             #endif /* Not EBCDIC */
5785             )))
5786             break;
5787             eptr += len;
5788             }
5789             }
5790             break;
5791              
5792             case OP_NOT_HSPACE:
5793             case OP_HSPACE:
5794             for (i = min; i < max; i++)
5795             {
5796             BOOL gotspace;
5797             int len = 1;
5798             if (eptr >= md->end_subject)
5799             {
5800             SCHECK_PARTIAL();
5801             break;
5802             }
5803             GETCHARLEN(c, eptr, len);
5804             switch(c)
5805             {
5806             HSPACE_CASES: gotspace = TRUE; break;
5807             default: gotspace = FALSE; break;
5808             }
5809             if (gotspace == (ctype == OP_NOT_HSPACE)) break;
5810             eptr += len;
5811             }
5812             break;
5813              
5814             case OP_NOT_VSPACE:
5815             case OP_VSPACE:
5816             for (i = min; i < max; i++)
5817             {
5818             BOOL gotspace;
5819             int len = 1;
5820             if (eptr >= md->end_subject)
5821             {
5822             SCHECK_PARTIAL();
5823             break;
5824             }
5825             GETCHARLEN(c, eptr, len);
5826             switch(c)
5827             {
5828             VSPACE_CASES: gotspace = TRUE; break;
5829             default: gotspace = FALSE; break;
5830             }
5831             if (gotspace == (ctype == OP_NOT_VSPACE)) break;
5832             eptr += len;
5833             }
5834             break;
5835              
5836             case OP_NOT_DIGIT:
5837             for (i = min; i < max; i++)
5838             {
5839             int len = 1;
5840             if (eptr >= md->end_subject)
5841             {
5842             SCHECK_PARTIAL();
5843             break;
5844             }
5845             GETCHARLEN(c, eptr, len);
5846             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
5847             eptr+= len;
5848             }
5849             break;
5850              
5851             case OP_DIGIT:
5852             for (i = min; i < max; i++)
5853             {
5854             int len = 1;
5855             if (eptr >= md->end_subject)
5856             {
5857             SCHECK_PARTIAL();
5858             break;
5859             }
5860             GETCHARLEN(c, eptr, len);
5861             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
5862             eptr+= len;
5863             }
5864             break;
5865              
5866             case OP_NOT_WHITESPACE:
5867             for (i = min; i < max; i++)
5868             {
5869             int len = 1;
5870             if (eptr >= md->end_subject)
5871             {
5872             SCHECK_PARTIAL();
5873             break;
5874             }
5875             GETCHARLEN(c, eptr, len);
5876             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
5877             eptr+= len;
5878             }
5879             break;
5880              
5881             case OP_WHITESPACE:
5882             for (i = min; i < max; i++)
5883             {
5884             int len = 1;
5885             if (eptr >= md->end_subject)
5886             {
5887             SCHECK_PARTIAL();
5888             break;
5889             }
5890             GETCHARLEN(c, eptr, len);
5891             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
5892             eptr+= len;
5893             }
5894             break;
5895              
5896             case OP_NOT_WORDCHAR:
5897             for (i = min; i < max; i++)
5898             {
5899             int len = 1;
5900             if (eptr >= md->end_subject)
5901             {
5902             SCHECK_PARTIAL();
5903             break;
5904             }
5905             GETCHARLEN(c, eptr, len);
5906             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
5907             eptr+= len;
5908             }
5909             break;
5910              
5911             case OP_WORDCHAR:
5912             for (i = min; i < max; i++)
5913             {
5914             int len = 1;
5915             if (eptr >= md->end_subject)
5916             {
5917             SCHECK_PARTIAL();
5918             break;
5919             }
5920             GETCHARLEN(c, eptr, len);
5921             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
5922             eptr+= len;
5923             }
5924             break;
5925              
5926             default:
5927             RRETURN(PCRE_ERROR_INTERNAL);
5928             }
5929              
5930             if (possessive) continue; /* No backtracking */
5931             for(;;)
5932             {
5933             if (eptr <= pp) goto TAIL_RECURSE;
5934             RMATCH(eptr, ecode, offset_top, md, eptrb, RM46);
5935             if (rrc != MATCH_NOMATCH) RRETURN(rrc);
5936             eptr--;
5937             BACKCHAR(eptr);
5938             if (ctype == OP_ANYNL && eptr > pp && UCHAR21(eptr) == CHAR_NL &&
5939             UCHAR21(eptr - 1) == CHAR_CR) eptr--;
5940             }
5941             }
5942             else
5943             #endif /* SUPPORT_UTF */
5944             /* Not UTF mode */
5945             {
5946 14           switch(ctype)
5947             {
5948             case OP_ANY:
5949 155 50         for (i = min; i < max; i++)
5950             {
5951 155 100         if (eptr >= md->end_subject)
5952             {
5953 14 50         SCHECK_PARTIAL();
    0          
    0          
5954 14           break;
5955             }
5956 141 50         if (IS_NEWLINE(eptr)) break;
    50          
    50          
    0          
    0          
    0          
    0          
    50          
5957 141 50         if (md->partial != 0 && /* Take care with CRLF partial */
    0          
5958 0 0         eptr + 1 >= md->end_subject &&
5959 0 0         NLBLOCK->nltype == NLTYPE_FIXED &&
5960 0 0         NLBLOCK->nllen == 2 &&
5961 0           *eptr == NLBLOCK->nl[0])
5962             {
5963 0           md->hitend = TRUE;
5964 0 0         if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);
5965             }
5966 141           eptr++;
5967             }
5968 14           break;
5969              
5970             case OP_ALLANY:
5971             case OP_ANYBYTE:
5972 0           c = max - min;
5973 0 0         if (c > (unsigned int)(md->end_subject - eptr))
5974             {
5975 0           eptr = md->end_subject;
5976 0 0         SCHECK_PARTIAL();
    0          
    0          
5977             }
5978 0           else eptr += c;
5979 0           break;
5980              
5981             case OP_ANYNL:
5982 0 0         for (i = min; i < max; i++)
5983             {
5984 0 0         if (eptr >= md->end_subject)
5985             {
5986 0 0         SCHECK_PARTIAL();
    0          
    0          
5987 0           break;
5988             }
5989 0           c = *eptr;
5990 0 0         if (c == CHAR_CR)
5991             {
5992 0 0         if (++eptr >= md->end_subject) break;
5993 0 0         if (*eptr == CHAR_LF) eptr++;
5994             }
5995             else
5996             {
5997 0 0         if (c != CHAR_LF && (md->bsr_anycrlf ||
    0          
    0          
5998 0 0         (c != CHAR_VT && c != CHAR_FF && c != CHAR_NEL
    0          
5999             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6000             && c != 0x2028 && c != 0x2029
6001             #endif
6002             ))) break;
6003 0           eptr++;
6004             }
6005             }
6006 0           break;
6007              
6008             case OP_NOT_HSPACE:
6009 0 0         for (i = min; i < max; i++)
6010             {
6011 0 0         if (eptr >= md->end_subject)
6012             {
6013 0 0         SCHECK_PARTIAL();
    0          
    0          
6014 0           break;
6015             }
6016 0 0         switch(*eptr)
6017             {
6018 0           default: eptr++; break;
6019             HSPACE_BYTE_CASES:
6020             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6021             HSPACE_MULTIBYTE_CASES:
6022             #endif
6023 0           goto ENDLOOP00;
6024             }
6025             }
6026             ENDLOOP00:
6027 0           break;
6028              
6029             case OP_HSPACE:
6030 0 0         for (i = min; i < max; i++)
6031             {
6032 0 0         if (eptr >= md->end_subject)
6033             {
6034 0 0         SCHECK_PARTIAL();
    0          
    0          
6035 0           break;
6036             }
6037 0 0         switch(*eptr)
6038             {
6039 0           default: goto ENDLOOP01;
6040             HSPACE_BYTE_CASES:
6041             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6042             HSPACE_MULTIBYTE_CASES:
6043             #endif
6044 0           eptr++; break;
6045             }
6046             }
6047             ENDLOOP01:
6048 0           break;
6049              
6050             case OP_NOT_VSPACE:
6051 0 0         for (i = min; i < max; i++)
6052             {
6053 0 0         if (eptr >= md->end_subject)
6054             {
6055 0 0         SCHECK_PARTIAL();
    0          
    0          
6056 0           break;
6057             }
6058 0 0         switch(*eptr)
6059             {
6060 0           default: eptr++; break;
6061             VSPACE_BYTE_CASES:
6062             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6063             VSPACE_MULTIBYTE_CASES:
6064             #endif
6065 0           goto ENDLOOP02;
6066             }
6067             }
6068             ENDLOOP02:
6069 0           break;
6070              
6071             case OP_VSPACE:
6072 0 0         for (i = min; i < max; i++)
6073             {
6074 0 0         if (eptr >= md->end_subject)
6075             {
6076 0 0         SCHECK_PARTIAL();
    0          
    0          
6077 0           break;
6078             }
6079 0 0         switch(*eptr)
6080             {
6081 0           default: goto ENDLOOP03;
6082             VSPACE_BYTE_CASES:
6083             #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
6084             VSPACE_MULTIBYTE_CASES:
6085             #endif
6086 0           eptr++; break;
6087             }
6088             }
6089             ENDLOOP03:
6090 0           break;
6091              
6092             case OP_NOT_DIGIT:
6093 0 0         for (i = min; i < max; i++)
6094             {
6095 0 0         if (eptr >= md->end_subject)
6096             {
6097 0 0         SCHECK_PARTIAL();
    0          
    0          
6098 0           break;
6099             }
6100 0 0         if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_digit) != 0) break;
6101 0           eptr++;
6102             }
6103 0           break;
6104              
6105             case OP_DIGIT:
6106 0 0         for (i = min; i < max; i++)
6107             {
6108 0 0         if (eptr >= md->end_subject)
6109             {
6110 0 0         SCHECK_PARTIAL();
    0          
    0          
6111 0           break;
6112             }
6113 0 0         if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_digit) == 0) break;
6114 0           eptr++;
6115             }
6116 0           break;
6117              
6118             case OP_NOT_WHITESPACE:
6119 0 0         for (i = min; i < max; i++)
6120             {
6121 0 0         if (eptr >= md->end_subject)
6122             {
6123 0 0         SCHECK_PARTIAL();
    0          
    0          
6124 0           break;
6125             }
6126 0 0         if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_space) != 0) break;
6127 0           eptr++;
6128             }
6129 0           break;
6130              
6131             case OP_WHITESPACE:
6132 0 0         for (i = min; i < max; i++)
6133             {
6134 0 0         if (eptr >= md->end_subject)
6135             {
6136 0 0         SCHECK_PARTIAL();
    0          
    0          
6137 0           break;
6138             }
6139 0 0         if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_space) == 0) break;
6140 0           eptr++;
6141             }
6142 0           break;
6143              
6144             case OP_NOT_WORDCHAR:
6145 0 0         for (i = min; i < max; i++)
6146             {
6147 0 0         if (eptr >= md->end_subject)
6148             {
6149 0 0         SCHECK_PARTIAL();
    0          
    0          
6150 0           break;
6151             }
6152 0 0         if (MAX_255(*eptr) && (md->ctypes[*eptr] & ctype_word) != 0) break;
6153 0           eptr++;
6154             }
6155 0           break;
6156              
6157             case OP_WORDCHAR:
6158 0 0         for (i = min; i < max; i++)
6159             {
6160 0 0         if (eptr >= md->end_subject)
6161             {
6162 0 0         SCHECK_PARTIAL();
    0          
    0          
6163 0           break;
6164             }
6165 0 0         if (!MAX_255(*eptr) || (md->ctypes[*eptr] & ctype_word) == 0) break;
6166 0           eptr++;
6167             }
6168 0           break;
6169              
6170             default:
6171 0           RRETURN(PCRE_ERROR_INTERNAL);
6172             }
6173              
6174 14 100         if (possessive) continue; /* No backtracking */
6175             for (;;)
6176             {
6177 62 50         if (eptr == pp) goto TAIL_RECURSE;
6178 62           RMATCH(eptr, ecode, offset_top, md, eptrb, RM47);
6179 62 100         if (rrc != MATCH_NOMATCH) RRETURN(rrc);
6180 52           eptr--;
6181 52 50         if (ctype == OP_ANYNL && eptr > pp && *eptr == CHAR_LF &&
    0          
    0          
    0          
6182 0           eptr[-1] == CHAR_CR) eptr--;
6183 52           }
6184             }
6185              
6186             /* Control never gets here */
6187             }
6188              
6189             /* There's been some horrible disaster. Arrival here can only mean there is
6190             something seriously wrong in the code above or the OP_xxx definitions. */
6191              
6192             default:
6193             DPRINTF(("Unknown opcode %d\n", *ecode));
6194 0           RRETURN(PCRE_ERROR_UNKNOWN_OPCODE);
6195             }
6196              
6197             /* Do not stick any code in here without much thought; it is assumed
6198             that "continue" in the code above comes out to here to repeat the main
6199             loop. */
6200              
6201 554           } /* End of main loop */
6202             /* Control never reaches here */
6203              
6204              
6205             /* When compiling to use the heap rather than the stack for recursive calls to
6206             match(), the RRETURN() macro jumps here. The number that is saved in
6207             frame->Xwhere indicates which label we actually want to return to. */
6208              
6209             #ifdef NO_RECURSE
6210             #define LBL(val) case val: goto L_RM##val;
6211             HEAP_RETURN:
6212             switch (frame->Xwhere)
6213             {
6214             LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8)
6215             LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17)
6216             LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33)
6217             LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52)
6218             LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64)
6219             LBL(65) LBL(66)
6220             #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
6221             LBL(20) LBL(21)
6222             #endif
6223             #ifdef SUPPORT_UTF
6224             LBL(16) LBL(18)
6225             LBL(22) LBL(23) LBL(28) LBL(30)
6226             LBL(32) LBL(34) LBL(42) LBL(46)
6227             #ifdef SUPPORT_UCP
6228             LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45)
6229             LBL(59) LBL(60) LBL(61) LBL(62) LBL(67)
6230             #endif /* SUPPORT_UCP */
6231             #endif /* SUPPORT_UTF */
6232             default:
6233             DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere));
6234             return PCRE_ERROR_INTERNAL;
6235             }
6236             #undef LBL
6237             #endif /* NO_RECURSE */
6238             }
6239              
6240              
6241             /***************************************************************************
6242             ****************************************************************************
6243             RECURSION IN THE match() FUNCTION
6244              
6245             Undefine all the macros that were defined above to handle this. */
6246              
6247             #ifdef NO_RECURSE
6248             #undef eptr
6249             #undef ecode
6250             #undef mstart
6251             #undef offset_top
6252             #undef eptrb
6253             #undef flags
6254              
6255             #undef callpat
6256             #undef charptr
6257             #undef data
6258             #undef next
6259             #undef pp
6260             #undef prev
6261             #undef saved_eptr
6262              
6263             #undef new_recursive
6264              
6265             #undef cur_is_word
6266             #undef condition
6267             #undef prev_is_word
6268              
6269             #undef ctype
6270             #undef length
6271             #undef max
6272             #undef min
6273             #undef number
6274             #undef offset
6275             #undef op
6276             #undef save_capture_last
6277             #undef save_offset1
6278             #undef save_offset2
6279             #undef save_offset3
6280             #undef stacksave
6281              
6282             #undef newptrb
6283              
6284             #endif
6285              
6286             /* These two are defined as macros in both cases */
6287              
6288             #undef fc
6289             #undef fi
6290              
6291             /***************************************************************************
6292             ***************************************************************************/
6293              
6294              
6295             #ifdef NO_RECURSE
6296             /*************************************************
6297             * Release allocated heap frames *
6298             *************************************************/
6299              
6300             /* This function releases all the allocated frames. The base frame is on the
6301             machine stack, and so must not be freed.
6302              
6303             Argument: the address of the base frame
6304             Returns: nothing
6305             */
6306              
6307             static void
6308             release_match_heapframes (heapframe *frame_base)
6309             {
6310             heapframe *nextframe = frame_base->Xnextframe;
6311             while (nextframe != NULL)
6312             {
6313             heapframe *oldframe = nextframe;
6314             nextframe = nextframe->Xnextframe;
6315             (PUBL(stack_free))(oldframe);
6316             }
6317             }
6318             #endif
6319              
6320              
6321             /*************************************************
6322             * Execute a Regular Expression *
6323             *************************************************/
6324              
6325             /* This function applies a compiled re to a subject string and picks out
6326             portions of the string if it matches. Two elements in the vector are set for
6327             each substring: the offsets to the start and end of the substring.
6328              
6329             Arguments:
6330             argument_re points to the compiled expression
6331             extra_data points to extra data or is NULL
6332             subject points to the subject string
6333             length length of subject string (may contain binary zeros)
6334             start_offset where to start in the subject string
6335             options option bits
6336             offsets points to a vector of ints to be filled in with offsets
6337             offsetcount the number of elements in the vector
6338              
6339             Returns: > 0 => success; value is the number of elements filled in
6340             = 0 => success, but offsets is not big enough
6341             -1 => failed to match
6342             < -1 => some kind of unexpected problem
6343             */
6344              
6345             #if defined COMPILE_PCRE8
6346             PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6347 753           pcre_exec(const pcre *argument_re, const pcre_extra *extra_data,
6348             PCRE_SPTR subject, int length, int start_offset, int options, int *offsets,
6349             int offsetcount)
6350             #elif defined COMPILE_PCRE16
6351             PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6352             pcre16_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
6353             PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
6354             int offsetcount)
6355             #elif defined COMPILE_PCRE32
6356             PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
6357             pcre32_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
6358             PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
6359             int offsetcount)
6360             #endif
6361             {
6362             int rc, ocount, arg_offset_max;
6363             int newline;
6364 753           BOOL using_temporary_offsets = FALSE;
6365             BOOL anchored;
6366             BOOL startline;
6367             BOOL firstline;
6368             BOOL utf;
6369 753           BOOL has_first_char = FALSE;
6370 753           BOOL has_req_char = FALSE;
6371 753           pcre_uchar first_char = 0;
6372 753           pcre_uchar first_char2 = 0;
6373 753           pcre_uchar req_char = 0;
6374 753           pcre_uchar req_char2 = 0;
6375             match_data match_block;
6376 753           match_data *md = &match_block;
6377             const pcre_uint8 *tables;
6378 753           const pcre_uint8 *start_bits = NULL;
6379 753           PCRE_PUCHAR start_match = (PCRE_PUCHAR)subject + start_offset;
6380             PCRE_PUCHAR end_subject;
6381 753           PCRE_PUCHAR start_partial = NULL;
6382 753           PCRE_PUCHAR match_partial = NULL;
6383 753           PCRE_PUCHAR req_char_ptr = start_match - 1;
6384              
6385             const pcre_study_data *study;
6386 753           const REAL_PCRE *re = (const REAL_PCRE *)argument_re;
6387              
6388             #ifdef NO_RECURSE
6389             heapframe frame_zero;
6390             frame_zero.Xprevframe = NULL; /* Marks the top level */
6391             frame_zero.Xnextframe = NULL; /* None are allocated yet */
6392             md->match_frames_base = &frame_zero;
6393             #endif
6394              
6395             /* Check for the special magic call that measures the size of the stack used
6396             per recursive call of match(). Without the funny casting for sizeof, a Windows
6397             compiler gave this error: "unary minus operator applied to unsigned type,
6398             result still unsigned". Hopefully the cast fixes that. */
6399              
6400 753 50         if (re == NULL && extra_data == NULL && subject == NULL && length == -999 &&
    0          
    0          
    0          
    0          
6401             start_offset == -999)
6402             #ifdef NO_RECURSE
6403             return -((int)sizeof(heapframe));
6404             #else
6405 0           return match(NULL, NULL, NULL, 0, NULL, NULL, 0);
6406             #endif
6407              
6408             /* Plausibility checks */
6409              
6410 753 50         if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
6411 753 50         if (re == NULL || subject == NULL || (offsets == NULL && offsetcount > 0))
    50          
    50          
    50          
6412 0           return PCRE_ERROR_NULL;
6413 753 50         if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
6414 753 50         if (length < 0) return PCRE_ERROR_BADLENGTH;
6415 753 50         if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
    50          
6416              
6417             /* Check that the first field in the block is the magic number. If it is not,
6418             return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
6419             REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
6420             means that the pattern is likely compiled with different endianness. */
6421              
6422 753 50         if (re->magic_number != MAGIC_NUMBER)
6423 0           return re->magic_number == REVERSED_MAGIC_NUMBER?
6424 0 0         PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
6425 753 50         if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
6426              
6427             /* These two settings are used in the code for checking a UTF-8 string that
6428             follows immediately afterwards. Other values in the md block are used only
6429             during "normal" pcre_exec() processing, not when the JIT support is in use,
6430             so they are set up later. */
6431              
6432             /* PCRE_UTF16 has the same value as PCRE_UTF8. */
6433 753           utf = md->utf = (re->options & PCRE_UTF8) != 0;
6434 753 50         md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 :
6435 753           ((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0;
6436              
6437             /* Check a UTF-8 string if required. Pass back the character offset and error
6438             code for an invalid string if a results vector is available. */
6439              
6440             #ifdef SUPPORT_UTF
6441             if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
6442             {
6443             int erroroffset;
6444             int errorcode = PRIV(valid_utf)((PCRE_PUCHAR)subject, length, &erroroffset);
6445             if (errorcode != 0)
6446             {
6447             if (offsetcount >= 2)
6448             {
6449             offsets[0] = erroroffset;
6450             offsets[1] = errorcode;
6451             }
6452             #if defined COMPILE_PCRE8
6453             return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)?
6454             PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
6455             #elif defined COMPILE_PCRE16
6456             return (errorcode <= PCRE_UTF16_ERR1 && md->partial > 1)?
6457             PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
6458             #elif defined COMPILE_PCRE32
6459             return PCRE_ERROR_BADUTF32;
6460             #endif
6461             }
6462             #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
6463             /* Check that a start_offset points to the start of a UTF character. */
6464             if (start_offset > 0 && start_offset < length &&
6465             NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
6466             return PCRE_ERROR_BADUTF8_OFFSET;
6467             #endif
6468             }
6469             #endif
6470              
6471             /* If the pattern was successfully studied with JIT support, run the JIT
6472             executable instead of the rest of this function. Most options must be set at
6473             compile time for the JIT code to be usable. Fallback to the normal code path if
6474             an unsupported flag is set. */
6475              
6476             #ifdef SUPPORT_JIT
6477             if (extra_data != NULL
6478             && (extra_data->flags & (PCRE_EXTRA_EXECUTABLE_JIT |
6479             PCRE_EXTRA_TABLES)) == PCRE_EXTRA_EXECUTABLE_JIT
6480             && extra_data->executable_jit != NULL
6481             && (options & ~PUBLIC_JIT_EXEC_OPTIONS) == 0)
6482             {
6483             rc = PRIV(jit_exec)(extra_data, (const pcre_uchar *)subject, length,
6484             start_offset, options, offsets, offsetcount);
6485              
6486             /* PCRE_ERROR_NULL means that the selected normal or partial matching
6487             mode is not compiled. In this case we simply fallback to interpreter. */
6488              
6489             if (rc != PCRE_ERROR_JIT_BADOPTION) return rc;
6490             }
6491             #endif
6492              
6493             /* Carry on with non-JIT matching. This information is for finding all the
6494             numbers associated with a given name, for condition testing. */
6495              
6496 753           md->name_table = (pcre_uchar *)re + re->name_table_offset;
6497 753           md->name_count = re->name_count;
6498 753           md->name_entry_size = re->name_entry_size;
6499              
6500             /* Fish out the optional data from the extra_data structure, first setting
6501             the default values. */
6502              
6503 753           study = NULL;
6504 753           md->match_limit = MATCH_LIMIT;
6505 753           md->match_limit_recursion = MATCH_LIMIT_RECURSION;
6506 753           md->callout_data = NULL;
6507              
6508             /* The table pointer is always in native byte order. */
6509              
6510 753           tables = re->tables;
6511              
6512             /* The two limit values override the defaults, whatever their value. */
6513              
6514 753 50         if (extra_data != NULL)
6515             {
6516 0           unsigned long int flags = extra_data->flags;
6517 0 0         if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
6518 0           study = (const pcre_study_data *)extra_data->study_data;
6519 0 0         if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0)
6520 0           md->match_limit = extra_data->match_limit;
6521 0 0         if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
6522 0           md->match_limit_recursion = extra_data->match_limit_recursion;
6523 0 0         if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
6524 0           md->callout_data = extra_data->callout_data;
6525 0 0         if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables;
6526             }
6527              
6528             /* Limits in the regex override only if they are smaller. */
6529              
6530 753 50         if ((re->flags & PCRE_MLSET) != 0 && re->limit_match < md->match_limit)
    0          
6531 0           md->match_limit = re->limit_match;
6532              
6533 753 50         if ((re->flags & PCRE_RLSET) != 0 &&
    0          
6534 0           re->limit_recursion < md->match_limit_recursion)
6535 0           md->match_limit_recursion = re->limit_recursion;
6536              
6537             /* If the exec call supplied NULL for tables, use the inbuilt ones. This
6538             is a feature that makes it possible to save compiled regex and re-use them
6539             in other programs later. */
6540              
6541 753 50         if (tables == NULL) tables = PRIV(default_tables);
6542              
6543             /* Set up other data */
6544              
6545 753           anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
6546 753           startline = (re->flags & PCRE_STARTLINE) != 0;
6547 753           firstline = (re->options & PCRE_FIRSTLINE) != 0;
6548              
6549             /* The code starts after the real_pcre block and the capture name table. */
6550              
6551 1506           md->start_code = (const pcre_uchar *)re + re->name_table_offset +
6552 753           re->name_count * re->name_entry_size;
6553              
6554 753           md->start_subject = (PCRE_PUCHAR)subject;
6555 753           md->start_offset = start_offset;
6556 753           md->end_subject = md->start_subject + length;
6557 753           end_subject = md->end_subject;
6558              
6559 753           md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
6560 753           md->use_ucp = (re->options & PCRE_UCP) != 0;
6561 753           md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;
6562 753           md->ignore_skip_arg = 0;
6563              
6564             /* Some options are unpacked into BOOL variables in the hope that testing
6565             them will be faster than individual option bits. */
6566              
6567 753           md->notbol = (options & PCRE_NOTBOL) != 0;
6568 753           md->noteol = (options & PCRE_NOTEOL) != 0;
6569 753           md->notempty = (options & PCRE_NOTEMPTY) != 0;
6570 753           md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0;
6571              
6572 753           md->hitend = FALSE;
6573 753           md->mark = md->nomatch_mark = NULL; /* In case never set */
6574              
6575 753           md->recursive = NULL; /* No recursion at top level */
6576 753           md->hasthen = (re->flags & PCRE_HASTHEN) != 0;
6577              
6578 753           md->lcc = tables + lcc_offset;
6579 753           md->fcc = tables + fcc_offset;
6580 753           md->ctypes = tables + ctypes_offset;
6581              
6582             /* Handle different \R options. */
6583              
6584 753           switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
6585             {
6586             case 0:
6587 753 50         if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
6588 0           md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0;
6589             else
6590             #ifdef BSR_ANYCRLF
6591             md->bsr_anycrlf = TRUE;
6592             #else
6593 753           md->bsr_anycrlf = FALSE;
6594             #endif
6595 753           break;
6596              
6597             case PCRE_BSR_ANYCRLF:
6598 0           md->bsr_anycrlf = TRUE;
6599 0           break;
6600              
6601             case PCRE_BSR_UNICODE:
6602 0           md->bsr_anycrlf = FALSE;
6603 0           break;
6604              
6605 0           default: return PCRE_ERROR_BADNEWLINE;
6606             }
6607              
6608             /* Handle different types of newline. The three bits give eight cases. If
6609             nothing is set at run time, whatever was used at compile time applies. */
6610              
6611 753           switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options :
6612 753 50         (pcre_uint32)options) & PCRE_NEWLINE_BITS)
6613             {
6614 753           case 0: newline = NEWLINE; break; /* Compile-time default */
6615 0           case PCRE_NEWLINE_CR: newline = CHAR_CR; break;
6616 0           case PCRE_NEWLINE_LF: newline = CHAR_NL; break;
6617             case PCRE_NEWLINE_CR+
6618 0           PCRE_NEWLINE_LF: newline = (CHAR_CR << 8) | CHAR_NL; break;
6619 0           case PCRE_NEWLINE_ANY: newline = -1; break;
6620 0           case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
6621 0           default: return PCRE_ERROR_BADNEWLINE;
6622             }
6623              
6624 753 50         if (newline == -2)
6625             {
6626 753           md->nltype = NLTYPE_ANYCRLF;
6627             }
6628 0 0         else if (newline < 0)
6629             {
6630 0           md->nltype = NLTYPE_ANY;
6631             }
6632             else
6633             {
6634 0           md->nltype = NLTYPE_FIXED;
6635 0 0         if (newline > 255)
6636             {
6637 0           md->nllen = 2;
6638 0           md->nl[0] = (newline >> 8) & 255;
6639 0           md->nl[1] = newline & 255;
6640             }
6641             else
6642             {
6643 0           md->nllen = 1;
6644 0           md->nl[0] = newline;
6645             }
6646             }
6647              
6648             /* Partial matching was originally supported only for a restricted set of
6649             regexes; from release 8.00 there are no restrictions, but the bits are still
6650             defined (though never set). So there's no harm in leaving this code. */
6651              
6652 753 50         if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0)
    0          
6653 0           return PCRE_ERROR_BADPARTIAL;
6654              
6655             /* If the expression has got more back references than the offsets supplied can
6656             hold, we get a temporary chunk of working store to use during the matching.
6657             Otherwise, we can use the vector supplied, rounding down its size to a multiple
6658             of 3. */
6659              
6660 753           ocount = offsetcount - (offsetcount % 3);
6661 753           arg_offset_max = (2*ocount)/3;
6662              
6663 753 50         if (re->top_backref > 0 && re->top_backref >= ocount/3)
    0          
6664             {
6665 0           ocount = re->top_backref * 3 + 3;
6666 0           md->offset_vector = (int *)(PUBL(malloc))(ocount * sizeof(int));
6667 0 0         if (md->offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
6668 0           using_temporary_offsets = TRUE;
6669             DPRINTF(("Got memory to hold back references\n"));
6670             }
6671 753           else md->offset_vector = offsets;
6672 753           md->offset_end = ocount;
6673 753           md->offset_max = (2*ocount)/3;
6674 753           md->capture_last = 0;
6675              
6676             /* Reset the working variable associated with each extraction. These should
6677             never be used unless previously set, but they get saved and restored, and so we
6678             initialize them to avoid reading uninitialized locations. Also, unset the
6679             offsets for the matched string. This is really just for tidiness with callouts,
6680             in case they inspect these fields. */
6681              
6682 753 50         if (md->offset_vector != NULL)
6683             {
6684 0           register int *iptr = md->offset_vector + ocount;
6685 0           register int *iend = iptr - re->top_bracket;
6686 0 0         if (iend < md->offset_vector + 2) iend = md->offset_vector + 2;
6687 0 0         while (--iptr >= iend) *iptr = -1;
6688 0 0         if (offsetcount > 0) md->offset_vector[0] = -1;
6689 0 0         if (offsetcount > 1) md->offset_vector[1] = -1;
6690             }
6691              
6692             /* Set up the first character to match, if available. The first_char value is
6693             never set for an anchored regular expression, but the anchoring may be forced
6694             at run time, so we have to test for anchoring. The first char may be unset for
6695             an unanchored pattern, of course. If there's no first char and the pattern was
6696             studied, there may be a bitmap of possible first characters. */
6697              
6698 753 100         if (!anchored)
6699             {
6700 696 50         if ((re->flags & PCRE_FIRSTSET) != 0)
6701             {
6702 696           has_first_char = TRUE;
6703 696           first_char = first_char2 = (pcre_uchar)(re->first_char);
6704 696 50         if ((re->flags & PCRE_FCH_CASELESS) != 0)
6705             {
6706 696           first_char2 = TABLE_GET(first_char, md->fcc, first_char);
6707             #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6708             if (utf && first_char > 127)
6709             first_char2 = UCD_OTHERCASE(first_char);
6710             #endif
6711             }
6712             }
6713             else
6714 0 0         if (!startline && study != NULL &&
    0          
    0          
6715 0           (study->flags & PCRE_STUDY_MAPPED) != 0)
6716 0           start_bits = study->start_bits;
6717             }
6718              
6719             /* For anchored or unanchored matches, there may be a "last known required
6720             character" set. */
6721              
6722 753 100         if ((re->flags & PCRE_REQCHSET) != 0)
6723             {
6724 752           has_req_char = TRUE;
6725 752           req_char = req_char2 = (pcre_uchar)(re->req_char);
6726 752 50         if ((re->flags & PCRE_RCH_CASELESS) != 0)
6727             {
6728 0           req_char2 = TABLE_GET(req_char, md->fcc, req_char);
6729             #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
6730             if (utf && req_char > 127)
6731             req_char2 = UCD_OTHERCASE(req_char);
6732             #endif
6733             }
6734             }
6735              
6736              
6737             /* ==========================================================================*/
6738              
6739             /* Loop for handling unanchored repeated matching attempts; for anchored regexs
6740             the loop runs just once. */
6741              
6742             for(;;)
6743             {
6744 817           PCRE_PUCHAR save_end_subject = end_subject;
6745             PCRE_PUCHAR new_start_match;
6746              
6747             /* If firstline is TRUE, the start of the match is constrained to the first
6748             line of a multiline string. That is, the match must be before or at the first
6749             newline. Implement this by temporarily adjusting end_subject so that we stop
6750             scanning at a newline. If the match fails at the newline, later code breaks
6751             this loop. */
6752              
6753 817 50         if (firstline)
6754             {
6755 0           PCRE_PUCHAR t = start_match;
6756             #ifdef SUPPORT_UTF
6757             if (utf)
6758             {
6759             while (t < md->end_subject && !IS_NEWLINE(t))
6760             {
6761             t++;
6762             ACROSSCHAR(t < end_subject, *t, t++);
6763             }
6764             }
6765             else
6766             #endif
6767 0 0         while (t < md->end_subject && !IS_NEWLINE(t)) t++;
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
6768 0           end_subject = t;
6769             }
6770              
6771             /* There are some optimizations that avoid running the match if a known
6772             starting point is not found, or if a known later character is not present.
6773             However, there is an option that disables these, for testing and for ensuring
6774             that all callouts do actually occur. The option can be set in the regex by
6775             (*NO_START_OPT) or passed in match-time options. */
6776              
6777 817 50         if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
6778             {
6779             /* Advance to a unique first char if there is one. */
6780              
6781 817 100         if (has_first_char)
6782             {
6783             pcre_uchar smc;
6784              
6785 760 50         if (first_char != first_char2)
6786 0 0         while (start_match < end_subject &&
    0          
6787 0 0         (smc = UCHAR21TEST(start_match)) != first_char && smc != first_char2)
6788 0           start_match++;
6789             else
6790 8713 100         while (start_match < end_subject && UCHAR21TEST(start_match) != first_char)
    100          
6791 7953           start_match++;
6792             }
6793              
6794             /* Or to just after a linebreak for a multiline match */
6795              
6796 57 50         else if (startline)
6797             {
6798 0 0         if (start_match > md->start_subject + start_offset)
6799             {
6800             #ifdef SUPPORT_UTF
6801             if (utf)
6802             {
6803             while (start_match < end_subject && !WAS_NEWLINE(start_match))
6804             {
6805             start_match++;
6806             ACROSSCHAR(start_match < end_subject, *start_match,
6807             start_match++);
6808             }
6809             }
6810             else
6811             #endif
6812 0 0         while (start_match < end_subject && !WAS_NEWLINE(start_match))
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
6813 0           start_match++;
6814              
6815             /* If we have just passed a CR and the newline option is ANY or ANYCRLF,
6816             and we are now at a LF, advance the match position by one more character.
6817             */
6818              
6819 0 0         if (start_match[-1] == CHAR_CR &&
    0          
6820 0 0         (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
    0          
6821 0 0         start_match < end_subject &&
6822 0           UCHAR21TEST(start_match) == CHAR_NL)
6823 0           start_match++;
6824             }
6825             }
6826              
6827             /* Or to a non-unique first byte after study */
6828              
6829 57 50         else if (start_bits != NULL)
6830             {
6831 0 0         while (start_match < end_subject)
6832             {
6833 0           register pcre_uint32 c = UCHAR21TEST(start_match);
6834             #ifndef COMPILE_PCRE8
6835             if (c > 255) c = 255;
6836             #endif
6837 0 0         if ((start_bits[c/8] & (1 << (c&7))) != 0) break;
6838 0           start_match++;
6839             }
6840             }
6841             } /* Starting optimizations */
6842              
6843             /* Restore fudged end_subject */
6844              
6845 817           end_subject = save_end_subject;
6846              
6847             /* The following two optimizations are disabled for partial matching or if
6848             disabling is explicitly requested. */
6849              
6850 817 50         if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial)
    50          
6851             {
6852             /* If the pattern was studied, a minimum subject length may be set. This is
6853             a lower bound; no actual string of that length may actually match the
6854             pattern. Although the value is, strictly, in characters, we treat it as
6855             bytes to avoid spending too much time in this optimization. */
6856              
6857 817 50         if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
    0          
    0          
6858 0           (pcre_uint32)(end_subject - start_match) < study->minlength)
6859             {
6860 0           rc = MATCH_NOMATCH;
6861 0           break;
6862             }
6863              
6864             /* If req_char is set, we know that that character must appear in the
6865             subject for the match to succeed. If the first character is set, req_char
6866             must be later in the subject; otherwise the test starts at the match point.
6867             This optimization can save a huge amount of backtracking in patterns with
6868             nested unlimited repeats that aren't going to match. Writing separate code
6869             for cased/caseless versions makes it go faster, as does using an
6870             autoincrement and backing off on a match.
6871              
6872             HOWEVER: when the subject string is very, very long, searching to its end
6873             can take a long time, and give bad performance on quite ordinary patterns.
6874             This showed up when somebody was matching something like /^\d+C/ on a
6875             32-megabyte string... so we don't do this when the string is sufficiently
6876             long. */
6877              
6878 817 100         if (has_req_char && end_subject - start_match < REQ_BYTE_MAX)
    50          
6879             {
6880 816 100         register PCRE_PUCHAR p = start_match + (has_first_char? 1:0);
6881              
6882             /* We don't need to repeat the search if we haven't yet reached the
6883             place we found it at last time. */
6884              
6885 816 50         if (p > req_char_ptr)
6886             {
6887 816 50         if (req_char != req_char2)
6888             {
6889 0 0         while (p < end_subject)
6890             {
6891 0           register pcre_uint32 pp = UCHAR21INCTEST(p);
6892 0 0         if (pp == req_char || pp == req_char2) { p--; break; }
    0          
6893             }
6894             }
6895             else
6896             {
6897 3826 100         while (p < end_subject)
6898             {
6899 3109 100         if (UCHAR21INCTEST(p) == req_char) { p--; break; }
6900             }
6901             }
6902              
6903             /* If we can't find the required character, break the matching loop,
6904             forcing a match failure. */
6905              
6906 816 100         if (p >= end_subject)
6907             {
6908 717           rc = MATCH_NOMATCH;
6909 717           break;
6910             }
6911              
6912             /* If we have found the required character, save the point where we
6913             found it, so that we don't search again next time round the loop if
6914             the start hasn't passed this character yet. */
6915              
6916 99           req_char_ptr = p;
6917             }
6918             }
6919             }
6920              
6921             #ifdef PCRE_DEBUG /* Sigh. Some compilers never learn. */
6922             printf(">>>> Match against: ");
6923             pchars(start_match, end_subject - start_match, TRUE, md);
6924             printf("\n");
6925             #endif
6926              
6927             /* OK, we can now run the match. If "hitend" is set afterwards, remember the
6928             first starting point for which a partial match was found. */
6929              
6930 100           md->start_match_ptr = start_match;
6931 100           md->start_used_ptr = start_match;
6932 100           md->match_call_count = 0;
6933 100           md->match_function_type = 0;
6934 100           md->end_offset_top = 0;
6935 100           md->skip_arg_count = 0;
6936 100           rc = match(start_match, md->start_code, start_match, 2, md, NULL, 0);
6937 100 50         if (md->hitend && start_partial == NULL)
    0          
6938             {
6939 0           start_partial = md->start_used_ptr;
6940 0           match_partial = start_match;
6941             }
6942              
6943 100           switch(rc)
6944             {
6945             /* If MATCH_SKIP_ARG reaches this level it means that a MARK that matched
6946             the SKIP's arg was not found. In this circumstance, Perl ignores the SKIP
6947             entirely. The only way we can do that is to re-do the match at the same
6948             point, with a flag to force SKIP with an argument to be ignored. Just
6949             treating this case as NOMATCH does not work because it does not check other
6950             alternatives in patterns such as A(*SKIP:A)B|AC when the subject is AC. */
6951              
6952             case MATCH_SKIP_ARG:
6953 0           new_start_match = start_match;
6954 0           md->ignore_skip_arg = md->skip_arg_count;
6955 0           break;
6956              
6957             /* SKIP passes back the next starting point explicitly, but if it is no
6958             greater than the match we have just done, treat it as NOMATCH. */
6959              
6960             case MATCH_SKIP:
6961 0 0         if (md->start_match_ptr > start_match)
6962             {
6963 0           new_start_match = md->start_match_ptr;
6964 0           break;
6965             }
6966             /* Fall through */
6967              
6968             /* NOMATCH and PRUNE advance by one character. THEN at this level acts
6969             exactly like PRUNE. Unset ignore SKIP-with-argument. */
6970              
6971             case MATCH_NOMATCH:
6972             case MATCH_PRUNE:
6973             case MATCH_THEN:
6974 86           md->ignore_skip_arg = 0;
6975 86           new_start_match = start_match + 1;
6976             #ifdef SUPPORT_UTF
6977             if (utf)
6978             ACROSSCHAR(new_start_match < end_subject, *new_start_match,
6979             new_start_match++);
6980             #endif
6981 86           break;
6982              
6983             /* COMMIT disables the bumpalong, but otherwise behaves as NOMATCH. */
6984              
6985             case MATCH_COMMIT:
6986 0           rc = MATCH_NOMATCH;
6987 0           goto ENDLOOP;
6988              
6989             /* Any other return is either a match, or some kind of error. */
6990              
6991             default:
6992 14           goto ENDLOOP;
6993             }
6994              
6995             /* Control reaches here for the various types of "no match at this point"
6996             result. Reset the code to MATCH_NOMATCH for subsequent checking. */
6997              
6998 86           rc = MATCH_NOMATCH;
6999              
7000             /* If PCRE_FIRSTLINE is set, the match must happen before or at the first
7001             newline in the subject (though it may continue over the newline). Therefore,
7002             if we have just failed to match, starting at a newline, do not continue. */
7003              
7004 86 50         if (firstline && IS_NEWLINE(start_match)) break;
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
7005              
7006             /* Advance to new matching position */
7007              
7008 86           start_match = new_start_match;
7009              
7010             /* Break the loop if the pattern is anchored or if we have passed the end of
7011             the subject. */
7012              
7013 86 100         if (anchored || start_match > end_subject) break;
    50          
7014              
7015             /* If we have just passed a CR and we are now at a LF, and the pattern does
7016             not contain any explicit matches for \r or \n, and the newline option is CRLF
7017             or ANY or ANYCRLF, advance the match position by one more character. In
7018             normal matching start_match will aways be greater than the first position at
7019             this stage, but a failed *SKIP can cause a return at the same point, which is
7020             why the first test exists. */
7021              
7022 64 50         if (start_match > (PCRE_PUCHAR)subject + start_offset &&
    50          
7023 0 0         start_match[-1] == CHAR_CR &&
7024 0 0         start_match < end_subject &&
7025 0 0         *start_match == CHAR_NL &&
7026 0 0         (re->flags & PCRE_HASCRORLF) == 0 &&
7027 0 0         (md->nltype == NLTYPE_ANY ||
7028 0 0         md->nltype == NLTYPE_ANYCRLF ||
7029 0           md->nllen == 2))
7030 0           start_match++;
7031              
7032 64           md->mark = NULL; /* Reset for start of next match attempt */
7033 64           } /* End of for(;;) "bumpalong" loop */
7034              
7035             /* ==========================================================================*/
7036              
7037             /* We reach here when rc is not MATCH_NOMATCH, or if one of the stopping
7038             conditions is true:
7039              
7040             (1) The pattern is anchored or the match was failed by (*COMMIT);
7041              
7042             (2) We are past the end of the subject;
7043              
7044             (3) PCRE_FIRSTLINE is set and we have failed to match at a newline, because
7045             this option requests that a match occur at or before the first newline in
7046             the subject.
7047              
7048             When we have a match and the offset vector is big enough to deal with any
7049             backreferences, captured substring offsets will already be set up. In the case
7050             where we had to get some local store to hold offsets for backreference
7051             processing, copy those that we can. In this case there need not be overflow if
7052             certain parts of the pattern were not used, even though there are more
7053             capturing parentheses than vector slots. */
7054              
7055             ENDLOOP:
7056              
7057 753 100         if (rc == MATCH_MATCH || rc == MATCH_ACCEPT)
    50          
7058             {
7059 14 50         if (using_temporary_offsets)
7060             {
7061 0 0         if (arg_offset_max >= 4)
7062             {
7063 0           memcpy(offsets + 2, md->offset_vector + 2,
7064 0           (arg_offset_max - 2) * sizeof(int));
7065             DPRINTF(("Copied offsets from temporary memory\n"));
7066             }
7067 0 0         if (md->end_offset_top > arg_offset_max) md->capture_last |= OVFLBIT;
7068             DPRINTF(("Freeing temporary memory\n"));
7069 0           (PUBL(free))(md->offset_vector);
7070             }
7071              
7072             /* Set the return code to the number of captured strings, or 0 if there were
7073             too many to fit into the vector. */
7074              
7075 3 50         rc = ((md->capture_last & OVFLBIT) != 0 &&
7076 3           md->end_offset_top >= arg_offset_max)?
7077 17 100         0 : md->end_offset_top/2;
7078              
7079             /* If there is space in the offset vector, set any unused pairs at the end of
7080             the pattern to -1 for backwards compatibility. It is documented that this
7081             happens. In earlier versions, the whole set of potential capturing offsets
7082             was set to -1 each time round the loop, but this is handled differently now.
7083             "Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only
7084             those at the end that need unsetting here. We can't just unset them all at
7085             the start of the whole thing because they may get set in one branch that is
7086             not the final matching branch. */
7087              
7088 14 100         if (md->end_offset_top/2 <= re->top_bracket && offsets != NULL)
    50          
7089             {
7090             register int *iptr, *iend;
7091 0           int resetcount = 2 + re->top_bracket * 2;
7092 0 0         if (resetcount > offsetcount) resetcount = offsetcount;
7093 0           iptr = offsets + md->end_offset_top;
7094 0           iend = offsets + resetcount;
7095 0 0         while (iptr < iend) *iptr++ = -1;
7096             }
7097              
7098             /* If there is space, set up the whole thing as substring 0. The value of
7099             md->start_match_ptr might be modified if \K was encountered on the success
7100             matching path. */
7101              
7102 14 50         if (offsetcount < 2) rc = 0; else
7103             {
7104 0           offsets[0] = (int)(md->start_match_ptr - md->start_subject);
7105 0           offsets[1] = (int)(md->end_match_ptr - md->start_subject);
7106             }
7107              
7108             /* Return MARK data if requested */
7109              
7110 14 50         if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
    0          
7111 0           *(extra_data->mark) = (pcre_uchar *)md->mark;
7112             DPRINTF((">>>> returning %d\n", rc));
7113             #ifdef NO_RECURSE
7114             release_match_heapframes(&frame_zero);
7115             #endif
7116 14           return rc;
7117             }
7118              
7119             /* Control gets here if there has been an error, or if the overall match
7120             attempt has failed at all permitted starting positions. */
7121              
7122 739 50         if (using_temporary_offsets)
7123             {
7124             DPRINTF(("Freeing temporary memory\n"));
7125 0           (PUBL(free))(md->offset_vector);
7126             }
7127              
7128             /* For anything other than nomatch or partial match, just return the code. */
7129              
7130 739 50         if (rc != MATCH_NOMATCH && rc != PCRE_ERROR_PARTIAL)
    0          
7131             {
7132             DPRINTF((">>>> error: returning %d\n", rc));
7133             #ifdef NO_RECURSE
7134             release_match_heapframes(&frame_zero);
7135             #endif
7136 0           return rc;
7137             }
7138              
7139             /* Handle partial matches - disable any mark data */
7140              
7141 739 50         if (match_partial != NULL)
7142             {
7143             DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n"));
7144 0           md->mark = NULL;
7145 0 0         if (offsetcount > 1)
7146             {
7147 0           offsets[0] = (int)(start_partial - (PCRE_PUCHAR)subject);
7148 0           offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
7149 0 0         if (offsetcount > 2)
7150 0           offsets[2] = (int)(match_partial - (PCRE_PUCHAR)subject);
7151             }
7152 0           rc = PCRE_ERROR_PARTIAL;
7153             }
7154              
7155             /* This is the classic nomatch case */
7156              
7157             else
7158             {
7159             DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
7160 739           rc = PCRE_ERROR_NOMATCH;
7161             }
7162              
7163             /* Return the MARK data if it has been requested. */
7164              
7165 739 50         if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_MARK) != 0)
    0          
7166 0           *(extra_data->mark) = (pcre_uchar *)md->nomatch_mark;
7167             #ifdef NO_RECURSE
7168             release_match_heapframes(&frame_zero);
7169             #endif
7170 753           return rc;
7171             }
7172              
7173             /* End of pcre_exec.c */