line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* Extended regular expression matching and search library. |
2
|
|
|
|
|
|
|
Copyright (C) 2002-2014 Free Software Foundation, Inc. |
3
|
|
|
|
|
|
|
This file is part of the GNU C Library. |
4
|
|
|
|
|
|
|
Contributed by Isamu Hasegawa . |
5
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or |
7
|
|
|
|
|
|
|
modify it under the terms of the GNU Lesser General Public |
8
|
|
|
|
|
|
|
License as published by the Free Software Foundation; either |
9
|
|
|
|
|
|
|
version 2.1 of the License, or (at your option) any later version. |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful, |
12
|
|
|
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14
|
|
|
|
|
|
|
Lesser General Public License for more details. |
15
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public |
17
|
|
|
|
|
|
|
License along with the GNU C Library; if not, see |
18
|
|
|
|
|
|
|
. */ |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
#ifndef _REGEX_INTERNAL_H |
21
|
|
|
|
|
|
|
#define _REGEX_INTERNAL_H 1 |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
#ifdef HAVE_ASSERT_H |
24
|
|
|
|
|
|
|
# include |
25
|
|
|
|
|
|
|
#endif |
26
|
|
|
|
|
|
|
#ifdef HAVE_CTYPE_H |
27
|
|
|
|
|
|
|
# include |
28
|
|
|
|
|
|
|
#endif |
29
|
|
|
|
|
|
|
#ifdef HAVE_STDIO_H |
30
|
|
|
|
|
|
|
# include |
31
|
|
|
|
|
|
|
#endif |
32
|
|
|
|
|
|
|
#ifdef HAVE_STDLIB_H |
33
|
|
|
|
|
|
|
# include |
34
|
|
|
|
|
|
|
#endif |
35
|
|
|
|
|
|
|
#ifdef HAVE_STRING_H |
36
|
|
|
|
|
|
|
# include |
37
|
|
|
|
|
|
|
#endif |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
/* We do not want to include locale stuff: everything */ |
40
|
|
|
|
|
|
|
/* will be done using perl API */ |
41
|
|
|
|
|
|
|
#if 0 |
42
|
|
|
|
|
|
|
# include |
43
|
|
|
|
|
|
|
# include |
44
|
|
|
|
|
|
|
#endif |
45
|
|
|
|
|
|
|
#ifdef HAVE_WCHAR_H |
46
|
|
|
|
|
|
|
# include |
47
|
|
|
|
|
|
|
#endif |
48
|
|
|
|
|
|
|
#ifdef HAVE_WCTYPE_H |
49
|
|
|
|
|
|
|
# include |
50
|
|
|
|
|
|
|
#endif |
51
|
|
|
|
|
|
|
#ifndef HAS_BOOL |
52
|
|
|
|
|
|
|
/* Because perl usually already defined it */ |
53
|
|
|
|
|
|
|
# ifdef HAVE_STDBOOL_H |
54
|
|
|
|
|
|
|
# include |
55
|
|
|
|
|
|
|
# else |
56
|
|
|
|
|
|
|
# ifndef __cplusplus |
57
|
|
|
|
|
|
|
# ifndef bool |
58
|
|
|
|
|
|
|
# ifdef HAVE__BOOL |
59
|
|
|
|
|
|
|
typedef _Bool bool; |
60
|
|
|
|
|
|
|
# else |
61
|
|
|
|
|
|
|
typedef unsigned char bool; |
62
|
|
|
|
|
|
|
# endif |
63
|
|
|
|
|
|
|
# endif |
64
|
|
|
|
|
|
|
# ifndef true |
65
|
|
|
|
|
|
|
# define true 1 |
66
|
|
|
|
|
|
|
# endif |
67
|
|
|
|
|
|
|
# ifndef false |
68
|
|
|
|
|
|
|
# define false 0 |
69
|
|
|
|
|
|
|
# endif |
70
|
|
|
|
|
|
|
# define __bool_true_false_are_defined 1 |
71
|
|
|
|
|
|
|
# endif |
72
|
|
|
|
|
|
|
# endif |
73
|
|
|
|
|
|
|
#else |
74
|
|
|
|
|
|
|
/* This is perl's bool style. Though it usually does not define true or false */ |
75
|
|
|
|
|
|
|
# ifndef __bool_true_false_are_defined |
76
|
|
|
|
|
|
|
# ifndef true |
77
|
|
|
|
|
|
|
# define true 1 |
78
|
|
|
|
|
|
|
# endif |
79
|
|
|
|
|
|
|
# ifndef false |
80
|
|
|
|
|
|
|
# define false 0 |
81
|
|
|
|
|
|
|
# endif |
82
|
|
|
|
|
|
|
# define __bool_true_false_are_defined 1 |
83
|
|
|
|
|
|
|
# endif |
84
|
|
|
|
|
|
|
#endif /* HAS_BOOL */ |
85
|
|
|
|
|
|
|
#ifdef HAVE_STDINT_H |
86
|
|
|
|
|
|
|
#include |
87
|
|
|
|
|
|
|
#endif |
88
|
|
|
|
|
|
|
#ifdef HAVE_SYS_INT_TYPES_H |
89
|
|
|
|
|
|
|
/* for some old solaris */ |
90
|
|
|
|
|
|
|
#include |
91
|
|
|
|
|
|
|
#endif |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
#ifdef _LIBC |
94
|
|
|
|
|
|
|
# include |
95
|
|
|
|
|
|
|
# define lock_define(name) __libc_lock_define (, name) |
96
|
|
|
|
|
|
|
# define lock_init(lock) (__libc_lock_init (lock), 0) |
97
|
|
|
|
|
|
|
# define lock_fini(lock) 0 |
98
|
|
|
|
|
|
|
# define lock_lock(lock) __libc_lock_lock (lock) |
99
|
|
|
|
|
|
|
# define lock_unlock(lock) __libc_lock_unlock (lock) |
100
|
|
|
|
|
|
|
#elif defined GNULIB_LOCK |
101
|
|
|
|
|
|
|
# include "glthread/lock.h" |
102
|
|
|
|
|
|
|
/* Use gl_lock_define if empty macro arguments are known to work. |
103
|
|
|
|
|
|
|
Otherwise, fall back on less-portable substitutes. */ |
104
|
|
|
|
|
|
|
# if ((defined __GNUC__ && !defined __STRICT_ANSI__) \ |
105
|
|
|
|
|
|
|
|| (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__)) |
106
|
|
|
|
|
|
|
# define lock_define(name) gl_lock_define (, name) |
107
|
|
|
|
|
|
|
# elif USE_POSIX_THREADS |
108
|
|
|
|
|
|
|
# define lock_define(name) pthread_mutex_t name; |
109
|
|
|
|
|
|
|
# elif USE_PTH_THREADS |
110
|
|
|
|
|
|
|
# define lock_define(name) pth_mutex_t name; |
111
|
|
|
|
|
|
|
# elif USE_SOLARIS_THREADS |
112
|
|
|
|
|
|
|
# define lock_define(name) mutex_t name; |
113
|
|
|
|
|
|
|
# elif USE_WINDOWS_THREADS |
114
|
|
|
|
|
|
|
# define lock_define(name) gl_lock_t name; |
115
|
|
|
|
|
|
|
# else |
116
|
|
|
|
|
|
|
# define lock_define(name) |
117
|
|
|
|
|
|
|
# endif |
118
|
|
|
|
|
|
|
# define lock_init(lock) glthread_lock_init (&(lock)) |
119
|
|
|
|
|
|
|
# define lock_fini(lock) glthread_lock_destroy (&(lock)) |
120
|
|
|
|
|
|
|
# define lock_lock(lock) glthread_lock_lock (&(lock)) |
121
|
|
|
|
|
|
|
# define lock_unlock(lock) glthread_lock_unlock (&(lock)) |
122
|
|
|
|
|
|
|
#elif defined GNULIB_PTHREAD |
123
|
|
|
|
|
|
|
# include |
124
|
|
|
|
|
|
|
# define lock_define(name) pthread_mutex_t name; |
125
|
|
|
|
|
|
|
# define lock_init(lock) pthread_mutex_init (&(lock), 0) |
126
|
|
|
|
|
|
|
# define lock_fini(lock) pthread_mutex_destroy (&(lock)) |
127
|
|
|
|
|
|
|
# define lock_lock(lock) pthread_mutex_lock (&(lock)) |
128
|
|
|
|
|
|
|
# define lock_unlock(lock) pthread_mutex_unlock (&(lock)) |
129
|
|
|
|
|
|
|
#else |
130
|
|
|
|
|
|
|
# define lock_define(name) SV *name; |
131
|
|
|
|
|
|
|
/* GNU regex expect lock_init(lock) to return 0 if success */ |
132
|
|
|
|
|
|
|
/* Break on win32 ? */ |
133
|
|
|
|
|
|
|
/* |
134
|
|
|
|
|
|
|
# define lock_init(lock) (SvSHARE(lock), 0) |
135
|
|
|
|
|
|
|
# define lock_fini(lock) |
136
|
|
|
|
|
|
|
# define lock_lock(lock) SvLOCK(lock) |
137
|
|
|
|
|
|
|
# define lock_unlock(lock) SvUNLOCK(lock) |
138
|
|
|
|
|
|
|
*/ |
139
|
|
|
|
|
|
|
# define lock_init(lock) 0 |
140
|
|
|
|
|
|
|
# define lock_fini(lock) |
141
|
|
|
|
|
|
|
# define lock_lock(lock) |
142
|
|
|
|
|
|
|
# define lock_unlock(lock) |
143
|
|
|
|
|
|
|
#endif |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
/* In case that the system doesn't have isblank(). */ |
146
|
|
|
|
|
|
|
#if !defined _LIBC && ! (defined isblank || (HAVE_ISBLANK && HAVE_DECL_ISBLANK)) |
147
|
|
|
|
|
|
|
# define isblank(ch) ((ch) == ' ' || (ch) == '\t') |
148
|
|
|
|
|
|
|
#endif |
149
|
|
|
|
|
|
|
|
150
|
|
|
|
|
|
|
#ifdef _LIBC |
151
|
|
|
|
|
|
|
# ifndef _RE_DEFINE_LOCALE_FUNCTIONS |
152
|
|
|
|
|
|
|
# define _RE_DEFINE_LOCALE_FUNCTIONS 1 |
153
|
|
|
|
|
|
|
# include |
154
|
|
|
|
|
|
|
# include |
155
|
|
|
|
|
|
|
# include |
156
|
|
|
|
|
|
|
# endif |
157
|
|
|
|
|
|
|
#endif |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
/* This is for other GNU distributions with internationalized messages. */ |
160
|
|
|
|
|
|
|
#if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC |
161
|
|
|
|
|
|
|
# include |
162
|
|
|
|
|
|
|
# ifdef _LIBC |
163
|
|
|
|
|
|
|
# undef gettext |
164
|
|
|
|
|
|
|
# define gettext(msgid) \ |
165
|
|
|
|
|
|
|
__dcgettext (_libc_intl_domainname, msgid, LC_MESSAGES) |
166
|
|
|
|
|
|
|
# endif |
167
|
|
|
|
|
|
|
#else |
168
|
|
|
|
|
|
|
# define gettext(msgid) (msgid) |
169
|
|
|
|
|
|
|
#endif |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
#ifndef gettext_noop |
172
|
|
|
|
|
|
|
/* This define is so xgettext can find the internationalizable |
173
|
|
|
|
|
|
|
strings. */ |
174
|
|
|
|
|
|
|
# define gettext_noop(String) String |
175
|
|
|
|
|
|
|
#endif |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
#if __GNUC__ >= 3 |
178
|
|
|
|
|
|
|
# define BE(expr, val) __builtin_expect (expr, val) |
179
|
|
|
|
|
|
|
#else |
180
|
|
|
|
|
|
|
# define BE(expr, val) (expr) |
181
|
|
|
|
|
|
|
#endif |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
/* Number of ASCII characters. */ |
184
|
|
|
|
|
|
|
#define ASCII_CHARS 0x80 |
185
|
|
|
|
|
|
|
|
186
|
|
|
|
|
|
|
/* Number of single byte characters. */ |
187
|
|
|
|
|
|
|
#define SBC_MAX (UCHAR_MAX + 1) |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
#define COLL_ELEM_LEN_MAX 8 |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
/* The character which represents newline. */ |
192
|
|
|
|
|
|
|
#define NEWLINE_CHAR '\n' |
193
|
|
|
|
|
|
|
#define WIDE_NEWLINE_CHAR L'\n' |
194
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
/* Rename to standard API for using out of glibc. */ |
196
|
|
|
|
|
|
|
#undef rpl__wint_t |
197
|
|
|
|
|
|
|
#undef rpl__wchar_t |
198
|
|
|
|
|
|
|
#undef rpl__mbsinit |
199
|
|
|
|
|
|
|
#undef rpl__wctype_t |
200
|
|
|
|
|
|
|
#undef rpl__wctype |
201
|
|
|
|
|
|
|
#undef rpl__isascii |
202
|
|
|
|
|
|
|
#undef rpl__isalnum |
203
|
|
|
|
|
|
|
#undef rpl__iswalnum |
204
|
|
|
|
|
|
|
#undef rpl__iscntrl |
205
|
|
|
|
|
|
|
#undef rpl__islower |
206
|
|
|
|
|
|
|
#undef rpl__isspace |
207
|
|
|
|
|
|
|
#undef rpl__isalpha |
208
|
|
|
|
|
|
|
#undef rpl__isdigit |
209
|
|
|
|
|
|
|
#undef rpl__isprint |
210
|
|
|
|
|
|
|
#undef rpl__isupper |
211
|
|
|
|
|
|
|
#undef rpl__isblank |
212
|
|
|
|
|
|
|
#undef rpl__isgraph |
213
|
|
|
|
|
|
|
#undef rpl__ispunct |
214
|
|
|
|
|
|
|
#undef rpl__isxdigit |
215
|
|
|
|
|
|
|
#undef rpl__iswlower |
216
|
|
|
|
|
|
|
#undef rpl__iswctype |
217
|
|
|
|
|
|
|
#undef rpl__btowc |
218
|
|
|
|
|
|
|
#undef rpl__mbrtowc |
219
|
|
|
|
|
|
|
#undef rpl__mbtowc |
220
|
|
|
|
|
|
|
#undef rpl__wcrtomb |
221
|
|
|
|
|
|
|
#undef rpl__towlower |
222
|
|
|
|
|
|
|
#undef rpl__mbstate_t |
223
|
|
|
|
|
|
|
#undef rpl__MB_CUR_MAX |
224
|
|
|
|
|
|
|
#undef rpl__MB_LEN_MAX |
225
|
|
|
|
|
|
|
#undef rpl__WEOF |
226
|
|
|
|
|
|
|
#ifndef _LIBC |
227
|
|
|
|
|
|
|
# ifndef _PERL_I18N |
228
|
|
|
|
|
|
|
# define rpl__wint_t wint_t |
229
|
|
|
|
|
|
|
# define rpl__wchar_t wchar_t |
230
|
|
|
|
|
|
|
# define rpl__mbsinit mbsinit |
231
|
|
|
|
|
|
|
# define rpl__wctype_t wctype_t |
232
|
|
|
|
|
|
|
# define rpl__wctype(c) wctype(c) |
233
|
|
|
|
|
|
|
# define rpl__isascii(c) isascii(c) |
234
|
|
|
|
|
|
|
# define rpl__isalnum(c) isalnum(c) |
235
|
|
|
|
|
|
|
# define rpl__iswalnum(c) iswalnum(c) |
236
|
|
|
|
|
|
|
# define rpl__iscntrl(c) iscntrl(c) |
237
|
|
|
|
|
|
|
# define rpl__islower(c) islower(c) |
238
|
|
|
|
|
|
|
# define rpl__isspace(c) isspace(c) |
239
|
|
|
|
|
|
|
# define rpl__isalpha(c) isalpha(c) |
240
|
|
|
|
|
|
|
# define rpl__isdigit(c) isdigit(c) |
241
|
|
|
|
|
|
|
# define rpl__isprint(c) isprint(c) |
242
|
|
|
|
|
|
|
# define rpl__isupper(c) isupper(c) |
243
|
|
|
|
|
|
|
# define rpl__isblank(c) isblank(c) |
244
|
|
|
|
|
|
|
# define rpl__isgraph(c) isgraph(c) |
245
|
|
|
|
|
|
|
# define rpl__ispunct(c) ispunct(c) |
246
|
|
|
|
|
|
|
# define rpl__isxdigit(c) isxdigit(c) |
247
|
|
|
|
|
|
|
# define rpl__iswlower(c) iswlower(c) |
248
|
|
|
|
|
|
|
# define rpl__iswctype(c) iswctype(c) |
249
|
|
|
|
|
|
|
# define rpl__btowc(c) btowc(c) |
250
|
|
|
|
|
|
|
# define rpl__mbrtowc(pwc, s, n, ps) mbrtowc(pwc, s, n, ps) |
251
|
|
|
|
|
|
|
# define rpl__mbtowc(pwc, s, n) mbrtowc(pwc, s, n) |
252
|
|
|
|
|
|
|
# define rpl__wcrtomb(s, wc, ps) wcrtomb(s, wc, ps) |
253
|
|
|
|
|
|
|
# define rpl__towlower(wc) towlower(wc) |
254
|
|
|
|
|
|
|
# define rpl__towupper(wc) towupper(wc) |
255
|
|
|
|
|
|
|
# define rpl__tolower(wc) tolower(wc) |
256
|
|
|
|
|
|
|
# define rpl__toupper(wc) toupper(wc) |
257
|
|
|
|
|
|
|
# define rpl__mbstate_t mbstate_t |
258
|
|
|
|
|
|
|
# define rpl__MB_CUR_MAX MB_CUR_MAX |
259
|
|
|
|
|
|
|
# define rpl__MB_LEN_MAX MB_LEN_MAX |
260
|
|
|
|
|
|
|
# define rpl__WEOF WEOF |
261
|
|
|
|
|
|
|
# else |
262
|
|
|
|
|
|
|
# define rpl__wint_t UV |
263
|
|
|
|
|
|
|
# define rpl__wchar_t UV |
264
|
|
|
|
|
|
|
# define rpl__mbsinit rpl_Perl_mbsinit |
265
|
|
|
|
|
|
|
# define rpl__wctype_t rpl_Perl_wctype_t |
266
|
|
|
|
|
|
|
# define rpl__wctype(property) rpl_Perl_wctype(aTHX_ property) |
267
|
|
|
|
|
|
|
# define rpl__isascii(c) rpl_Perl_isascii(aTHX_ c) |
268
|
|
|
|
|
|
|
# define rpl__isalnum(c) rpl_Perl_isalnum(aTHX_ c) |
269
|
|
|
|
|
|
|
# define rpl__iswalnum(c) rpl_Perl_iswalnum(aTHX_ c) |
270
|
|
|
|
|
|
|
# define rpl__iscntrl(c) rpl_Perl_iscntrl(aTHX_ c) |
271
|
|
|
|
|
|
|
# define rpl__islower(c) rpl_Perl_islower(aTHX_ c) |
272
|
|
|
|
|
|
|
# define rpl__isspace(c) rpl_Perl_isspace(aTHX_ c) |
273
|
|
|
|
|
|
|
# define rpl__isalpha(c) rpl_Perl_isalpha(aTHX_ c) |
274
|
|
|
|
|
|
|
# define rpl__isdigit(c) rpl_Perl_isdigit(aTHX_ c) |
275
|
|
|
|
|
|
|
# define rpl__isprint(c) rpl_Perl_isprint(aTHX_ c) |
276
|
|
|
|
|
|
|
# define rpl__isupper(c) rpl_Perl_isupper(aTHX_ c) |
277
|
|
|
|
|
|
|
# define rpl__isblank(c) rpl_Perl_isblank(aTHX_ c) |
278
|
|
|
|
|
|
|
# define rpl__isgraph(c) rpl_Perl_isgraph(aTHX_ c) |
279
|
|
|
|
|
|
|
# define rpl__ispunct(c) rpl_Perl_ispunct(aTHX_ c) |
280
|
|
|
|
|
|
|
# define rpl__isxdigit(c) rpl_Perl_isxdigit(aTHX_ c) |
281
|
|
|
|
|
|
|
# define rpl__iswlower(c) rpl_Perl_iswlower(aTHX_ c) |
282
|
|
|
|
|
|
|
# define rpl__iswctype(c, t) rpl_Perl_iswctype(aTHX_ c, t) |
283
|
|
|
|
|
|
|
# define rpl__btowc(c) rpl_Perl_btowc(aTHX_ c) |
284
|
|
|
|
|
|
|
# define rpl__mbrtowc(pwc, s, n, ps) rpl_Perl_mbrtowc(aTHX_ pwc, s, n, ps) |
285
|
|
|
|
|
|
|
# define rpl__mbtowc(pwc, s, n) rpl_Perl_mbrtowc(aTHX_ pwc, s, n) |
286
|
|
|
|
|
|
|
# define rpl__wcrtomb(s, wc, ps) rpl_Perl_wcrtomb(aTHX_ s, wc, ps) |
287
|
|
|
|
|
|
|
# define rpl__towlower(wc) rpl_Perl_towlower(aTHX_ wc) |
288
|
|
|
|
|
|
|
# define rpl__towupper(wc) rpl_Perl_towupper(aTHX_ wc) |
289
|
|
|
|
|
|
|
# define rpl__tolower(wc) rpl_Perl_tolower(aTHX_ wc) |
290
|
|
|
|
|
|
|
# define rpl__toupper(wc) rpl_Perl_toupper(aTHX_ wc) |
291
|
|
|
|
|
|
|
# define rpl__mbstate_t rpl_Perl_mbstate_t |
292
|
|
|
|
|
|
|
# define rpl__MB_CUR_MAX rpl_Perl_MB_CUR_MAX(aTHX) |
293
|
|
|
|
|
|
|
# define rpl__MB_LEN_MAX UTF8_MAXBYTES |
294
|
|
|
|
|
|
|
# define rpl__WEOF ((UV)-1) |
295
|
|
|
|
|
|
|
typedef enum { |
296
|
|
|
|
|
|
|
PERL_WCTYPE_ALNUM = 1, |
297
|
|
|
|
|
|
|
PERL_WCTYPE_ALPHA, |
298
|
|
|
|
|
|
|
PERL_WCTYPE_CNTRL, |
299
|
|
|
|
|
|
|
PERL_WCTYPE_DIGIT, |
300
|
|
|
|
|
|
|
PERL_WCTYPE_GRAPH, |
301
|
|
|
|
|
|
|
PERL_WCTYPE_LOWER, |
302
|
|
|
|
|
|
|
PERL_WCTYPE_PRINT, |
303
|
|
|
|
|
|
|
PERL_WCTYPE_PUNCT, |
304
|
|
|
|
|
|
|
PERL_WCTYPE_SPACE, |
305
|
|
|
|
|
|
|
PERL_WCTYPE_UPPER, |
306
|
|
|
|
|
|
|
PERL_WCTYPE_XDIGIT |
307
|
|
|
|
|
|
|
} rpl_Perl_wctype_t; |
308
|
|
|
|
|
|
|
typedef struct { |
309
|
|
|
|
|
|
|
union |
310
|
|
|
|
|
|
|
{ |
311
|
|
|
|
|
|
|
unsigned int __wch; |
312
|
|
|
|
|
|
|
char __wchb[4]; |
313
|
|
|
|
|
|
|
} u; |
314
|
|
|
|
|
|
|
} rpl_Perl_mbstate_t; |
315
|
|
|
|
|
|
|
# endif |
316
|
|
|
|
|
|
|
# define __regfree regfree |
317
|
|
|
|
|
|
|
# define attribute_hidden |
318
|
|
|
|
|
|
|
#endif /* not _LIBC */ |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
#if (defined MB_CUR_MAX && HAVE_WCTYPE_H && HAVE_ISWCTYPE) || _LIBC || defined(_PERL_I18N) |
321
|
|
|
|
|
|
|
# define RE_ENABLE_I18N |
322
|
|
|
|
|
|
|
#endif |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
#if __GNUC__ < 3 + (__GNUC_MINOR__ < 1) |
325
|
|
|
|
|
|
|
# define __attribute__(arg) |
326
|
|
|
|
|
|
|
#endif |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
typedef __re_idx_t Idx; |
329
|
|
|
|
|
|
|
#ifdef _REGEX_LARGE_OFFSETS |
330
|
|
|
|
|
|
|
# define IDX_MAX (SIZE_MAX - 2) |
331
|
|
|
|
|
|
|
#else |
332
|
|
|
|
|
|
|
# define IDX_MAX INT_MAX |
333
|
|
|
|
|
|
|
#endif |
334
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
/* Special return value for failure to match. */ |
336
|
|
|
|
|
|
|
#define REG_MISSING ((Idx) -1) |
337
|
|
|
|
|
|
|
|
338
|
|
|
|
|
|
|
/* Special return value for internal error. */ |
339
|
|
|
|
|
|
|
#define REG_ERROR ((Idx) -2) |
340
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
/* Test whether N is a valid index, and is not one of the above. */ |
342
|
|
|
|
|
|
|
#ifdef _REGEX_LARGE_OFFSETS |
343
|
|
|
|
|
|
|
# define REG_VALID_INDEX(n) ((Idx) (n) < REG_ERROR) |
344
|
|
|
|
|
|
|
#else |
345
|
|
|
|
|
|
|
# define REG_VALID_INDEX(n) (0 <= (n)) |
346
|
|
|
|
|
|
|
#endif |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
/* Test whether N is a valid nonzero index. */ |
349
|
|
|
|
|
|
|
#ifdef _REGEX_LARGE_OFFSETS |
350
|
|
|
|
|
|
|
# define REG_VALID_NONZERO_INDEX(n) ((Idx) ((n) - 1) < (Idx) (REG_ERROR - 1)) |
351
|
|
|
|
|
|
|
#else |
352
|
|
|
|
|
|
|
# define REG_VALID_NONZERO_INDEX(n) (0 < (n)) |
353
|
|
|
|
|
|
|
#endif |
354
|
|
|
|
|
|
|
|
355
|
|
|
|
|
|
|
/* A hash value, suitable for computing hash tables. */ |
356
|
|
|
|
|
|
|
typedef __re_size_t re_hashval_t; |
357
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
/* An integer used to represent a set of bits. It must be unsigned, |
359
|
|
|
|
|
|
|
and must be at least as wide as unsigned int. */ |
360
|
|
|
|
|
|
|
typedef unsigned long int bitset_word_t; |
361
|
|
|
|
|
|
|
/* All bits set in a bitset_word_t. */ |
362
|
|
|
|
|
|
|
#define BITSET_WORD_MAX ULONG_MAX |
363
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
/* Number of bits in a bitset_word_t. For portability to hosts with |
365
|
|
|
|
|
|
|
padding bits, do not use '(sizeof (bitset_word_t) * CHAR_BIT)'; |
366
|
|
|
|
|
|
|
instead, deduce it directly from BITSET_WORD_MAX. Avoid |
367
|
|
|
|
|
|
|
greater-than-32-bit integers and unconditional shifts by more than |
368
|
|
|
|
|
|
|
31 bits, as they're not portable. */ |
369
|
|
|
|
|
|
|
#if BITSET_WORD_MAX == 0xffffffffUL |
370
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 32 |
371
|
|
|
|
|
|
|
#elif BITSET_WORD_MAX >> 31 >> 4 == 1 |
372
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 36 |
373
|
|
|
|
|
|
|
#elif BITSET_WORD_MAX >> 31 >> 16 == 1 |
374
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 48 |
375
|
|
|
|
|
|
|
#elif BITSET_WORD_MAX >> 31 >> 28 == 1 |
376
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 60 |
377
|
|
|
|
|
|
|
#elif BITSET_WORD_MAX >> 31 >> 31 >> 1 == 1 |
378
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 64 |
379
|
|
|
|
|
|
|
#elif BITSET_WORD_MAX >> 31 >> 31 >> 9 == 1 |
380
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 72 |
381
|
|
|
|
|
|
|
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 3 == 1 |
382
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 128 |
383
|
|
|
|
|
|
|
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 == 1 |
384
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 256 |
385
|
|
|
|
|
|
|
#elif BITSET_WORD_MAX >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 31 >> 7 > 1 |
386
|
|
|
|
|
|
|
# define BITSET_WORD_BITS 257 /* any value > SBC_MAX will do here */ |
387
|
|
|
|
|
|
|
# if BITSET_WORD_BITS <= SBC_MAX |
388
|
|
|
|
|
|
|
# error "Invalid SBC_MAX" |
389
|
|
|
|
|
|
|
# endif |
390
|
|
|
|
|
|
|
#else |
391
|
|
|
|
|
|
|
# error "Add case for new bitset_word_t size" |
392
|
|
|
|
|
|
|
#endif |
393
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
/* Number of bitset_word_t values in a bitset_t. */ |
395
|
|
|
|
|
|
|
#define BITSET_WORDS ((SBC_MAX + BITSET_WORD_BITS - 1) / BITSET_WORD_BITS) |
396
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
typedef bitset_word_t bitset_t[BITSET_WORDS]; |
398
|
|
|
|
|
|
|
typedef bitset_word_t *re_bitset_ptr_t; |
399
|
|
|
|
|
|
|
typedef const bitset_word_t *re_const_bitset_ptr_t; |
400
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
#define PREV_WORD_CONSTRAINT 0x0001 |
402
|
|
|
|
|
|
|
#define PREV_NOTWORD_CONSTRAINT 0x0002 |
403
|
|
|
|
|
|
|
#define NEXT_WORD_CONSTRAINT 0x0004 |
404
|
|
|
|
|
|
|
#define NEXT_NOTWORD_CONSTRAINT 0x0008 |
405
|
|
|
|
|
|
|
#define PREV_NEWLINE_CONSTRAINT 0x0010 |
406
|
|
|
|
|
|
|
#define NEXT_NEWLINE_CONSTRAINT 0x0020 |
407
|
|
|
|
|
|
|
#define PREV_BEGBUF_CONSTRAINT 0x0040 |
408
|
|
|
|
|
|
|
#define NEXT_ENDBUF_CONSTRAINT 0x0080 |
409
|
|
|
|
|
|
|
#define WORD_DELIM_CONSTRAINT 0x0100 |
410
|
|
|
|
|
|
|
#define NOT_WORD_DELIM_CONSTRAINT 0x0200 |
411
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
typedef enum |
413
|
|
|
|
|
|
|
{ |
414
|
|
|
|
|
|
|
INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, |
415
|
|
|
|
|
|
|
WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT, |
416
|
|
|
|
|
|
|
WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, |
417
|
|
|
|
|
|
|
INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT, |
418
|
|
|
|
|
|
|
LINE_FIRST = PREV_NEWLINE_CONSTRAINT, |
419
|
|
|
|
|
|
|
LINE_LAST = NEXT_NEWLINE_CONSTRAINT, |
420
|
|
|
|
|
|
|
BUF_FIRST = PREV_BEGBUF_CONSTRAINT, |
421
|
|
|
|
|
|
|
BUF_LAST = NEXT_ENDBUF_CONSTRAINT, |
422
|
|
|
|
|
|
|
WORD_DELIM = WORD_DELIM_CONSTRAINT, |
423
|
|
|
|
|
|
|
NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT |
424
|
|
|
|
|
|
|
} re_context_type; |
425
|
|
|
|
|
|
|
|
426
|
|
|
|
|
|
|
typedef struct |
427
|
|
|
|
|
|
|
{ |
428
|
|
|
|
|
|
|
Idx alloc; |
429
|
|
|
|
|
|
|
Idx nelem; |
430
|
|
|
|
|
|
|
Idx *elems; |
431
|
|
|
|
|
|
|
} re_node_set; |
432
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
typedef enum |
434
|
|
|
|
|
|
|
{ |
435
|
|
|
|
|
|
|
NON_TYPE = 0, |
436
|
|
|
|
|
|
|
|
437
|
|
|
|
|
|
|
/* Node type, These are used by token, node, tree. */ |
438
|
|
|
|
|
|
|
CHARACTER = 1, |
439
|
|
|
|
|
|
|
END_OF_RE = 2, |
440
|
|
|
|
|
|
|
SIMPLE_BRACKET = 3, |
441
|
|
|
|
|
|
|
OP_BACK_REF = 4, |
442
|
|
|
|
|
|
|
OP_PERIOD = 5, |
443
|
|
|
|
|
|
|
#ifdef RE_ENABLE_I18N |
444
|
|
|
|
|
|
|
COMPLEX_BRACKET = 6, |
445
|
|
|
|
|
|
|
OP_UTF8_PERIOD = 7, |
446
|
|
|
|
|
|
|
#endif /* RE_ENABLE_I18N */ |
447
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
/* We define EPSILON_BIT as a macro so that OP_OPEN_SUBEXP is used |
449
|
|
|
|
|
|
|
when the debugger shows values of this enum type. */ |
450
|
|
|
|
|
|
|
#define EPSILON_BIT 8 |
451
|
|
|
|
|
|
|
OP_OPEN_SUBEXP = EPSILON_BIT | 0, |
452
|
|
|
|
|
|
|
OP_CLOSE_SUBEXP = EPSILON_BIT | 1, |
453
|
|
|
|
|
|
|
OP_ALT = EPSILON_BIT | 2, |
454
|
|
|
|
|
|
|
OP_DUP_ASTERISK = EPSILON_BIT | 3, |
455
|
|
|
|
|
|
|
ANCHOR = EPSILON_BIT | 4, |
456
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
/* Tree type, these are used only by tree. */ |
458
|
|
|
|
|
|
|
CONCAT = 16, |
459
|
|
|
|
|
|
|
SUBEXP = 17, |
460
|
|
|
|
|
|
|
|
461
|
|
|
|
|
|
|
/* Token type, these are used only by token. */ |
462
|
|
|
|
|
|
|
OP_DUP_PLUS = 18, |
463
|
|
|
|
|
|
|
OP_DUP_QUESTION, |
464
|
|
|
|
|
|
|
OP_OPEN_BRACKET, |
465
|
|
|
|
|
|
|
OP_CLOSE_BRACKET, |
466
|
|
|
|
|
|
|
OP_CHARSET_RANGE, |
467
|
|
|
|
|
|
|
OP_OPEN_DUP_NUM, |
468
|
|
|
|
|
|
|
OP_CLOSE_DUP_NUM, |
469
|
|
|
|
|
|
|
OP_NON_MATCH_LIST, |
470
|
|
|
|
|
|
|
OP_OPEN_COLL_ELEM, |
471
|
|
|
|
|
|
|
OP_CLOSE_COLL_ELEM, |
472
|
|
|
|
|
|
|
OP_OPEN_EQUIV_CLASS, |
473
|
|
|
|
|
|
|
OP_CLOSE_EQUIV_CLASS, |
474
|
|
|
|
|
|
|
OP_OPEN_CHAR_CLASS, |
475
|
|
|
|
|
|
|
OP_CLOSE_CHAR_CLASS, |
476
|
|
|
|
|
|
|
OP_WORD, |
477
|
|
|
|
|
|
|
OP_NOTWORD, |
478
|
|
|
|
|
|
|
OP_SPACE, |
479
|
|
|
|
|
|
|
OP_NOTSPACE, |
480
|
|
|
|
|
|
|
BACK_SLASH |
481
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
} re_token_type_t; |
483
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
#ifdef RE_ENABLE_I18N |
485
|
|
|
|
|
|
|
typedef struct |
486
|
|
|
|
|
|
|
{ |
487
|
|
|
|
|
|
|
/* Multibyte characters. */ |
488
|
|
|
|
|
|
|
rpl__wchar_t *mbchars; |
489
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
/* Collating symbols. */ |
491
|
|
|
|
|
|
|
# if (defined(_LIBC) || defined(_PERL_I18N)) |
492
|
|
|
|
|
|
|
# ifdef _LIBC |
493
|
|
|
|
|
|
|
int32_t *coll_syms; |
494
|
|
|
|
|
|
|
# else |
495
|
|
|
|
|
|
|
UV *coll_syms; |
496
|
|
|
|
|
|
|
# endif |
497
|
|
|
|
|
|
|
# endif |
498
|
|
|
|
|
|
|
|
499
|
|
|
|
|
|
|
/* Equivalence classes. */ |
500
|
|
|
|
|
|
|
# if (defined(_LIBC) || defined(_PERL_I18N)) |
501
|
|
|
|
|
|
|
# ifdef _LIBC |
502
|
|
|
|
|
|
|
int32_t *equiv_classes; |
503
|
|
|
|
|
|
|
# else |
504
|
|
|
|
|
|
|
UV *equiv_classes; |
505
|
|
|
|
|
|
|
# endif |
506
|
|
|
|
|
|
|
# endif |
507
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
/* Range expressions. */ |
509
|
|
|
|
|
|
|
# ifdef _LIBC |
510
|
|
|
|
|
|
|
uint32_t *range_starts; |
511
|
|
|
|
|
|
|
uint32_t *range_ends; |
512
|
|
|
|
|
|
|
# else /* not _LIBC */ |
513
|
|
|
|
|
|
|
rpl__wchar_t *range_starts; |
514
|
|
|
|
|
|
|
rpl__wchar_t *range_ends; |
515
|
|
|
|
|
|
|
# endif /* not _LIBC */ |
516
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
/* Character classes. */ |
518
|
|
|
|
|
|
|
rpl__wctype_t *char_classes; |
519
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
/* If this character set is the non-matching list. */ |
521
|
|
|
|
|
|
|
unsigned int non_match : 1; |
522
|
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
/* # of multibyte characters. */ |
524
|
|
|
|
|
|
|
Idx nmbchars; |
525
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
/* # of collating symbols. */ |
527
|
|
|
|
|
|
|
Idx ncoll_syms; |
528
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
/* # of equivalence classes. */ |
530
|
|
|
|
|
|
|
Idx nequiv_classes; |
531
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
/* # of range expressions. */ |
533
|
|
|
|
|
|
|
Idx nranges; |
534
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
/* # of character classes. */ |
536
|
|
|
|
|
|
|
Idx nchar_classes; |
537
|
|
|
|
|
|
|
} re_charset_t; |
538
|
|
|
|
|
|
|
#endif /* RE_ENABLE_I18N */ |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
typedef struct |
541
|
|
|
|
|
|
|
{ |
542
|
|
|
|
|
|
|
union |
543
|
|
|
|
|
|
|
{ |
544
|
|
|
|
|
|
|
unsigned char c; /* for CHARACTER */ |
545
|
|
|
|
|
|
|
re_bitset_ptr_t sbcset; /* for SIMPLE_BRACKET */ |
546
|
|
|
|
|
|
|
#ifdef RE_ENABLE_I18N |
547
|
|
|
|
|
|
|
re_charset_t *mbcset; /* for COMPLEX_BRACKET */ |
548
|
|
|
|
|
|
|
#endif /* RE_ENABLE_I18N */ |
549
|
|
|
|
|
|
|
Idx idx; /* for BACK_REF */ |
550
|
|
|
|
|
|
|
re_context_type ctx_type; /* for ANCHOR */ |
551
|
|
|
|
|
|
|
} opr; |
552
|
|
|
|
|
|
|
#if __GNUC__ >= 2 && !defined __STRICT_ANSI__ |
553
|
|
|
|
|
|
|
re_token_type_t type : 8; |
554
|
|
|
|
|
|
|
#else |
555
|
|
|
|
|
|
|
re_token_type_t type; |
556
|
|
|
|
|
|
|
#endif |
557
|
|
|
|
|
|
|
unsigned int constraint : 10; /* context constraint */ |
558
|
|
|
|
|
|
|
unsigned int duplicated : 1; |
559
|
|
|
|
|
|
|
unsigned int opt_subexp : 1; |
560
|
|
|
|
|
|
|
#ifdef RE_ENABLE_I18N |
561
|
|
|
|
|
|
|
unsigned int accept_mb : 1; |
562
|
|
|
|
|
|
|
/* These 2 bits can be moved into the union if needed (e.g. if running out |
563
|
|
|
|
|
|
|
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */ |
564
|
|
|
|
|
|
|
unsigned int mb_partial : 1; |
565
|
|
|
|
|
|
|
#endif |
566
|
|
|
|
|
|
|
unsigned int word_char : 1; |
567
|
|
|
|
|
|
|
} re_token_t; |
568
|
|
|
|
|
|
|
|
569
|
|
|
|
|
|
|
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT) |
570
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
struct re_string_t |
572
|
|
|
|
|
|
|
{ |
573
|
|
|
|
|
|
|
/* Indicate the raw buffer which is the original string passed as an |
574
|
|
|
|
|
|
|
argument of regexec(), re_search(), etc.. */ |
575
|
|
|
|
|
|
|
const unsigned char *raw_mbs; |
576
|
|
|
|
|
|
|
/* Store the multibyte string. In case of "case insensitive mode" like |
577
|
|
|
|
|
|
|
REG_ICASE, upper cases of the string are stored, otherwise MBS points |
578
|
|
|
|
|
|
|
the same address that RAW_MBS points. */ |
579
|
|
|
|
|
|
|
unsigned char *mbs; |
580
|
|
|
|
|
|
|
#ifdef RE_ENABLE_I18N |
581
|
|
|
|
|
|
|
/* Store the wide character string which is corresponding to MBS. */ |
582
|
|
|
|
|
|
|
rpl__wint_t *wcs; |
583
|
|
|
|
|
|
|
Idx *offsets; |
584
|
|
|
|
|
|
|
rpl__mbstate_t cur_state; |
585
|
|
|
|
|
|
|
#endif |
586
|
|
|
|
|
|
|
/* Index in RAW_MBS. Each character mbs[i] corresponds to |
587
|
|
|
|
|
|
|
raw_mbs[raw_mbs_idx + i]. */ |
588
|
|
|
|
|
|
|
Idx raw_mbs_idx; |
589
|
|
|
|
|
|
|
/* The length of the valid characters in the buffers. */ |
590
|
|
|
|
|
|
|
Idx valid_len; |
591
|
|
|
|
|
|
|
/* The corresponding number of bytes in raw_mbs array. */ |
592
|
|
|
|
|
|
|
Idx valid_raw_len; |
593
|
|
|
|
|
|
|
/* The length of the buffers MBS and WCS. */ |
594
|
|
|
|
|
|
|
Idx bufs_len; |
595
|
|
|
|
|
|
|
/* The index in MBS, which is updated by re_string_fetch_byte. */ |
596
|
|
|
|
|
|
|
Idx cur_idx; |
597
|
|
|
|
|
|
|
/* length of RAW_MBS array. */ |
598
|
|
|
|
|
|
|
Idx raw_len; |
599
|
|
|
|
|
|
|
/* This is RAW_LEN - RAW_MBS_IDX + VALID_LEN - VALID_RAW_LEN. */ |
600
|
|
|
|
|
|
|
Idx len; |
601
|
|
|
|
|
|
|
/* End of the buffer may be shorter than its length in the cases such |
602
|
|
|
|
|
|
|
as re_match_2, re_search_2. Then, we use STOP for end of the buffer |
603
|
|
|
|
|
|
|
instead of LEN. */ |
604
|
|
|
|
|
|
|
Idx raw_stop; |
605
|
|
|
|
|
|
|
/* This is RAW_STOP - RAW_MBS_IDX adjusted through OFFSETS. */ |
606
|
|
|
|
|
|
|
Idx stop; |
607
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
/* The context of mbs[0]. We store the context independently, since |
609
|
|
|
|
|
|
|
the context of mbs[0] may be different from raw_mbs[0], which is |
610
|
|
|
|
|
|
|
the beginning of the input string. */ |
611
|
|
|
|
|
|
|
unsigned int tip_context; |
612
|
|
|
|
|
|
|
/* The translation passed as a part of an argument of re_compile_pattern. */ |
613
|
|
|
|
|
|
|
RE_TRANSLATE_TYPE trans; |
614
|
|
|
|
|
|
|
/* Copy of re_dfa_t's word_char. */ |
615
|
|
|
|
|
|
|
re_const_bitset_ptr_t word_char; |
616
|
|
|
|
|
|
|
/* true if REG_ICASE. */ |
617
|
|
|
|
|
|
|
unsigned char icase; |
618
|
|
|
|
|
|
|
unsigned char is_utf8; |
619
|
|
|
|
|
|
|
unsigned char map_notascii; |
620
|
|
|
|
|
|
|
unsigned char mbs_allocated; |
621
|
|
|
|
|
|
|
unsigned char offsets_needed; |
622
|
|
|
|
|
|
|
unsigned char newline_anchor; |
623
|
|
|
|
|
|
|
unsigned char word_ops_used; |
624
|
|
|
|
|
|
|
int mb_cur_max; |
625
|
|
|
|
|
|
|
}; |
626
|
|
|
|
|
|
|
typedef struct re_string_t re_string_t; |
627
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
struct re_dfa_t; |
630
|
|
|
|
|
|
|
typedef struct re_dfa_t re_dfa_t; |
631
|
|
|
|
|
|
|
|
632
|
|
|
|
|
|
|
#ifndef _LIBC |
633
|
|
|
|
|
|
|
# define internal_function |
634
|
|
|
|
|
|
|
#endif |
635
|
|
|
|
|
|
|
|
636
|
|
|
|
|
|
|
#ifndef NOT_IN_libc |
637
|
|
|
|
|
|
|
static reg_errcode_t re_string_realloc_buffers (pTHX_ re_string_t *pstr, |
638
|
|
|
|
|
|
|
Idx new_buf_len) |
639
|
|
|
|
|
|
|
internal_function; |
640
|
|
|
|
|
|
|
# ifdef RE_ENABLE_I18N |
641
|
|
|
|
|
|
|
static void build_wcs_buffer (pTHX_ re_string_t *pstr) internal_function; |
642
|
|
|
|
|
|
|
static reg_errcode_t build_wcs_upper_buffer (pTHX_ re_string_t *pstr) |
643
|
|
|
|
|
|
|
internal_function; |
644
|
|
|
|
|
|
|
# endif /* RE_ENABLE_I18N */ |
645
|
|
|
|
|
|
|
static void build_upper_buffer (pTHX_ re_string_t *pstr) internal_function; |
646
|
|
|
|
|
|
|
static void re_string_translate_buffer (pTHX_ re_string_t *pstr) internal_function; |
647
|
|
|
|
|
|
|
static unsigned int re_string_context_at (pTHX_ const re_string_t *input, Idx idx, |
648
|
|
|
|
|
|
|
int eflags) |
649
|
|
|
|
|
|
|
internal_function __attribute__ ((pure)); |
650
|
|
|
|
|
|
|
#endif |
651
|
|
|
|
|
|
|
#define re_string_peek_byte(pstr, offset) \ |
652
|
|
|
|
|
|
|
((pstr)->mbs[(pstr)->cur_idx + offset]) |
653
|
|
|
|
|
|
|
#define re_string_fetch_byte(pstr) \ |
654
|
|
|
|
|
|
|
((pstr)->mbs[(pstr)->cur_idx++]) |
655
|
|
|
|
|
|
|
#define re_string_first_byte(pstr, idx) \ |
656
|
|
|
|
|
|
|
((idx) == (pstr)->valid_len || (pstr)->wcs[idx] != rpl__WEOF) |
657
|
|
|
|
|
|
|
#define re_string_is_single_byte_char(pstr, idx) \ |
658
|
|
|
|
|
|
|
((pstr)->wcs[idx] != rpl__WEOF && ((pstr)->valid_len == (idx) + 1 \ |
659
|
|
|
|
|
|
|
|| (pstr)->wcs[(idx) + 1] != rpl__WEOF)) |
660
|
|
|
|
|
|
|
#define re_string_eoi(pstr) ((pstr)->stop <= (pstr)->cur_idx) |
661
|
|
|
|
|
|
|
#define re_string_cur_idx(pstr) ((pstr)->cur_idx) |
662
|
|
|
|
|
|
|
#define re_string_get_buffer(pstr) ((pstr)->mbs) |
663
|
|
|
|
|
|
|
#define re_string_length(pstr) ((pstr)->len) |
664
|
|
|
|
|
|
|
#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx]) |
665
|
|
|
|
|
|
|
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx)) |
666
|
|
|
|
|
|
|
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx)) |
667
|
|
|
|
|
|
|
|
668
|
|
|
|
|
|
|
#if defined _LIBC || HAVE_ALLOCA |
669
|
|
|
|
|
|
|
# include |
670
|
|
|
|
|
|
|
#endif |
671
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
#ifndef _LIBC |
673
|
|
|
|
|
|
|
# if HAVE_ALLOCA |
674
|
|
|
|
|
|
|
/* The OS usually guarantees only one guard page at the bottom of the stack, |
675
|
|
|
|
|
|
|
and a page size can be as small as 4096 bytes. So we cannot safely |
676
|
|
|
|
|
|
|
allocate anything larger than 4096 bytes. Also care for the possibility |
677
|
|
|
|
|
|
|
of a few compiler-allocated temporary stack slots. */ |
678
|
|
|
|
|
|
|
# define __libc_use_alloca(n) ((n) < 4032) |
679
|
|
|
|
|
|
|
# else |
680
|
|
|
|
|
|
|
/* alloca is implemented with malloc, so just use malloc. */ |
681
|
|
|
|
|
|
|
# define __libc_use_alloca(n) 0 |
682
|
|
|
|
|
|
|
# undef alloca |
683
|
|
|
|
|
|
|
# define alloca(n) malloc (n) |
684
|
|
|
|
|
|
|
# endif |
685
|
|
|
|
|
|
|
#endif |
686
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
#ifdef _LIBC |
688
|
|
|
|
|
|
|
# define MALLOC_0_IS_NONNULL 1 |
689
|
|
|
|
|
|
|
#elif !defined MALLOC_0_IS_NONNULL |
690
|
|
|
|
|
|
|
# define MALLOC_0_IS_NONNULL 0 |
691
|
|
|
|
|
|
|
#endif |
692
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
#ifndef MAX |
694
|
|
|
|
|
|
|
# define MAX(a,b) ((a) < (b) ? (b) : (a)) |
695
|
|
|
|
|
|
|
#endif |
696
|
|
|
|
|
|
|
#ifndef MIN |
697
|
|
|
|
|
|
|
# define MIN(a,b) ((a) < (b) ? (a) : (b)) |
698
|
|
|
|
|
|
|
#endif |
699
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
/* t* p = malloc(n) <==> Newx(p, n, t) */ |
701
|
|
|
|
|
|
|
/* #define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t))) */ |
702
|
|
|
|
|
|
|
#define re_malloc(dst,t,n) Newx(dst, (n) * sizeof (t), t) |
703
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
/* p = ((t *) realloc(p, n)) <==> Renew(p, n, t) */ |
705
|
|
|
|
|
|
|
/* #define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t))) */ |
706
|
|
|
|
|
|
|
#define re_realloc(p,t,n) Renew(p, (n) * sizeof (t), t) |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
#define re_calloc(dst,t,n) Newxz(dst, (n) * sizeof (t), t) |
709
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
/* free(p) <==> Safefree(p) */ |
711
|
|
|
|
|
|
|
/* #define re_free(p) free (p) */ |
712
|
|
|
|
|
|
|
#define re_free(p) do { if (p != NULL) { Safefree (p); } p = NULL; } while (0) |
713
|
|
|
|
|
|
|
|
714
|
|
|
|
|
|
|
struct bin_tree_t |
715
|
|
|
|
|
|
|
{ |
716
|
|
|
|
|
|
|
struct bin_tree_t *parent; |
717
|
|
|
|
|
|
|
struct bin_tree_t *left; |
718
|
|
|
|
|
|
|
struct bin_tree_t *right; |
719
|
|
|
|
|
|
|
struct bin_tree_t *first; |
720
|
|
|
|
|
|
|
struct bin_tree_t *next; |
721
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
re_token_t token; |
723
|
|
|
|
|
|
|
|
724
|
|
|
|
|
|
|
/* 'node_idx' is the index in dfa->nodes, if 'type' == 0. |
725
|
|
|
|
|
|
|
Otherwise 'type' indicate the type of this node. */ |
726
|
|
|
|
|
|
|
Idx node_idx; |
727
|
|
|
|
|
|
|
}; |
728
|
|
|
|
|
|
|
typedef struct bin_tree_t bin_tree_t; |
729
|
|
|
|
|
|
|
|
730
|
|
|
|
|
|
|
#define BIN_TREE_STORAGE_SIZE \ |
731
|
|
|
|
|
|
|
((1024 - sizeof (void *)) / sizeof (bin_tree_t)) |
732
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
struct bin_tree_storage_t |
734
|
|
|
|
|
|
|
{ |
735
|
|
|
|
|
|
|
struct bin_tree_storage_t *next; |
736
|
|
|
|
|
|
|
bin_tree_t data[BIN_TREE_STORAGE_SIZE]; |
737
|
|
|
|
|
|
|
}; |
738
|
|
|
|
|
|
|
typedef struct bin_tree_storage_t bin_tree_storage_t; |
739
|
|
|
|
|
|
|
|
740
|
|
|
|
|
|
|
#define CONTEXT_WORD 1 |
741
|
|
|
|
|
|
|
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1) |
742
|
|
|
|
|
|
|
#define CONTEXT_BEGBUF (CONTEXT_NEWLINE << 1) |
743
|
|
|
|
|
|
|
#define CONTEXT_ENDBUF (CONTEXT_BEGBUF << 1) |
744
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
#define IS_WORD_CONTEXT(c) ((c) & CONTEXT_WORD) |
746
|
|
|
|
|
|
|
#define IS_NEWLINE_CONTEXT(c) ((c) & CONTEXT_NEWLINE) |
747
|
|
|
|
|
|
|
#define IS_BEGBUF_CONTEXT(c) ((c) & CONTEXT_BEGBUF) |
748
|
|
|
|
|
|
|
#define IS_ENDBUF_CONTEXT(c) ((c) & CONTEXT_ENDBUF) |
749
|
|
|
|
|
|
|
#define IS_ORDINARY_CONTEXT(c) ((c) == 0) |
750
|
|
|
|
|
|
|
|
751
|
|
|
|
|
|
|
#define IS_WORD_CHAR(ch) (rpl__isalnum (ch) || (ch) == '_') |
752
|
|
|
|
|
|
|
#define IS_NEWLINE(ch) ((ch) == NEWLINE_CHAR) |
753
|
|
|
|
|
|
|
#define IS_WIDE_WORD_CHAR(ch) (rpl__iswalnum (ch) || (ch) == L'_') |
754
|
|
|
|
|
|
|
#define IS_WIDE_NEWLINE(ch) ((ch) == WIDE_NEWLINE_CHAR) |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
#define NOT_SATISFY_PREV_CONSTRAINT(constraint,context) \ |
757
|
|
|
|
|
|
|
((((constraint) & PREV_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ |
758
|
|
|
|
|
|
|
|| ((constraint & PREV_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ |
759
|
|
|
|
|
|
|
|| ((constraint & PREV_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context))\ |
760
|
|
|
|
|
|
|
|| ((constraint & PREV_BEGBUF_CONSTRAINT) && !IS_BEGBUF_CONTEXT (context))) |
761
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
#define NOT_SATISFY_NEXT_CONSTRAINT(constraint,context) \ |
763
|
|
|
|
|
|
|
((((constraint) & NEXT_WORD_CONSTRAINT) && !IS_WORD_CONTEXT (context)) \ |
764
|
|
|
|
|
|
|
|| (((constraint) & NEXT_NOTWORD_CONSTRAINT) && IS_WORD_CONTEXT (context)) \ |
765
|
|
|
|
|
|
|
|| (((constraint) & NEXT_NEWLINE_CONSTRAINT) && !IS_NEWLINE_CONTEXT (context)) \ |
766
|
|
|
|
|
|
|
|| (((constraint) & NEXT_ENDBUF_CONSTRAINT) && !IS_ENDBUF_CONTEXT (context))) |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
struct re_dfastate_t |
769
|
|
|
|
|
|
|
{ |
770
|
|
|
|
|
|
|
re_hashval_t hash; |
771
|
|
|
|
|
|
|
re_node_set nodes; |
772
|
|
|
|
|
|
|
re_node_set non_eps_nodes; |
773
|
|
|
|
|
|
|
re_node_set inveclosure; |
774
|
|
|
|
|
|
|
re_node_set *entrance_nodes; |
775
|
|
|
|
|
|
|
struct re_dfastate_t **trtable, **word_trtable; |
776
|
|
|
|
|
|
|
unsigned int context : 4; |
777
|
|
|
|
|
|
|
unsigned int halt : 1; |
778
|
|
|
|
|
|
|
/* If this state can accept "multi byte". |
779
|
|
|
|
|
|
|
Note that we refer to multibyte characters, and multi character |
780
|
|
|
|
|
|
|
collating elements as "multi byte". */ |
781
|
|
|
|
|
|
|
unsigned int accept_mb : 1; |
782
|
|
|
|
|
|
|
/* If this state has backreference node(s). */ |
783
|
|
|
|
|
|
|
unsigned int has_backref : 1; |
784
|
|
|
|
|
|
|
unsigned int has_constraint : 1; |
785
|
|
|
|
|
|
|
}; |
786
|
|
|
|
|
|
|
typedef struct re_dfastate_t re_dfastate_t; |
787
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
struct re_state_table_entry |
789
|
|
|
|
|
|
|
{ |
790
|
|
|
|
|
|
|
Idx num; |
791
|
|
|
|
|
|
|
Idx alloc; |
792
|
|
|
|
|
|
|
re_dfastate_t **array; |
793
|
|
|
|
|
|
|
}; |
794
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
/* Array type used in re_sub_match_last_t and re_sub_match_top_t. */ |
796
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
typedef struct |
798
|
|
|
|
|
|
|
{ |
799
|
|
|
|
|
|
|
Idx next_idx; |
800
|
|
|
|
|
|
|
Idx alloc; |
801
|
|
|
|
|
|
|
re_dfastate_t **array; |
802
|
|
|
|
|
|
|
} state_array_t; |
803
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
/* Store information about the node NODE whose type is OP_CLOSE_SUBEXP. */ |
805
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
typedef struct |
807
|
|
|
|
|
|
|
{ |
808
|
|
|
|
|
|
|
Idx node; |
809
|
|
|
|
|
|
|
Idx str_idx; /* The position NODE match at. */ |
810
|
|
|
|
|
|
|
state_array_t path; |
811
|
|
|
|
|
|
|
} re_sub_match_last_t; |
812
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
/* Store information about the node NODE whose type is OP_OPEN_SUBEXP. |
814
|
|
|
|
|
|
|
And information about the node, whose type is OP_CLOSE_SUBEXP, |
815
|
|
|
|
|
|
|
corresponding to NODE is stored in LASTS. */ |
816
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
typedef struct |
818
|
|
|
|
|
|
|
{ |
819
|
|
|
|
|
|
|
Idx str_idx; |
820
|
|
|
|
|
|
|
Idx node; |
821
|
|
|
|
|
|
|
state_array_t *path; |
822
|
|
|
|
|
|
|
Idx alasts; /* Allocation size of LASTS. */ |
823
|
|
|
|
|
|
|
Idx nlasts; /* The number of LASTS. */ |
824
|
|
|
|
|
|
|
re_sub_match_last_t **lasts; |
825
|
|
|
|
|
|
|
} re_sub_match_top_t; |
826
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
struct re_backref_cache_entry |
828
|
|
|
|
|
|
|
{ |
829
|
|
|
|
|
|
|
Idx node; |
830
|
|
|
|
|
|
|
Idx str_idx; |
831
|
|
|
|
|
|
|
Idx subexp_from; |
832
|
|
|
|
|
|
|
Idx subexp_to; |
833
|
|
|
|
|
|
|
char more; |
834
|
|
|
|
|
|
|
char unused; |
835
|
|
|
|
|
|
|
unsigned short int eps_reachable_subexps_map; |
836
|
|
|
|
|
|
|
}; |
837
|
|
|
|
|
|
|
|
838
|
|
|
|
|
|
|
typedef struct |
839
|
|
|
|
|
|
|
{ |
840
|
|
|
|
|
|
|
/* The string object corresponding to the input string. */ |
841
|
|
|
|
|
|
|
re_string_t input; |
842
|
|
|
|
|
|
|
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L) |
843
|
|
|
|
|
|
|
const re_dfa_t *const dfa; |
844
|
|
|
|
|
|
|
#else |
845
|
|
|
|
|
|
|
const re_dfa_t *dfa; |
846
|
|
|
|
|
|
|
#endif |
847
|
|
|
|
|
|
|
/* EFLAGS of the argument of regexec. */ |
848
|
|
|
|
|
|
|
int eflags; |
849
|
|
|
|
|
|
|
/* Where the matching ends. */ |
850
|
|
|
|
|
|
|
Idx match_last; |
851
|
|
|
|
|
|
|
Idx last_node; |
852
|
|
|
|
|
|
|
/* The state log used by the matcher. */ |
853
|
|
|
|
|
|
|
re_dfastate_t **state_log; |
854
|
|
|
|
|
|
|
Idx state_log_top; |
855
|
|
|
|
|
|
|
/* Back reference cache. */ |
856
|
|
|
|
|
|
|
Idx nbkref_ents; |
857
|
|
|
|
|
|
|
Idx abkref_ents; |
858
|
|
|
|
|
|
|
struct re_backref_cache_entry *bkref_ents; |
859
|
|
|
|
|
|
|
int max_mb_elem_len; |
860
|
|
|
|
|
|
|
Idx nsub_tops; |
861
|
|
|
|
|
|
|
Idx asub_tops; |
862
|
|
|
|
|
|
|
re_sub_match_top_t **sub_tops; |
863
|
|
|
|
|
|
|
#ifdef _PERL_I18N |
864
|
|
|
|
|
|
|
SV *sv; |
865
|
|
|
|
|
|
|
#endif |
866
|
|
|
|
|
|
|
} re_match_context_t; |
867
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
typedef struct |
869
|
|
|
|
|
|
|
{ |
870
|
|
|
|
|
|
|
re_dfastate_t **sifted_states; |
871
|
|
|
|
|
|
|
re_dfastate_t **limited_states; |
872
|
|
|
|
|
|
|
Idx last_node; |
873
|
|
|
|
|
|
|
Idx last_str_idx; |
874
|
|
|
|
|
|
|
re_node_set limits; |
875
|
|
|
|
|
|
|
} re_sift_context_t; |
876
|
|
|
|
|
|
|
|
877
|
|
|
|
|
|
|
struct re_fail_stack_ent_t |
878
|
|
|
|
|
|
|
{ |
879
|
|
|
|
|
|
|
Idx idx; |
880
|
|
|
|
|
|
|
Idx node; |
881
|
|
|
|
|
|
|
regmatch_t *regs; |
882
|
|
|
|
|
|
|
re_node_set eps_via_nodes; |
883
|
|
|
|
|
|
|
}; |
884
|
|
|
|
|
|
|
|
885
|
|
|
|
|
|
|
struct re_fail_stack_t |
886
|
|
|
|
|
|
|
{ |
887
|
|
|
|
|
|
|
Idx num; |
888
|
|
|
|
|
|
|
Idx alloc; |
889
|
|
|
|
|
|
|
struct re_fail_stack_ent_t *stack; |
890
|
|
|
|
|
|
|
}; |
891
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
struct re_dfa_t |
893
|
|
|
|
|
|
|
{ |
894
|
|
|
|
|
|
|
re_token_t *nodes; |
895
|
|
|
|
|
|
|
size_t nodes_alloc; |
896
|
|
|
|
|
|
|
size_t nodes_len; |
897
|
|
|
|
|
|
|
Idx *nexts; |
898
|
|
|
|
|
|
|
Idx *org_indices; |
899
|
|
|
|
|
|
|
re_node_set *edests; |
900
|
|
|
|
|
|
|
re_node_set *eclosures; |
901
|
|
|
|
|
|
|
re_node_set *inveclosures; |
902
|
|
|
|
|
|
|
struct re_state_table_entry *state_table; |
903
|
|
|
|
|
|
|
re_dfastate_t *init_state; |
904
|
|
|
|
|
|
|
re_dfastate_t *init_state_word; |
905
|
|
|
|
|
|
|
re_dfastate_t *init_state_nl; |
906
|
|
|
|
|
|
|
re_dfastate_t *init_state_begbuf; |
907
|
|
|
|
|
|
|
bin_tree_t *str_tree; |
908
|
|
|
|
|
|
|
bin_tree_storage_t *str_tree_storage; |
909
|
|
|
|
|
|
|
re_bitset_ptr_t sb_char; |
910
|
|
|
|
|
|
|
int str_tree_storage_idx; |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
/* number of subexpressions 're_nsub' is in regex_t. */ |
913
|
|
|
|
|
|
|
re_hashval_t state_hash_mask; |
914
|
|
|
|
|
|
|
Idx init_node; |
915
|
|
|
|
|
|
|
Idx nbackref; /* The number of backreference in this dfa. */ |
916
|
|
|
|
|
|
|
|
917
|
|
|
|
|
|
|
/* Bitmap expressing which backreference is used. */ |
918
|
|
|
|
|
|
|
bitset_word_t used_bkref_map; |
919
|
|
|
|
|
|
|
bitset_word_t completed_bkref_map; |
920
|
|
|
|
|
|
|
|
921
|
|
|
|
|
|
|
unsigned int has_plural_match : 1; |
922
|
|
|
|
|
|
|
/* If this dfa has "multibyte node", which is a backreference or |
923
|
|
|
|
|
|
|
a node which can accept multibyte character or multi character |
924
|
|
|
|
|
|
|
collating element. */ |
925
|
|
|
|
|
|
|
unsigned int has_mb_node : 1; |
926
|
|
|
|
|
|
|
unsigned int is_utf8 : 1; |
927
|
|
|
|
|
|
|
unsigned int map_notascii : 1; |
928
|
|
|
|
|
|
|
unsigned int word_ops_used : 1; |
929
|
|
|
|
|
|
|
int mb_cur_max; |
930
|
|
|
|
|
|
|
bitset_t word_char; |
931
|
|
|
|
|
|
|
reg_syntax_t syntax; |
932
|
|
|
|
|
|
|
Idx *subexp_map; |
933
|
|
|
|
|
|
|
#ifdef DEBUG |
934
|
|
|
|
|
|
|
char* re_str; |
935
|
|
|
|
|
|
|
#endif |
936
|
|
|
|
|
|
|
lock_define (lock) |
937
|
|
|
|
|
|
|
}; |
938
|
|
|
|
|
|
|
|
939
|
|
|
|
|
|
|
/* memset(dst, 0, n * sizeof(t)) <==> Zero(dst, n, t) */ |
940
|
|
|
|
|
|
|
/* #define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set)) */ |
941
|
|
|
|
|
|
|
#define re_node_set_init_empty(set) Zero(set, 1, re_node_set) |
942
|
|
|
|
|
|
|
|
943
|
|
|
|
|
|
|
#define re_node_set_remove(set,id) \ |
944
|
|
|
|
|
|
|
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1)) |
945
|
|
|
|
|
|
|
#define re_node_set_empty(p) ((p)->nelem = 0) |
946
|
|
|
|
|
|
|
#define re_node_set_free(set) re_free ((set)->elems) |
947
|
|
|
|
|
|
|
|
948
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
typedef enum |
950
|
|
|
|
|
|
|
{ |
951
|
|
|
|
|
|
|
SB_CHAR, |
952
|
|
|
|
|
|
|
MB_CHAR, |
953
|
|
|
|
|
|
|
EQUIV_CLASS, |
954
|
|
|
|
|
|
|
COLL_SYM, |
955
|
|
|
|
|
|
|
CHAR_CLASS |
956
|
|
|
|
|
|
|
} bracket_elem_type; |
957
|
|
|
|
|
|
|
|
958
|
|
|
|
|
|
|
typedef struct |
959
|
|
|
|
|
|
|
{ |
960
|
|
|
|
|
|
|
bracket_elem_type type; |
961
|
|
|
|
|
|
|
union |
962
|
|
|
|
|
|
|
{ |
963
|
|
|
|
|
|
|
unsigned char ch; |
964
|
|
|
|
|
|
|
unsigned char *name; |
965
|
|
|
|
|
|
|
rpl__wchar_t wch; |
966
|
|
|
|
|
|
|
} opr; |
967
|
|
|
|
|
|
|
} bracket_elem_t; |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
|
970
|
|
|
|
|
|
|
/* Functions for bitset_t operation. */ |
971
|
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
static void |
973
|
17
|
|
|
|
|
|
bitset_set (pTHX_ bitset_t set, Idx i) |
974
|
|
|
|
|
|
|
{ |
975
|
17
|
|
|
|
|
|
set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS; |
976
|
17
|
|
|
|
|
|
} |
977
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
static void |
979
|
0
|
|
|
|
|
|
bitset_clear (bitset_t set, Idx i) |
980
|
|
|
|
|
|
|
{ |
981
|
0
|
|
|
|
|
|
set[i / BITSET_WORD_BITS] &= ~ ((bitset_word_t) 1 << i % BITSET_WORD_BITS); |
982
|
0
|
|
|
|
|
|
} |
983
|
|
|
|
|
|
|
|
984
|
|
|
|
|
|
|
static bool |
985
|
28
|
|
|
|
|
|
bitset_contain (pTHX_ const bitset_t set, Idx i) |
986
|
|
|
|
|
|
|
{ |
987
|
28
|
|
|
|
|
|
return ((set[i / BITSET_WORD_BITS] >> i % BITSET_WORD_BITS) & 1) ? true : false; |
988
|
|
|
|
|
|
|
} |
989
|
|
|
|
|
|
|
|
990
|
|
|
|
|
|
|
static void |
991
|
53
|
|
|
|
|
|
bitset_empty (pTHX_ bitset_t set) |
992
|
|
|
|
|
|
|
{ |
993
|
53
|
|
|
|
|
|
Zero(set, 1, bitset_t); |
994
|
53
|
|
|
|
|
|
} |
995
|
|
|
|
|
|
|
|
996
|
|
|
|
|
|
|
static void |
997
|
0
|
|
|
|
|
|
bitset_set_all (pTHX_ bitset_t set) |
998
|
|
|
|
|
|
|
{ |
999
|
0
|
|
|
|
|
|
memset (set, -1, sizeof (bitset_word_t) * (SBC_MAX / BITSET_WORD_BITS)); |
1000
|
|
|
|
|
|
|
if (SBC_MAX % BITSET_WORD_BITS != 0) |
1001
|
|
|
|
|
|
|
set[BITSET_WORDS - 1] = |
1002
|
|
|
|
|
|
|
((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1; |
1003
|
0
|
|
|
|
|
|
} |
1004
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
static void |
1006
|
17
|
|
|
|
|
|
bitset_copy (pTHX_ bitset_t dest, const bitset_t src) |
1007
|
|
|
|
|
|
|
{ |
1008
|
17
|
|
|
|
|
|
Copy (src, dest, 1, bitset_t); |
1009
|
17
|
|
|
|
|
|
} |
1010
|
|
|
|
|
|
|
|
1011
|
|
|
|
|
|
|
static void __attribute__ ((unused)) |
1012
|
2
|
|
|
|
|
|
bitset_not (pTHX_ bitset_t set) |
1013
|
|
|
|
|
|
|
{ |
1014
|
|
|
|
|
|
|
int bitset_i; |
1015
|
10
|
100
|
|
|
|
|
for (bitset_i = 0; bitset_i < SBC_MAX / BITSET_WORD_BITS; ++bitset_i) |
1016
|
8
|
|
|
|
|
|
set[bitset_i] = ~set[bitset_i]; |
1017
|
|
|
|
|
|
|
if (SBC_MAX % BITSET_WORD_BITS != 0) |
1018
|
|
|
|
|
|
|
set[BITSET_WORDS - 1] = |
1019
|
|
|
|
|
|
|
((((bitset_word_t) 1 << SBC_MAX % BITSET_WORD_BITS) - 1) |
1020
|
|
|
|
|
|
|
& ~set[BITSET_WORDS - 1]); |
1021
|
2
|
|
|
|
|
|
} |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
static void __attribute__ ((unused)) |
1024
|
19
|
|
|
|
|
|
bitset_merge (pTHX_ bitset_t dest, const bitset_t src) |
1025
|
|
|
|
|
|
|
{ |
1026
|
|
|
|
|
|
|
int bitset_i; |
1027
|
95
|
100
|
|
|
|
|
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) |
1028
|
76
|
|
|
|
|
|
dest[bitset_i] |= src[bitset_i]; |
1029
|
19
|
|
|
|
|
|
} |
1030
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
static void __attribute__ ((unused)) |
1032
|
2
|
|
|
|
|
|
bitset_mask (pTHX_ bitset_t dest, const bitset_t src) |
1033
|
|
|
|
|
|
|
{ |
1034
|
|
|
|
|
|
|
int bitset_i; |
1035
|
10
|
100
|
|
|
|
|
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i) |
1036
|
8
|
|
|
|
|
|
dest[bitset_i] &= src[bitset_i]; |
1037
|
2
|
|
|
|
|
|
} |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
#ifdef RE_ENABLE_I18N |
1040
|
|
|
|
|
|
|
/* Functions for re_string. */ |
1041
|
|
|
|
|
|
|
static int |
1042
|
|
|
|
|
|
|
internal_function __attribute__ ((pure, unused)) |
1043
|
37
|
|
|
|
|
|
re_string_char_size_at (pTHX_ const re_string_t *pstr, Idx idx) |
1044
|
|
|
|
|
|
|
{ |
1045
|
|
|
|
|
|
|
int byte_idx; |
1046
|
37
|
50
|
|
|
|
|
if (pstr->mb_cur_max == 1) |
1047
|
0
|
|
|
|
|
|
return 1; |
1048
|
105
|
100
|
|
|
|
|
for (byte_idx = 1; idx + byte_idx < pstr->valid_len; ++byte_idx) |
1049
|
104
|
100
|
|
|
|
|
if (pstr->wcs[idx + byte_idx] != rpl__WEOF) |
1050
|
36
|
|
|
|
|
|
break; |
1051
|
37
|
|
|
|
|
|
return byte_idx; |
1052
|
|
|
|
|
|
|
} |
1053
|
|
|
|
|
|
|
|
1054
|
|
|
|
|
|
|
static rpl__wint_t |
1055
|
|
|
|
|
|
|
internal_function __attribute__ ((pure, unused)) |
1056
|
38
|
|
|
|
|
|
re_string_wchar_at (pTHX_ const re_string_t *pstr, Idx idx) |
1057
|
|
|
|
|
|
|
{ |
1058
|
38
|
50
|
|
|
|
|
if (pstr->mb_cur_max == 1) |
1059
|
0
|
|
|
|
|
|
return (rpl__wint_t) pstr->mbs[idx]; |
1060
|
38
|
|
|
|
|
|
return (rpl__wint_t) pstr->wcs[idx]; |
1061
|
|
|
|
|
|
|
} |
1062
|
|
|
|
|
|
|
|
1063
|
|
|
|
|
|
|
#ifndef _LIBC |
1064
|
|
|
|
|
|
|
#ifdef _PERL_I18N |
1065
|
6
|
|
|
|
|
|
size_t rpl_Perl_MB_CUR_MAX(pTHX) { |
1066
|
|
|
|
|
|
|
size_t rc; |
1067
|
|
|
|
|
|
|
|
1068
|
6
|
|
|
|
|
|
rc = rpl__MB_LEN_MAX; |
1069
|
|
|
|
|
|
|
#ifndef NDEBUG |
1070
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_MB_CUR_MAX() ==> %d\n", (int) rc); |
1071
|
|
|
|
|
|
|
#endif |
1072
|
6
|
|
|
|
|
|
return rc; |
1073
|
|
|
|
|
|
|
} |
1074
|
|
|
|
|
|
|
/* Initalize only the first element */ |
1075
|
|
|
|
|
|
|
static rpl_Perl_mbstate_t Perl_internal_state = { 0 }; |
1076
|
|
|
|
|
|
|
|
1077
|
1560
|
|
|
|
|
|
int rpl_Perl_isascii(pTHX_ UV c) { |
1078
|
|
|
|
|
|
|
int rc; |
1079
|
|
|
|
|
|
|
#ifndef isASCII_LC_uvchr |
1080
|
|
|
|
|
|
|
#define isASCII_LC_uvchr isASCII_LC |
1081
|
|
|
|
|
|
|
#ifndef isASCII_LC |
1082
|
|
|
|
|
|
|
#define isASCII_LC isASCII |
1083
|
|
|
|
|
|
|
#endif |
1084
|
|
|
|
|
|
|
#endif |
1085
|
1560
|
50
|
|
|
|
|
rc = isASCII_LC_uvchr(c); |
|
|
100
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
|
1087
|
|
|
|
|
|
|
#ifndef NDEBUG |
1088
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isascii(%d) ==> %d\n", (int) c, rc); |
1089
|
|
|
|
|
|
|
#endif |
1090
|
1560
|
|
|
|
|
|
return rc; |
1091
|
|
|
|
|
|
|
} |
1092
|
|
|
|
|
|
|
|
1093
|
0
|
|
|
|
|
|
int rpl_Perl_isalnum(pTHX_ UV c) { |
1094
|
|
|
|
|
|
|
int rc; |
1095
|
|
|
|
|
|
|
#ifndef isALNUM_LC_uvchr |
1096
|
|
|
|
|
|
|
#define isALNUM_LC_uvchr isALNUM_LC |
1097
|
|
|
|
|
|
|
#ifndef isALNUM_LC |
1098
|
|
|
|
|
|
|
#define isALNUM_LC isALNUM |
1099
|
|
|
|
|
|
|
#endif |
1100
|
|
|
|
|
|
|
#endif |
1101
|
0
|
0
|
|
|
|
|
rc = isALNUM_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
|
1103
|
|
|
|
|
|
|
#ifndef NDEBUG |
1104
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isalnum(%d) ==> %d\n", (int) c, rc); |
1105
|
|
|
|
|
|
|
#endif |
1106
|
|
|
|
|
|
|
|
1107
|
0
|
|
|
|
|
|
return rc; |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
|
1110
|
38
|
|
|
|
|
|
int rpl_Perl_iswalnum(pTHX_ UV c) { |
1111
|
|
|
|
|
|
|
int rc; |
1112
|
|
|
|
|
|
|
#ifndef isALNUM_LC_uvchr |
1113
|
|
|
|
|
|
|
#define isALNUM_LC_uvchr isALNUM_LC |
1114
|
|
|
|
|
|
|
#ifndef isALNUM_LC |
1115
|
|
|
|
|
|
|
#define isALNUM_LC isALNUM |
1116
|
|
|
|
|
|
|
#endif |
1117
|
|
|
|
|
|
|
#endif |
1118
|
38
|
50
|
|
|
|
|
rc = isALNUM_LC_uvchr(c); |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1119
|
|
|
|
|
|
|
|
1120
|
|
|
|
|
|
|
#ifndef NDEBUG |
1121
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_iswalnum(%d) ==> %d\n", (int) c, rc); |
1122
|
|
|
|
|
|
|
#endif |
1123
|
|
|
|
|
|
|
|
1124
|
38
|
|
|
|
|
|
return rc; |
1125
|
|
|
|
|
|
|
} |
1126
|
|
|
|
|
|
|
|
1127
|
0
|
|
|
|
|
|
int rpl_Perl_iscntrl(pTHX_ UV c) { |
1128
|
|
|
|
|
|
|
int rc; |
1129
|
|
|
|
|
|
|
#ifndef isCNTRL_LC_uvchr |
1130
|
|
|
|
|
|
|
#define isCNTRL_LC_uvchr isCNTRL_LC |
1131
|
|
|
|
|
|
|
#ifndef isCNTRL_LC |
1132
|
|
|
|
|
|
|
#define isCNTRL_LC isCNTRL |
1133
|
|
|
|
|
|
|
#endif |
1134
|
|
|
|
|
|
|
#endif |
1135
|
0
|
0
|
|
|
|
|
rc = isCNTRL_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
|
1137
|
|
|
|
|
|
|
#ifndef NDEBUG |
1138
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_iscntrl(%d) ==> %d\n", (int) c, rc); |
1139
|
|
|
|
|
|
|
#endif |
1140
|
|
|
|
|
|
|
|
1141
|
0
|
|
|
|
|
|
return rc; |
1142
|
|
|
|
|
|
|
} |
1143
|
|
|
|
|
|
|
|
1144
|
12
|
|
|
|
|
|
int rpl_Perl_islower(pTHX_ UV c) { |
1145
|
|
|
|
|
|
|
int rc; |
1146
|
|
|
|
|
|
|
#ifndef isLOWER_LC_uvchr |
1147
|
|
|
|
|
|
|
#define isLOWER_LC_uvchr isLOWER_LC |
1148
|
|
|
|
|
|
|
#ifndef isLOWER_LC |
1149
|
|
|
|
|
|
|
#define isLOWER_LC isLOWER |
1150
|
|
|
|
|
|
|
#endif |
1151
|
|
|
|
|
|
|
#endif |
1152
|
12
|
100
|
|
|
|
|
rc = isLOWER_LC_uvchr(c); |
|
|
50
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
1153
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
#ifndef NDEBUG |
1155
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_islower(%d) ==> %d\n", (int) c, rc); |
1156
|
|
|
|
|
|
|
#endif |
1157
|
|
|
|
|
|
|
|
1158
|
12
|
|
|
|
|
|
return rc; |
1159
|
|
|
|
|
|
|
} |
1160
|
|
|
|
|
|
|
|
1161
|
0
|
|
|
|
|
|
int rpl_Perl_isspace(pTHX_ UV c) { |
1162
|
|
|
|
|
|
|
int rc; |
1163
|
|
|
|
|
|
|
#ifndef isSPACE_LC_uvchr |
1164
|
|
|
|
|
|
|
#define isSPACE_LC_uvchr isSPACE_LC |
1165
|
|
|
|
|
|
|
#ifndef isSPACE_LC |
1166
|
|
|
|
|
|
|
#define isSPACE_LC isSPACE |
1167
|
|
|
|
|
|
|
#endif |
1168
|
|
|
|
|
|
|
#endif |
1169
|
0
|
0
|
|
|
|
|
rc = isSPACE_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1170
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
#ifndef NDEBUG |
1172
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isspace(%d) ==> %d\n", (int) c, rc); |
1173
|
|
|
|
|
|
|
#endif |
1174
|
|
|
|
|
|
|
|
1175
|
0
|
|
|
|
|
|
return rc; |
1176
|
|
|
|
|
|
|
} |
1177
|
|
|
|
|
|
|
|
1178
|
0
|
|
|
|
|
|
int rpl_Perl_isalpha(pTHX_ UV c) { |
1179
|
|
|
|
|
|
|
int rc; |
1180
|
|
|
|
|
|
|
#ifndef isALPHA_LC_uvchr |
1181
|
|
|
|
|
|
|
#define isALPHA_LC_uvchr isALPHA_LC |
1182
|
|
|
|
|
|
|
#ifndef isALPHA_LC |
1183
|
|
|
|
|
|
|
#define isALPHA_LC isALPHA |
1184
|
|
|
|
|
|
|
#endif |
1185
|
|
|
|
|
|
|
#endif |
1186
|
0
|
0
|
|
|
|
|
rc = isALPHA_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
|
1188
|
|
|
|
|
|
|
#ifndef NDEBUG |
1189
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isalpha(%d) ==> %d\n", (int) c, rc); |
1190
|
|
|
|
|
|
|
#endif |
1191
|
|
|
|
|
|
|
|
1192
|
0
|
|
|
|
|
|
return rc; |
1193
|
|
|
|
|
|
|
} |
1194
|
|
|
|
|
|
|
|
1195
|
0
|
|
|
|
|
|
int rpl_Perl_isdigit(pTHX_ UV c) { |
1196
|
|
|
|
|
|
|
int rc; |
1197
|
|
|
|
|
|
|
#ifndef isDIGIT_LC_uvchr |
1198
|
|
|
|
|
|
|
#define isDIGIT_LC_uvchr isDIGIT_LC |
1199
|
|
|
|
|
|
|
#ifndef isDIGIT_LC |
1200
|
|
|
|
|
|
|
#define isDIGIT_LC isDIGIT |
1201
|
|
|
|
|
|
|
#endif |
1202
|
|
|
|
|
|
|
#endif |
1203
|
0
|
0
|
|
|
|
|
rc = isDIGIT_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
|
1205
|
|
|
|
|
|
|
#ifndef NDEBUG |
1206
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isdigit(%d) ==> %d\n", (int) c, rc); |
1207
|
|
|
|
|
|
|
#endif |
1208
|
|
|
|
|
|
|
|
1209
|
0
|
|
|
|
|
|
return rc; |
1210
|
|
|
|
|
|
|
} |
1211
|
|
|
|
|
|
|
|
1212
|
0
|
|
|
|
|
|
int rpl_Perl_isprint(pTHX_ UV c) { |
1213
|
|
|
|
|
|
|
int rc; |
1214
|
|
|
|
|
|
|
#ifndef isPRINT_LC_uvchr |
1215
|
|
|
|
|
|
|
#define isPRINT_LC_uvchr isPRINT_LC |
1216
|
|
|
|
|
|
|
#ifndef isPRINT_LC |
1217
|
|
|
|
|
|
|
#define isPRINT_LC isPRINT |
1218
|
|
|
|
|
|
|
#endif |
1219
|
|
|
|
|
|
|
#endif |
1220
|
0
|
0
|
|
|
|
|
rc = isPRINT_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
#ifndef NDEBUG |
1223
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isprint(%d) ==> %d\n", (int) c, rc); |
1224
|
|
|
|
|
|
|
#endif |
1225
|
|
|
|
|
|
|
|
1226
|
0
|
|
|
|
|
|
return rc; |
1227
|
|
|
|
|
|
|
} |
1228
|
|
|
|
|
|
|
|
1229
|
0
|
|
|
|
|
|
int rpl_Perl_isupper(pTHX_ UV c) { |
1230
|
|
|
|
|
|
|
int rc; |
1231
|
|
|
|
|
|
|
#ifndef isUPPER_LC_uvchr |
1232
|
|
|
|
|
|
|
#define isUPPER_LC_uvchr isUPPER_LC |
1233
|
|
|
|
|
|
|
#ifndef isUPPER_LC |
1234
|
|
|
|
|
|
|
#define isUPPER_LC isUPPER |
1235
|
|
|
|
|
|
|
#endif |
1236
|
|
|
|
|
|
|
#endif |
1237
|
0
|
0
|
|
|
|
|
rc = isUPPER_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
|
1239
|
|
|
|
|
|
|
#ifndef NDEBUG |
1240
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isupper(%d) ==> %d\n", (int) c, rc); |
1241
|
|
|
|
|
|
|
#endif |
1242
|
|
|
|
|
|
|
|
1243
|
0
|
|
|
|
|
|
return rc; |
1244
|
|
|
|
|
|
|
} |
1245
|
|
|
|
|
|
|
|
1246
|
0
|
|
|
|
|
|
int rpl_Perl_isblank(pTHX_ UV c) { |
1247
|
|
|
|
|
|
|
int rc; |
1248
|
|
|
|
|
|
|
#ifndef isBLANK_LC_uvchr |
1249
|
|
|
|
|
|
|
#define isBLANK_LC_uvchr isBLANK_LC |
1250
|
|
|
|
|
|
|
#ifndef isBLANK_LC |
1251
|
|
|
|
|
|
|
#define isBLANK_LC isBLANK |
1252
|
|
|
|
|
|
|
#endif |
1253
|
|
|
|
|
|
|
#endif |
1254
|
0
|
0
|
|
|
|
|
rc = isBLANK_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
|
1256
|
|
|
|
|
|
|
#ifndef NDEBUG |
1257
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isblank(%d) ==> %d\n", (int) c, rc); |
1258
|
|
|
|
|
|
|
#endif |
1259
|
|
|
|
|
|
|
|
1260
|
0
|
|
|
|
|
|
return rc; |
1261
|
|
|
|
|
|
|
} |
1262
|
|
|
|
|
|
|
|
1263
|
0
|
|
|
|
|
|
int rpl_Perl_isgraph(pTHX_ UV c) { |
1264
|
|
|
|
|
|
|
int rc; |
1265
|
|
|
|
|
|
|
#ifndef isGRAPH_LC_uvchr |
1266
|
|
|
|
|
|
|
#define isGRAPH_LC_uvchr isGRAPH_LC |
1267
|
|
|
|
|
|
|
#ifndef isGRAPH_LC |
1268
|
|
|
|
|
|
|
#define isGRAPH_LC isGRAPH |
1269
|
|
|
|
|
|
|
#endif |
1270
|
|
|
|
|
|
|
#endif |
1271
|
0
|
0
|
|
|
|
|
rc = isGRAPH_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1272
|
|
|
|
|
|
|
|
1273
|
|
|
|
|
|
|
#ifndef DEBUG |
1274
|
0
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isgraph(%d) ==> %d\n", (int) c, rc); |
1275
|
|
|
|
|
|
|
#endif |
1276
|
|
|
|
|
|
|
|
1277
|
0
|
|
|
|
|
|
return rc; |
1278
|
|
|
|
|
|
|
} |
1279
|
|
|
|
|
|
|
|
1280
|
0
|
|
|
|
|
|
int rpl_Perl_ispunct(pTHX_ UV c) { |
1281
|
|
|
|
|
|
|
int rc; |
1282
|
|
|
|
|
|
|
#ifndef isPUNCT_LC_uvchr |
1283
|
|
|
|
|
|
|
#define isPUNCT_LC_uvchr isPUNCT_LC |
1284
|
|
|
|
|
|
|
#ifndef isPUNCT_LC |
1285
|
|
|
|
|
|
|
#define isPUNCT_LC isPUNCT |
1286
|
|
|
|
|
|
|
#endif |
1287
|
|
|
|
|
|
|
#endif |
1288
|
0
|
0
|
|
|
|
|
rc = isPUNCT_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
|
1290
|
|
|
|
|
|
|
#ifndef NDEBUG |
1291
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_ispunct(%d) ==> %d\n", (int) c, rc); |
1292
|
|
|
|
|
|
|
#endif |
1293
|
|
|
|
|
|
|
|
1294
|
0
|
|
|
|
|
|
return rc; |
1295
|
|
|
|
|
|
|
} |
1296
|
|
|
|
|
|
|
|
1297
|
0
|
|
|
|
|
|
int rpl_Perl_isxdigit(pTHX_ UV c) { |
1298
|
|
|
|
|
|
|
int rc; |
1299
|
|
|
|
|
|
|
#ifndef isXDIGIT_LC_uvchr |
1300
|
|
|
|
|
|
|
#define isXDIGIT_LC_uvchr isXDIGIT_LC |
1301
|
|
|
|
|
|
|
#ifndef isXDIGIT_LC |
1302
|
|
|
|
|
|
|
#define isXDIGIT_LC isXDIGIT |
1303
|
|
|
|
|
|
|
#endif |
1304
|
|
|
|
|
|
|
#endif |
1305
|
0
|
0
|
|
|
|
|
rc = isXDIGIT_LC_uvchr(c); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
#ifndef NDEBUG |
1308
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_isxdigit(%d) ==> %d\n", (int) c, rc); |
1309
|
|
|
|
|
|
|
#endif |
1310
|
|
|
|
|
|
|
|
1311
|
0
|
|
|
|
|
|
return rc; |
1312
|
|
|
|
|
|
|
} |
1313
|
|
|
|
|
|
|
|
1314
|
12
|
|
|
|
|
|
int rpl_Perl_iswlower(pTHX_ UV wc) { |
1315
|
12
|
|
|
|
|
|
int rc = rpl_Perl_islower(aTHX_ wc); |
1316
|
|
|
|
|
|
|
|
1317
|
|
|
|
|
|
|
#ifndef NDEBUG |
1318
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_iswlower(%ld) ==> %d\n", (unsigned long) wc, rc); |
1319
|
|
|
|
|
|
|
#endif |
1320
|
|
|
|
|
|
|
|
1321
|
12
|
|
|
|
|
|
return rc; |
1322
|
|
|
|
|
|
|
} |
1323
|
|
|
|
|
|
|
|
1324
|
|
|
|
|
|
|
/* Our mb implementations are all stateless */ |
1325
|
1696
|
|
|
|
|
|
size_t rpl_Perl_mbrtowc(pTHX_ UV *restrict pwc, const char *restrict s, size_t n, void *restrict ps) { |
1326
|
|
|
|
|
|
|
STRLEN ch_len; |
1327
|
|
|
|
|
|
|
UV ord; |
1328
|
|
|
|
|
|
|
size_t rc; |
1329
|
|
|
|
|
|
|
#ifndef NDEBUG |
1330
|
|
|
|
|
|
|
void octdump(pTHX_ const void *mem, unsigned int len); |
1331
|
|
|
|
|
|
|
#endif |
1332
|
|
|
|
|
|
|
|
1333
|
1696
|
50
|
|
|
|
|
if (s == NULL) { |
1334
|
0
|
|
|
|
|
|
pwc = NULL; |
1335
|
0
|
|
|
|
|
|
s = ""; |
1336
|
0
|
|
|
|
|
|
n = 1; |
1337
|
|
|
|
|
|
|
} |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
#ifndef NDEBUG |
1340
|
|
|
|
|
|
|
octdump(aTHX_ s, n); |
1341
|
|
|
|
|
|
|
#endif |
1342
|
|
|
|
|
|
|
|
1343
|
1696
|
50
|
|
|
|
|
if (n == 0) { |
1344
|
0
|
|
|
|
|
|
rc = (size_t)(-2); |
1345
|
|
|
|
|
|
|
} |
1346
|
|
|
|
|
|
|
else { |
1347
|
|
|
|
|
|
|
/* In here you find the reason why the buffers allocated at the */ |
1348
|
|
|
|
|
|
|
/* very beginning are already the full buffers: I do not want */ |
1349
|
|
|
|
|
|
|
/* Perl to raise a warning if the buffer is not enough. So I */ |
1350
|
|
|
|
|
|
|
/* should not use UTF8_CHECK_ONLY. But the only way to NOT raise */ |
1351
|
|
|
|
|
|
|
/* a warning is to use UTF8_CHECK_ONLY -; */ |
1352
|
|
|
|
|
|
|
/* This is why in this case we do never return -2: we made sure */ |
1353
|
|
|
|
|
|
|
/* at the very beginning that the buffer will always be large enough */ |
1354
|
|
|
|
|
|
|
|
1355
|
1696
|
|
|
|
|
|
ord = utf8n_to_uvchr((U8 *) s, n, &ch_len, UTF8_CHECK_ONLY); |
1356
|
1696
|
100
|
|
|
|
|
if (ord > 0 || *s == 0) { |
|
|
50
|
|
|
|
|
|
1357
|
922
|
50
|
|
|
|
|
if (pwc != NULL) { |
1358
|
922
|
|
|
|
|
|
*pwc = ord; |
1359
|
|
|
|
|
|
|
} |
1360
|
922
|
50
|
|
|
|
|
rc = (ord == 0) ? 0 : ch_len; |
1361
|
|
|
|
|
|
|
} else { |
1362
|
|
|
|
|
|
|
/* Invalid */ |
1363
|
774
|
|
|
|
|
|
errno = EILSEQ; |
1364
|
|
|
|
|
|
|
/* The conversion state is undefined, says POSIX. */ |
1365
|
774
|
|
|
|
|
|
rc = (size_t)(-1); |
1366
|
|
|
|
|
|
|
} |
1367
|
|
|
|
|
|
|
} |
1368
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
#ifndef NDEBUG |
1370
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_mbrtowc ==> %d\n", (int) rc); |
1371
|
|
|
|
|
|
|
#endif |
1372
|
|
|
|
|
|
|
|
1373
|
1696
|
|
|
|
|
|
return rc; |
1374
|
|
|
|
|
|
|
} |
1375
|
|
|
|
|
|
|
|
1376
|
1536
|
|
|
|
|
|
int rpl_Perl_mbtowc(pTHX_ UV *restrict pwc, const char *restrict s, size_t n) { |
1377
|
|
|
|
|
|
|
int rc; |
1378
|
|
|
|
|
|
|
|
1379
|
|
|
|
|
|
|
static rpl_Perl_mbstate_t state; |
1380
|
|
|
|
|
|
|
/* If s is NULL the function has to return null or not null |
1381
|
|
|
|
|
|
|
depending on the encoding having a state depending encoding or |
1382
|
|
|
|
|
|
|
not. */ |
1383
|
1536
|
50
|
|
|
|
|
if (s == NULL) { |
1384
|
|
|
|
|
|
|
/* No support for state dependent encodings. */ |
1385
|
0
|
|
|
|
|
|
rc = 0; |
1386
|
|
|
|
|
|
|
} |
1387
|
1536
|
100
|
|
|
|
|
else if (*s == '\0') { |
1388
|
6
|
50
|
|
|
|
|
if (pwc != NULL) { |
1389
|
6
|
|
|
|
|
|
*pwc = L'\0'; |
1390
|
|
|
|
|
|
|
} |
1391
|
6
|
|
|
|
|
|
rc = 0; |
1392
|
|
|
|
|
|
|
} else { |
1393
|
1530
|
|
|
|
|
|
rc = rpl_Perl_mbrtowc(aTHX_ pwc, s, n, &state); |
1394
|
1530
|
100
|
|
|
|
|
if (rc < 0) { |
1395
|
768
|
|
|
|
|
|
rc = -1; |
1396
|
|
|
|
|
|
|
} |
1397
|
|
|
|
|
|
|
} |
1398
|
|
|
|
|
|
|
|
1399
|
|
|
|
|
|
|
#ifndef NDEBUG |
1400
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_mbtowc(pwc, s=\"%s\", n=%d) ==> %d\n", s, (int) n, (int) rc); |
1401
|
|
|
|
|
|
|
#endif |
1402
|
|
|
|
|
|
|
|
1403
|
1536
|
|
|
|
|
|
return rc; |
1404
|
|
|
|
|
|
|
} |
1405
|
|
|
|
|
|
|
|
1406
|
1536
|
|
|
|
|
|
rpl__wint_t rpl_Perl_btowc (pTHX_ int c) { |
1407
|
1536
|
|
|
|
|
|
rpl__wint_t rc = rpl__WEOF; |
1408
|
|
|
|
|
|
|
|
1409
|
1536
|
50
|
|
|
|
|
if (c != EOF) { |
1410
|
|
|
|
|
|
|
char buf[1]; |
1411
|
|
|
|
|
|
|
rpl__wchar_t wc; |
1412
|
|
|
|
|
|
|
|
1413
|
1536
|
|
|
|
|
|
buf[0] = (U8)c; |
1414
|
1536
|
100
|
|
|
|
|
if (rpl_Perl_mbtowc(aTHX_ &wc, buf, 1) >= 0) { |
1415
|
1536
|
|
|
|
|
|
rc = (rpl__wint_t) wc; |
1416
|
|
|
|
|
|
|
} |
1417
|
|
|
|
|
|
|
} |
1418
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
#ifndef NDEBUG |
1420
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_btowc(c=%d) ==> %d\n", (int) c, (int) rc); |
1421
|
|
|
|
|
|
|
#endif |
1422
|
|
|
|
|
|
|
|
1423
|
1536
|
|
|
|
|
|
return rc; |
1424
|
|
|
|
|
|
|
} |
1425
|
|
|
|
|
|
|
|
1426
|
0
|
|
|
|
|
|
int rpl_Perl_iswctype(pTHX_ rpl__wint_t wi, rpl__wctype_t wt) { |
1427
|
|
|
|
|
|
|
int rc; |
1428
|
|
|
|
|
|
|
|
1429
|
0
|
0
|
|
|
|
|
if (wi == rpl__WEOF) { |
1430
|
0
|
|
|
|
|
|
rc = 0; |
1431
|
|
|
|
|
|
|
} else { |
1432
|
0
|
|
|
|
|
|
switch (wt) { |
1433
|
|
|
|
|
|
|
case PERL_WCTYPE_ALNUM: |
1434
|
0
|
0
|
|
|
|
|
rc = isALNUM_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1435
|
0
|
|
|
|
|
|
break; |
1436
|
|
|
|
|
|
|
case PERL_WCTYPE_ALPHA: |
1437
|
0
|
0
|
|
|
|
|
rc = isALPHA_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1438
|
0
|
|
|
|
|
|
break; |
1439
|
|
|
|
|
|
|
case PERL_WCTYPE_CNTRL: |
1440
|
0
|
0
|
|
|
|
|
rc = isCNTRL_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
1441
|
0
|
|
|
|
|
|
break; |
1442
|
|
|
|
|
|
|
case PERL_WCTYPE_DIGIT: |
1443
|
0
|
0
|
|
|
|
|
rc = isDIGIT_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1444
|
0
|
|
|
|
|
|
break; |
1445
|
|
|
|
|
|
|
case PERL_WCTYPE_GRAPH: |
1446
|
0
|
0
|
|
|
|
|
rc = isGRAPH_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1447
|
0
|
|
|
|
|
|
break; |
1448
|
|
|
|
|
|
|
case PERL_WCTYPE_LOWER: |
1449
|
0
|
0
|
|
|
|
|
rc = isLOWER_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1450
|
0
|
|
|
|
|
|
break; |
1451
|
|
|
|
|
|
|
case PERL_WCTYPE_PRINT: |
1452
|
0
|
0
|
|
|
|
|
rc = isPRINT_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1453
|
0
|
|
|
|
|
|
break; |
1454
|
|
|
|
|
|
|
case PERL_WCTYPE_PUNCT: |
1455
|
0
|
0
|
|
|
|
|
rc = isPUNCT_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1456
|
0
|
|
|
|
|
|
break; |
1457
|
|
|
|
|
|
|
case PERL_WCTYPE_SPACE: |
1458
|
0
|
0
|
|
|
|
|
rc = isSPACE_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1459
|
0
|
|
|
|
|
|
break; |
1460
|
|
|
|
|
|
|
case PERL_WCTYPE_UPPER: |
1461
|
0
|
0
|
|
|
|
|
rc = isUPPER_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1462
|
0
|
|
|
|
|
|
break; |
1463
|
|
|
|
|
|
|
case PERL_WCTYPE_XDIGIT: |
1464
|
0
|
0
|
|
|
|
|
rc = isXDIGIT_uni((UV) wi); |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
1465
|
0
|
|
|
|
|
|
break; |
1466
|
|
|
|
|
|
|
default: |
1467
|
0
|
|
|
|
|
|
rc = 0; |
1468
|
|
|
|
|
|
|
} |
1469
|
|
|
|
|
|
|
} |
1470
|
|
|
|
|
|
|
|
1471
|
|
|
|
|
|
|
#ifndef NDEBUG |
1472
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_iswctype(wi=%ld, wt=%d) ==> %d\n", (unsigned long) wi, (int) wt, (int) rc); |
1473
|
|
|
|
|
|
|
#endif |
1474
|
|
|
|
|
|
|
|
1475
|
0
|
|
|
|
|
|
return rc; |
1476
|
|
|
|
|
|
|
} |
1477
|
|
|
|
|
|
|
|
1478
|
0
|
|
|
|
|
|
rpl_Perl_wctype_t rpl_Perl_wctype(pTHX_ const char * property) { |
1479
|
|
|
|
|
|
|
rpl_Perl_wctype_t rc; |
1480
|
|
|
|
|
|
|
|
1481
|
0
|
0
|
|
|
|
|
if (strncmp(property, "alnum", sizeof("alnum") - 1) == 0) { |
1482
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_ALNUM; |
1483
|
|
|
|
|
|
|
} |
1484
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "alpha", sizeof("alpha") - 1) == 0) { |
1485
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_ALPHA; |
1486
|
|
|
|
|
|
|
} |
1487
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "cntrl", sizeof("cntrl") - 1) == 0) { |
1488
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_CNTRL; |
1489
|
|
|
|
|
|
|
} |
1490
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "digit", sizeof("digit") - 1) == 0) { |
1491
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_DIGIT; |
1492
|
|
|
|
|
|
|
} |
1493
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "graph", sizeof("graph") - 1) == 0) { |
1494
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_GRAPH; |
1495
|
|
|
|
|
|
|
} |
1496
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "lower", sizeof("lower") - 1) == 0) { |
1497
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_LOWER; |
1498
|
|
|
|
|
|
|
} |
1499
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "print", sizeof("print") - 1) == 0) { |
1500
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_PRINT; |
1501
|
|
|
|
|
|
|
} |
1502
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "punct", sizeof("punct") - 1) == 0) { |
1503
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_PUNCT; |
1504
|
|
|
|
|
|
|
} |
1505
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "space", sizeof("space") - 1) == 0) { |
1506
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_SPACE; |
1507
|
|
|
|
|
|
|
} |
1508
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "upper", sizeof("upper") - 1) == 0) { |
1509
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_UPPER; |
1510
|
|
|
|
|
|
|
} |
1511
|
0
|
0
|
|
|
|
|
else if (strncmp(property, "xdigit", sizeof("xdigit") - 1) == 0) { |
1512
|
0
|
|
|
|
|
|
rc = PERL_WCTYPE_XDIGIT; |
1513
|
|
|
|
|
|
|
} |
1514
|
|
|
|
|
|
|
else { |
1515
|
0
|
|
|
|
|
|
rc = 0; |
1516
|
|
|
|
|
|
|
} |
1517
|
|
|
|
|
|
|
|
1518
|
|
|
|
|
|
|
#ifndef NDEBUG |
1519
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_wctype(property=%s) ==> %d\n", property, (int) rc); |
1520
|
|
|
|
|
|
|
#endif |
1521
|
|
|
|
|
|
|
|
1522
|
0
|
|
|
|
|
|
return rc; |
1523
|
|
|
|
|
|
|
} |
1524
|
|
|
|
|
|
|
|
1525
|
23
|
|
|
|
|
|
int rpl_Perl_mbsinit(rpl__mbstate_t *ps) { |
1526
|
23
|
|
|
|
|
|
const char *pstate = (const char *)ps; |
1527
|
23
|
50
|
|
|
|
|
return (pstate == NULL) || (pstate[0] == 0); |
|
|
50
|
|
|
|
|
|
1528
|
|
|
|
|
|
|
} |
1529
|
|
|
|
|
|
|
|
1530
|
5
|
|
|
|
|
|
int Perl_wctomb(pTHX_ char *restrict s, rpl__wchar_t wc) { |
1531
|
|
|
|
|
|
|
U8 d[UTF8_MAXBYTES+1]; |
1532
|
5
|
|
|
|
|
|
bool is_utf8 = 1; |
1533
|
|
|
|
|
|
|
U8 *bytes; |
1534
|
|
|
|
|
|
|
STRLEN len; |
1535
|
|
|
|
|
|
|
|
1536
|
5
|
50
|
|
|
|
|
if (s == NULL) { |
1537
|
0
|
|
|
|
|
|
return 0; |
1538
|
|
|
|
|
|
|
} |
1539
|
|
|
|
|
|
|
|
1540
|
5
|
50
|
|
|
|
|
if (wc == 0) { |
1541
|
0
|
|
|
|
|
|
*s = '\0'; |
1542
|
0
|
|
|
|
|
|
return 1; |
1543
|
|
|
|
|
|
|
} |
1544
|
|
|
|
|
|
|
|
1545
|
5
|
|
|
|
|
|
len = uvchr_to_utf8(d, (UV) wc) - d; |
1546
|
5
|
|
|
|
|
|
bytes = bytes_from_utf8(d, &len, &is_utf8); |
1547
|
5
|
|
|
|
|
|
memcpy(s, bytes, len); |
1548
|
|
|
|
|
|
|
|
1549
|
5
|
50
|
|
|
|
|
if (bytes != d) { |
1550
|
5
|
|
|
|
|
|
Safefree(bytes); |
1551
|
|
|
|
|
|
|
} |
1552
|
|
|
|
|
|
|
|
1553
|
|
|
|
|
|
|
#ifndef NDEBUG |
1554
|
|
|
|
|
|
|
fprintf(stderr, "Perl_wctomb(%ld) ==> %d\n", (unsigned long) wc, (int) len); |
1555
|
|
|
|
|
|
|
#endif |
1556
|
|
|
|
|
|
|
|
1557
|
5
|
|
|
|
|
|
return len; |
1558
|
|
|
|
|
|
|
} |
1559
|
|
|
|
|
|
|
|
1560
|
5
|
|
|
|
|
|
size_t rpl_Perl_wcrtomb (pTHX_ char *s, rpl__wchar_t wc, rpl__mbstate_t *ps) { |
1561
|
|
|
|
|
|
|
/* This implementation of wcrtomb on top of wctomb() supports only |
1562
|
|
|
|
|
|
|
stateless encodings. ps must be in the initial state. */ |
1563
|
5
|
50
|
|
|
|
|
if (ps != NULL && !rpl_Perl_mbsinit ( ps)) |
|
|
50
|
|
|
|
|
|
1564
|
|
|
|
|
|
|
{ |
1565
|
0
|
|
|
|
|
|
errno = EINVAL; |
1566
|
0
|
|
|
|
|
|
return (size_t)(-1); |
1567
|
|
|
|
|
|
|
} |
1568
|
|
|
|
|
|
|
|
1569
|
5
|
50
|
|
|
|
|
if (s == NULL) |
1570
|
|
|
|
|
|
|
/* We know the NUL wide character corresponds to the NUL character. */ |
1571
|
0
|
|
|
|
|
|
return 1; |
1572
|
|
|
|
|
|
|
else |
1573
|
|
|
|
|
|
|
{ |
1574
|
5
|
|
|
|
|
|
int ret = Perl_wctomb (aTHX_ s, wc); |
1575
|
|
|
|
|
|
|
|
1576
|
5
|
50
|
|
|
|
|
if (ret >= 0) |
1577
|
5
|
|
|
|
|
|
return ret; |
1578
|
|
|
|
|
|
|
else |
1579
|
|
|
|
|
|
|
{ |
1580
|
0
|
|
|
|
|
|
errno = EILSEQ; |
1581
|
0
|
|
|
|
|
|
return (size_t)(-1); |
1582
|
|
|
|
|
|
|
} |
1583
|
|
|
|
|
|
|
} |
1584
|
|
|
|
|
|
|
} |
1585
|
|
|
|
|
|
|
|
1586
|
0
|
|
|
|
|
|
rpl__wint_t rpl_Perl_towlower(pTHX_ UV wc) { |
1587
|
|
|
|
|
|
|
U8 s[UTF8_MAXBYTES_CASE+1]; |
1588
|
|
|
|
|
|
|
rpl__wint_t rc; |
1589
|
|
|
|
|
|
|
STRLEN len; |
1590
|
|
|
|
|
|
|
|
1591
|
0
|
|
|
|
|
|
rc = toLOWER_uni(wc, s, &len); |
1592
|
|
|
|
|
|
|
|
1593
|
|
|
|
|
|
|
#ifndef NDEBUG |
1594
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_towlower(%d) ==> %d\n", (int) wc, (int) rc); |
1595
|
|
|
|
|
|
|
#endif |
1596
|
|
|
|
|
|
|
|
1597
|
0
|
|
|
|
|
|
return rc; |
1598
|
|
|
|
|
|
|
|
1599
|
|
|
|
|
|
|
} |
1600
|
|
|
|
|
|
|
|
1601
|
5
|
|
|
|
|
|
rpl__wint_t rpl_Perl_towupper(pTHX_ UV wc) { |
1602
|
|
|
|
|
|
|
U8 s[UTF8_MAXBYTES_CASE+1]; |
1603
|
|
|
|
|
|
|
rpl__wint_t rc; |
1604
|
|
|
|
|
|
|
STRLEN len; |
1605
|
|
|
|
|
|
|
|
1606
|
5
|
|
|
|
|
|
rc = toUPPER_uni(wc, s, &len); |
1607
|
|
|
|
|
|
|
|
1608
|
|
|
|
|
|
|
#ifndef NDEBUG |
1609
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_towlower(%d) ==> %d\n", (int) wc, (int) rc); |
1610
|
|
|
|
|
|
|
#endif |
1611
|
|
|
|
|
|
|
|
1612
|
5
|
|
|
|
|
|
return rc; |
1613
|
|
|
|
|
|
|
|
1614
|
|
|
|
|
|
|
} |
1615
|
|
|
|
|
|
|
|
1616
|
0
|
|
|
|
|
|
rpl__wint_t rpl_Perl_tolower(pTHX_ UV c) { |
1617
|
|
|
|
|
|
|
rpl__wint_t rc; |
1618
|
|
|
|
|
|
|
|
1619
|
|
|
|
|
|
|
/* The caller made sure that it fits in 8 bytes */ |
1620
|
0
|
0
|
|
|
|
|
rc = toLOWER((U8) c); |
1621
|
|
|
|
|
|
|
|
1622
|
|
|
|
|
|
|
#ifndef NDEBUG |
1623
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_tolower(%d) ==> %d\n", (int) c, (int) rc); |
1624
|
|
|
|
|
|
|
#endif |
1625
|
|
|
|
|
|
|
|
1626
|
0
|
|
|
|
|
|
return rc; |
1627
|
|
|
|
|
|
|
|
1628
|
|
|
|
|
|
|
} |
1629
|
|
|
|
|
|
|
|
1630
|
18
|
|
|
|
|
|
rpl__wint_t rpl_Perl_toupper(pTHX_ UV c) { |
1631
|
|
|
|
|
|
|
rpl__wint_t rc; |
1632
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
/* The caller made sure that it fits in 8 bytes */ |
1634
|
18
|
100
|
|
|
|
|
rc = toUPPER((U8) c); |
1635
|
|
|
|
|
|
|
|
1636
|
|
|
|
|
|
|
#ifndef NDEBUG |
1637
|
|
|
|
|
|
|
fprintf(stderr, "rpl_Perl_toupper(%d) ==> %d\n", (int) c, (int) rc); |
1638
|
|
|
|
|
|
|
#endif |
1639
|
|
|
|
|
|
|
|
1640
|
18
|
|
|
|
|
|
return rc; |
1641
|
|
|
|
|
|
|
|
1642
|
|
|
|
|
|
|
} |
1643
|
|
|
|
|
|
|
|
1644
|
|
|
|
|
|
|
#endif /* _PERL_I18N */ |
1645
|
|
|
|
|
|
|
#endif /* _LIBC */ |
1646
|
|
|
|
|
|
|
|
1647
|
|
|
|
|
|
|
# ifndef NOT_IN_libc |
1648
|
|
|
|
|
|
|
static int |
1649
|
|
|
|
|
|
|
internal_function __attribute__ ((pure, unused)) |
1650
|
35
|
|
|
|
|
|
re_string_elem_size_at (pTHX_ const re_string_t *pstr, SV *sv, Idx idx) |
1651
|
|
|
|
|
|
|
{ |
1652
|
|
|
|
|
|
|
# ifdef _LIBC |
1653
|
|
|
|
|
|
|
const unsigned char *p, *extra; |
1654
|
|
|
|
|
|
|
const int32_t *table, *indirect; |
1655
|
|
|
|
|
|
|
# include |
1656
|
|
|
|
|
|
|
uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
1657
|
|
|
|
|
|
|
|
1658
|
|
|
|
|
|
|
if (nrules != 0) |
1659
|
|
|
|
|
|
|
{ |
1660
|
|
|
|
|
|
|
table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
1661
|
|
|
|
|
|
|
extra = (const unsigned char *) |
1662
|
|
|
|
|
|
|
_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
1663
|
|
|
|
|
|
|
indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, |
1664
|
|
|
|
|
|
|
_NL_COLLATE_INDIRECTMB); |
1665
|
|
|
|
|
|
|
p = pstr->mbs + idx; |
1666
|
|
|
|
|
|
|
findidx (&p, pstr->len - idx); |
1667
|
|
|
|
|
|
|
return p - pstr->mbs - idx; |
1668
|
|
|
|
|
|
|
} |
1669
|
|
|
|
|
|
|
else |
1670
|
|
|
|
|
|
|
# else |
1671
|
|
|
|
|
|
|
# ifdef _PERL_I18N |
1672
|
|
|
|
|
|
|
{ |
1673
|
35
|
50
|
|
|
|
|
if (! DO_UTF8(sv)) { |
|
|
50
|
|
|
|
|
|
1674
|
|
|
|
|
|
|
/* Per def perl's non UTF-8 is one byte */ |
1675
|
|
|
|
|
|
|
#ifndef NDEBUG |
1676
|
|
|
|
|
|
|
fprintf(stderr, "re_string_elem_size_at(.., Idx=%d) => 1\n", (int) idx); |
1677
|
|
|
|
|
|
|
#endif |
1678
|
0
|
|
|
|
|
|
return 1; |
1679
|
|
|
|
|
|
|
} else { |
1680
|
|
|
|
|
|
|
/* We know that pstr->mbs is at offset raw_mbs_idx v.s. original string */ |
1681
|
35
|
|
|
|
|
|
I32 offset = pstr->raw_mbs_idx + idx; |
1682
|
35
|
|
|
|
|
|
I32 len = 1; |
1683
|
|
|
|
|
|
|
|
1684
|
35
|
|
|
|
|
|
sv_pos_b2u(sv, &offset); |
1685
|
35
|
|
|
|
|
|
sv_pos_u2b(sv, &offset, &len); |
1686
|
|
|
|
|
|
|
#ifndef NDEBUG |
1687
|
|
|
|
|
|
|
fprintf(stderr, "re_string_elem_size_at(.., Idx=%d) => %d\n", (int) idx, (int) len); |
1688
|
|
|
|
|
|
|
#endif |
1689
|
35
|
|
|
|
|
|
return (int) len; |
1690
|
|
|
|
|
|
|
} |
1691
|
|
|
|
|
|
|
|
1692
|
|
|
|
|
|
|
} |
1693
|
|
|
|
|
|
|
# else |
1694
|
|
|
|
|
|
|
return 1; |
1695
|
|
|
|
|
|
|
# endif |
1696
|
|
|
|
|
|
|
# endif /* _LIBC */ |
1697
|
|
|
|
|
|
|
} |
1698
|
|
|
|
|
|
|
# endif |
1699
|
|
|
|
|
|
|
#endif /* RE_ENABLE_I18N */ |
1700
|
|
|
|
|
|
|
|
1701
|
|
|
|
|
|
|
#ifndef __GNUC_PREREQ |
1702
|
|
|
|
|
|
|
# if defined __GNUC__ && defined __GNUC_MINOR__ |
1703
|
|
|
|
|
|
|
# define __GNUC_PREREQ(maj, min) \ |
1704
|
|
|
|
|
|
|
((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) |
1705
|
|
|
|
|
|
|
# else |
1706
|
|
|
|
|
|
|
# define __GNUC_PREREQ(maj, min) 0 |
1707
|
|
|
|
|
|
|
# endif |
1708
|
|
|
|
|
|
|
#endif |
1709
|
|
|
|
|
|
|
|
1710
|
|
|
|
|
|
|
#if __GNUC_PREREQ (3,4) |
1711
|
|
|
|
|
|
|
# undef __attribute_warn_unused_result__ |
1712
|
|
|
|
|
|
|
# define __attribute_warn_unused_result__ \ |
1713
|
|
|
|
|
|
|
__attribute__ ((__warn_unused_result__)) |
1714
|
|
|
|
|
|
|
#else |
1715
|
|
|
|
|
|
|
# define __attribute_warn_unused_result__ /* empty */ |
1716
|
|
|
|
|
|
|
#endif |
1717
|
|
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
#ifndef NDEBUG |
1719
|
|
|
|
|
|
|
#include |
1720
|
|
|
|
|
|
|
#include |
1721
|
|
|
|
|
|
|
|
1722
|
|
|
|
|
|
|
#ifndef OCTDUMP_COLS |
1723
|
|
|
|
|
|
|
#define OCTDUMP_COLS 8 |
1724
|
|
|
|
|
|
|
#endif |
1725
|
|
|
|
|
|
|
|
1726
|
|
|
|
|
|
|
void octdump(pTHX_ const void *mem, unsigned int len) |
1727
|
|
|
|
|
|
|
{ |
1728
|
|
|
|
|
|
|
unsigned int i, j; |
1729
|
|
|
|
|
|
|
|
1730
|
|
|
|
|
|
|
for(i = 0; i < len + ((len % OCTDUMP_COLS) ? (OCTDUMP_COLS - len % OCTDUMP_COLS) : 0); i++) |
1731
|
|
|
|
|
|
|
{ |
1732
|
|
|
|
|
|
|
/* print offset */ |
1733
|
|
|
|
|
|
|
if(i % OCTDUMP_COLS == 0) |
1734
|
|
|
|
|
|
|
{ |
1735
|
|
|
|
|
|
|
fprintf(stderr, "0x%06x: ", i); |
1736
|
|
|
|
|
|
|
} |
1737
|
|
|
|
|
|
|
|
1738
|
|
|
|
|
|
|
/* print oct data */ |
1739
|
|
|
|
|
|
|
if(i < len) |
1740
|
|
|
|
|
|
|
{ |
1741
|
|
|
|
|
|
|
fprintf(stderr, "%03o ", 0xFF & ((char*)mem)[i]); |
1742
|
|
|
|
|
|
|
} |
1743
|
|
|
|
|
|
|
else /* end of block, just aligning for ASCII dump */ |
1744
|
|
|
|
|
|
|
{ |
1745
|
|
|
|
|
|
|
fprintf(stderr, " "); |
1746
|
|
|
|
|
|
|
} |
1747
|
|
|
|
|
|
|
|
1748
|
|
|
|
|
|
|
/* print ASCII dump */ |
1749
|
|
|
|
|
|
|
if(i % OCTDUMP_COLS == (OCTDUMP_COLS - 1)) |
1750
|
|
|
|
|
|
|
{ |
1751
|
|
|
|
|
|
|
for(j = i - (OCTDUMP_COLS - 1); j <= i; j++) |
1752
|
|
|
|
|
|
|
{ |
1753
|
|
|
|
|
|
|
if(j >= len) /* end of block, not really printing */ |
1754
|
|
|
|
|
|
|
{ |
1755
|
|
|
|
|
|
|
fprintf(stderr, "%c", ' '); |
1756
|
|
|
|
|
|
|
} |
1757
|
|
|
|
|
|
|
else if(isprint(((char*)mem)[j])) /* printable char */ |
1758
|
|
|
|
|
|
|
{ |
1759
|
|
|
|
|
|
|
fprintf(stderr, "%c", 0xFF & ((char*)mem)[j]); |
1760
|
|
|
|
|
|
|
} |
1761
|
|
|
|
|
|
|
else /* other char */ |
1762
|
|
|
|
|
|
|
{ |
1763
|
|
|
|
|
|
|
fprintf(stderr, "%c", '.'); |
1764
|
|
|
|
|
|
|
} |
1765
|
|
|
|
|
|
|
} |
1766
|
|
|
|
|
|
|
fprintf(stderr, "\n"); |
1767
|
|
|
|
|
|
|
} |
1768
|
|
|
|
|
|
|
} |
1769
|
|
|
|
|
|
|
} |
1770
|
|
|
|
|
|
|
#endif /* NDEBUG */ |
1771
|
|
|
|
|
|
|
#endif /* _REGEX_INTERNAL_H */ |