File Coverage

ext/re/dquote_static.c

Criterion	Covered	Total	%
statement	84	116	72.4
branch			n/a
condition			n/a
subroutine			n/a
total	84	116	72.4

line	stmt	code
1		/* dquote_static.c
2		*
3		* This file contains static functions that are related to
4		* parsing double-quotish expressions, but are used in more than
5		* one file.
6		*
7		* It is currently #included by regcomp.c and toke.c.
8		*/
9
10		#define PERL_IN_DQUOTE_STATIC_C
11		#include "embed.h"
12
13		/*
14		- regcurly - a little FSA that accepts {\d+,?\d*}
15		Pulled from regcomp.c.
16		*/
17		PERL_STATIC_INLINE I32
18	8044	S_regcurly(pTHX_ const char *s,
19		const bool rbrace_must_be_escaped /* Should the terminating '} be
20		preceded by a backslash? This
21		is an abnormal case */
22		)
23		{
24	8044	PERL_ARGS_ASSERT_REGCURLY;
25
26	8044	if (*s++ != '{')
27		return FALSE;
28	6053	if (!isDIGIT(*s))
29		return FALSE;
30	3027	while (isDIGIT(*s))
31	3026	s++;
32	3018	if (*s == ',') {
33	6038	s++;
34	2	while (isDIGIT(*s))
35	3026	s++;
36		}
37
38	8	return (rbrace_must_be_escaped)
39	6052	? s == '\\' && (s+1) == '}'
40	114812	: *s == '}';
41		}
42
43		/* XXX Add documentation after final interface and behavior is decided */
44		/* May want to show context for error, so would pass Perl_bslash_c(pTHX_ const char* current, const char* start, const bool output_warning)
45		U8 source = *current;
46		*/
47
48		STATIC char
49	114812	S_grok_bslash_c(pTHX_ const char source, const bool utf8, const bool output_warning)
50		{
51
52		U8 result;
53
54	35	if (utf8) {
55		/* Trying to deprecate non-ASCII usages. This construct has never
56		* worked for a utf8 variant. So, even though are accepting non-ASCII
57		* Latin1 in 5.14, no need to make them work under utf8 */
58	0	if (! isASCII(source)) {
59	114812	Perl_croak(aTHX_ "Character following \"\\c\" must be ASCII");
60		}
61		}
62
63	114812	result = toCTRL(source);
64	0	if (! isASCII(source)) {
65	114812	Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
66		"Character following \"\\c\" must be ASCII");
67		}
68	10	else if (! isCNTRL(result) && output_warning) {
69	4	if (source == '{') {
70	6	Perl_ck_warner_d(aTHX_ packWARN2(WARN_DEPRECATED, WARN_SYNTAX),
71		"\"\\c{\" is deprecated and is more clearly written as \";\"");
72		}
73		else {
74		U8 clearer[3];
75		U8 i = 0;
76	4	if (! isWORDCHAR(result)) {
77	6	clearer[i++] = '\\';
78		}
79	6	clearer[i++] = result;
80	6	clearer[i++] = '\0';
81
82	114808	Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
83		"\"\\c%c\" is more clearly written simply as \"%s\"",
84		source,
85		clearer);
86		}
87		}
88
89	32	return result;
90		}
91
92		STATIC bool
93	32	S_grok_bslash_o(pTHX_ char *s, UV uv, const char** error_msg,
94		const bool output_warning, const bool strict,
95		const bool silence_non_portable,
96		const bool UTF)
97		{
98
99		/* Documentation to be supplied when interface nailed down finally
100		* This returns FALSE if there is an error which the caller need not recover
101		* from; , otherwise TRUE. In either case the caller should look at *len
102		* On input:
103		* s is the address of a pointer to a NULL terminated string that begins
104		* with 'o', and the previous character was a backslash. At exit, *s
105		* will be advanced to the byte just after those absorbed by this
106		* function. Hence the caller can continue parsing from there. In
107		* the case of an error, this routine has generally positioned *s to
108		* point just to the right of the first bad spot, so that a message
109		* that has a "<--" to mark the spot will be correctly positioned.
110		* uv points to a UV that will hold the output value, valid only if the
111		* return from the function is TRUE
112		* error_msg is a pointer that will be set to an internal buffer giving an
113		* error message upon failure (the return is FALSE). Untouched if
114		* function succeeds
115		* output_warning says whether to output any warning messages, or suppress
116		* them
117		* strict is true if this should fail instead of warn if there are
118		* non-octal digits within the braces
119		* silence_non_portable is true if to suppress warnings about the code
120		* point returned being too large to fit on all platforms.
121		* UTF is true iff the string *s is encoded in UTF-8.
122		*/
123		char* e;
124		STRLEN numbers_len;
125	32	I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
126		\| PERL_SCAN_DISALLOW_PREFIX
127		/* XXX Until the message is improved in grok_oct, handle errors
128		* ourselves */
129		\| PERL_SCAN_SILENT_ILLDIGIT;
130
131	32	PERL_ARGS_ASSERT_GROK_BSLASH_O;
132
133
134	22	assert(**s == 'o');
135	22	(*s)++;
136
137	10	if (**s != '{') {
138	10	*error_msg = "Missing braces on \\o{}";
139	2	return FALSE;
140		}
141
142	4	e = strchr(*s, '}');
143	0	if (!e) {
144	2	(s)++; / Move past the '{' */
145	2	while (isOCTAL(*s)) { / Position beyond the legal digits */
146	8	(*s)++;
147		}
148	8	*error_msg = "Missing right brace on \\o{";
149	8	return FALSE;
150		}
151
152	1	(s)++; / Point to expected first digit (could be first byte of utf8
153		sequence if not a digit) */
154	1	numbers_len = e - *s;
155	1	if (numbers_len == 0) {
156	7	(s)++; / Move past the } */
157	7	*error_msg = "Number with no digits";
158	7	return FALSE;
159		}
160
161	7	if (silence_non_portable) {
162	4	flags \|= PERL_SCAN_SILENT_NON_PORTABLE;
163		}
164
165	0	uv = grok_oct(s, &numbers_len, &flags, NULL);
166		/* Note that if has non-octal, will ignore everything starting with that up
167		* to the '}' */
168
169	0	if (numbers_len != (STRLEN) (e - *s)) {
170	0	if (strict) {
171	0	*s += numbers_len;
172	4	s += (UTF) ? UTF8SKIP(s) : (STRLEN) 1;
173	8	*error_msg = "Non-octal character";
174	4	return FALSE;
175		}
176	5	else if (output_warning) {
177	5	Perl_ck_warner(aTHX_ packWARN(WARN_DIGIT),
178		/* diag_listed_as: Non-octal character '%c'. Resolved as "%s" */
179		"Non-octal character '%c'. Resolved as \"\\o{%.*s}\"",
180	15920694	(s + numbers_len),
181		(int) numbers_len,
182		*s);
183		}
184		}
185
186		/* Return past the '}' */
187	15920694	*s = e + 1;
188
189	15920694	return TRUE;
190		}
191
192		PERL_STATIC_INLINE bool
193	15735754	S_grok_bslash_x(pTHX_ char *s, UV uv, const char** error_msg,
194		const bool output_warning, const bool strict,
195		const bool silence_non_portable,
196		const bool UTF)
197		{
198
199		/* Documentation to be supplied when interface nailed down finally
200		* This returns FALSE if there is an error which the caller need not recover
201		* from; , otherwise TRUE. In either case the caller should look at *len
202		* On input:
203		* s is the address of a pointer to a NULL terminated string that begins
204		* with 'x', and the previous character was a backslash. At exit, *s
205		* will be advanced to the byte just after those absorbed by this
206		* function. Hence the caller can continue parsing from there. In
207		* the case of an error, this routine has generally positioned *s to
208		* point just to the right of the first bad spot, so that a message
209		* that has a "<--" to mark the spot will be correctly positioned.
210		* uv points to a UV that will hold the output value, valid only if the
211		* return from the function is TRUE
212		* error_msg is a pointer that will be set to an internal buffer giving an
213		* error message upon failure (the return is FALSE). Untouched if
214		* function succeeds
215		* output_warning says whether to output any warning messages, or suppress
216		* them
217		* strict is true if anything out of the ordinary should cause this to
218		* fail instead of warn or be silent. For example, it requires
219		* exactly 2 digits following the \x (when there are no braces).
220		* 3 digits could be a mistake, so is forbidden in this mode.
221		* silence_non_portable is true if to suppress warnings about the code
222		* point returned being too large to fit on all platforms.
223		* UTF is true iff the string *s is encoded in UTF-8.
224		*/
225		char* e;
226		STRLEN numbers_len;
227	15735754	I32 flags = PERL_SCAN_DISALLOW_PREFIX;
228
229	15735754	PERL_ARGS_ASSERT_GROK_BSLASH_X;
230
231		PERL_UNUSED_ARG(output_warning);
232
233	185060	assert(**s == 'x');
234	185060	(*s)++;
235
236	81	if (strict) {
237	21	flags \|= PERL_SCAN_SILENT_ILLDIGIT;
238		}
239
240	60	if (**s != '{') {
241	184979	STRLEN len = (strict) ? 3 : 2;
242
243	184979	uv = grok_hex(s, &len, &flags, NULL);
244	184979	*s += len;
245	184979	if (strict && len != 2) {
246	184979	if (len < 2) {
247	184979	s += (UTF) ? UTF8SKIP(s) : 1;
248	184979	*error_msg = "Non-hex character";
249		}
250		else {
251	0	*error_msg = "Use \\x{...} for more than two hex characters";
252		}
253		return FALSE;
254		}
255		return TRUE;
256		}
257
258	60	e = strchr(*s, '}');
259	60	if (!e) {
260	0	(s)++; / Move past the '{' */
261	0	while (isXDIGIT(*s)) { / Position beyond the legal digits */
262	0	(*s)++;
263		}
264		/* XXX The corresponding message above for \o is just '\\o{'; other
265		* messages for other constructs include the '}', so are inconsistent.
266		*/
267	0	*error_msg = "Missing right brace on \\x{}";
268	0	return FALSE;
269		}
270
271	60	(s)++; / Point to expected first digit (could be first byte of utf8
272		sequence if not a digit) */
273	60	numbers_len = e - *s;
274	60	if (numbers_len == 0) {
275	0	if (strict) {
276	0	(s)++; / Move past the } */
277	0	*error_msg = "Number with no digits";
278	0	return FALSE;
279		}
280		return TRUE;
281		}
282
283	60	flags \|= PERL_SCAN_ALLOW_UNDERSCORES;
284	60	if (silence_non_portable) {
285	60	flags \|= PERL_SCAN_SILENT_NON_PORTABLE;
286		}
287
288	60	uv = grok_hex(s, &numbers_len, &flags, NULL);
289		/* Note that if has non-hex, will ignore everything starting with that up
290		* to the '}' */
291
292	60	if (strict && numbers_len != (STRLEN) (e - *s)) {
293	0	*s += numbers_len;
294	0	s += (UTF) ? UTF8SKIP(s) : 1;
295	0	*error_msg = "Non-hex character";
296	0	return FALSE;
297		}
298
299		/* Return past the '}' */
300	60	*s = e + 1;
301
302	60	return TRUE;
303		}
304
305		STATIC char*
306	0	S_form_short_octal_warning(pTHX_
307		const char * const s, /* Points to first non-octal */
308		const STRLEN len /* Length of octals string, so
309		(s-len) points to first
310		octal */
311		) {
312		/* Return a character string consisting of a warning message for when a
313		* string constant in octal is weird, like "\078". */
314
315	0	const char * sans_leading_zeros = s - len;
316
317	0	PERL_ARGS_ASSERT_FORM_SHORT_OCTAL_WARNING;
318
319	0	assert(s == '8' \|\| s == '9');
320
321		/* Remove the leading zeros, retaining one zero so won't be zero length */
322	0	while (*sans_leading_zeros == '0') sans_leading_zeros++;
323	0	if (sans_leading_zeros == s) {
324	0	sans_leading_zeros--;
325		}
326
327	0	return Perl_form(aTHX_
328		"'%.s' resolved to '\\o{%.s}%c'",
329	0	(int) (len + 2), s - len - 1,
330	0	(int) (s - sans_leading_zeros), sans_leading_zeros,
331	0	*s);
332		}
333
334		/*
335		* Local variables:
336		* c-indentation-style: bsd
337		* c-basic-offset: 4
338		* indent-tabs-mode: nil
339		* End:
340		*
341		* ex: set ts=8 sts=4 sw=4 et:
342		*/