File Coverage

secret_buffer_parse.c

Criterion	Covered	Total	%
statement	540	692	78.0
branch	391	592	66.0
condition			n/a
subroutine			n/a
pod			n/a
total	931	1284	72.5

line	stmt	bran	code
1
2			/* These local parse functions are independenct of the SecretBuffer instance,
3			* needing only the 'data' pointer to whch the parse_state refers.
4			* The pos/lim of the parse state must already be checked against the length
5			* of the data before calling these.
6			*/
7			static int sizeof_codepoint_encoding(int codepoint, int encoding);
8			static int sb_parse_prev_codepoint(secret_buffer_parse *parse);
9			static int sb_parse_next_codepoint(secret_buffer_parse *parse);
10			static bool sb_parse_encode_codepoint(secret_buffer_parse *parse, int codepoint);
11			static bool sb_parse_match_charset_bytes(secret_buffer_parse parse, const secret_buffer_charset cset, int flags);
12			static bool sb_parse_match_charset_codepoints(secret_buffer_parse parse, const secret_buffer_charset cset, int flags);
13			static bool sb_parse_match_str_U8(secret_buffer_parse parse, const U8 pattern, size_t pattern_len, int flags);
14			static bool sb_parse_match_str_I32(secret_buffer_parse parse, const I32 pattern, size_t pattern_len, int flags);
15
16	66		static bool parse_encoding(pTHX_ SV sv, int out) {
17			int enc;
18	66	50	if (looks_like_number(sv)) {
19	0		IV i= SvIV(sv);
20	0	0	if (i < 0 \|\| i > SECRET_BUFFER_ENCODING_MAX)
		0
21	0		return false;
22	0		enc= (int) i;
23			} else {
24			STRLEN len;
25	66		const char *str= SvPV(sv, len);
26	66		switch (len) {
27	6	50	case 3: if (0 == strcmp(str, "HEX")) { enc= SECRET_BUFFER_ENCODING_HEX; break; }
28	1	50	case 4: if (0 == strcmp(str, "UTF8")) { enc= SECRET_BUFFER_ENCODING_UTF8; break; }
29	13	100	case 5: if (0 == strcmp(str, "ASCII")) { enc= SECRET_BUFFER_ENCODING_ASCII; break; }
30	12	50	if (0 == strcmp(str, "UTF-8")) { enc= SECRET_BUFFER_ENCODING_UTF8; break; }
31	26	50	case 6: if (0 == strcmp(str, "BASE64")) { enc= SECRET_BUFFER_ENCODING_BASE64; break; }
32	1	50	case 7: if (0 == strcmp(str, "UTF16LE")) { enc= SECRET_BUFFER_ENCODING_UTF16LE; break; }
33	0	0	if (0 == strcmp(str, "UTF16BE")) { enc= SECRET_BUFFER_ENCODING_UTF16BE; break; }
34	6	100	case 8: if (0 == strcmp(str, "UTF-16LE")) { enc= SECRET_BUFFER_ENCODING_UTF16LE; break; }
35	3	50	if (0 == strcmp(str, "UTF-16BE")) { enc= SECRET_BUFFER_ENCODING_UTF16BE; break; }
36	0	0	case 9: if (0 == strcmp(str, "ISO8859_1")) { enc= SECRET_BUFFER_ENCODING_ISO8859_1; break; }
37	13	50	case 10: if (0 == strcmp(str, "ISO-8859-1")) { enc= SECRET_BUFFER_ENCODING_ISO8859_1; break; }
38			default:
39	0		return false;
40			}
41			}
42	66	50	if (out) *out= enc;
43	66		return true;
44			}
45
46			/* Public API --------------------------------------------------------------*/
47
48			/* initialize a parse struct, but only if it is valid span of the buffer */
49	2001		bool secret_buffer_parse_init(secret_buffer_parse *parse,
50			secret_buffer *buf, size_t pos, size_t lim, int encoding
51			) {
52	2001		Zero(parse, 1, secret_buffer_parse);
53			// Sanity check this parse state vs. the buffer
54	2001	100	if (lim > buf->len \|\| pos > lim) {
		50
55	1	50	parse->error= pos > lim? "span starts beyond buffer" : "span ends beyond buffer";
56	1		return false;
57			}
58	2000		parse->pos= ((U8*) buf->data) + pos;
59	2000		parse->lim= ((U8*) buf->data) + lim;
60	2000		parse->encoding= encoding;
61	2000		parse->sbuf= buf;
62	2000		return true;
63			}
64
65			/* Initialize a parse struct, either from a Span, or a SecretBuffer, or a plain Scalar.
66			*/
67	1737		bool secret_buffer_parse_init_from_sv(secret_buffer_parse parse, SV sv) {
68			dTHX;
69			secret_buffer *sb;
70			secret_buffer_span *span;
71			/* Is the sv a Span object? */
72	1737	100	if ((span= secret_buffer_span_from_magic(sv, 0)) && SvTYPE(SvRV(sv)) == SVt_PVHV) {
		50
73	1284		SV *sb_sv= hv_fetchs((HV)SvRV(sv), "buf", 1);
74	1284		sb= secret_buffer_from_magic(*sb_sv, SECRET_BUFFER_MAGIC_OR_DIE);
75	1284		return secret_buffer_parse_init(parse, sb, span->pos, span->lim, span->encoding);
76			}
77			/* Is the sv a SecretBuffer? */
78	453	100	else if ((sb= secret_buffer_from_magic(sv, 0))) {
79	2		return secret_buffer_parse_init(parse, sb, 0, sb->len, SECRET_BUFFER_ENCODING_ISO8859_1);
80			}
81			/* It needs to at least be defined */
82	451	50	else if (SvOK(sv)) {
83			STRLEN len;
84	451		char *buf= SvPV(sv, len);
85	451		Zero(parse, 1, secret_buffer_parse);
86	451		parse->pos= (U8*) buf;
87	451		parse->lim= (U8*) buf + len;
88	451		parse->encoding= SvUTF8(sv)? SECRET_BUFFER_ENCODING_UTF8 : SECRET_BUFFER_ENCODING_ISO8859_1;
89	451		return true;
90			}
91			else {
92	0		Zero(parse, 1, secret_buffer_parse);
93	0		parse->error= "Not a Span, SecretBuffer, or defined scalar";
94	0		return false;
95			}
96			}
97
98			/* Scan for a pattern which may be a regex or literal string.
99			* Regexes are currently limited to a single charclass.
100			*/
101	785		bool secret_buffer_match(secret_buffer_parse parse, SV pattern, int flags) {
102			dTHX;
103	785		REGEXP rx= (REGEXP)SvRX(pattern);
104			secret_buffer_parse pat_parse;
105
106			/* Is the pattern a regexp-ref? */
107	785	100	if (rx) {
108	346		secret_buffer_charset *cset= secret_buffer_charset_from_regexpref(pattern);
109	346		return secret_buffer_match_charset(parse, cset, flags);
110			}
111
112			/* load up a parse struct with the pos, lim, and encoding */
113	439	50	if (!secret_buffer_parse_init_from_sv(&pat_parse, pattern))
114	0		croak("%s", pat_parse.error);
115
116			/* Remove edge case of zero-length pattern (always matches) */
117	439	100	if (pat_parse.pos >= pat_parse.lim) {
118	2	50	if ((flags & SECRET_BUFFER_MATCH_REVERSE))
119	0		parse->pos= parse->lim;
120			else
121	2		parse->lim= parse->pos;
122	2		return !(flags & SECRET_BUFFER_MATCH_NEGATE);
123			}
124			/* Remove edge case of zero-length subject (never matches) */
125	437	100	if (parse->pos >= parse->lim) {
126	4		return (flags & SECRET_BUFFER_MATCH_NEGATE);
127			}
128
129			/* Since unicode iteration of the pattern is a hassle and might happen lots of times,
130			* convert it to either plain bytes or array of U32 codepoints.
131			*/
132	433	100	if (pat_parse.encoding != SECRET_BUFFER_ENCODING_ISO8859_1) {
133	17		int dst_enc=
134			/* these can be transcoded to bytes */
135	17		(pat_parse.encoding == SECRET_BUFFER_ENCODING_ASCII
136	17	100	\|\| pat_parse.encoding == SECRET_BUFFER_ENCODING_HEX
137	16	50	\|\| pat_parse.encoding == SECRET_BUFFER_ENCODING_BASE64)
138			? SECRET_BUFFER_ENCODING_ISO8859_1
139	34	50	: SECRET_BUFFER_ENCODING_I32;
140	17		SSize_t dst_len= secret_buffer_sizeof_transcode(&pat_parse, dst_enc);
141	17	50	if (dst_len < 0)
142	0		croak("transcode of pattern failed: %s", pat_parse.error);
143			/* No need to transcode SECRET_BUFFER_ENCODING_ASCII, but the above size check
144			* verified it is clean 7-bit, which is the whole point of that encoding.
145			*/
146	17	50	if (pat_parse.encoding == SECRET_BUFFER_ENCODING_ASCII
147			/* Likewise, if SECRET_BUFFER_ENCODING_UTF8's I32 len is exactly 4x the number of
148			* original bytes, that means every byte became a character, which means every
149			* character could fit in a byte. */
150	17	100	\|\| (pat_parse.encoding == SECRET_BUFFER_ENCODING_UTF8
151	16	100	&& dst_len == (pat_parse.lim - pat_parse.pos) * 4)
152			) {
153	9		pat_parse.encoding= SECRET_BUFFER_ENCODING_ISO8859_1;
154			} else {
155			/* create a temporary secret buffer to hold the transcode */
156	8		secret_buffer *tmp= secret_buffer_new(0, NULL);
157	8		secret_buffer_parse pat_orig= pat_parse;
158	8		secret_buffer_set_len(tmp, dst_len);
159	8	50	if (!secret_buffer_parse_init(&pat_parse, tmp, 0, dst_len, dst_enc))
160	0		croak("transcode of pattern failed: %s", pat_parse.error);
161			/* Transcode the pattern */
162	8	50	if (!secret_buffer_transcode(&pat_orig, &pat_parse))
163	0	0	croak("transcode of pattern failed: %s", pat_orig.error? pat_orig.error : pat_parse.error);
164			}
165			}
166			/* In some cases it would also be nice to transcode the subject first, but the
167			* final state of the parse struct carries information back to the caller and
168			* needs to refer to original positions of characters. */
169
170			/* Now dipatch to sb_parse_match_str_X */
171	433	100	if (pat_parse.encoding == SECRET_BUFFER_ENCODING_ISO8859_1) {
172	426		size_t pat_len= pat_parse.lim - pat_parse.pos;
173	426		return sb_parse_match_str_U8(parse, pat_parse.pos, pat_len, flags);
174			} else { /* must be _I32 encoding, from above */
175	7		size_t pat_len= (pat_parse.lim - pat_parse.pos) >> 2;
176	7		return sb_parse_match_str_I32(parse, (I32*) pat_parse.pos, pat_len, flags);
177			}
178			}
179
180			/* Scan for a pattern which is a set of characters */
181	346		bool secret_buffer_match_charset(secret_buffer_parse parse, secret_buffer_charset cset, int flags) {
182	346	100	if (parse->pos >= parse->lim) // empty range
183	48		return false;
184
185			// byte matching gets to use a more efficient algorithm
186	298		return parse->encoding == SECRET_BUFFER_ENCODING_ISO8859_1
187	289		? sb_parse_match_charset_bytes(parse, cset, flags)
188	587	100	: sb_parse_match_charset_codepoints(parse, cset, flags);
189			}
190
191			/* Scan for a pattern which is a literal string of bytes.
192			*/
193	0		bool secret_buffer_match_bytestr(secret_buffer_parse parse, char data, size_t datalen, int flags) {
194	0		return sb_parse_match_str_U8(parse, (U8*) data, datalen, flags);
195			}
196
197			/* Count number of bytes required to transcode the source.
198			* If the source contains an invalid character for its encoding, or that codepoint
199			* can't be encoded as the dst_encoding, this returns -1 and sets src->error
200			* and also sets src->pos pointing at the character that could not be converted.
201			*/
202	118		SSize_t secret_buffer_sizeof_transcode(secret_buffer_parse *src, int dst_encoding) {
203			// If the source and destination encodings are both bytes, return the length
204	118	100	if (dst_encoding == src->encoding && src->encoding == 0)
		100
205	17		return src->lim - src->pos;
206			// Else need to iterate characters (to validate) and re-encode them
207			else {
208	101		size_t dst_size_needed= 0;
209			secret_buffer_parse tmp;
210	101		Zero(&tmp, 1, secret_buffer_parse);
211	101		tmp.pos= src->pos;
212	101		tmp.lim= src->lim;
213	101		tmp.encoding= src->encoding;
214	859	100	while (tmp.pos < tmp.lim) {
215	758		int cp= sb_parse_next_codepoint(&tmp);
216	758	50	if (cp < 0) return -1;
217	758		int ch_size= sizeof_codepoint_encoding(cp, dst_encoding);
218	758	50	if (ch_size < 0) return -1;
219	758		dst_size_needed += ch_size;
220			}
221			// If dest is base64, need special calculation
222	101	100	if (dst_encoding == SECRET_BUFFER_ENCODING_BASE64) {
223	10		dst_size_needed= ((dst_size_needed + 2) / 3) * 4;
224			}
225	101		return dst_size_needed;
226			}
227			}
228
229			static const char base64_alphabet[64]=
230			"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
231			"abcdefghijklmnopqrstuvwxyz"
232			"0123456789+/";
233
234			/*
235			perl -E 'my @tbl= (-1)x256;
236			$tbl[ord]= -ord(A)+ord for A..Z;
237			$tbl[ord]= 26-ord(a)+ord for a..z;
238			$tbl[ord]= 52-ord(0)+ord for 0..9;
239			$tbl[ord "+"]= 62;
240			$tbl[ord "/"]= 63;
241			$tbl[ord "="]= 64;
242			say join ",\n", map join(",", @tbl[$_16 .. $_16+15]), 0..0xF'
243			*/
244			static const int8_t base64_decode_table[256]= {
245			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
246			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
247			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63,
248			52,53,54,55,56,57,58,59,60,61,-1,-1,-1,64,-1,-1,
249			-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,
250			15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1,
251			-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,
252			41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1,
253			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
254			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
255			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
256			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
257			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
258			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
259			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
260			-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
261			};
262
263			/* Transcode characters from one parse state into another.
264			* This works sort of like
265			* $data= decode($src_enc, substr($src, $src_pos, $src_len));
266			* substr($dst, $dst_pos, $dst_lim, encode($dst_enc, $data));
267			* processing only a range of the source, and replacing only a range of the dest,
268			* adjusting the size of dst as needed. Both src->pos and dst->pos
269			* are updated.
270			*/
271	109		bool secret_buffer_transcode(secret_buffer_parse src, secret_buffer_parse dst) {
272	109		src->error= NULL;
273	109		dst->error= NULL;
274			// If the source and destination encodings are both bytes, use memcpy
275	109	100	if (dst->encoding == src->encoding && src->encoding == 0) {
		100
276	17		size_t cnt= dst->lim - dst->pos;
277	17	50	if (src->lim - src->pos != cnt) {
278	0		dst->error= "miscalculated buffer length";
279	0		return false;
280			}
281	17		memcpy(dst->pos, src->pos, cnt);
282	17		dst->pos += cnt;
283	17		src->pos += cnt;
284			}
285			// Else need to iterate characters and re-encode them
286			// base64 encoding doesn't work with sb_parse_encode_codepoint, so it gets
287			// special treatment.
288	92	100	else if (dst->encoding == SECRET_BUFFER_ENCODING_BASE64) {
289			// Read 3, write 4
290	10		int accum= 0;
291	10		int shift= 16, cp;
292	88	100	while (src->pos < src->lim) {
293	78		cp= sb_parse_next_codepoint(src);
294	78	50	if (cp > 0xFF) {
295	0		dst->error= "byte out of range";
296	0		return false;
297			}
298	78	100	if (!shift) {
299	24	50	if (dst->pos + 4 > dst->lim) {
300	0		dst->error= "miscalculated buffer length";
301	0		return false;
302			}
303	24		accum \|= cp;
304	24		*dst->pos++ = base64_alphabet[0x3F & (accum >> 18)];
305	24		*dst->pos++ = base64_alphabet[0x3F & (accum >> 12)];
306	24		*dst->pos++ = base64_alphabet[0x3F & (accum >> 6)];
307	24		*dst->pos++ = base64_alphabet[0x3F & accum];
308	24		accum= 0;
309	24		shift= 16;
310			}
311			else {
312	54		accum \|= (cp << shift);
313	54		shift -= 8;
314			}
315			}
316	10	100	if (dst->pos + (shift < 16? 4 : 0) != dst->lim) {
		50
317	0		dst->error= "miscalculated buffer length";
318	0		return false;
319			}
320			// write leftover accumulated bits
321	10	100	if (shift < 16) {
322	5		*dst->pos++ = base64_alphabet[0x3F & (accum >> 18)];
323	5		*dst->pos++ = base64_alphabet[0x3F & (accum >> 12)];
324	5	100	*dst->pos++ = shift? '=' : base64_alphabet[0x3F & (accum >> 6)];
325	5		*dst->pos++ = '=';
326			}
327			}
328			else {
329	526	100	while (src->pos < src->lim) {
330	444		int cp= sb_parse_next_codepoint(src);
331	444	50	if (cp < 0)
332	0		return false; // error is already set
333	444		int len= sb_parse_encode_codepoint(dst, cp);
334	444	50	if (len < 0)
335	0		return false; // error is already set
336			}
337	82	50	if (dst->pos != dst->lim) {
338	0		dst->error= "miscalculated buffer length";
339	0		return false;
340			}
341			}
342	109		return true;
343			}
344
345			bool
346	101		secret_buffer_copy_to(secret_buffer_parse src, SV dst_sv, int encoding, bool append) {
347			dTHX;
348			secret_buffer_parse dst;
349	101		secret_buffer *dst_sbuf= NULL;
350			SSize_t need_bytes;
351	101		bool dst_wide= false;
352
353	101		Zero(&dst, 1, secret_buffer_parse);
354			// Encoding may be -1 to indicate the user didn't specify, in which case we use the
355			// same encoding as the source, unless the destination is a perl scalar (handled below)
356	101	100	dst.encoding= encoding >= 0? encoding : src->encoding;
357	101	100	if (sv_isobject(dst_sv)) {
358			// if object, must be a SecretBuffer
359	27		dst_sbuf= secret_buffer_from_magic(dst_sv, SECRET_BUFFER_MAGIC_OR_DIE);
360			}
361			else {
362			// Going to overwrite the scalar, or if its a scalar-ref, overwrite that.
363	74	50	if (SvROK(dst_sv) && !sv_isobject(dst_sv) && SvTYPE(SvRV(dst_sv)) <= SVt_PVMG)
		0
		0
364	0		dst_sv= SvRV(dst_sv);
365			// Refuse to overwrite any other kind of ref
366	74	50	if (SvTYPE(dst_sv) > SVt_PVMG \|\| SvROK(dst_sv)) {
		50
367	0		src->error= "Can only copy_to scalars or scalar-refs";
368	0		return false;
369			}
370			// If the source encoding is a type of unicode, and the destination encoding is not
371			// specified, then write wide characters (utf-8) to the perl scalar and flag it as utf8
372	74	100	if (encoding < 0 && SECRET_BUFFER_ENCODING_IS_UNICODE(src->encoding)) {
		100
		100
		100
		50
373	66		dst.encoding= SECRET_BUFFER_ENCODING_UTF8;
374	66		dst_wide= true;
375			}
376			}
377			// Determine how many bytes we need
378	101		need_bytes= secret_buffer_sizeof_transcode(src, dst.encoding);
379	101	50	if (need_bytes < 0)
380	0		return false;
381			// Prepare the buffers for that many bytes
382	101	100	if (dst_sbuf) {
383			// For destination SecretBuffer, set length to 0 unless appending, then
384			// ensure enough allocated space for need_bytes, then transcode and update
385			// the length in the block below.
386	27	100	if (!append)
387	20		secret_buffer_set_len(dst_sbuf, 0); /* clears secrets */
388	27		secret_buffer_alloc_at_least(dst_sbuf, dst_sbuf->len + need_bytes);
389	27		dst.pos= (U8*) dst_sbuf->data + dst_sbuf->len;
390	27		dst.lim= dst.pos + need_bytes;
391			}
392			else {
393			// For destination SV, set length to 0 unless appending, then force it to
394			// be bytes or utf-8, then grow it to ensure room for additional `need_bytes`.
395			STRLEN len;
396			// If overwriting, set the length to 0 before forcing to bytes or utf8
397	74	100	if (!append)
398	72		sv_setpvn(dst_sv, "", 0);
399			// force it to the type required
400	74	100	if (dst_wide) SvPVutf8(dst_sv, len);
401	8		else SvPVbyte(dst_sv, len);
402			// grow it to the required length, for writing
403	74	100	sv_grow(dst_sv, (append? len : 0) + need_bytes + 1);
404	74		dst.pos= (U8*) SvPVX_mutable(dst_sv) + len;
405	74		dst.lim= dst.pos + need_bytes;
406			// don't forget the NUL terminator
407	74		*dst.lim= '\0';
408			}
409	101	50	if (!secret_buffer_transcode(src, &dst)) {
410	0	0	if (!src->error) src->error= dst.error;
411	0		return false;
412			}
413			/* update the lengths */
414	101	100	if (dst_sbuf) {
415	27		dst_sbuf->len += need_bytes;
416			}
417			else {
418	74		SvCUR_set(dst_sv, SvCUR(dst_sv) + need_bytes);
419	74	50	SvSETMAGIC(dst_sv);
420			}
421	101		return true;
422			}
423
424			/* Append DER length octets (ASN.1 Length field, definite form only).
425			*
426			* DER rules:
427			* - If len <= 127: single byte [0x00..0x7F]
428			* - Else: first byte is 0x80 \| N, where N is # of following length bytes (big-endian),
429			* and the length must be encoded in the minimal number of bytes (no leading 0x00).
430			*
431			* This function encodes ONLY the length field (not tag/value).
432			*/
433			void
434	384		secret_buffer_append_uv_asn1_der_length(secret_buffer *buf, UV val) {
435			dTHX;
436	384		int enc_len = 1;
437			U8 *pos;
438	384	100	if (val > 127) {
439			/* Determine minimal number of bytes needed to represent len in base-256. */
440	339		UV tmp = val;
441	2001	100	while (tmp) {
442	1662		enc_len++;
443	1662		tmp >>= 8;
444			}
445			}
446			/* In BER/DER, the long-form initial octet has 7 bits of length-of-length.
447			* 0x80 is indefinite length (forbidden in DER), 0xFF would mean 127 length bytes.
448			* With 64-bit UV enc_len will never exceed 9.
449			*/
450	384	50	ASSUME(enc_len < 127);
451	384		secret_buffer_set_len(buf, buf->len + enc_len);
452	384		pos= (U8*) buf->data + buf->len - 1;
453	384	100	if (val <= 127) {
454	45		*pos = (U8) val;
455			} else {
456	339		UV tmp = val;
457			/* Write the length big-endian into enc[1..n]. */
458	2001	100	while (tmp) {
459	1662		*pos-- = (U8)(tmp & 0xFF);
460	1662		tmp >>= 8;
461			}
462	339		*pos= (U8) (0x80 \| (U8)(enc_len-1));
463			}
464	384		}
465
466			/* Parse ASN.1 DER Length (definite form only) */
467			bool
468	384		secret_buffer_parse_uv_asn1_der_length(secret_buffer_parse parse, UV out) {
469			/* Work on a local cursor so we can roll back on failure */
470	384		U8 *pos = parse->pos;
471	384		U8 *lim = parse->lim;
472			UV result;
473
474	384	50	if (pos >= lim) {
475	0		parse->error = "unexpected end of buffer";
476	0		return false;
477			}
478
479	384		result = *pos++;
480
481			/* If 0..127, the byte is the length value itself, otherwise it is the number of octets
482			* to read following that byte. */
483	384	100	if ((result & 0x80)) {
484	339		int n = result & 0x7F;
485			/* 0x80 means indefinite length (BER/CER), forbidden in DER */
486	339	50	if (n == 0) {
487	0		parse->error = "ASN.1 DER indefinite length not allowed";
488	0		return false;
489			}
490			/* Number of octets should be smallest possible encoding, so if it is larger than size_t
491			* don't even bother trying to decode it.
492			*/
493	339	50	if (n > sizeof(UV)) {
494	0		parse->error = "ASN.1 DER length too large for perl UV";
495	0		return false;
496			}
497			/* ensure we have that many bytes */
498	339	50	if ((size_t)(lim - pos) < (size_t)n) {
499	0		parse->error = "unexpected end of buffer";
500	0		return false;
501			}
502			/* DER minimal encoding rules:
503			* - no leading 0x00 in the length octets
504			* - long form must not be used for lengths <= 127
505			*/
506	339		lim= pos + n;
507	339		result= *pos++;
508	339	50	if (!result) {
509	0		parse->error = "ASN.1 DER length has leading zero (non-minimal)";
510	0		return false;
511			}
512			/* Parse remaining bytes of big-endian unsigned integer */
513	1662	100	while (pos < lim)
514	1323		result= (result << 8) \| *pos++;
515			/* DER should not use 1-byte encoding if it would have fit in the initial byte */
516	339	50	if (result < 0x80) {
517	0		parse->error = "ASN.1 DER length should use short form (non-minimal)";
518	0		return false;
519			}
520			}
521	384	50	if (out) *out = result;
522	384		parse->pos = pos;
523	384		parse->error = NULL;
524	384		return true;
525			}
526
527			/* Append canonical unsigned Base128, Little-Endian
528			*
529			* Rules:
530			* - 7 data bits per byte, little-endian (least significant group first)
531			* - High bit 0x80 set on all bytes except the final byte
532			* - Canonical/minimal: stop as soon as remaining value is 0
533			*/
534			void
535	384		secret_buffer_append_uv_base128le(secret_buffer *buf, UV val) {
536			dTHX;
537			U8 *pos;
538	384		int enc_len= 1;
539	384		UV tmp= val >> 7;
540	1923	100	while (tmp) {
541	1539		enc_len++;
542	1539		tmp >>= 7;
543			}
544	384		secret_buffer_set_len(buf, buf->len + enc_len);
545	384		pos= (U8*) buf->data + buf->len - enc_len;
546			/* Encode */
547	384		tmp= val;
548			do {
549	1923		U8 byte = (U8)(tmp & 0x7F);
550	1923		tmp >>= 7;
551	1923	100	if (tmp)
552	1539		byte \|= 0x80;
553	1923		*pos++ = byte;
554	1923	100	} while (tmp);
555	384	50	ASSUME(pos == (U8*)(buf->data + buf->len));
556	384		}
557
558			/* Parse Unsigned LittleEndian Base128 (also requiring canonical / minimal encoding) */
559			bool
560	384		secret_buffer_parse_uv_base128le(secret_buffer_parse parse, UV out) {
561	384		U8 *pos = parse->pos;
562	384		U8 *lim = parse->lim;
563	384		UV result= 0, payload;
564	384		int shift= 7;
565
566	384	50	if (pos >= lim) {
567	0		parse->error = "unexpected end of buffer";
568	0		return false;
569			}
570	384		result= payload= *pos & 0x7F;
571			/* Scan forward looking for the first byte without the continuation flag */
572	1923	100	while (*pos++ & 0x80) {
573	1539	50	if (pos >= lim) {
574	0		parse->error = "unexpected end of buffer";
575	0		return false;
576			}
577	1539		payload= *pos & 0x7F;
578	1539	100	if (shift > sizeof(UV)*8 - 7) {
579			/* Do any of the bits overflow? Is the continuation flag set? */
580	3	50	if (shift >= sizeof(UV)8 \|\| (payload >> (sizeof(UV)8 - shift))) {
		50
581	0		parse->error = "Base128-LE value overflows perl UV";
582	0		return false;
583			}
584			}
585	1539		result \|= payload << shift;
586	1539		shift += 7;
587			}
588			/* check if the high bits were all zero, meaning an unnecessary byte was encoded */
589	384	100	if (!payload && result != 0) {
		50
590	0		parse->error = "Over-long encoding of Base128-LE";
591	0		return false;
592			}
593	384	50	if (out) *out = result;
594	384		parse->pos = pos;
595	384		parse->error = NULL;
596	384		return true;
597			}
598
599			/* Append canonical unsigned Base128, Big-Endian
600			*
601			* Rules:
602			* - 7 data bits per byte, big-endian (most significant group first)
603			* - High bit 0x80 set on all bytes except the final byte
604			* - Canonical/minimal: stop as soon as remaining value is 0
605			*/
606			void
607	387		secret_buffer_append_uv_base128be(secret_buffer *buf, UV val) {
608			dTHX;
609			U8 *pos;
610	387		int enc_len= 1, shift;
611	387		UV tmp= val >> 7;
612	1926	100	while (tmp) {
613	1539		enc_len++;
614	1539		tmp >>= 7;
615			}
616	387		secret_buffer_set_len(buf, buf->len + enc_len);
617	387		pos= (U8*) buf->data + buf->len - enc_len;
618			/* Encode */
619	2313	100	for (shift= (enc_len-1) * 7; shift >= 0; shift -= 7) {
620	1926		U8 byte = (U8)((val >> shift) & 0x7F);
621	1926	100	if (shift)
622	1539		byte \|= 0x80;
623	1926		*pos++ = byte;
624			}
625	387	50	ASSUME(pos == (U8*)(buf->data + buf->len));
626	387		}
627
628			/* Parse Unsigned BigEndian Base128 (also requiring canonical / minimal encoding) */
629			bool
630	395		secret_buffer_parse_uv_base128be(secret_buffer_parse parse, UV out) {
631	395		U8 *pos = parse->pos;
632	395		U8 *lim = parse->lim;
633	395		UV result= 0;
634
635	395	50	if (pos >= lim) {
636	0		parse->error = "unexpected end of buffer";
637	0		return false;
638			}
639			/* high-bit payload == 0 with continue bit set is an error. */
640	395	50	if (*pos == 0x80) {
641	0		parse->error = "Over-long encoding of Base128-BE";
642	0		return false;
643			}
644	395		result= *pos & 0x7F;
645	1934	100	while (*pos++ & 0x80) {
646			/* Will existing bits overflow UV when shifted? */
647	1539	50	if (result >> (sizeof(UV)*8 - 7)) {
648	0		parse->error = "Base128-BE value overflows perl UV";
649	0		return false;
650			}
651	1539	50	if (pos >= lim) {
652	0		parse->error = "unexpected end of buffer";
653	0		return false;
654			}
655	1539		result= (result << 7) \| (*pos & 0x7F);
656			}
657	395	50	if (out) *out = result;
658	395		parse->pos = pos;
659	395		parse->error = NULL;
660	395		return true;
661			}
662
663			/* Private API -------------------------------------------------------------*/
664
665			/* Scan raw bytes using only the bitmap */
666	289		static bool sb_parse_match_charset_bytes(
667			secret_buffer_parse *parse,
668			const secret_buffer_charset *cset,
669			int flags
670			) {
671	289		bool negate= 0 != (flags & SECRET_BUFFER_MATCH_NEGATE);
672	289		bool reverse= 0 != (flags & SECRET_BUFFER_MATCH_REVERSE);
673	289	100	bool multi= 0 != (flags & SECRET_BUFFER_MATCH_MULTI) \|\| cset->match_multi;
		100
674	289		bool anchored= 0 != (flags & SECRET_BUFFER_MATCH_ANCHORED);
675	289		bool consttime=0 != (flags & SECRET_BUFFER_MATCH_CONST_TIME);
676	289	100	int step= reverse? -1 : 1;
677	289	100	U8 *pos= reverse? parse->lim-1 : parse->pos,
678	289	100	*lim= reverse? parse->pos-1 : parse->lim,
679	289		*span_start= NULL;
680			//warn("scan_charset_bytes pos=%p lim=%p len=%d", parse->pos, parse->lim, (int)(parse->lim - parse->pos));
681
682	1102	100	while (pos != lim) {
683	1097	100	if (sbc_bitmap_test(cset->bitmap, *pos) != negate) {
684			// Found. Now are we looking for a span?
685	238	100	if (span_start)
686	105		break;
687	133		span_start= pos;
688	133	100	if (!multi) {
689	27		pos += step;
690	27		break;
691			}
692	106		negate= !negate;
693	859	100	} else if (anchored && !span_start)
		100
694	152		break;
695	813		pos += step;
696			}
697			/* If constant time operation is requested, we need to perform one sbc_bitmap_test
698			* for every character in the span, and make sure the compiler doesn't eliminate it.
699			*/
700	289	50	if (consttime) {
701	0		volatile bool sink= false;
702	0	0	while (pos != lim) {
703	0		sink ^= sbc_bitmap_test(cset->bitmap, *pos);
704	0		pos += step;
705			}
706	0		(void) sink;
707			}
708			// reached end of defined range, and implicitly ends span
709	289	100	if (reverse) {
710	86		parse->pos= pos + 1;
711	86	100	parse->lim= span_start? span_start + 1 : parse->pos;
712			} else {
713	203		parse->lim= pos;
714	203	100	parse->pos= span_start? span_start : parse->lim;
715			}
716	289		return span_start != NULL;
717			}
718
719	9		static bool sb_parse_match_charset_codepoints(
720			secret_buffer_parse *parse,
721			const secret_buffer_charset *cset,
722			int flags
723			) {
724			dTHX;
725	9		bool negate= 0 != (flags & SECRET_BUFFER_MATCH_NEGATE);
726	9		bool reverse= 0 != (flags & SECRET_BUFFER_MATCH_REVERSE);
727	9	50	bool multi= 0 != (flags & SECRET_BUFFER_MATCH_MULTI) \|\| cset->match_multi;
		100
728	9		bool anchored= 0 != (flags & SECRET_BUFFER_MATCH_ANCHORED);
729	9		bool consttime=0 != (flags & SECRET_BUFFER_MATCH_CONST_TIME);
730	9		bool span_started= false;
731	9		bool encoding_error= false;
732	9	100	U8 span_mark= NULL, prev_mark= reverse? parse->lim : parse->pos;
733
734	37	50	while (parse->pos < parse->lim) {
735	19		int codepoint= reverse? sb_parse_prev_codepoint(parse)
736	37	100	: sb_parse_next_codepoint(parse);
737			// warn("parse.pos=%p parse.lim=%p parse.enc=%d cp=%d parse.err=%s", parse->pos, parse->lim, parse->encoding, codepoint, parse->error);
738	37	100	if (codepoint < 0) {// encoding error
739	1		encoding_error= true;
740	1		break;
741			}
742	36	100	if (sbc_test_codepoint(aTHX_ cset, codepoint) != negate) {
743			// Found. Mark boundaries of char.
744			// Now are we looking for a span?
745	10	100	if (span_started)
746	2		break;
747	8		span_started= true;
748	8		span_mark= prev_mark;
749	8		negate= !negate;
750	8	100	if (!multi) {
751	6	100	prev_mark= reverse? parse->lim : parse->pos;
752	6		break;
753			}
754	26	50	} else if (anchored && !span_started)
		0
755	0		break;
756	28	100	prev_mark= reverse? parse->lim : parse->pos;
757			}
758			/* If constant time operation is requested, we need to perform one sbc_bitmap_test
759			* for every character in the span, and make sure the compiler doesn't eliminate it.
760			*/
761	9	50	if (consttime) {
762	0		volatile bool sink= false;
763	0	0	while (parse->pos < parse->lim) {
764	0		int codepoint= reverse? sb_parse_prev_codepoint(parse)
765	0	0	: sb_parse_next_codepoint(parse);
766			// warn("parse.pos=%p parse.lim=%p parse.enc=%d cp=%d parse.err=%s", parse->pos, parse->lim, parse->encoding, codepoint, parse->error);
767	0	0	if (codepoint < 0) { // encoding error
768	0		encoding_error= true;
769	0		sink ^= sbc_test_codepoint(aTHX_ cset, 0);
770			}
771			else
772	0		sink ^= sbc_test_codepoint(aTHX_ cset, codepoint);
773			}
774	0		(void) sink;
775			}
776	9	100	if (encoding_error)
777	1		return false;
778			// reached end of defined range
779	8	50	if (span_started) { // and implicitly ends span
780	8	100	if (reverse) {
781	5		parse->pos= prev_mark;
782	5		parse->lim= span_mark;
783			}
784			else {
785	3		parse->pos= span_mark;
786	3		parse->lim= prev_mark;
787			}
788	8		return true;
789			}
790	0		return false;
791			}
792
793	18		int sb_parse_codepointcmp(secret_buffer_parse lhs, secret_buffer_parse rhs) {
794			I32 lhs_cp, rhs_cp;
795	18		volatile int ret= 0;
796			/* constant-time iteration per the shorter of the two strings */
797	87	100	while (lhs->pos < lhs->lim && rhs->pos < rhs->lim) {
		50
798	69		lhs_cp= sb_parse_next_codepoint(lhs);
799	69	50	if (lhs_cp < 0)
800	0		croak("Encoding error in left-hand buffer");
801	69		rhs_cp= sb_parse_next_codepoint(rhs);
802	69	50	if (rhs_cp < 0)
803	0		croak("Encoding error in right-hand buffer");
804	69	100	if (lhs_cp != rhs_cp && !ret)
		50
805	2	50	ret= lhs_cp < rhs_cp? -1 : 1;
806			}
807	18		return ret? ret
808	34	100	: (lhs->pos < lhs->lim)? 1 /* right string shorter than left */
809	32	50	: (rhs->pos < rhs->lim)? -1 /* left string shorter than right */
810	16	50	: 0;
811			}
812
813			/* UTF-8 decoding helper */
814	3369		static int sb_parse_next_codepoint(secret_buffer_parse *parse) {
815	3369		U8 pos= parse->pos, lim= parse->lim;
816	3369		int cp, encoding= parse->encoding;
817			#define SB_RETURN_ERROR(msg) { parse->error= msg; return -1; }
818
819	3369	100	if (encoding == SECRET_BUFFER_ENCODING_ASCII
820	3368	100	\|\| encoding == SECRET_BUFFER_ENCODING_ISO8859_1
821	577	100	\|\| encoding == SECRET_BUFFER_ENCODING_UTF8
822			) {
823	3127	50	if (lim - pos < 1)
824	0		SB_RETURN_ERROR("end of span")
825	3127		cp= *pos++;
826	3127	100	if (cp >= 0x80 && encoding == SECRET_BUFFER_ENCODING_ASCII)
		100
827	1		SB_RETURN_ERROR("not 7-bit ASCII")
828	3126	100	else if (cp >= 0x80 && encoding == SECRET_BUFFER_ENCODING_UTF8) {
		100
829	47		int min_cp= 0;
830	47		switch ((cp >> 3) & 0xF) {
831	13		case 14: // 0b1[1110]yyy
832	13	50	{ if (lim - pos < 3) goto incomplete;
833	13		min_cp= 0x10000;
834	13		cp &= 0x07;
835			}
836	13	50	if ((*pos & 0xC0) != 0x80) goto invalid;
837	13		cp= (cp << 6) \| (*pos++ & 0x3F);
838			if (0)
839			case 12: case 13: // 0b1[110x]yyy
840	14	50	{ if (lim - pos < 2) goto incomplete;
841	14		min_cp= 0x800;
842	14		cp &= 0x0F;
843			}
844	27	50	if ((*pos & 0xC0) != 0x80) goto invalid;
845	27		cp= (cp << 6) \| (*pos++ & 0x3F);
846			if (0)
847			case 8: case 9: case 10: case 11: // 0b1[10xx]yyy
848	20	50	{ if (lim - pos < 1) goto incomplete;
849	20		min_cp= 0x80;
850	20		cp &= 0x1F;
851			}
852	47	50	if ((*pos & 0xC0) != 0x80) goto invalid;
853	47		cp= (cp << 6) \| (*pos++ & 0x3F);
854	47		break;
855			default:
856	0		invalid: SB_RETURN_ERROR("invalid UTF8 character")
857	0		incomplete: SB_RETURN_ERROR("incomplete UTF8 character")
858			}
859	47	50	if (cp < min_cp)
860	0		SB_RETURN_ERROR("overlong encoding of UTF8 character")
861	47	50	else if (cp > 0x10FFFF)
862	0		SB_RETURN_ERROR("UTF8 character exceeds max")
863			}
864			// else all ISO-8859-1 bytes are valid codepoints
865			}
866	242	100	else if (encoding == SECRET_BUFFER_ENCODING_UTF16LE
867	221	100	\|\| encoding == SECRET_BUFFER_ENCODING_UTF16BE
868	36		) {
869	36		int low= encoding == SECRET_BUFFER_ENCODING_UTF16LE? 0 : 1;
870	36	50	if (lim - pos < 2)
871	0		SB_RETURN_ERROR("end of span")
872	36		cp= pos[low] \| ((int)pos[low^1] << 8);
873	36		pos += 2;
874	36	100	if (cp >= 0xD800 && cp <= 0xDFFF) {
		50
875	10	50	if (lim - pos < 2)
876	0		SB_RETURN_ERROR("incomplete UTF16 character")
877	10		int w2= pos[low] \| ((int)pos[low^1] << 8);
878	10		pos += 2;
879	10	50	if (w2 < 0xDC00 \|\| w2 > 0xDFFF)
		50
880	0		SB_RETURN_ERROR("invalid UTF16 low surrogate")
881	10		cp = 0x10000 + (((cp & 0x3FF) << 10) \| (w2 & 0x3FF));
882			}
883			}
884	206	100	else if (encoding == SECRET_BUFFER_ENCODING_HEX) {
885			// Skip over whitespace
886	38	50	while (pos < lim && isspace(*pos))
		50
887	0		pos++;
888	38	50	if (lim - pos < 2)
889	0		SB_RETURN_ERROR("end of span")
890	38		int high= *pos++ - '0';
891	38		int low= *pos++ - '0';
892	38	50	if (low >= ('a'-'0')) low -= ('a'-'0'-10);
893	38	100	else if (low >= ('A'-'0')) low -= ('A'-'0'-10);
894	38	50	if (high >= ('a'-'0')) high -= ('a'-'0'-10);
895	38	100	else if (high >= ('A'-'0')) high -= ('A'-'0'-10);
896	38	50	if ((low >> 4) \| (high >> 4))
897	0		SB_RETURN_ERROR("not a pair of hex digits")
898	38		cp= (high << 4) \| low;
899			// skip over whitespace if it takes us to the end of buffer so that caller
900			// knows it's EOF before trying another decode.
901	38	100	while (pos < lim && isspace(*pos))
		50
902	0		pos++;
903			}
904	168	50	else if (encoding == SECRET_BUFFER_ENCODING_BASE64) {
905			// Skip over whitespace and control chars
906	168	50	while (pos < lim && *pos <= ' ')
		50
907	0		pos++;
908			// There need to be at least 2 base64 characters left
909	168	50	if (pos < lim) {
910	168	50	if (base64_decode_table[*pos] < 0)
911	0		SB_RETURN_ERROR("invalid base64 character");
912			// ->pos_bit > 0 means pointer is pointing at a sub-bit of the base64
913			// character at *pos (and possible values are 0, 2, or 4)
914	168		cp= (((int)base64_decode_table[*pos++]) << (2 + parse->pos_bit)) & 0xFF;
915	168	50	while (pos < lim && *pos <= ' ')
		50
916	0		pos++;
917			}
918	168	50	if (pos >= lim) {
919	0		parse->pos_bit= 0;
920	0		SB_RETURN_ERROR("end of span")
921			}
922	168	50	if (base64_decode_table[*pos] < 0)
923	0		SB_RETURN_ERROR("invalid base64 character");
924	168		cp \|= base64_decode_table[*pos] >> (4-parse->pos_bit);
925	168		parse->pos_bit += 2;
926			// If pos_bit == 6 we've completed a set of 4 b64 chars and fully consumed them.
927	168	100	if (parse->pos_bit >= 6) {
928	51		pos++;
929	51		parse->pos_bit= 0;
930			// consume trailing whitespace
931	55	100	while (pos < lim && *pos <= ' ')
		100
932	4		pos++;
933			}
934			else {
935			// if next char is '=', terminate the decoding
936	117		U8 *next= pos+1;
937	117	50	while (next < lim && *next <= ' ')
		50
938	0		next++;
939	117	50	if (next < lim && *next == '=') {
		100
940	13		pos= lim; // indicate parsing complete
941	13		parse->pos_bit= 0;
942			}
943			}
944			}
945	0	0	else if (encoding == SECRET_BUFFER_ENCODING_I32) {
946	0	0	if (lim - pos < 4)
947	0		SB_RETURN_ERROR("end of span");
948	0		cp= (I32)pos;
949	0		pos+= 4;
950			}
951	0		else SB_RETURN_ERROR("unsupported encoding")
952	3368		parse->pos= pos;
953	3368		return cp;
954			#undef SB_RETURN_ERROR
955			}
956
957	850		static int sb_parse_prev_codepoint(secret_buffer_parse *parse) {
958	850		U8 pos= parse->pos, lim= parse->lim;
959	850		int encoding= parse->encoding;
960			int cp;
961			#define SB_RETURN_ERROR(msg) { parse->error= msg; return -1; }
962
963	850	50	if (encoding == SECRET_BUFFER_ENCODING_ASCII
964	850	100	\|\| encoding == SECRET_BUFFER_ENCODING_ISO8859_1
965	25	100	\|\| encoding == SECRET_BUFFER_ENCODING_UTF8
966			) {
967	842	50	if (lim <= pos)
968	0		SB_RETURN_ERROR("end of span")
969	842		cp= *--lim;
970			// handle the simple case first
971	842	100	if (cp >= 0x80 && encoding != SECRET_BUFFER_ENCODING_ISO8859_1) {
		50
972			// Strict ASCII can't encode above 0x7F
973	4	50	if (encoding == SECRET_BUFFER_ENCODING_ASCII)
974	0		SB_RETURN_ERROR("not 7-bit ASCII")
975			// else need to backtrack and then call next_codepoint
976	4		U8 *start= lim;
977	12	50	while (start >= pos && (*start & 0xC0) == 0x80)
		100
978	8		--start;
979	4		parse->pos= start;
980	4		cp= sb_parse_next_codepoint(parse);
981	4	50	if (parse->pos != parse->lim) {// consumed all characters we gave it?
982	0		parse->pos= pos; // restore original pos
983	0	0	if (cp >= 0) // had a valid char, but extra 0x80 bytes
984	0		parse->error= "invalid UTF8 character";
985			// else use the error message from next_codepoint
986	0		return -1;
987			}
988	4		parse->pos= pos; // restore original pos
989	4		lim= start; // new lim is where we started the parse from
990			}
991			}
992	8	50	else if (encoding == SECRET_BUFFER_ENCODING_UTF16LE
993	8	100	\|\| encoding == SECRET_BUFFER_ENCODING_UTF16BE
994	1		) {
995	1	50	if (lim - pos < 2)
996	0		SB_RETURN_ERROR("end of span");
997			// handle the simple case first
998	1		lim -= 2;
999	1		int low= (encoding == SECRET_BUFFER_ENCODING_UTF16LE)? 0 : 1;
1000	1		cp= lim[low] \| ((int)lim[low^1] << 8);
1001	1	50	if (cp >= 0xD800 && cp <= 0xDFFF) {
		50
1002	1	50	if (lim - pos < 4)
1003	0		SB_RETURN_ERROR("end of span");
1004	1		lim -= 2;
1005	1		int w1= lim[low] \| ((int)lim[low^1] << 8);
1006	1	50	if (w1 < 0xD800 \|\| w1 > 0xDFFF \|\| cp < 0xDC00)
		50
		50
1007	0		SB_RETURN_ERROR("invalid UTF16 surrogate");
1008	1		cp = 0x10000 + (((w1 & 0x3FF) << 10) \| (cp & 0x3FF));
1009			}
1010			}
1011	7	100	else if (encoding == SECRET_BUFFER_ENCODING_HEX) {
1012			// Skip over whitespace
1013	1	50	while (pos < lim && isspace(lim[-1]))
		50
1014	0		lim--;
1015	1	50	if (lim - pos < 2)
1016	0	0	SB_RETURN_ERROR((pos == lim? "end of span" : "incomplete hex pair at end of span"))
1017	1		int low= *--lim - '0';
1018	1		int high= *--lim - '0';
1019	1	50	if (low >= ('a'-'0')) low -= ('a'-'0'-10);
1020	0	0	else if (low >= ('A'-'0')) low -= ('A'-'0'-10);
1021	1	50	if (high >= ('a'-'0')) high -= ('a'-'0'-10);
1022	0	0	else if (high >= ('A'-'0')) high -= ('A'-'0'-10);
1023	1	50	if ((low >> 4) \| (high >> 4))
1024	0		SB_RETURN_ERROR("not a pair of hex digits")
1025	1		cp= (high << 4) \| low;
1026			}
1027	6	50	else if (encoding == SECRET_BUFFER_ENCODING_BASE64) {
1028			bool again;
1029			do {
1030	9		again= false;
1031			// Skip over non-base64 chars
1032	12	50	while (pos < lim && base64_decode_table[lim[-1]] < 0)
		100
1033	3		lim--;
1034	9	50	if (pos < lim) {
1035			//warn("lim-pos=%d, lim[-1]=%c, lim_bit=%d", (int)(lim-pos), lim[-1], parse->lim_bit);
1036	9	50	if (base64_decode_table[lim[-1]] < 0)
1037	0		SB_RETURN_ERROR("invalid base64 character");
1038			// ->lim_bit > 0 means the character lim[-1] is partially consumed.
1039			// (sequence is 0, 2, 4, 0)
1040	9		cp= ((int)base64_decode_table[lim[-1]]) >> parse->lim_bit;
1041			// parsing an equal sign means 'cp' is bogus and need to go again
1042	9	100	if (lim[-1] == '=')
1043	3		again= true;
1044	9		--lim;
1045			// find next base64 char
1046	9	50	while (pos < lim && base64_decode_table[lim[-1]] < 0)
		50
1047	0		lim--;
1048			}
1049	9	50	if (pos >= lim) {
1050	0		parse->lim_bit= 0;
1051	0		SB_RETURN_ERROR("end of span")
1052			}
1053	9	50	if (base64_decode_table[lim[-1]] < 0)
1054	0		SB_RETURN_ERROR("invalid base64 character");
1055			//warn(" lim-pos=%d, lim[-1]=%c, lim_bit=%d", (int)(lim-pos), lim[-1], parse->lim_bit);
1056	9		cp \|= (((int)base64_decode_table[lim[-1]]) << (6 - parse->lim_bit)) & 0xFF;
1057	9		parse->lim_bit += 2;
1058	9	100	if (parse->lim_bit >= 6) {
1059	3		parse->lim_bit= 0;
1060			// If completed a set of 4 b64 chars, lim[-1] is consumed, and need to
1061			// walk backward to find next base64 char
1062	3		--lim;
1063	3	50	while (pos < lim && base64_decode_table[lim[-1]] < 0)
		0
1064	0		lim--;
1065			}
1066			//warn(" cp=%d, lim-pos=%d, lim_bit=%d", cp, (int)(lim-pos), parse->lim_bit);
1067	9	100	} while (again);
1068			}
1069	0	0	else if (encoding == SECRET_BUFFER_ENCODING_I32) {
1070	0	0	if (lim - pos < 4)
1071	0		SB_RETURN_ERROR("end of span");
1072	0		lim -= 4;
1073	0		cp= (I32)lim;
1074			}
1075	0		else SB_RETURN_ERROR("unsupported encoding")
1076	850		parse->lim= lim;
1077	850		return cp;
1078			#undef SB_RETURN_ERROR
1079			}
1080
1081	1202		static int sizeof_codepoint_encoding(int codepoint, int encoding) {
1082	1202	50	if (encoding == SECRET_BUFFER_ENCODING_ASCII)
1083	0	0	return codepoint < 0x80? 1 : -1;
1084	1202	100	if (encoding == SECRET_BUFFER_ENCODING_ISO8859_1)
1085	110	50	return codepoint < 0x100? 1 : -1;
1086	1092	100	else if (encoding == SECRET_BUFFER_ENCODING_UTF8)
1087	736	100	return codepoint < 0x80? 1 : codepoint < 0x800? 2 : codepoint < 0x10000? 3 : 4;
		100
		100
1088	356	50	else if (encoding == SECRET_BUFFER_ENCODING_UTF16LE
1089	356	50	\|\| encoding == SECRET_BUFFER_ENCODING_UTF16BE)
1090	0	0	return codepoint >= 0xD800 && codepoint < 0xE000? -1
1091	0	0	: codepoint < 0x10000? 2 : 4;
		0
1092	356	100	else if (encoding == SECRET_BUFFER_ENCODING_HEX)
1093	6	50	return codepoint < 0x100? 2 : -1;
1094			/* Base64 would need to track an accumulator, so just return 1 and fix it in the caller */
1095	350	100	else if (encoding == SECRET_BUFFER_ENCODING_BASE64)
1096	78	50	return codepoint < 0x100? 1 : -1;
1097	272	50	else if (encoding == SECRET_BUFFER_ENCODING_I32)
1098	272		return 4;
1099			else
1100	0		return -1;
1101			}
1102
1103	444		static bool sb_parse_encode_codepoint(secret_buffer_parse *dst, int codepoint) {
1104			#define SB_RETURN_ERROR(msg) { dst->error= msg; return false; }
1105	444		int encoding= dst->encoding, n;
1106	444		U8 *dst_pos= dst->pos;
1107			// codepoints above 0x10FFFF are illegal
1108	444	50	if (codepoint >= 0x110000)
1109	0		SB_RETURN_ERROR("invalid codepoint");
1110			// not quite as efficient as checking during the code below, but saves a bunch of redundancy
1111	444		n= sizeof_codepoint_encoding(codepoint, encoding);
1112	444	50	if (n < 0)
1113	0		SB_RETURN_ERROR("character too wide for encoding")
1114	444	50	if (dst->lim - dst_pos < n)
1115	0		SB_RETURN_ERROR("buffer too small")
1116	444		dst->pos += n;
1117
1118	444	50	if (encoding == SECRET_BUFFER_ENCODING_ASCII
1119	444	100	\|\| encoding == SECRET_BUFFER_ENCODING_ISO8859_1
1120	389	100	\|\| encoding == SECRET_BUFFER_ENCODING_UTF8
1121			) {
1122	423		switch ((n-1)&0x3) { // help the compiler understand there are only 4 possible values
1123	401		case 0: *dst_pos++ = (U8) codepoint;
1124	401		break;
1125	10		case 1: *dst_pos++ = (U8)(0xC0 \| (codepoint >> 6));
1126	10		*dst_pos++ = (U8)(0x80 \| (codepoint & 0x3F));
1127	10		break;
1128	4		case 2: *dst_pos++ = (U8)(0xE0 \| (codepoint >> 12));
1129	4		*dst_pos++ = (U8)(0x80 \| ((codepoint >> 6) & 0x3F));
1130	4		*dst_pos++ = (U8)(0x80 \| (codepoint & 0x3F));
1131	4		break;
1132	8		case 3: *dst_pos++ = (U8)(0xF0 \| (codepoint >> 18));
1133	8		*dst_pos++ = (U8)(0x80 \| ((codepoint >> 12) & 0x3F));
1134	8		*dst_pos++ = (U8)(0x80 \| ((codepoint >> 6) & 0x3F));
1135	8		*dst_pos++ = (U8)(0x80 \| (codepoint & 0x3F));
1136	8		break;
1137			}
1138			}
1139	21	50	else if (encoding == SECRET_BUFFER_ENCODING_UTF16LE
1140	21	50	\|\| encoding == SECRET_BUFFER_ENCODING_UTF16BE
1141	0		) {
1142	0		int low= (encoding == SECRET_BUFFER_ENCODING_UTF16LE)? 0 : 1;
1143	0	0	if (n == 2) {
1144	0		dst_pos[low] = (U8)(codepoint & 0xFF);
1145	0		dst_pos[low^1] = (U8)(codepoint >> 8);
1146			}
1147			else {
1148	0		int adjusted = codepoint - 0x10000;
1149	0		int w0 = 0xD800 \| (adjusted >> 10);
1150	0		int w1 = 0xDC00 \| (adjusted & 0x3FF);
1151	0		dst_pos[low] = (U8)(w0 & 0xFF);
1152	0		dst_pos[1^low] = (U8)(w0 >> 8);
1153	0		dst_pos[2^low] = (U8)(w1 & 0xFF);
1154	0		dst_pos[3^low] = (U8)(w1 >> 8);
1155			}
1156			}
1157	21	100	else if (encoding == SECRET_BUFFER_ENCODING_HEX) {
1158	3		dst_pos[0] = "0123456789ABCDEF"[(codepoint >> 4) & 0xF];
1159	3		dst_pos[1] = "0123456789ABCDEF"[codepoint & 0xF];
1160			}
1161	18	50	else if (encoding == SECRET_BUFFER_ENCODING_I32) {
1162	18		(I32)dst_pos = codepoint;
1163			}
1164			/* BASE64 is not handled here because the '=' padding can only be generated in
1165			* a context that knows when we are ending on a non-multiple-of-4. */
1166	0		else SB_RETURN_ERROR("unsupported encoding");
1167	444		return true;
1168			#undef SB_RETURN_ERROR
1169			}
1170
1171			#define SB_PARSE_MATCH_STR_FN sb_parse_match_str_U8
1172			#define SB_PATTERN_EL_TYPE const U8
1173			#include "secret_buffer_parse_match_str.c"
1174			#undef SB_PARSE_MATCH_STR_FN
1175			#undef SB_PATTERN_EL_TYPE
1176
1177			#define SB_PARSE_MATCH_STR_FN sb_parse_match_str_I32
1178			#define SB_PATTERN_EL_TYPE const I32
1179			#include "secret_buffer_parse_match_str.c"
1180			#undef SB_PARSE_MATCH_STR_FN
1181			#undef SB_PATTERN_EL_TYPE