| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
/* These local parse functions are independenct of the SecretBuffer instance, |
|
3
|
|
|
|
|
|
|
* needing only the 'data' pointer to whch the parse_state refers. |
|
4
|
|
|
|
|
|
|
* The pos/lim of the parse state must already be checked against the length |
|
5
|
|
|
|
|
|
|
* of the data before calling these. |
|
6
|
|
|
|
|
|
|
*/ |
|
7
|
|
|
|
|
|
|
static int sizeof_codepoint_encoding(int codepoint, int encoding); |
|
8
|
|
|
|
|
|
|
static int sb_parse_prev_codepoint(secret_buffer_parse *parse); |
|
9
|
|
|
|
|
|
|
static int sb_parse_next_codepoint(secret_buffer_parse *parse); |
|
10
|
|
|
|
|
|
|
static bool sb_parse_encode_codepoint(secret_buffer_parse *parse, int codepoint); |
|
11
|
|
|
|
|
|
|
static bool sb_parse_match_charset_bytes(secret_buffer_parse *parse, const secret_buffer_charset *cset, int flags); |
|
12
|
|
|
|
|
|
|
static bool sb_parse_match_charset_codepoints(secret_buffer_parse *parse, const secret_buffer_charset *cset, int flags); |
|
13
|
|
|
|
|
|
|
static bool sb_parse_match_str_U8(secret_buffer_parse *parse, const U8 *pattern, size_t pattern_len, int flags); |
|
14
|
|
|
|
|
|
|
static bool sb_parse_match_str_I32(secret_buffer_parse *parse, const I32 *pattern, size_t pattern_len, int flags); |
|
15
|
|
|
|
|
|
|
|
|
16
|
66
|
|
|
|
|
|
static bool parse_encoding(pTHX_ SV *sv, int *out) { |
|
17
|
|
|
|
|
|
|
int enc; |
|
18
|
66
|
50
|
|
|
|
|
if (looks_like_number(sv)) { |
|
19
|
0
|
|
|
|
|
|
IV i= SvIV(sv); |
|
20
|
0
|
0
|
|
|
|
|
if (i < 0 || i > SECRET_BUFFER_ENCODING_MAX) |
|
|
|
0
|
|
|
|
|
|
|
21
|
0
|
|
|
|
|
|
return false; |
|
22
|
0
|
|
|
|
|
|
enc= (int) i; |
|
23
|
|
|
|
|
|
|
} else { |
|
24
|
|
|
|
|
|
|
STRLEN len; |
|
25
|
66
|
|
|
|
|
|
const char *str= SvPV(sv, len); |
|
26
|
66
|
|
|
|
|
|
switch (len) { |
|
27
|
6
|
50
|
|
|
|
|
case 3: if (0 == strcmp(str, "HEX")) { enc= SECRET_BUFFER_ENCODING_HEX; break; } |
|
28
|
1
|
50
|
|
|
|
|
case 4: if (0 == strcmp(str, "UTF8")) { enc= SECRET_BUFFER_ENCODING_UTF8; break; } |
|
29
|
13
|
100
|
|
|
|
|
case 5: if (0 == strcmp(str, "ASCII")) { enc= SECRET_BUFFER_ENCODING_ASCII; break; } |
|
30
|
12
|
50
|
|
|
|
|
if (0 == strcmp(str, "UTF-8")) { enc= SECRET_BUFFER_ENCODING_UTF8; break; } |
|
31
|
26
|
50
|
|
|
|
|
case 6: if (0 == strcmp(str, "BASE64")) { enc= SECRET_BUFFER_ENCODING_BASE64; break; } |
|
32
|
1
|
50
|
|
|
|
|
case 7: if (0 == strcmp(str, "UTF16LE")) { enc= SECRET_BUFFER_ENCODING_UTF16LE; break; } |
|
33
|
0
|
0
|
|
|
|
|
if (0 == strcmp(str, "UTF16BE")) { enc= SECRET_BUFFER_ENCODING_UTF16BE; break; } |
|
34
|
6
|
100
|
|
|
|
|
case 8: if (0 == strcmp(str, "UTF-16LE")) { enc= SECRET_BUFFER_ENCODING_UTF16LE; break; } |
|
35
|
3
|
50
|
|
|
|
|
if (0 == strcmp(str, "UTF-16BE")) { enc= SECRET_BUFFER_ENCODING_UTF16BE; break; } |
|
36
|
0
|
0
|
|
|
|
|
case 9: if (0 == strcmp(str, "ISO8859_1")) { enc= SECRET_BUFFER_ENCODING_ISO8859_1; break; } |
|
37
|
13
|
50
|
|
|
|
|
case 10: if (0 == strcmp(str, "ISO-8859-1")) { enc= SECRET_BUFFER_ENCODING_ISO8859_1; break; } |
|
38
|
|
|
|
|
|
|
default: |
|
39
|
0
|
|
|
|
|
|
return false; |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
} |
|
42
|
66
|
50
|
|
|
|
|
if (out) *out= enc; |
|
43
|
66
|
|
|
|
|
|
return true; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
/* Public API --------------------------------------------------------------*/ |
|
47
|
|
|
|
|
|
|
|
|
48
|
|
|
|
|
|
|
/* initialize a parse struct, but only if it is valid span of the buffer */ |
|
49
|
2001
|
|
|
|
|
|
bool secret_buffer_parse_init(secret_buffer_parse *parse, |
|
50
|
|
|
|
|
|
|
secret_buffer *buf, size_t pos, size_t lim, int encoding |
|
51
|
|
|
|
|
|
|
) { |
|
52
|
2001
|
|
|
|
|
|
Zero(parse, 1, secret_buffer_parse); |
|
53
|
|
|
|
|
|
|
// Sanity check this parse state vs. the buffer |
|
54
|
2001
|
100
|
|
|
|
|
if (lim > buf->len || pos > lim) { |
|
|
|
50
|
|
|
|
|
|
|
55
|
1
|
50
|
|
|
|
|
parse->error= pos > lim? "span starts beyond buffer" : "span ends beyond buffer"; |
|
56
|
1
|
|
|
|
|
|
return false; |
|
57
|
|
|
|
|
|
|
} |
|
58
|
2000
|
|
|
|
|
|
parse->pos= ((U8*) buf->data) + pos; |
|
59
|
2000
|
|
|
|
|
|
parse->lim= ((U8*) buf->data) + lim; |
|
60
|
2000
|
|
|
|
|
|
parse->encoding= encoding; |
|
61
|
2000
|
|
|
|
|
|
parse->sbuf= buf; |
|
62
|
2000
|
|
|
|
|
|
return true; |
|
63
|
|
|
|
|
|
|
} |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
/* Initialize a parse struct, either from a Span, or a SecretBuffer, or a plain Scalar. |
|
66
|
|
|
|
|
|
|
*/ |
|
67
|
1737
|
|
|
|
|
|
bool secret_buffer_parse_init_from_sv(secret_buffer_parse *parse, SV *sv) { |
|
68
|
|
|
|
|
|
|
dTHX; |
|
69
|
|
|
|
|
|
|
secret_buffer *sb; |
|
70
|
|
|
|
|
|
|
secret_buffer_span *span; |
|
71
|
|
|
|
|
|
|
/* Is the sv a Span object? */ |
|
72
|
1737
|
100
|
|
|
|
|
if ((span= secret_buffer_span_from_magic(sv, 0)) && SvTYPE(SvRV(sv)) == SVt_PVHV) { |
|
|
|
50
|
|
|
|
|
|
|
73
|
1284
|
|
|
|
|
|
SV **sb_sv= hv_fetchs((HV*)SvRV(sv), "buf", 1); |
|
74
|
1284
|
|
|
|
|
|
sb= secret_buffer_from_magic(*sb_sv, SECRET_BUFFER_MAGIC_OR_DIE); |
|
75
|
1284
|
|
|
|
|
|
return secret_buffer_parse_init(parse, sb, span->pos, span->lim, span->encoding); |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
/* Is the sv a SecretBuffer? */ |
|
78
|
453
|
100
|
|
|
|
|
else if ((sb= secret_buffer_from_magic(sv, 0))) { |
|
79
|
2
|
|
|
|
|
|
return secret_buffer_parse_init(parse, sb, 0, sb->len, SECRET_BUFFER_ENCODING_ISO8859_1); |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
/* It needs to at least be defined */ |
|
82
|
451
|
50
|
|
|
|
|
else if (SvOK(sv)) { |
|
83
|
|
|
|
|
|
|
STRLEN len; |
|
84
|
451
|
|
|
|
|
|
char *buf= SvPV(sv, len); |
|
85
|
451
|
|
|
|
|
|
Zero(parse, 1, secret_buffer_parse); |
|
86
|
451
|
|
|
|
|
|
parse->pos= (U8*) buf; |
|
87
|
451
|
|
|
|
|
|
parse->lim= (U8*) buf + len; |
|
88
|
451
|
|
|
|
|
|
parse->encoding= SvUTF8(sv)? SECRET_BUFFER_ENCODING_UTF8 : SECRET_BUFFER_ENCODING_ISO8859_1; |
|
89
|
451
|
|
|
|
|
|
return true; |
|
90
|
|
|
|
|
|
|
} |
|
91
|
|
|
|
|
|
|
else { |
|
92
|
0
|
|
|
|
|
|
Zero(parse, 1, secret_buffer_parse); |
|
93
|
0
|
|
|
|
|
|
parse->error= "Not a Span, SecretBuffer, or defined scalar"; |
|
94
|
0
|
|
|
|
|
|
return false; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
|
|
|
|
|
|
} |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
/* Scan for a pattern which may be a regex or literal string. |
|
99
|
|
|
|
|
|
|
* Regexes are currently limited to a single charclass. |
|
100
|
|
|
|
|
|
|
*/ |
|
101
|
785
|
|
|
|
|
|
bool secret_buffer_match(secret_buffer_parse *parse, SV *pattern, int flags) { |
|
102
|
|
|
|
|
|
|
dTHX; |
|
103
|
785
|
|
|
|
|
|
REGEXP *rx= (REGEXP*)SvRX(pattern); |
|
104
|
|
|
|
|
|
|
secret_buffer_parse pat_parse; |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
/* Is the pattern a regexp-ref? */ |
|
107
|
785
|
100
|
|
|
|
|
if (rx) { |
|
108
|
346
|
|
|
|
|
|
secret_buffer_charset *cset= secret_buffer_charset_from_regexpref(pattern); |
|
109
|
346
|
|
|
|
|
|
return secret_buffer_match_charset(parse, cset, flags); |
|
110
|
|
|
|
|
|
|
} |
|
111
|
|
|
|
|
|
|
|
|
112
|
|
|
|
|
|
|
/* load up a parse struct with the pos, lim, and encoding */ |
|
113
|
439
|
50
|
|
|
|
|
if (!secret_buffer_parse_init_from_sv(&pat_parse, pattern)) |
|
114
|
0
|
|
|
|
|
|
croak("%s", pat_parse.error); |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
/* Remove edge case of zero-length pattern (always matches) */ |
|
117
|
439
|
100
|
|
|
|
|
if (pat_parse.pos >= pat_parse.lim) { |
|
118
|
2
|
50
|
|
|
|
|
if ((flags & SECRET_BUFFER_MATCH_REVERSE)) |
|
119
|
0
|
|
|
|
|
|
parse->pos= parse->lim; |
|
120
|
|
|
|
|
|
|
else |
|
121
|
2
|
|
|
|
|
|
parse->lim= parse->pos; |
|
122
|
2
|
|
|
|
|
|
return !(flags & SECRET_BUFFER_MATCH_NEGATE); |
|
123
|
|
|
|
|
|
|
} |
|
124
|
|
|
|
|
|
|
/* Remove edge case of zero-length subject (never matches) */ |
|
125
|
437
|
100
|
|
|
|
|
if (parse->pos >= parse->lim) { |
|
126
|
4
|
|
|
|
|
|
return (flags & SECRET_BUFFER_MATCH_NEGATE); |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
/* Since unicode iteration of the pattern is a hassle and might happen lots of times, |
|
130
|
|
|
|
|
|
|
* convert it to either plain bytes or array of U32 codepoints. |
|
131
|
|
|
|
|
|
|
*/ |
|
132
|
433
|
100
|
|
|
|
|
if (pat_parse.encoding != SECRET_BUFFER_ENCODING_ISO8859_1) { |
|
133
|
17
|
|
|
|
|
|
int dst_enc= |
|
134
|
|
|
|
|
|
|
/* these can be transcoded to bytes */ |
|
135
|
17
|
|
|
|
|
|
(pat_parse.encoding == SECRET_BUFFER_ENCODING_ASCII |
|
136
|
17
|
100
|
|
|
|
|
|| pat_parse.encoding == SECRET_BUFFER_ENCODING_HEX |
|
137
|
16
|
50
|
|
|
|
|
|| pat_parse.encoding == SECRET_BUFFER_ENCODING_BASE64) |
|
138
|
|
|
|
|
|
|
? SECRET_BUFFER_ENCODING_ISO8859_1 |
|
139
|
34
|
50
|
|
|
|
|
: SECRET_BUFFER_ENCODING_I32; |
|
140
|
17
|
|
|
|
|
|
SSize_t dst_len= secret_buffer_sizeof_transcode(&pat_parse, dst_enc); |
|
141
|
17
|
50
|
|
|
|
|
if (dst_len < 0) |
|
142
|
0
|
|
|
|
|
|
croak("transcode of pattern failed: %s", pat_parse.error); |
|
143
|
|
|
|
|
|
|
/* No need to transcode SECRET_BUFFER_ENCODING_ASCII, but the above size check |
|
144
|
|
|
|
|
|
|
* verified it is clean 7-bit, which is the whole point of that encoding. |
|
145
|
|
|
|
|
|
|
*/ |
|
146
|
17
|
50
|
|
|
|
|
if (pat_parse.encoding == SECRET_BUFFER_ENCODING_ASCII |
|
147
|
|
|
|
|
|
|
/* Likewise, if SECRET_BUFFER_ENCODING_UTF8's I32 len is exactly 4x the number of |
|
148
|
|
|
|
|
|
|
* original bytes, that means every byte became a character, which means every |
|
149
|
|
|
|
|
|
|
* character could fit in a byte. */ |
|
150
|
17
|
100
|
|
|
|
|
|| (pat_parse.encoding == SECRET_BUFFER_ENCODING_UTF8 |
|
151
|
16
|
100
|
|
|
|
|
&& dst_len == (pat_parse.lim - pat_parse.pos) * 4) |
|
152
|
|
|
|
|
|
|
) { |
|
153
|
9
|
|
|
|
|
|
pat_parse.encoding= SECRET_BUFFER_ENCODING_ISO8859_1; |
|
154
|
|
|
|
|
|
|
} else { |
|
155
|
|
|
|
|
|
|
/* create a temporary secret buffer to hold the transcode */ |
|
156
|
8
|
|
|
|
|
|
secret_buffer *tmp= secret_buffer_new(0, NULL); |
|
157
|
8
|
|
|
|
|
|
secret_buffer_parse pat_orig= pat_parse; |
|
158
|
8
|
|
|
|
|
|
secret_buffer_set_len(tmp, dst_len); |
|
159
|
8
|
50
|
|
|
|
|
if (!secret_buffer_parse_init(&pat_parse, tmp, 0, dst_len, dst_enc)) |
|
160
|
0
|
|
|
|
|
|
croak("transcode of pattern failed: %s", pat_parse.error); |
|
161
|
|
|
|
|
|
|
/* Transcode the pattern */ |
|
162
|
8
|
50
|
|
|
|
|
if (!secret_buffer_transcode(&pat_orig, &pat_parse)) |
|
163
|
0
|
0
|
|
|
|
|
croak("transcode of pattern failed: %s", pat_orig.error? pat_orig.error : pat_parse.error); |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
/* In some cases it would also be nice to transcode the subject first, but the |
|
167
|
|
|
|
|
|
|
* final state of the parse struct carries information back to the caller and |
|
168
|
|
|
|
|
|
|
* needs to refer to original positions of characters. */ |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
/* Now dipatch to sb_parse_match_str_X */ |
|
171
|
433
|
100
|
|
|
|
|
if (pat_parse.encoding == SECRET_BUFFER_ENCODING_ISO8859_1) { |
|
172
|
426
|
|
|
|
|
|
size_t pat_len= pat_parse.lim - pat_parse.pos; |
|
173
|
426
|
|
|
|
|
|
return sb_parse_match_str_U8(parse, pat_parse.pos, pat_len, flags); |
|
174
|
|
|
|
|
|
|
} else { /* must be _I32 encoding, from above */ |
|
175
|
7
|
|
|
|
|
|
size_t pat_len= (pat_parse.lim - pat_parse.pos) >> 2; |
|
176
|
7
|
|
|
|
|
|
return sb_parse_match_str_I32(parse, (I32*) pat_parse.pos, pat_len, flags); |
|
177
|
|
|
|
|
|
|
} |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
/* Scan for a pattern which is a set of characters */ |
|
181
|
346
|
|
|
|
|
|
bool secret_buffer_match_charset(secret_buffer_parse *parse, secret_buffer_charset *cset, int flags) { |
|
182
|
346
|
100
|
|
|
|
|
if (parse->pos >= parse->lim) // empty range |
|
183
|
48
|
|
|
|
|
|
return false; |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
// byte matching gets to use a more efficient algorithm |
|
186
|
298
|
|
|
|
|
|
return parse->encoding == SECRET_BUFFER_ENCODING_ISO8859_1 |
|
187
|
289
|
|
|
|
|
|
? sb_parse_match_charset_bytes(parse, cset, flags) |
|
188
|
587
|
100
|
|
|
|
|
: sb_parse_match_charset_codepoints(parse, cset, flags); |
|
189
|
|
|
|
|
|
|
} |
|
190
|
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
/* Scan for a pattern which is a literal string of bytes. |
|
192
|
|
|
|
|
|
|
*/ |
|
193
|
0
|
|
|
|
|
|
bool secret_buffer_match_bytestr(secret_buffer_parse *parse, char *data, size_t datalen, int flags) { |
|
194
|
0
|
|
|
|
|
|
return sb_parse_match_str_U8(parse, (U8*) data, datalen, flags); |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
/* Count number of bytes required to transcode the source. |
|
198
|
|
|
|
|
|
|
* If the source contains an invalid character for its encoding, or that codepoint |
|
199
|
|
|
|
|
|
|
* can't be encoded as the dst_encoding, this returns -1 and sets src->error |
|
200
|
|
|
|
|
|
|
* and also sets src->pos pointing at the character that could not be converted. |
|
201
|
|
|
|
|
|
|
*/ |
|
202
|
118
|
|
|
|
|
|
SSize_t secret_buffer_sizeof_transcode(secret_buffer_parse *src, int dst_encoding) { |
|
203
|
|
|
|
|
|
|
// If the source and destination encodings are both bytes, return the length |
|
204
|
118
|
100
|
|
|
|
|
if (dst_encoding == src->encoding && src->encoding == 0) |
|
|
|
100
|
|
|
|
|
|
|
205
|
17
|
|
|
|
|
|
return src->lim - src->pos; |
|
206
|
|
|
|
|
|
|
// Else need to iterate characters (to validate) and re-encode them |
|
207
|
|
|
|
|
|
|
else { |
|
208
|
101
|
|
|
|
|
|
size_t dst_size_needed= 0; |
|
209
|
|
|
|
|
|
|
secret_buffer_parse tmp; |
|
210
|
101
|
|
|
|
|
|
Zero(&tmp, 1, secret_buffer_parse); |
|
211
|
101
|
|
|
|
|
|
tmp.pos= src->pos; |
|
212
|
101
|
|
|
|
|
|
tmp.lim= src->lim; |
|
213
|
101
|
|
|
|
|
|
tmp.encoding= src->encoding; |
|
214
|
859
|
100
|
|
|
|
|
while (tmp.pos < tmp.lim) { |
|
215
|
758
|
|
|
|
|
|
int cp= sb_parse_next_codepoint(&tmp); |
|
216
|
758
|
50
|
|
|
|
|
if (cp < 0) return -1; |
|
217
|
758
|
|
|
|
|
|
int ch_size= sizeof_codepoint_encoding(cp, dst_encoding); |
|
218
|
758
|
50
|
|
|
|
|
if (ch_size < 0) return -1; |
|
219
|
758
|
|
|
|
|
|
dst_size_needed += ch_size; |
|
220
|
|
|
|
|
|
|
} |
|
221
|
|
|
|
|
|
|
// If dest is base64, need special calculation |
|
222
|
101
|
100
|
|
|
|
|
if (dst_encoding == SECRET_BUFFER_ENCODING_BASE64) { |
|
223
|
10
|
|
|
|
|
|
dst_size_needed= ((dst_size_needed + 2) / 3) * 4; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
101
|
|
|
|
|
|
return dst_size_needed; |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
static const char base64_alphabet[64]= |
|
230
|
|
|
|
|
|
|
"ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
|
231
|
|
|
|
|
|
|
"abcdefghijklmnopqrstuvwxyz" |
|
232
|
|
|
|
|
|
|
"0123456789+/"; |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
/* |
|
235
|
|
|
|
|
|
|
perl -E 'my @tbl= (-1)x256; |
|
236
|
|
|
|
|
|
|
$tbl[ord]= -ord(A)+ord for A..Z; |
|
237
|
|
|
|
|
|
|
$tbl[ord]= 26-ord(a)+ord for a..z; |
|
238
|
|
|
|
|
|
|
$tbl[ord]= 52-ord(0)+ord for 0..9; |
|
239
|
|
|
|
|
|
|
$tbl[ord "+"]= 62; |
|
240
|
|
|
|
|
|
|
$tbl[ord "/"]= 63; |
|
241
|
|
|
|
|
|
|
$tbl[ord "="]= 64; |
|
242
|
|
|
|
|
|
|
say join ",\n", map join(",", @tbl[$_*16 .. $_*16+15]), 0..0xF' |
|
243
|
|
|
|
|
|
|
*/ |
|
244
|
|
|
|
|
|
|
static const int8_t base64_decode_table[256]= { |
|
245
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
246
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
247
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,62,-1,-1,-1,63, |
|
248
|
|
|
|
|
|
|
52,53,54,55,56,57,58,59,60,61,-1,-1,-1,64,-1,-1, |
|
249
|
|
|
|
|
|
|
-1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14, |
|
250
|
|
|
|
|
|
|
15,16,17,18,19,20,21,22,23,24,25,-1,-1,-1,-1,-1, |
|
251
|
|
|
|
|
|
|
-1,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40, |
|
252
|
|
|
|
|
|
|
41,42,43,44,45,46,47,48,49,50,51,-1,-1,-1,-1,-1, |
|
253
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
254
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
255
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
256
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
257
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
258
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
259
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, |
|
260
|
|
|
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 |
|
261
|
|
|
|
|
|
|
}; |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
/* Transcode characters from one parse state into another. |
|
264
|
|
|
|
|
|
|
* This works sort of like |
|
265
|
|
|
|
|
|
|
* $data= decode($src_enc, substr($src, $src_pos, $src_len)); |
|
266
|
|
|
|
|
|
|
* substr($dst, $dst_pos, $dst_lim, encode($dst_enc, $data)); |
|
267
|
|
|
|
|
|
|
* processing only a range of the source, and replacing only a range of the dest, |
|
268
|
|
|
|
|
|
|
* adjusting the size of dst as needed. Both src->pos and dst->pos |
|
269
|
|
|
|
|
|
|
* are updated. |
|
270
|
|
|
|
|
|
|
*/ |
|
271
|
109
|
|
|
|
|
|
bool secret_buffer_transcode(secret_buffer_parse *src, secret_buffer_parse *dst) { |
|
272
|
109
|
|
|
|
|
|
src->error= NULL; |
|
273
|
109
|
|
|
|
|
|
dst->error= NULL; |
|
274
|
|
|
|
|
|
|
// If the source and destination encodings are both bytes, use memcpy |
|
275
|
109
|
100
|
|
|
|
|
if (dst->encoding == src->encoding && src->encoding == 0) { |
|
|
|
100
|
|
|
|
|
|
|
276
|
17
|
|
|
|
|
|
size_t cnt= dst->lim - dst->pos; |
|
277
|
17
|
50
|
|
|
|
|
if (src->lim - src->pos != cnt) { |
|
278
|
0
|
|
|
|
|
|
dst->error= "miscalculated buffer length"; |
|
279
|
0
|
|
|
|
|
|
return false; |
|
280
|
|
|
|
|
|
|
} |
|
281
|
17
|
|
|
|
|
|
memcpy(dst->pos, src->pos, cnt); |
|
282
|
17
|
|
|
|
|
|
dst->pos += cnt; |
|
283
|
17
|
|
|
|
|
|
src->pos += cnt; |
|
284
|
|
|
|
|
|
|
} |
|
285
|
|
|
|
|
|
|
// Else need to iterate characters and re-encode them |
|
286
|
|
|
|
|
|
|
// base64 encoding doesn't work with sb_parse_encode_codepoint, so it gets |
|
287
|
|
|
|
|
|
|
// special treatment. |
|
288
|
92
|
100
|
|
|
|
|
else if (dst->encoding == SECRET_BUFFER_ENCODING_BASE64) { |
|
289
|
|
|
|
|
|
|
// Read 3, write 4 |
|
290
|
10
|
|
|
|
|
|
int accum= 0; |
|
291
|
10
|
|
|
|
|
|
int shift= 16, cp; |
|
292
|
88
|
100
|
|
|
|
|
while (src->pos < src->lim) { |
|
293
|
78
|
|
|
|
|
|
cp= sb_parse_next_codepoint(src); |
|
294
|
78
|
50
|
|
|
|
|
if (cp > 0xFF) { |
|
295
|
0
|
|
|
|
|
|
dst->error= "byte out of range"; |
|
296
|
0
|
|
|
|
|
|
return false; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
78
|
100
|
|
|
|
|
if (!shift) { |
|
299
|
24
|
50
|
|
|
|
|
if (dst->pos + 4 > dst->lim) { |
|
300
|
0
|
|
|
|
|
|
dst->error= "miscalculated buffer length"; |
|
301
|
0
|
|
|
|
|
|
return false; |
|
302
|
|
|
|
|
|
|
} |
|
303
|
24
|
|
|
|
|
|
accum |= cp; |
|
304
|
24
|
|
|
|
|
|
*dst->pos++ = base64_alphabet[0x3F & (accum >> 18)]; |
|
305
|
24
|
|
|
|
|
|
*dst->pos++ = base64_alphabet[0x3F & (accum >> 12)]; |
|
306
|
24
|
|
|
|
|
|
*dst->pos++ = base64_alphabet[0x3F & (accum >> 6)]; |
|
307
|
24
|
|
|
|
|
|
*dst->pos++ = base64_alphabet[0x3F & accum]; |
|
308
|
24
|
|
|
|
|
|
accum= 0; |
|
309
|
24
|
|
|
|
|
|
shift= 16; |
|
310
|
|
|
|
|
|
|
} |
|
311
|
|
|
|
|
|
|
else { |
|
312
|
54
|
|
|
|
|
|
accum |= (cp << shift); |
|
313
|
54
|
|
|
|
|
|
shift -= 8; |
|
314
|
|
|
|
|
|
|
} |
|
315
|
|
|
|
|
|
|
} |
|
316
|
10
|
100
|
|
|
|
|
if (dst->pos + (shift < 16? 4 : 0) != dst->lim) { |
|
|
|
50
|
|
|
|
|
|
|
317
|
0
|
|
|
|
|
|
dst->error= "miscalculated buffer length"; |
|
318
|
0
|
|
|
|
|
|
return false; |
|
319
|
|
|
|
|
|
|
} |
|
320
|
|
|
|
|
|
|
// write leftover accumulated bits |
|
321
|
10
|
100
|
|
|
|
|
if (shift < 16) { |
|
322
|
5
|
|
|
|
|
|
*dst->pos++ = base64_alphabet[0x3F & (accum >> 18)]; |
|
323
|
5
|
|
|
|
|
|
*dst->pos++ = base64_alphabet[0x3F & (accum >> 12)]; |
|
324
|
5
|
100
|
|
|
|
|
*dst->pos++ = shift? '=' : base64_alphabet[0x3F & (accum >> 6)]; |
|
325
|
5
|
|
|
|
|
|
*dst->pos++ = '='; |
|
326
|
|
|
|
|
|
|
} |
|
327
|
|
|
|
|
|
|
} |
|
328
|
|
|
|
|
|
|
else { |
|
329
|
526
|
100
|
|
|
|
|
while (src->pos < src->lim) { |
|
330
|
444
|
|
|
|
|
|
int cp= sb_parse_next_codepoint(src); |
|
331
|
444
|
50
|
|
|
|
|
if (cp < 0) |
|
332
|
0
|
|
|
|
|
|
return false; // error is already set |
|
333
|
444
|
|
|
|
|
|
int len= sb_parse_encode_codepoint(dst, cp); |
|
334
|
444
|
50
|
|
|
|
|
if (len < 0) |
|
335
|
0
|
|
|
|
|
|
return false; // error is already set |
|
336
|
|
|
|
|
|
|
} |
|
337
|
82
|
50
|
|
|
|
|
if (dst->pos != dst->lim) { |
|
338
|
0
|
|
|
|
|
|
dst->error= "miscalculated buffer length"; |
|
339
|
0
|
|
|
|
|
|
return false; |
|
340
|
|
|
|
|
|
|
} |
|
341
|
|
|
|
|
|
|
} |
|
342
|
109
|
|
|
|
|
|
return true; |
|
343
|
|
|
|
|
|
|
} |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
bool |
|
346
|
101
|
|
|
|
|
|
secret_buffer_copy_to(secret_buffer_parse *src, SV *dst_sv, int encoding, bool append) { |
|
347
|
|
|
|
|
|
|
dTHX; |
|
348
|
|
|
|
|
|
|
secret_buffer_parse dst; |
|
349
|
101
|
|
|
|
|
|
secret_buffer *dst_sbuf= NULL; |
|
350
|
|
|
|
|
|
|
SSize_t need_bytes; |
|
351
|
101
|
|
|
|
|
|
bool dst_wide= false; |
|
352
|
|
|
|
|
|
|
|
|
353
|
101
|
|
|
|
|
|
Zero(&dst, 1, secret_buffer_parse); |
|
354
|
|
|
|
|
|
|
// Encoding may be -1 to indicate the user didn't specify, in which case we use the |
|
355
|
|
|
|
|
|
|
// same encoding as the source, unless the destination is a perl scalar (handled below) |
|
356
|
101
|
100
|
|
|
|
|
dst.encoding= encoding >= 0? encoding : src->encoding; |
|
357
|
101
|
100
|
|
|
|
|
if (sv_isobject(dst_sv)) { |
|
358
|
|
|
|
|
|
|
// if object, must be a SecretBuffer |
|
359
|
27
|
|
|
|
|
|
dst_sbuf= secret_buffer_from_magic(dst_sv, SECRET_BUFFER_MAGIC_OR_DIE); |
|
360
|
|
|
|
|
|
|
} |
|
361
|
|
|
|
|
|
|
else { |
|
362
|
|
|
|
|
|
|
// Going to overwrite the scalar, or if its a scalar-ref, overwrite that. |
|
363
|
74
|
50
|
|
|
|
|
if (SvROK(dst_sv) && !sv_isobject(dst_sv) && SvTYPE(SvRV(dst_sv)) <= SVt_PVMG) |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
364
|
0
|
|
|
|
|
|
dst_sv= SvRV(dst_sv); |
|
365
|
|
|
|
|
|
|
// Refuse to overwrite any other kind of ref |
|
366
|
74
|
50
|
|
|
|
|
if (SvTYPE(dst_sv) > SVt_PVMG || SvROK(dst_sv)) { |
|
|
|
50
|
|
|
|
|
|
|
367
|
0
|
|
|
|
|
|
src->error= "Can only copy_to scalars or scalar-refs"; |
|
368
|
0
|
|
|
|
|
|
return false; |
|
369
|
|
|
|
|
|
|
} |
|
370
|
|
|
|
|
|
|
// If the source encoding is a type of unicode, and the destination encoding is not |
|
371
|
|
|
|
|
|
|
// specified, then write wide characters (utf-8) to the perl scalar and flag it as utf8 |
|
372
|
74
|
100
|
|
|
|
|
if (encoding < 0 && SECRET_BUFFER_ENCODING_IS_UNICODE(src->encoding)) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
373
|
66
|
|
|
|
|
|
dst.encoding= SECRET_BUFFER_ENCODING_UTF8; |
|
374
|
66
|
|
|
|
|
|
dst_wide= true; |
|
375
|
|
|
|
|
|
|
} |
|
376
|
|
|
|
|
|
|
} |
|
377
|
|
|
|
|
|
|
// Determine how many bytes we need |
|
378
|
101
|
|
|
|
|
|
need_bytes= secret_buffer_sizeof_transcode(src, dst.encoding); |
|
379
|
101
|
50
|
|
|
|
|
if (need_bytes < 0) |
|
380
|
0
|
|
|
|
|
|
return false; |
|
381
|
|
|
|
|
|
|
// Prepare the buffers for that many bytes |
|
382
|
101
|
100
|
|
|
|
|
if (dst_sbuf) { |
|
383
|
|
|
|
|
|
|
// For destination SecretBuffer, set length to 0 unless appending, then |
|
384
|
|
|
|
|
|
|
// ensure enough allocated space for need_bytes, then transcode and update |
|
385
|
|
|
|
|
|
|
// the length in the block below. |
|
386
|
27
|
100
|
|
|
|
|
if (!append) |
|
387
|
20
|
|
|
|
|
|
secret_buffer_set_len(dst_sbuf, 0); /* clears secrets */ |
|
388
|
27
|
|
|
|
|
|
secret_buffer_alloc_at_least(dst_sbuf, dst_sbuf->len + need_bytes); |
|
389
|
27
|
|
|
|
|
|
dst.pos= (U8*) dst_sbuf->data + dst_sbuf->len; |
|
390
|
27
|
|
|
|
|
|
dst.lim= dst.pos + need_bytes; |
|
391
|
|
|
|
|
|
|
} |
|
392
|
|
|
|
|
|
|
else { |
|
393
|
|
|
|
|
|
|
// For destination SV, set length to 0 unless appending, then force it to |
|
394
|
|
|
|
|
|
|
// be bytes or utf-8, then grow it to ensure room for additional `need_bytes`. |
|
395
|
|
|
|
|
|
|
STRLEN len; |
|
396
|
|
|
|
|
|
|
// If overwriting, set the length to 0 before forcing to bytes or utf8 |
|
397
|
74
|
100
|
|
|
|
|
if (!append) |
|
398
|
72
|
|
|
|
|
|
sv_setpvn(dst_sv, "", 0); |
|
399
|
|
|
|
|
|
|
// force it to the type required |
|
400
|
74
|
100
|
|
|
|
|
if (dst_wide) SvPVutf8(dst_sv, len); |
|
401
|
8
|
|
|
|
|
|
else SvPVbyte(dst_sv, len); |
|
402
|
|
|
|
|
|
|
// grow it to the required length, for writing |
|
403
|
74
|
100
|
|
|
|
|
sv_grow(dst_sv, (append? len : 0) + need_bytes + 1); |
|
404
|
74
|
|
|
|
|
|
dst.pos= (U8*) SvPVX_mutable(dst_sv) + len; |
|
405
|
74
|
|
|
|
|
|
dst.lim= dst.pos + need_bytes; |
|
406
|
|
|
|
|
|
|
// don't forget the NUL terminator |
|
407
|
74
|
|
|
|
|
|
*dst.lim= '\0'; |
|
408
|
|
|
|
|
|
|
} |
|
409
|
101
|
50
|
|
|
|
|
if (!secret_buffer_transcode(src, &dst)) { |
|
410
|
0
|
0
|
|
|
|
|
if (!src->error) src->error= dst.error; |
|
411
|
0
|
|
|
|
|
|
return false; |
|
412
|
|
|
|
|
|
|
} |
|
413
|
|
|
|
|
|
|
/* update the lengths */ |
|
414
|
101
|
100
|
|
|
|
|
if (dst_sbuf) { |
|
415
|
27
|
|
|
|
|
|
dst_sbuf->len += need_bytes; |
|
416
|
|
|
|
|
|
|
} |
|
417
|
|
|
|
|
|
|
else { |
|
418
|
74
|
|
|
|
|
|
SvCUR_set(dst_sv, SvCUR(dst_sv) + need_bytes); |
|
419
|
74
|
50
|
|
|
|
|
SvSETMAGIC(dst_sv); |
|
420
|
|
|
|
|
|
|
} |
|
421
|
101
|
|
|
|
|
|
return true; |
|
422
|
|
|
|
|
|
|
} |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
/* Append DER length octets (ASN.1 Length field, definite form only). |
|
425
|
|
|
|
|
|
|
* |
|
426
|
|
|
|
|
|
|
* DER rules: |
|
427
|
|
|
|
|
|
|
* - If len <= 127: single byte [0x00..0x7F] |
|
428
|
|
|
|
|
|
|
* - Else: first byte is 0x80 | N, where N is # of following length bytes (big-endian), |
|
429
|
|
|
|
|
|
|
* and the length must be encoded in the minimal number of bytes (no leading 0x00). |
|
430
|
|
|
|
|
|
|
* |
|
431
|
|
|
|
|
|
|
* This function encodes ONLY the length field (not tag/value). |
|
432
|
|
|
|
|
|
|
*/ |
|
433
|
|
|
|
|
|
|
void |
|
434
|
384
|
|
|
|
|
|
secret_buffer_append_uv_asn1_der_length(secret_buffer *buf, UV val) { |
|
435
|
|
|
|
|
|
|
dTHX; |
|
436
|
384
|
|
|
|
|
|
int enc_len = 1; |
|
437
|
|
|
|
|
|
|
U8 *pos; |
|
438
|
384
|
100
|
|
|
|
|
if (val > 127) { |
|
439
|
|
|
|
|
|
|
/* Determine minimal number of bytes needed to represent len in base-256. */ |
|
440
|
339
|
|
|
|
|
|
UV tmp = val; |
|
441
|
2001
|
100
|
|
|
|
|
while (tmp) { |
|
442
|
1662
|
|
|
|
|
|
enc_len++; |
|
443
|
1662
|
|
|
|
|
|
tmp >>= 8; |
|
444
|
|
|
|
|
|
|
} |
|
445
|
|
|
|
|
|
|
} |
|
446
|
|
|
|
|
|
|
/* In BER/DER, the long-form initial octet has 7 bits of length-of-length. |
|
447
|
|
|
|
|
|
|
* 0x80 is indefinite length (forbidden in DER), 0xFF would mean 127 length bytes. |
|
448
|
|
|
|
|
|
|
* With 64-bit UV enc_len will never exceed 9. |
|
449
|
|
|
|
|
|
|
*/ |
|
450
|
384
|
50
|
|
|
|
|
ASSUME(enc_len < 127); |
|
451
|
384
|
|
|
|
|
|
secret_buffer_set_len(buf, buf->len + enc_len); |
|
452
|
384
|
|
|
|
|
|
pos= (U8*) buf->data + buf->len - 1; |
|
453
|
384
|
100
|
|
|
|
|
if (val <= 127) { |
|
454
|
45
|
|
|
|
|
|
*pos = (U8) val; |
|
455
|
|
|
|
|
|
|
} else { |
|
456
|
339
|
|
|
|
|
|
UV tmp = val; |
|
457
|
|
|
|
|
|
|
/* Write the length big-endian into enc[1..n]. */ |
|
458
|
2001
|
100
|
|
|
|
|
while (tmp) { |
|
459
|
1662
|
|
|
|
|
|
*pos-- = (U8)(tmp & 0xFF); |
|
460
|
1662
|
|
|
|
|
|
tmp >>= 8; |
|
461
|
|
|
|
|
|
|
} |
|
462
|
339
|
|
|
|
|
|
*pos= (U8) (0x80 | (U8)(enc_len-1)); |
|
463
|
|
|
|
|
|
|
} |
|
464
|
384
|
|
|
|
|
|
} |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
/* Parse ASN.1 DER Length (definite form only) */ |
|
467
|
|
|
|
|
|
|
bool |
|
468
|
384
|
|
|
|
|
|
secret_buffer_parse_uv_asn1_der_length(secret_buffer_parse *parse, UV *out) { |
|
469
|
|
|
|
|
|
|
/* Work on a local cursor so we can roll back on failure */ |
|
470
|
384
|
|
|
|
|
|
U8 *pos = parse->pos; |
|
471
|
384
|
|
|
|
|
|
U8 *lim = parse->lim; |
|
472
|
|
|
|
|
|
|
UV result; |
|
473
|
|
|
|
|
|
|
|
|
474
|
384
|
50
|
|
|
|
|
if (pos >= lim) { |
|
475
|
0
|
|
|
|
|
|
parse->error = "unexpected end of buffer"; |
|
476
|
0
|
|
|
|
|
|
return false; |
|
477
|
|
|
|
|
|
|
} |
|
478
|
|
|
|
|
|
|
|
|
479
|
384
|
|
|
|
|
|
result = *pos++; |
|
480
|
|
|
|
|
|
|
|
|
481
|
|
|
|
|
|
|
/* If 0..127, the byte is the length value itself, otherwise it is the number of octets |
|
482
|
|
|
|
|
|
|
* to read following that byte. */ |
|
483
|
384
|
100
|
|
|
|
|
if ((result & 0x80)) { |
|
484
|
339
|
|
|
|
|
|
int n = result & 0x7F; |
|
485
|
|
|
|
|
|
|
/* 0x80 means indefinite length (BER/CER), forbidden in DER */ |
|
486
|
339
|
50
|
|
|
|
|
if (n == 0) { |
|
487
|
0
|
|
|
|
|
|
parse->error = "ASN.1 DER indefinite length not allowed"; |
|
488
|
0
|
|
|
|
|
|
return false; |
|
489
|
|
|
|
|
|
|
} |
|
490
|
|
|
|
|
|
|
/* Number of octets should be smallest possible encoding, so if it is larger than size_t |
|
491
|
|
|
|
|
|
|
* don't even bother trying to decode it. |
|
492
|
|
|
|
|
|
|
*/ |
|
493
|
339
|
50
|
|
|
|
|
if (n > sizeof(UV)) { |
|
494
|
0
|
|
|
|
|
|
parse->error = "ASN.1 DER length too large for perl UV"; |
|
495
|
0
|
|
|
|
|
|
return false; |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
/* ensure we have that many bytes */ |
|
498
|
339
|
50
|
|
|
|
|
if ((size_t)(lim - pos) < (size_t)n) { |
|
499
|
0
|
|
|
|
|
|
parse->error = "unexpected end of buffer"; |
|
500
|
0
|
|
|
|
|
|
return false; |
|
501
|
|
|
|
|
|
|
} |
|
502
|
|
|
|
|
|
|
/* DER minimal encoding rules: |
|
503
|
|
|
|
|
|
|
* - no leading 0x00 in the length octets |
|
504
|
|
|
|
|
|
|
* - long form must not be used for lengths <= 127 |
|
505
|
|
|
|
|
|
|
*/ |
|
506
|
339
|
|
|
|
|
|
lim= pos + n; |
|
507
|
339
|
|
|
|
|
|
result= *pos++; |
|
508
|
339
|
50
|
|
|
|
|
if (!result) { |
|
509
|
0
|
|
|
|
|
|
parse->error = "ASN.1 DER length has leading zero (non-minimal)"; |
|
510
|
0
|
|
|
|
|
|
return false; |
|
511
|
|
|
|
|
|
|
} |
|
512
|
|
|
|
|
|
|
/* Parse remaining bytes of big-endian unsigned integer */ |
|
513
|
1662
|
100
|
|
|
|
|
while (pos < lim) |
|
514
|
1323
|
|
|
|
|
|
result= (result << 8) | *pos++; |
|
515
|
|
|
|
|
|
|
/* DER should not use 1-byte encoding if it would have fit in the initial byte */ |
|
516
|
339
|
50
|
|
|
|
|
if (result < 0x80) { |
|
517
|
0
|
|
|
|
|
|
parse->error = "ASN.1 DER length should use short form (non-minimal)"; |
|
518
|
0
|
|
|
|
|
|
return false; |
|
519
|
|
|
|
|
|
|
} |
|
520
|
|
|
|
|
|
|
} |
|
521
|
384
|
50
|
|
|
|
|
if (out) *out = result; |
|
522
|
384
|
|
|
|
|
|
parse->pos = pos; |
|
523
|
384
|
|
|
|
|
|
parse->error = NULL; |
|
524
|
384
|
|
|
|
|
|
return true; |
|
525
|
|
|
|
|
|
|
} |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
/* Append canonical unsigned Base128, Little-Endian |
|
528
|
|
|
|
|
|
|
* |
|
529
|
|
|
|
|
|
|
* Rules: |
|
530
|
|
|
|
|
|
|
* - 7 data bits per byte, little-endian (least significant group first) |
|
531
|
|
|
|
|
|
|
* - High bit 0x80 set on all bytes except the final byte |
|
532
|
|
|
|
|
|
|
* - Canonical/minimal: stop as soon as remaining value is 0 |
|
533
|
|
|
|
|
|
|
*/ |
|
534
|
|
|
|
|
|
|
void |
|
535
|
384
|
|
|
|
|
|
secret_buffer_append_uv_base128le(secret_buffer *buf, UV val) { |
|
536
|
|
|
|
|
|
|
dTHX; |
|
537
|
|
|
|
|
|
|
U8 *pos; |
|
538
|
384
|
|
|
|
|
|
int enc_len= 1; |
|
539
|
384
|
|
|
|
|
|
UV tmp= val >> 7; |
|
540
|
1923
|
100
|
|
|
|
|
while (tmp) { |
|
541
|
1539
|
|
|
|
|
|
enc_len++; |
|
542
|
1539
|
|
|
|
|
|
tmp >>= 7; |
|
543
|
|
|
|
|
|
|
} |
|
544
|
384
|
|
|
|
|
|
secret_buffer_set_len(buf, buf->len + enc_len); |
|
545
|
384
|
|
|
|
|
|
pos= (U8*) buf->data + buf->len - enc_len; |
|
546
|
|
|
|
|
|
|
/* Encode */ |
|
547
|
384
|
|
|
|
|
|
tmp= val; |
|
548
|
|
|
|
|
|
|
do { |
|
549
|
1923
|
|
|
|
|
|
U8 byte = (U8)(tmp & 0x7F); |
|
550
|
1923
|
|
|
|
|
|
tmp >>= 7; |
|
551
|
1923
|
100
|
|
|
|
|
if (tmp) |
|
552
|
1539
|
|
|
|
|
|
byte |= 0x80; |
|
553
|
1923
|
|
|
|
|
|
*pos++ = byte; |
|
554
|
1923
|
100
|
|
|
|
|
} while (tmp); |
|
555
|
384
|
50
|
|
|
|
|
ASSUME(pos == (U8*)(buf->data + buf->len)); |
|
556
|
384
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
/* Parse Unsigned LittleEndian Base128 (also requiring canonical / minimal encoding) */ |
|
559
|
|
|
|
|
|
|
bool |
|
560
|
384
|
|
|
|
|
|
secret_buffer_parse_uv_base128le(secret_buffer_parse *parse, UV *out) { |
|
561
|
384
|
|
|
|
|
|
U8 *pos = parse->pos; |
|
562
|
384
|
|
|
|
|
|
U8 *lim = parse->lim; |
|
563
|
384
|
|
|
|
|
|
UV result= 0, payload; |
|
564
|
384
|
|
|
|
|
|
int shift= 7; |
|
565
|
|
|
|
|
|
|
|
|
566
|
384
|
50
|
|
|
|
|
if (pos >= lim) { |
|
567
|
0
|
|
|
|
|
|
parse->error = "unexpected end of buffer"; |
|
568
|
0
|
|
|
|
|
|
return false; |
|
569
|
|
|
|
|
|
|
} |
|
570
|
384
|
|
|
|
|
|
result= payload= *pos & 0x7F; |
|
571
|
|
|
|
|
|
|
/* Scan forward looking for the first byte without the continuation flag */ |
|
572
|
1923
|
100
|
|
|
|
|
while (*pos++ & 0x80) { |
|
573
|
1539
|
50
|
|
|
|
|
if (pos >= lim) { |
|
574
|
0
|
|
|
|
|
|
parse->error = "unexpected end of buffer"; |
|
575
|
0
|
|
|
|
|
|
return false; |
|
576
|
|
|
|
|
|
|
} |
|
577
|
1539
|
|
|
|
|
|
payload= *pos & 0x7F; |
|
578
|
1539
|
100
|
|
|
|
|
if (shift > sizeof(UV)*8 - 7) { |
|
579
|
|
|
|
|
|
|
/* Do any of the bits overflow? Is the continuation flag set? */ |
|
580
|
3
|
50
|
|
|
|
|
if (shift >= sizeof(UV)*8 || (payload >> (sizeof(UV)*8 - shift))) { |
|
|
|
50
|
|
|
|
|
|
|
581
|
0
|
|
|
|
|
|
parse->error = "Base128-LE value overflows perl UV"; |
|
582
|
0
|
|
|
|
|
|
return false; |
|
583
|
|
|
|
|
|
|
} |
|
584
|
|
|
|
|
|
|
} |
|
585
|
1539
|
|
|
|
|
|
result |= payload << shift; |
|
586
|
1539
|
|
|
|
|
|
shift += 7; |
|
587
|
|
|
|
|
|
|
} |
|
588
|
|
|
|
|
|
|
/* check if the high bits were all zero, meaning an unnecessary byte was encoded */ |
|
589
|
384
|
100
|
|
|
|
|
if (!payload && result != 0) { |
|
|
|
50
|
|
|
|
|
|
|
590
|
0
|
|
|
|
|
|
parse->error = "Over-long encoding of Base128-LE"; |
|
591
|
0
|
|
|
|
|
|
return false; |
|
592
|
|
|
|
|
|
|
} |
|
593
|
384
|
50
|
|
|
|
|
if (out) *out = result; |
|
594
|
384
|
|
|
|
|
|
parse->pos = pos; |
|
595
|
384
|
|
|
|
|
|
parse->error = NULL; |
|
596
|
384
|
|
|
|
|
|
return true; |
|
597
|
|
|
|
|
|
|
} |
|
598
|
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
/* Append canonical unsigned Base128, Big-Endian |
|
600
|
|
|
|
|
|
|
* |
|
601
|
|
|
|
|
|
|
* Rules: |
|
602
|
|
|
|
|
|
|
* - 7 data bits per byte, big-endian (most significant group first) |
|
603
|
|
|
|
|
|
|
* - High bit 0x80 set on all bytes except the final byte |
|
604
|
|
|
|
|
|
|
* - Canonical/minimal: stop as soon as remaining value is 0 |
|
605
|
|
|
|
|
|
|
*/ |
|
606
|
|
|
|
|
|
|
void |
|
607
|
387
|
|
|
|
|
|
secret_buffer_append_uv_base128be(secret_buffer *buf, UV val) { |
|
608
|
|
|
|
|
|
|
dTHX; |
|
609
|
|
|
|
|
|
|
U8 *pos; |
|
610
|
387
|
|
|
|
|
|
int enc_len= 1, shift; |
|
611
|
387
|
|
|
|
|
|
UV tmp= val >> 7; |
|
612
|
1926
|
100
|
|
|
|
|
while (tmp) { |
|
613
|
1539
|
|
|
|
|
|
enc_len++; |
|
614
|
1539
|
|
|
|
|
|
tmp >>= 7; |
|
615
|
|
|
|
|
|
|
} |
|
616
|
387
|
|
|
|
|
|
secret_buffer_set_len(buf, buf->len + enc_len); |
|
617
|
387
|
|
|
|
|
|
pos= (U8*) buf->data + buf->len - enc_len; |
|
618
|
|
|
|
|
|
|
/* Encode */ |
|
619
|
2313
|
100
|
|
|
|
|
for (shift= (enc_len-1) * 7; shift >= 0; shift -= 7) { |
|
620
|
1926
|
|
|
|
|
|
U8 byte = (U8)((val >> shift) & 0x7F); |
|
621
|
1926
|
100
|
|
|
|
|
if (shift) |
|
622
|
1539
|
|
|
|
|
|
byte |= 0x80; |
|
623
|
1926
|
|
|
|
|
|
*pos++ = byte; |
|
624
|
|
|
|
|
|
|
} |
|
625
|
387
|
50
|
|
|
|
|
ASSUME(pos == (U8*)(buf->data + buf->len)); |
|
626
|
387
|
|
|
|
|
|
} |
|
627
|
|
|
|
|
|
|
|
|
628
|
|
|
|
|
|
|
/* Parse Unsigned BigEndian Base128 (also requiring canonical / minimal encoding) */ |
|
629
|
|
|
|
|
|
|
bool |
|
630
|
395
|
|
|
|
|
|
secret_buffer_parse_uv_base128be(secret_buffer_parse *parse, UV *out) { |
|
631
|
395
|
|
|
|
|
|
U8 *pos = parse->pos; |
|
632
|
395
|
|
|
|
|
|
U8 *lim = parse->lim; |
|
633
|
395
|
|
|
|
|
|
UV result= 0; |
|
634
|
|
|
|
|
|
|
|
|
635
|
395
|
50
|
|
|
|
|
if (pos >= lim) { |
|
636
|
0
|
|
|
|
|
|
parse->error = "unexpected end of buffer"; |
|
637
|
0
|
|
|
|
|
|
return false; |
|
638
|
|
|
|
|
|
|
} |
|
639
|
|
|
|
|
|
|
/* high-bit payload == 0 with continue bit set is an error. */ |
|
640
|
395
|
50
|
|
|
|
|
if (*pos == 0x80) { |
|
641
|
0
|
|
|
|
|
|
parse->error = "Over-long encoding of Base128-BE"; |
|
642
|
0
|
|
|
|
|
|
return false; |
|
643
|
|
|
|
|
|
|
} |
|
644
|
395
|
|
|
|
|
|
result= *pos & 0x7F; |
|
645
|
1934
|
100
|
|
|
|
|
while (*pos++ & 0x80) { |
|
646
|
|
|
|
|
|
|
/* Will existing bits overflow UV when shifted? */ |
|
647
|
1539
|
50
|
|
|
|
|
if (result >> (sizeof(UV)*8 - 7)) { |
|
648
|
0
|
|
|
|
|
|
parse->error = "Base128-BE value overflows perl UV"; |
|
649
|
0
|
|
|
|
|
|
return false; |
|
650
|
|
|
|
|
|
|
} |
|
651
|
1539
|
50
|
|
|
|
|
if (pos >= lim) { |
|
652
|
0
|
|
|
|
|
|
parse->error = "unexpected end of buffer"; |
|
653
|
0
|
|
|
|
|
|
return false; |
|
654
|
|
|
|
|
|
|
} |
|
655
|
1539
|
|
|
|
|
|
result= (result << 7) | (*pos & 0x7F); |
|
656
|
|
|
|
|
|
|
} |
|
657
|
395
|
50
|
|
|
|
|
if (out) *out = result; |
|
658
|
395
|
|
|
|
|
|
parse->pos = pos; |
|
659
|
395
|
|
|
|
|
|
parse->error = NULL; |
|
660
|
395
|
|
|
|
|
|
return true; |
|
661
|
|
|
|
|
|
|
} |
|
662
|
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
/* Private API -------------------------------------------------------------*/ |
|
664
|
|
|
|
|
|
|
|
|
665
|
|
|
|
|
|
|
/* Scan raw bytes using only the bitmap */ |
|
666
|
289
|
|
|
|
|
|
static bool sb_parse_match_charset_bytes( |
|
667
|
|
|
|
|
|
|
secret_buffer_parse *parse, |
|
668
|
|
|
|
|
|
|
const secret_buffer_charset *cset, |
|
669
|
|
|
|
|
|
|
int flags |
|
670
|
|
|
|
|
|
|
) { |
|
671
|
289
|
|
|
|
|
|
bool negate= 0 != (flags & SECRET_BUFFER_MATCH_NEGATE); |
|
672
|
289
|
|
|
|
|
|
bool reverse= 0 != (flags & SECRET_BUFFER_MATCH_REVERSE); |
|
673
|
289
|
100
|
|
|
|
|
bool multi= 0 != (flags & SECRET_BUFFER_MATCH_MULTI) || cset->match_multi; |
|
|
|
100
|
|
|
|
|
|
|
674
|
289
|
|
|
|
|
|
bool anchored= 0 != (flags & SECRET_BUFFER_MATCH_ANCHORED); |
|
675
|
289
|
|
|
|
|
|
bool consttime=0 != (flags & SECRET_BUFFER_MATCH_CONST_TIME); |
|
676
|
289
|
100
|
|
|
|
|
int step= reverse? -1 : 1; |
|
677
|
289
|
100
|
|
|
|
|
U8 *pos= reverse? parse->lim-1 : parse->pos, |
|
678
|
289
|
100
|
|
|
|
|
*lim= reverse? parse->pos-1 : parse->lim, |
|
679
|
289
|
|
|
|
|
|
*span_start= NULL; |
|
680
|
|
|
|
|
|
|
//warn("scan_charset_bytes pos=%p lim=%p len=%d", parse->pos, parse->lim, (int)(parse->lim - parse->pos)); |
|
681
|
|
|
|
|
|
|
|
|
682
|
1102
|
100
|
|
|
|
|
while (pos != lim) { |
|
683
|
1097
|
100
|
|
|
|
|
if (sbc_bitmap_test(cset->bitmap, *pos) != negate) { |
|
684
|
|
|
|
|
|
|
// Found. Now are we looking for a span? |
|
685
|
238
|
100
|
|
|
|
|
if (span_start) |
|
686
|
105
|
|
|
|
|
|
break; |
|
687
|
133
|
|
|
|
|
|
span_start= pos; |
|
688
|
133
|
100
|
|
|
|
|
if (!multi) { |
|
689
|
27
|
|
|
|
|
|
pos += step; |
|
690
|
27
|
|
|
|
|
|
break; |
|
691
|
|
|
|
|
|
|
} |
|
692
|
106
|
|
|
|
|
|
negate= !negate; |
|
693
|
859
|
100
|
|
|
|
|
} else if (anchored && !span_start) |
|
|
|
100
|
|
|
|
|
|
|
694
|
152
|
|
|
|
|
|
break; |
|
695
|
813
|
|
|
|
|
|
pos += step; |
|
696
|
|
|
|
|
|
|
} |
|
697
|
|
|
|
|
|
|
/* If constant time operation is requested, we need to perform one sbc_bitmap_test |
|
698
|
|
|
|
|
|
|
* for every character in the span, and make sure the compiler doesn't eliminate it. |
|
699
|
|
|
|
|
|
|
*/ |
|
700
|
289
|
50
|
|
|
|
|
if (consttime) { |
|
701
|
0
|
|
|
|
|
|
volatile bool sink= false; |
|
702
|
0
|
0
|
|
|
|
|
while (pos != lim) { |
|
703
|
0
|
|
|
|
|
|
sink ^= sbc_bitmap_test(cset->bitmap, *pos); |
|
704
|
0
|
|
|
|
|
|
pos += step; |
|
705
|
|
|
|
|
|
|
} |
|
706
|
0
|
|
|
|
|
|
(void) sink; |
|
707
|
|
|
|
|
|
|
} |
|
708
|
|
|
|
|
|
|
// reached end of defined range, and implicitly ends span |
|
709
|
289
|
100
|
|
|
|
|
if (reverse) { |
|
710
|
86
|
|
|
|
|
|
parse->pos= pos + 1; |
|
711
|
86
|
100
|
|
|
|
|
parse->lim= span_start? span_start + 1 : parse->pos; |
|
712
|
|
|
|
|
|
|
} else { |
|
713
|
203
|
|
|
|
|
|
parse->lim= pos; |
|
714
|
203
|
100
|
|
|
|
|
parse->pos= span_start? span_start : parse->lim; |
|
715
|
|
|
|
|
|
|
} |
|
716
|
289
|
|
|
|
|
|
return span_start != NULL; |
|
717
|
|
|
|
|
|
|
} |
|
718
|
|
|
|
|
|
|
|
|
719
|
9
|
|
|
|
|
|
static bool sb_parse_match_charset_codepoints( |
|
720
|
|
|
|
|
|
|
secret_buffer_parse *parse, |
|
721
|
|
|
|
|
|
|
const secret_buffer_charset *cset, |
|
722
|
|
|
|
|
|
|
int flags |
|
723
|
|
|
|
|
|
|
) { |
|
724
|
|
|
|
|
|
|
dTHX; |
|
725
|
9
|
|
|
|
|
|
bool negate= 0 != (flags & SECRET_BUFFER_MATCH_NEGATE); |
|
726
|
9
|
|
|
|
|
|
bool reverse= 0 != (flags & SECRET_BUFFER_MATCH_REVERSE); |
|
727
|
9
|
50
|
|
|
|
|
bool multi= 0 != (flags & SECRET_BUFFER_MATCH_MULTI) || cset->match_multi; |
|
|
|
100
|
|
|
|
|
|
|
728
|
9
|
|
|
|
|
|
bool anchored= 0 != (flags & SECRET_BUFFER_MATCH_ANCHORED); |
|
729
|
9
|
|
|
|
|
|
bool consttime=0 != (flags & SECRET_BUFFER_MATCH_CONST_TIME); |
|
730
|
9
|
|
|
|
|
|
bool span_started= false; |
|
731
|
9
|
|
|
|
|
|
bool encoding_error= false; |
|
732
|
9
|
100
|
|
|
|
|
U8 *span_mark= NULL, *prev_mark= reverse? parse->lim : parse->pos; |
|
733
|
|
|
|
|
|
|
|
|
734
|
37
|
50
|
|
|
|
|
while (parse->pos < parse->lim) { |
|
735
|
19
|
|
|
|
|
|
int codepoint= reverse? sb_parse_prev_codepoint(parse) |
|
736
|
37
|
100
|
|
|
|
|
: sb_parse_next_codepoint(parse); |
|
737
|
|
|
|
|
|
|
// warn("parse.pos=%p parse.lim=%p parse.enc=%d cp=%d parse.err=%s", parse->pos, parse->lim, parse->encoding, codepoint, parse->error); |
|
738
|
37
|
100
|
|
|
|
|
if (codepoint < 0) {// encoding error |
|
739
|
1
|
|
|
|
|
|
encoding_error= true; |
|
740
|
1
|
|
|
|
|
|
break; |
|
741
|
|
|
|
|
|
|
} |
|
742
|
36
|
100
|
|
|
|
|
if (sbc_test_codepoint(aTHX_ cset, codepoint) != negate) { |
|
743
|
|
|
|
|
|
|
// Found. Mark boundaries of char. |
|
744
|
|
|
|
|
|
|
// Now are we looking for a span? |
|
745
|
10
|
100
|
|
|
|
|
if (span_started) |
|
746
|
2
|
|
|
|
|
|
break; |
|
747
|
8
|
|
|
|
|
|
span_started= true; |
|
748
|
8
|
|
|
|
|
|
span_mark= prev_mark; |
|
749
|
8
|
|
|
|
|
|
negate= !negate; |
|
750
|
8
|
100
|
|
|
|
|
if (!multi) { |
|
751
|
6
|
100
|
|
|
|
|
prev_mark= reverse? parse->lim : parse->pos; |
|
752
|
6
|
|
|
|
|
|
break; |
|
753
|
|
|
|
|
|
|
} |
|
754
|
26
|
50
|
|
|
|
|
} else if (anchored && !span_started) |
|
|
|
0
|
|
|
|
|
|
|
755
|
0
|
|
|
|
|
|
break; |
|
756
|
28
|
100
|
|
|
|
|
prev_mark= reverse? parse->lim : parse->pos; |
|
757
|
|
|
|
|
|
|
} |
|
758
|
|
|
|
|
|
|
/* If constant time operation is requested, we need to perform one sbc_bitmap_test |
|
759
|
|
|
|
|
|
|
* for every character in the span, and make sure the compiler doesn't eliminate it. |
|
760
|
|
|
|
|
|
|
*/ |
|
761
|
9
|
50
|
|
|
|
|
if (consttime) { |
|
762
|
0
|
|
|
|
|
|
volatile bool sink= false; |
|
763
|
0
|
0
|
|
|
|
|
while (parse->pos < parse->lim) { |
|
764
|
0
|
|
|
|
|
|
int codepoint= reverse? sb_parse_prev_codepoint(parse) |
|
765
|
0
|
0
|
|
|
|
|
: sb_parse_next_codepoint(parse); |
|
766
|
|
|
|
|
|
|
// warn("parse.pos=%p parse.lim=%p parse.enc=%d cp=%d parse.err=%s", parse->pos, parse->lim, parse->encoding, codepoint, parse->error); |
|
767
|
0
|
0
|
|
|
|
|
if (codepoint < 0) { // encoding error |
|
768
|
0
|
|
|
|
|
|
encoding_error= true; |
|
769
|
0
|
|
|
|
|
|
sink ^= sbc_test_codepoint(aTHX_ cset, 0); |
|
770
|
|
|
|
|
|
|
} |
|
771
|
|
|
|
|
|
|
else |
|
772
|
0
|
|
|
|
|
|
sink ^= sbc_test_codepoint(aTHX_ cset, codepoint); |
|
773
|
|
|
|
|
|
|
} |
|
774
|
0
|
|
|
|
|
|
(void) sink; |
|
775
|
|
|
|
|
|
|
} |
|
776
|
9
|
100
|
|
|
|
|
if (encoding_error) |
|
777
|
1
|
|
|
|
|
|
return false; |
|
778
|
|
|
|
|
|
|
// reached end of defined range |
|
779
|
8
|
50
|
|
|
|
|
if (span_started) { // and implicitly ends span |
|
780
|
8
|
100
|
|
|
|
|
if (reverse) { |
|
781
|
5
|
|
|
|
|
|
parse->pos= prev_mark; |
|
782
|
5
|
|
|
|
|
|
parse->lim= span_mark; |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
else { |
|
785
|
3
|
|
|
|
|
|
parse->pos= span_mark; |
|
786
|
3
|
|
|
|
|
|
parse->lim= prev_mark; |
|
787
|
|
|
|
|
|
|
} |
|
788
|
8
|
|
|
|
|
|
return true; |
|
789
|
|
|
|
|
|
|
} |
|
790
|
0
|
|
|
|
|
|
return false; |
|
791
|
|
|
|
|
|
|
} |
|
792
|
|
|
|
|
|
|
|
|
793
|
18
|
|
|
|
|
|
int sb_parse_codepointcmp(secret_buffer_parse *lhs, secret_buffer_parse *rhs) { |
|
794
|
|
|
|
|
|
|
I32 lhs_cp, rhs_cp; |
|
795
|
18
|
|
|
|
|
|
volatile int ret= 0; |
|
796
|
|
|
|
|
|
|
/* constant-time iteration per the shorter of the two strings */ |
|
797
|
87
|
100
|
|
|
|
|
while (lhs->pos < lhs->lim && rhs->pos < rhs->lim) { |
|
|
|
50
|
|
|
|
|
|
|
798
|
69
|
|
|
|
|
|
lhs_cp= sb_parse_next_codepoint(lhs); |
|
799
|
69
|
50
|
|
|
|
|
if (lhs_cp < 0) |
|
800
|
0
|
|
|
|
|
|
croak("Encoding error in left-hand buffer"); |
|
801
|
69
|
|
|
|
|
|
rhs_cp= sb_parse_next_codepoint(rhs); |
|
802
|
69
|
50
|
|
|
|
|
if (rhs_cp < 0) |
|
803
|
0
|
|
|
|
|
|
croak("Encoding error in right-hand buffer"); |
|
804
|
69
|
100
|
|
|
|
|
if (lhs_cp != rhs_cp && !ret) |
|
|
|
50
|
|
|
|
|
|
|
805
|
2
|
50
|
|
|
|
|
ret= lhs_cp < rhs_cp? -1 : 1; |
|
806
|
|
|
|
|
|
|
} |
|
807
|
18
|
|
|
|
|
|
return ret? ret |
|
808
|
34
|
100
|
|
|
|
|
: (lhs->pos < lhs->lim)? 1 /* right string shorter than left */ |
|
809
|
32
|
50
|
|
|
|
|
: (rhs->pos < rhs->lim)? -1 /* left string shorter than right */ |
|
810
|
16
|
50
|
|
|
|
|
: 0; |
|
811
|
|
|
|
|
|
|
} |
|
812
|
|
|
|
|
|
|
|
|
813
|
|
|
|
|
|
|
/* UTF-8 decoding helper */ |
|
814
|
3369
|
|
|
|
|
|
static int sb_parse_next_codepoint(secret_buffer_parse *parse) { |
|
815
|
3369
|
|
|
|
|
|
U8 *pos= parse->pos, *lim= parse->lim; |
|
816
|
3369
|
|
|
|
|
|
int cp, encoding= parse->encoding; |
|
817
|
|
|
|
|
|
|
#define SB_RETURN_ERROR(msg) { parse->error= msg; return -1; } |
|
818
|
|
|
|
|
|
|
|
|
819
|
3369
|
100
|
|
|
|
|
if (encoding == SECRET_BUFFER_ENCODING_ASCII |
|
820
|
3368
|
100
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_ISO8859_1 |
|
821
|
577
|
100
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_UTF8 |
|
822
|
|
|
|
|
|
|
) { |
|
823
|
3127
|
50
|
|
|
|
|
if (lim - pos < 1) |
|
824
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span") |
|
825
|
3127
|
|
|
|
|
|
cp= *pos++; |
|
826
|
3127
|
100
|
|
|
|
|
if (cp >= 0x80 && encoding == SECRET_BUFFER_ENCODING_ASCII) |
|
|
|
100
|
|
|
|
|
|
|
827
|
1
|
|
|
|
|
|
SB_RETURN_ERROR("not 7-bit ASCII") |
|
828
|
3126
|
100
|
|
|
|
|
else if (cp >= 0x80 && encoding == SECRET_BUFFER_ENCODING_UTF8) { |
|
|
|
100
|
|
|
|
|
|
|
829
|
47
|
|
|
|
|
|
int min_cp= 0; |
|
830
|
47
|
|
|
|
|
|
switch ((cp >> 3) & 0xF) { |
|
831
|
13
|
|
|
|
|
|
case 14: // 0b1[1110]yyy |
|
832
|
13
|
50
|
|
|
|
|
{ if (lim - pos < 3) goto incomplete; |
|
833
|
13
|
|
|
|
|
|
min_cp= 0x10000; |
|
834
|
13
|
|
|
|
|
|
cp &= 0x07; |
|
835
|
|
|
|
|
|
|
} |
|
836
|
13
|
50
|
|
|
|
|
if ((*pos & 0xC0) != 0x80) goto invalid; |
|
837
|
13
|
|
|
|
|
|
cp= (cp << 6) | (*pos++ & 0x3F); |
|
838
|
|
|
|
|
|
|
if (0) |
|
839
|
|
|
|
|
|
|
case 12: case 13: // 0b1[110x]yyy |
|
840
|
14
|
50
|
|
|
|
|
{ if (lim - pos < 2) goto incomplete; |
|
841
|
14
|
|
|
|
|
|
min_cp= 0x800; |
|
842
|
14
|
|
|
|
|
|
cp &= 0x0F; |
|
843
|
|
|
|
|
|
|
} |
|
844
|
27
|
50
|
|
|
|
|
if ((*pos & 0xC0) != 0x80) goto invalid; |
|
845
|
27
|
|
|
|
|
|
cp= (cp << 6) | (*pos++ & 0x3F); |
|
846
|
|
|
|
|
|
|
if (0) |
|
847
|
|
|
|
|
|
|
case 8: case 9: case 10: case 11: // 0b1[10xx]yyy |
|
848
|
20
|
50
|
|
|
|
|
{ if (lim - pos < 1) goto incomplete; |
|
849
|
20
|
|
|
|
|
|
min_cp= 0x80; |
|
850
|
20
|
|
|
|
|
|
cp &= 0x1F; |
|
851
|
|
|
|
|
|
|
} |
|
852
|
47
|
50
|
|
|
|
|
if ((*pos & 0xC0) != 0x80) goto invalid; |
|
853
|
47
|
|
|
|
|
|
cp= (cp << 6) | (*pos++ & 0x3F); |
|
854
|
47
|
|
|
|
|
|
break; |
|
855
|
|
|
|
|
|
|
default: |
|
856
|
0
|
|
|
|
|
|
invalid: SB_RETURN_ERROR("invalid UTF8 character") |
|
857
|
0
|
|
|
|
|
|
incomplete: SB_RETURN_ERROR("incomplete UTF8 character") |
|
858
|
|
|
|
|
|
|
} |
|
859
|
47
|
50
|
|
|
|
|
if (cp < min_cp) |
|
860
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("overlong encoding of UTF8 character") |
|
861
|
47
|
50
|
|
|
|
|
else if (cp > 0x10FFFF) |
|
862
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("UTF8 character exceeds max") |
|
863
|
|
|
|
|
|
|
} |
|
864
|
|
|
|
|
|
|
// else all ISO-8859-1 bytes are valid codepoints |
|
865
|
|
|
|
|
|
|
} |
|
866
|
242
|
100
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_UTF16LE |
|
867
|
221
|
100
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_UTF16BE |
|
868
|
36
|
|
|
|
|
|
) { |
|
869
|
36
|
|
|
|
|
|
int low= encoding == SECRET_BUFFER_ENCODING_UTF16LE? 0 : 1; |
|
870
|
36
|
50
|
|
|
|
|
if (lim - pos < 2) |
|
871
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span") |
|
872
|
36
|
|
|
|
|
|
cp= pos[low] | ((int)pos[low^1] << 8); |
|
873
|
36
|
|
|
|
|
|
pos += 2; |
|
874
|
36
|
100
|
|
|
|
|
if (cp >= 0xD800 && cp <= 0xDFFF) { |
|
|
|
50
|
|
|
|
|
|
|
875
|
10
|
50
|
|
|
|
|
if (lim - pos < 2) |
|
876
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("incomplete UTF16 character") |
|
877
|
10
|
|
|
|
|
|
int w2= pos[low] | ((int)pos[low^1] << 8); |
|
878
|
10
|
|
|
|
|
|
pos += 2; |
|
879
|
10
|
50
|
|
|
|
|
if (w2 < 0xDC00 || w2 > 0xDFFF) |
|
|
|
50
|
|
|
|
|
|
|
880
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("invalid UTF16 low surrogate") |
|
881
|
10
|
|
|
|
|
|
cp = 0x10000 + (((cp & 0x3FF) << 10) | (w2 & 0x3FF)); |
|
882
|
|
|
|
|
|
|
} |
|
883
|
|
|
|
|
|
|
} |
|
884
|
206
|
100
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_HEX) { |
|
885
|
|
|
|
|
|
|
// Skip over whitespace |
|
886
|
38
|
50
|
|
|
|
|
while (pos < lim && isspace(*pos)) |
|
|
|
50
|
|
|
|
|
|
|
887
|
0
|
|
|
|
|
|
pos++; |
|
888
|
38
|
50
|
|
|
|
|
if (lim - pos < 2) |
|
889
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span") |
|
890
|
38
|
|
|
|
|
|
int high= *pos++ - '0'; |
|
891
|
38
|
|
|
|
|
|
int low= *pos++ - '0'; |
|
892
|
38
|
50
|
|
|
|
|
if (low >= ('a'-'0')) low -= ('a'-'0'-10); |
|
893
|
38
|
100
|
|
|
|
|
else if (low >= ('A'-'0')) low -= ('A'-'0'-10); |
|
894
|
38
|
50
|
|
|
|
|
if (high >= ('a'-'0')) high -= ('a'-'0'-10); |
|
895
|
38
|
100
|
|
|
|
|
else if (high >= ('A'-'0')) high -= ('A'-'0'-10); |
|
896
|
38
|
50
|
|
|
|
|
if ((low >> 4) | (high >> 4)) |
|
897
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("not a pair of hex digits") |
|
898
|
38
|
|
|
|
|
|
cp= (high << 4) | low; |
|
899
|
|
|
|
|
|
|
// skip over whitespace if it takes us to the end of buffer so that caller |
|
900
|
|
|
|
|
|
|
// knows it's EOF before trying another decode. |
|
901
|
38
|
100
|
|
|
|
|
while (pos < lim && isspace(*pos)) |
|
|
|
50
|
|
|
|
|
|
|
902
|
0
|
|
|
|
|
|
pos++; |
|
903
|
|
|
|
|
|
|
} |
|
904
|
168
|
50
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_BASE64) { |
|
905
|
|
|
|
|
|
|
// Skip over whitespace and control chars |
|
906
|
168
|
50
|
|
|
|
|
while (pos < lim && *pos <= ' ') |
|
|
|
50
|
|
|
|
|
|
|
907
|
0
|
|
|
|
|
|
pos++; |
|
908
|
|
|
|
|
|
|
// There need to be at least 2 base64 characters left |
|
909
|
168
|
50
|
|
|
|
|
if (pos < lim) { |
|
910
|
168
|
50
|
|
|
|
|
if (base64_decode_table[*pos] < 0) |
|
911
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("invalid base64 character"); |
|
912
|
|
|
|
|
|
|
// ->pos_bit > 0 means pointer is pointing at a sub-bit of the base64 |
|
913
|
|
|
|
|
|
|
// character at *pos (and possible values are 0, 2, or 4) |
|
914
|
168
|
|
|
|
|
|
cp= (((int)base64_decode_table[*pos++]) << (2 + parse->pos_bit)) & 0xFF; |
|
915
|
168
|
50
|
|
|
|
|
while (pos < lim && *pos <= ' ') |
|
|
|
50
|
|
|
|
|
|
|
916
|
0
|
|
|
|
|
|
pos++; |
|
917
|
|
|
|
|
|
|
} |
|
918
|
168
|
50
|
|
|
|
|
if (pos >= lim) { |
|
919
|
0
|
|
|
|
|
|
parse->pos_bit= 0; |
|
920
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span") |
|
921
|
|
|
|
|
|
|
} |
|
922
|
168
|
50
|
|
|
|
|
if (base64_decode_table[*pos] < 0) |
|
923
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("invalid base64 character"); |
|
924
|
168
|
|
|
|
|
|
cp |= base64_decode_table[*pos] >> (4-parse->pos_bit); |
|
925
|
168
|
|
|
|
|
|
parse->pos_bit += 2; |
|
926
|
|
|
|
|
|
|
// If pos_bit == 6 we've completed a set of 4 b64 chars and fully consumed them. |
|
927
|
168
|
100
|
|
|
|
|
if (parse->pos_bit >= 6) { |
|
928
|
51
|
|
|
|
|
|
pos++; |
|
929
|
51
|
|
|
|
|
|
parse->pos_bit= 0; |
|
930
|
|
|
|
|
|
|
// consume trailing whitespace |
|
931
|
55
|
100
|
|
|
|
|
while (pos < lim && *pos <= ' ') |
|
|
|
100
|
|
|
|
|
|
|
932
|
4
|
|
|
|
|
|
pos++; |
|
933
|
|
|
|
|
|
|
} |
|
934
|
|
|
|
|
|
|
else { |
|
935
|
|
|
|
|
|
|
// if next char is '=', terminate the decoding |
|
936
|
117
|
|
|
|
|
|
U8 *next= pos+1; |
|
937
|
117
|
50
|
|
|
|
|
while (next < lim && *next <= ' ') |
|
|
|
50
|
|
|
|
|
|
|
938
|
0
|
|
|
|
|
|
next++; |
|
939
|
117
|
50
|
|
|
|
|
if (next < lim && *next == '=') { |
|
|
|
100
|
|
|
|
|
|
|
940
|
13
|
|
|
|
|
|
pos= lim; // indicate parsing complete |
|
941
|
13
|
|
|
|
|
|
parse->pos_bit= 0; |
|
942
|
|
|
|
|
|
|
} |
|
943
|
|
|
|
|
|
|
} |
|
944
|
|
|
|
|
|
|
} |
|
945
|
0
|
0
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_I32) { |
|
946
|
0
|
0
|
|
|
|
|
if (lim - pos < 4) |
|
947
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span"); |
|
948
|
0
|
|
|
|
|
|
cp= *(I32*)pos; |
|
949
|
0
|
|
|
|
|
|
pos+= 4; |
|
950
|
|
|
|
|
|
|
} |
|
951
|
0
|
|
|
|
|
|
else SB_RETURN_ERROR("unsupported encoding") |
|
952
|
3368
|
|
|
|
|
|
parse->pos= pos; |
|
953
|
3368
|
|
|
|
|
|
return cp; |
|
954
|
|
|
|
|
|
|
#undef SB_RETURN_ERROR |
|
955
|
|
|
|
|
|
|
} |
|
956
|
|
|
|
|
|
|
|
|
957
|
850
|
|
|
|
|
|
static int sb_parse_prev_codepoint(secret_buffer_parse *parse) { |
|
958
|
850
|
|
|
|
|
|
U8 *pos= parse->pos, *lim= parse->lim; |
|
959
|
850
|
|
|
|
|
|
int encoding= parse->encoding; |
|
960
|
|
|
|
|
|
|
int cp; |
|
961
|
|
|
|
|
|
|
#define SB_RETURN_ERROR(msg) { parse->error= msg; return -1; } |
|
962
|
|
|
|
|
|
|
|
|
963
|
850
|
50
|
|
|
|
|
if (encoding == SECRET_BUFFER_ENCODING_ASCII |
|
964
|
850
|
100
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_ISO8859_1 |
|
965
|
25
|
100
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_UTF8 |
|
966
|
|
|
|
|
|
|
) { |
|
967
|
842
|
50
|
|
|
|
|
if (lim <= pos) |
|
968
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span") |
|
969
|
842
|
|
|
|
|
|
cp= *--lim; |
|
970
|
|
|
|
|
|
|
// handle the simple case first |
|
971
|
842
|
100
|
|
|
|
|
if (cp >= 0x80 && encoding != SECRET_BUFFER_ENCODING_ISO8859_1) { |
|
|
|
50
|
|
|
|
|
|
|
972
|
|
|
|
|
|
|
// Strict ASCII can't encode above 0x7F |
|
973
|
4
|
50
|
|
|
|
|
if (encoding == SECRET_BUFFER_ENCODING_ASCII) |
|
974
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("not 7-bit ASCII") |
|
975
|
|
|
|
|
|
|
// else need to backtrack and then call next_codepoint |
|
976
|
4
|
|
|
|
|
|
U8 *start= lim; |
|
977
|
12
|
50
|
|
|
|
|
while (start >= pos && (*start & 0xC0) == 0x80) |
|
|
|
100
|
|
|
|
|
|
|
978
|
8
|
|
|
|
|
|
--start; |
|
979
|
4
|
|
|
|
|
|
parse->pos= start; |
|
980
|
4
|
|
|
|
|
|
cp= sb_parse_next_codepoint(parse); |
|
981
|
4
|
50
|
|
|
|
|
if (parse->pos != parse->lim) {// consumed all characters we gave it? |
|
982
|
0
|
|
|
|
|
|
parse->pos= pos; // restore original pos |
|
983
|
0
|
0
|
|
|
|
|
if (cp >= 0) // had a valid char, but extra 0x80 bytes |
|
984
|
0
|
|
|
|
|
|
parse->error= "invalid UTF8 character"; |
|
985
|
|
|
|
|
|
|
// else use the error message from next_codepoint |
|
986
|
0
|
|
|
|
|
|
return -1; |
|
987
|
|
|
|
|
|
|
} |
|
988
|
4
|
|
|
|
|
|
parse->pos= pos; // restore original pos |
|
989
|
4
|
|
|
|
|
|
lim= start; // new lim is where we started the parse from |
|
990
|
|
|
|
|
|
|
} |
|
991
|
|
|
|
|
|
|
} |
|
992
|
8
|
50
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_UTF16LE |
|
993
|
8
|
100
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_UTF16BE |
|
994
|
1
|
|
|
|
|
|
) { |
|
995
|
1
|
50
|
|
|
|
|
if (lim - pos < 2) |
|
996
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span"); |
|
997
|
|
|
|
|
|
|
// handle the simple case first |
|
998
|
1
|
|
|
|
|
|
lim -= 2; |
|
999
|
1
|
|
|
|
|
|
int low= (encoding == SECRET_BUFFER_ENCODING_UTF16LE)? 0 : 1; |
|
1000
|
1
|
|
|
|
|
|
cp= lim[low] | ((int)lim[low^1] << 8); |
|
1001
|
1
|
50
|
|
|
|
|
if (cp >= 0xD800 && cp <= 0xDFFF) { |
|
|
|
50
|
|
|
|
|
|
|
1002
|
1
|
50
|
|
|
|
|
if (lim - pos < 4) |
|
1003
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span"); |
|
1004
|
1
|
|
|
|
|
|
lim -= 2; |
|
1005
|
1
|
|
|
|
|
|
int w1= lim[low] | ((int)lim[low^1] << 8); |
|
1006
|
1
|
50
|
|
|
|
|
if (w1 < 0xD800 || w1 > 0xDFFF || cp < 0xDC00) |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1007
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("invalid UTF16 surrogate"); |
|
1008
|
1
|
|
|
|
|
|
cp = 0x10000 + (((w1 & 0x3FF) << 10) | (cp & 0x3FF)); |
|
1009
|
|
|
|
|
|
|
} |
|
1010
|
|
|
|
|
|
|
} |
|
1011
|
7
|
100
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_HEX) { |
|
1012
|
|
|
|
|
|
|
// Skip over whitespace |
|
1013
|
1
|
50
|
|
|
|
|
while (pos < lim && isspace(lim[-1])) |
|
|
|
50
|
|
|
|
|
|
|
1014
|
0
|
|
|
|
|
|
lim--; |
|
1015
|
1
|
50
|
|
|
|
|
if (lim - pos < 2) |
|
1016
|
0
|
0
|
|
|
|
|
SB_RETURN_ERROR((pos == lim? "end of span" : "incomplete hex pair at end of span")) |
|
1017
|
1
|
|
|
|
|
|
int low= *--lim - '0'; |
|
1018
|
1
|
|
|
|
|
|
int high= *--lim - '0'; |
|
1019
|
1
|
50
|
|
|
|
|
if (low >= ('a'-'0')) low -= ('a'-'0'-10); |
|
1020
|
0
|
0
|
|
|
|
|
else if (low >= ('A'-'0')) low -= ('A'-'0'-10); |
|
1021
|
1
|
50
|
|
|
|
|
if (high >= ('a'-'0')) high -= ('a'-'0'-10); |
|
1022
|
0
|
0
|
|
|
|
|
else if (high >= ('A'-'0')) high -= ('A'-'0'-10); |
|
1023
|
1
|
50
|
|
|
|
|
if ((low >> 4) | (high >> 4)) |
|
1024
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("not a pair of hex digits") |
|
1025
|
1
|
|
|
|
|
|
cp= (high << 4) | low; |
|
1026
|
|
|
|
|
|
|
} |
|
1027
|
6
|
50
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_BASE64) { |
|
1028
|
|
|
|
|
|
|
bool again; |
|
1029
|
|
|
|
|
|
|
do { |
|
1030
|
9
|
|
|
|
|
|
again= false; |
|
1031
|
|
|
|
|
|
|
// Skip over non-base64 chars |
|
1032
|
12
|
50
|
|
|
|
|
while (pos < lim && base64_decode_table[lim[-1]] < 0) |
|
|
|
100
|
|
|
|
|
|
|
1033
|
3
|
|
|
|
|
|
lim--; |
|
1034
|
9
|
50
|
|
|
|
|
if (pos < lim) { |
|
1035
|
|
|
|
|
|
|
//warn("lim-pos=%d, lim[-1]=%c, lim_bit=%d", (int)(lim-pos), lim[-1], parse->lim_bit); |
|
1036
|
9
|
50
|
|
|
|
|
if (base64_decode_table[lim[-1]] < 0) |
|
1037
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("invalid base64 character"); |
|
1038
|
|
|
|
|
|
|
// ->lim_bit > 0 means the character lim[-1] is partially consumed. |
|
1039
|
|
|
|
|
|
|
// (sequence is 0, 2, 4, 0) |
|
1040
|
9
|
|
|
|
|
|
cp= ((int)base64_decode_table[lim[-1]]) >> parse->lim_bit; |
|
1041
|
|
|
|
|
|
|
// parsing an equal sign means 'cp' is bogus and need to go again |
|
1042
|
9
|
100
|
|
|
|
|
if (lim[-1] == '=') |
|
1043
|
3
|
|
|
|
|
|
again= true; |
|
1044
|
9
|
|
|
|
|
|
--lim; |
|
1045
|
|
|
|
|
|
|
// find next base64 char |
|
1046
|
9
|
50
|
|
|
|
|
while (pos < lim && base64_decode_table[lim[-1]] < 0) |
|
|
|
50
|
|
|
|
|
|
|
1047
|
0
|
|
|
|
|
|
lim--; |
|
1048
|
|
|
|
|
|
|
} |
|
1049
|
9
|
50
|
|
|
|
|
if (pos >= lim) { |
|
1050
|
0
|
|
|
|
|
|
parse->lim_bit= 0; |
|
1051
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span") |
|
1052
|
|
|
|
|
|
|
} |
|
1053
|
9
|
50
|
|
|
|
|
if (base64_decode_table[lim[-1]] < 0) |
|
1054
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("invalid base64 character"); |
|
1055
|
|
|
|
|
|
|
//warn(" lim-pos=%d, lim[-1]=%c, lim_bit=%d", (int)(lim-pos), lim[-1], parse->lim_bit); |
|
1056
|
9
|
|
|
|
|
|
cp |= (((int)base64_decode_table[lim[-1]]) << (6 - parse->lim_bit)) & 0xFF; |
|
1057
|
9
|
|
|
|
|
|
parse->lim_bit += 2; |
|
1058
|
9
|
100
|
|
|
|
|
if (parse->lim_bit >= 6) { |
|
1059
|
3
|
|
|
|
|
|
parse->lim_bit= 0; |
|
1060
|
|
|
|
|
|
|
// If completed a set of 4 b64 chars, lim[-1] is consumed, and need to |
|
1061
|
|
|
|
|
|
|
// walk backward to find next base64 char |
|
1062
|
3
|
|
|
|
|
|
--lim; |
|
1063
|
3
|
50
|
|
|
|
|
while (pos < lim && base64_decode_table[lim[-1]] < 0) |
|
|
|
0
|
|
|
|
|
|
|
1064
|
0
|
|
|
|
|
|
lim--; |
|
1065
|
|
|
|
|
|
|
} |
|
1066
|
|
|
|
|
|
|
//warn(" cp=%d, lim-pos=%d, lim_bit=%d", cp, (int)(lim-pos), parse->lim_bit); |
|
1067
|
9
|
100
|
|
|
|
|
} while (again); |
|
1068
|
|
|
|
|
|
|
} |
|
1069
|
0
|
0
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_I32) { |
|
1070
|
0
|
0
|
|
|
|
|
if (lim - pos < 4) |
|
1071
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("end of span"); |
|
1072
|
0
|
|
|
|
|
|
lim -= 4; |
|
1073
|
0
|
|
|
|
|
|
cp= *(I32*)lim; |
|
1074
|
|
|
|
|
|
|
} |
|
1075
|
0
|
|
|
|
|
|
else SB_RETURN_ERROR("unsupported encoding") |
|
1076
|
850
|
|
|
|
|
|
parse->lim= lim; |
|
1077
|
850
|
|
|
|
|
|
return cp; |
|
1078
|
|
|
|
|
|
|
#undef SB_RETURN_ERROR |
|
1079
|
|
|
|
|
|
|
} |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
1202
|
|
|
|
|
|
static int sizeof_codepoint_encoding(int codepoint, int encoding) { |
|
1082
|
1202
|
50
|
|
|
|
|
if (encoding == SECRET_BUFFER_ENCODING_ASCII) |
|
1083
|
0
|
0
|
|
|
|
|
return codepoint < 0x80? 1 : -1; |
|
1084
|
1202
|
100
|
|
|
|
|
if (encoding == SECRET_BUFFER_ENCODING_ISO8859_1) |
|
1085
|
110
|
50
|
|
|
|
|
return codepoint < 0x100? 1 : -1; |
|
1086
|
1092
|
100
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_UTF8) |
|
1087
|
736
|
100
|
|
|
|
|
return codepoint < 0x80? 1 : codepoint < 0x800? 2 : codepoint < 0x10000? 3 : 4; |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1088
|
356
|
50
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_UTF16LE |
|
1089
|
356
|
50
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_UTF16BE) |
|
1090
|
0
|
0
|
|
|
|
|
return codepoint >= 0xD800 && codepoint < 0xE000? -1 |
|
1091
|
0
|
0
|
|
|
|
|
: codepoint < 0x10000? 2 : 4; |
|
|
|
0
|
|
|
|
|
|
|
1092
|
356
|
100
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_HEX) |
|
1093
|
6
|
50
|
|
|
|
|
return codepoint < 0x100? 2 : -1; |
|
1094
|
|
|
|
|
|
|
/* Base64 would need to track an accumulator, so just return 1 and fix it in the caller */ |
|
1095
|
350
|
100
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_BASE64) |
|
1096
|
78
|
50
|
|
|
|
|
return codepoint < 0x100? 1 : -1; |
|
1097
|
272
|
50
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_I32) |
|
1098
|
272
|
|
|
|
|
|
return 4; |
|
1099
|
|
|
|
|
|
|
else |
|
1100
|
0
|
|
|
|
|
|
return -1; |
|
1101
|
|
|
|
|
|
|
} |
|
1102
|
|
|
|
|
|
|
|
|
1103
|
444
|
|
|
|
|
|
static bool sb_parse_encode_codepoint(secret_buffer_parse *dst, int codepoint) { |
|
1104
|
|
|
|
|
|
|
#define SB_RETURN_ERROR(msg) { dst->error= msg; return false; } |
|
1105
|
444
|
|
|
|
|
|
int encoding= dst->encoding, n; |
|
1106
|
444
|
|
|
|
|
|
U8 *dst_pos= dst->pos; |
|
1107
|
|
|
|
|
|
|
// codepoints above 0x10FFFF are illegal |
|
1108
|
444
|
50
|
|
|
|
|
if (codepoint >= 0x110000) |
|
1109
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("invalid codepoint"); |
|
1110
|
|
|
|
|
|
|
// not quite as efficient as checking during the code below, but saves a bunch of redundancy |
|
1111
|
444
|
|
|
|
|
|
n= sizeof_codepoint_encoding(codepoint, encoding); |
|
1112
|
444
|
50
|
|
|
|
|
if (n < 0) |
|
1113
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("character too wide for encoding") |
|
1114
|
444
|
50
|
|
|
|
|
if (dst->lim - dst_pos < n) |
|
1115
|
0
|
|
|
|
|
|
SB_RETURN_ERROR("buffer too small") |
|
1116
|
444
|
|
|
|
|
|
dst->pos += n; |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
444
|
50
|
|
|
|
|
if (encoding == SECRET_BUFFER_ENCODING_ASCII |
|
1119
|
444
|
100
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_ISO8859_1 |
|
1120
|
389
|
100
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_UTF8 |
|
1121
|
|
|
|
|
|
|
) { |
|
1122
|
423
|
|
|
|
|
|
switch ((n-1)&0x3) { // help the compiler understand there are only 4 possible values |
|
1123
|
401
|
|
|
|
|
|
case 0: *dst_pos++ = (U8) codepoint; |
|
1124
|
401
|
|
|
|
|
|
break; |
|
1125
|
10
|
|
|
|
|
|
case 1: *dst_pos++ = (U8)(0xC0 | (codepoint >> 6)); |
|
1126
|
10
|
|
|
|
|
|
*dst_pos++ = (U8)(0x80 | (codepoint & 0x3F)); |
|
1127
|
10
|
|
|
|
|
|
break; |
|
1128
|
4
|
|
|
|
|
|
case 2: *dst_pos++ = (U8)(0xE0 | (codepoint >> 12)); |
|
1129
|
4
|
|
|
|
|
|
*dst_pos++ = (U8)(0x80 | ((codepoint >> 6) & 0x3F)); |
|
1130
|
4
|
|
|
|
|
|
*dst_pos++ = (U8)(0x80 | (codepoint & 0x3F)); |
|
1131
|
4
|
|
|
|
|
|
break; |
|
1132
|
8
|
|
|
|
|
|
case 3: *dst_pos++ = (U8)(0xF0 | (codepoint >> 18)); |
|
1133
|
8
|
|
|
|
|
|
*dst_pos++ = (U8)(0x80 | ((codepoint >> 12) & 0x3F)); |
|
1134
|
8
|
|
|
|
|
|
*dst_pos++ = (U8)(0x80 | ((codepoint >> 6) & 0x3F)); |
|
1135
|
8
|
|
|
|
|
|
*dst_pos++ = (U8)(0x80 | (codepoint & 0x3F)); |
|
1136
|
8
|
|
|
|
|
|
break; |
|
1137
|
|
|
|
|
|
|
} |
|
1138
|
|
|
|
|
|
|
} |
|
1139
|
21
|
50
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_UTF16LE |
|
1140
|
21
|
50
|
|
|
|
|
|| encoding == SECRET_BUFFER_ENCODING_UTF16BE |
|
1141
|
0
|
|
|
|
|
|
) { |
|
1142
|
0
|
|
|
|
|
|
int low= (encoding == SECRET_BUFFER_ENCODING_UTF16LE)? 0 : 1; |
|
1143
|
0
|
0
|
|
|
|
|
if (n == 2) { |
|
1144
|
0
|
|
|
|
|
|
dst_pos[low] = (U8)(codepoint & 0xFF); |
|
1145
|
0
|
|
|
|
|
|
dst_pos[low^1] = (U8)(codepoint >> 8); |
|
1146
|
|
|
|
|
|
|
} |
|
1147
|
|
|
|
|
|
|
else { |
|
1148
|
0
|
|
|
|
|
|
int adjusted = codepoint - 0x10000; |
|
1149
|
0
|
|
|
|
|
|
int w0 = 0xD800 | (adjusted >> 10); |
|
1150
|
0
|
|
|
|
|
|
int w1 = 0xDC00 | (adjusted & 0x3FF); |
|
1151
|
0
|
|
|
|
|
|
dst_pos[low] = (U8)(w0 & 0xFF); |
|
1152
|
0
|
|
|
|
|
|
dst_pos[1^low] = (U8)(w0 >> 8); |
|
1153
|
0
|
|
|
|
|
|
dst_pos[2^low] = (U8)(w1 & 0xFF); |
|
1154
|
0
|
|
|
|
|
|
dst_pos[3^low] = (U8)(w1 >> 8); |
|
1155
|
|
|
|
|
|
|
} |
|
1156
|
|
|
|
|
|
|
} |
|
1157
|
21
|
100
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_HEX) { |
|
1158
|
3
|
|
|
|
|
|
dst_pos[0] = "0123456789ABCDEF"[(codepoint >> 4) & 0xF]; |
|
1159
|
3
|
|
|
|
|
|
dst_pos[1] = "0123456789ABCDEF"[codepoint & 0xF]; |
|
1160
|
|
|
|
|
|
|
} |
|
1161
|
18
|
50
|
|
|
|
|
else if (encoding == SECRET_BUFFER_ENCODING_I32) { |
|
1162
|
18
|
|
|
|
|
|
*(I32*)dst_pos = codepoint; |
|
1163
|
|
|
|
|
|
|
} |
|
1164
|
|
|
|
|
|
|
/* BASE64 is not handled here because the '=' padding can only be generated in |
|
1165
|
|
|
|
|
|
|
* a context that knows when we are ending on a non-multiple-of-4. */ |
|
1166
|
0
|
|
|
|
|
|
else SB_RETURN_ERROR("unsupported encoding"); |
|
1167
|
444
|
|
|
|
|
|
return true; |
|
1168
|
|
|
|
|
|
|
#undef SB_RETURN_ERROR |
|
1169
|
|
|
|
|
|
|
} |
|
1170
|
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
#define SB_PARSE_MATCH_STR_FN sb_parse_match_str_U8 |
|
1172
|
|
|
|
|
|
|
#define SB_PATTERN_EL_TYPE const U8 |
|
1173
|
|
|
|
|
|
|
#include "secret_buffer_parse_match_str.c" |
|
1174
|
|
|
|
|
|
|
#undef SB_PARSE_MATCH_STR_FN |
|
1175
|
|
|
|
|
|
|
#undef SB_PATTERN_EL_TYPE |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
#define SB_PARSE_MATCH_STR_FN sb_parse_match_str_I32 |
|
1178
|
|
|
|
|
|
|
#define SB_PATTERN_EL_TYPE const I32 |
|
1179
|
|
|
|
|
|
|
#include "secret_buffer_parse_match_str.c" |
|
1180
|
|
|
|
|
|
|
#undef SB_PARSE_MATCH_STR_FN |
|
1181
|
|
|
|
|
|
|
#undef SB_PATTERN_EL_TYPE |