File Coverage

decode.c
Criterion Covered Total %
statement 620 673 92.1
branch 329 462 71.2
condition n/a
subroutine n/a
pod n/a
total 949 1135 83.6


line stmt bran cond sub pod time code
1             #define PERL_NO_GET_CONTEXT
2             #include "EXTERN.h"
3             #include "perl.h"
4             #include "XSUB.h"
5              
6             #include
7             #include
8              
9             #include "types.h"
10             #include "decimal.h"
11             #include "datetime.h"
12             #include "json_kind.h"
13             #include "decode.h"
14              
15              
16             /* ===== DECODER ============================================================
17             * Symmetric counterpart to encode_column. Reads raw Native bytes through
18             * a (cursor, end) pair, recursively building SVs. parse_type() returns
19             * the same TypeInfo* used on the encode side, so the type tree is shared. */
20              
21             /* Subtraction form: the more obvious `(*p) + (needed) > end` form
22             * overflows the pointer when `needed` is attacker-controlled via a
23             * crafted varint (CH varints can encode up to ~2^63). All call sites
24             * maintain the invariant `*p <= end`, so `end - *p` is a safe pointer
25             * difference yielding a non-negative `ptrdiff_t` we can compare against
26             * `needed` as a UV. */
27             #define DEC_NEED(needed) \
28             do { \
29             if ((UV)(needed) > (UV)(end - (*p))) \
30             croak("decode: buffer truncated (need %lu more bytes)", \
31             (unsigned long)(needed)); \
32             } while (0)
33              
34             /* Read a little-endian multi-byte unsigned integer from a byte buffer.
35             * Endianness-portable replacement for `memcpy(&v, ptr, N)`, which would
36             * read big-endian values on a BE host and silently misdecode the wire
37             * (CH Native is LE everywhere). For signed and floating-point reads,
38             * the caller bit-casts via memcpy from the unsigned result. */
39 15           static inline uint16_t dec_le16(const unsigned char *b) {
40 15           return (uint16_t)b[0] | ((uint16_t)b[1] << 8);
41             }
42 40227           static inline uint32_t dec_le32(const unsigned char *b) {
43 40227           return (uint32_t)b[0]
44 40227           | ((uint32_t)b[1] << 8)
45 40227           | ((uint32_t)b[2] << 16)
46 40227           | ((uint32_t)b[3] << 24);
47             }
48 22374           static inline uint64_t dec_le64(const unsigned char *b) {
49 22374           return (uint64_t)b[0]
50 22374           | ((uint64_t)b[1] << 8)
51 22374           | ((uint64_t)b[2] << 16)
52 22374           | ((uint64_t)b[3] << 24)
53 22374           | ((uint64_t)b[4] << 32)
54 22374           | ((uint64_t)b[5] << 40)
55 22374           | ((uint64_t)b[6] << 48)
56 22374           | ((uint64_t)b[7] << 56);
57             }
58              
59 26298           UV dec_varint(pTHX_ const unsigned char **p, const unsigned char *end) {
60 26298           UV v = 0;
61 26298           int shift = 0;
62 32           while (1) {
63 26330 100         DEC_NEED(1);
64 26324           unsigned char b = *(*p)++;
65 26324           v |= ((UV)(b & 0x7f)) << shift;
66 26324 100         if (!(b & 0x80)) break;
67 32           shift += 7;
68 32 50         if (shift >= 64) croak("decode: varint exceeds 64 bits");
69             }
70 26292           return v;
71             }
72              
73 24416           void dec_lenpfx_string(pTHX_ const unsigned char **p,
74             const unsigned char *end,
75             const char **out_s, STRLEN *out_len) {
76 24416           UV len = dec_varint(aTHX_ p, end);
77 24413 100         DEC_NEED(len);
78 24391           *out_s = (const char *)(*p);
79 24391           *out_len = (STRLEN)len;
80 24391           *p += len;
81 24391           }
82              
83             /* Shared prologue for decode_block / decode_block_rows: validate the
84             * input SV, position the cursor at the requested offset, read the
85             * block header (ncols + nrows), and run bounds checks. `fname` is
86             * embedded in croak messages so each XSUB reports its own name.
87             * Returns ncols/nrows by out-param; the cursor pair (p, end) is set
88             * up so the caller can resume column-by-column decoding. */
89 230           void decode_block_prologue(pTHX_ SV *bytes, UV start_offset,
90             const char *fname,
91             const unsigned char **out_start,
92             const unsigned char **out_p,
93             const unsigned char **out_end,
94             UV *out_ncols, UV *out_nrows) {
95             /* Materialize lvalue / magical SVs (e.g. the PVLV returned by 2-arg
96             * substr) before inspecting. SvOK on a fresh substr-LV returns
97             * false until SvGETMAGIC has run; SvPVbyte itself triggers the
98             * magic, so we just go straight to it and let buf_len = 0 cover
99             * the genuine empty-bytes case. Reject only true undef. */
100 230           SvGETMAGIC(bytes);
101 230 50         if (!SvOK(bytes)) croak("%s: bytes argument is undef", fname);
102             STRLEN buf_len;
103 230           const char *buf = SvPVbyte(bytes, buf_len);
104 230 100         if (start_offset > buf_len)
105 1           croak("%s: offset %lu past end of buffer (%lu bytes)",
106             fname, (unsigned long)start_offset, (unsigned long)buf_len);
107 229           const unsigned char *p = (const unsigned char *)buf + start_offset;
108 229           const unsigned char *start = p;
109 229           const unsigned char *end = (const unsigned char *)buf + buf_len;
110              
111 229           UV ncols = dec_varint(aTHX_ &p, end);
112 227           UV nrows = dec_varint(aTHX_ &p, end);
113              
114             /* Defensive bounds: ncols and nrows from the wire could be
115             * arbitrarily large in a malicious or corrupted block. Each column
116             * needs at least 2 bytes of name+type header; each row of the
117             * smallest type takes at least 1 byte. Reject obviously-impossible
118             * counts up front so we never allocate gigabytes for fuzz input. */
119 226 100         if (ncols > (UV)(end - p))
120 1           croak("%s: ncols=%lu exceeds remaining buffer (%lu bytes)",
121             fname, (unsigned long)ncols, (unsigned long)(end - p));
122 225           UV remaining_after_headers = (UV)(end - p);
123 225 50         if (nrows > remaining_after_headers && nrows > 0)
    0          
124 0           croak("%s: nrows=%lu exceeds remaining buffer (%lu bytes)",
125             fname, (unsigned long)nrows,
126             (unsigned long)remaining_after_headers);
127              
128 225           *out_start = start;
129 225           *out_p = p;
130 225           *out_end = end;
131 225           *out_ncols = ncols;
132 225           *out_nrows = nrows;
133 225           }
134              
135             /* Build a JSON-style boolean SV: bless(\(b ? 1 : 0), 'JSON::PP::Boolean').
136             * Used when decoding JSON/Dynamic Bool variant slots so that re-encoding
137             * the round-tripped value picks the same Bool wire variant instead of
138             * widening it to Int64 (which a naked newSViv(0|1) would trigger). The
139             * blessed package matches what json_pkg_is_bool() recognizes on the
140             * encode side. */
141 243           static SV *make_json_bool_sv(pTHX_ int b) {
142 243           SV *inner = newSViv(b ? 1 : 0);
143 243           SV *rv = newRV_noinc(inner);
144 243           sv_bless(rv, gv_stashpv("JSON::PP::Boolean", GV_ADD));
145 243           return rv;
146             }
147              
148             /* Decode one Dynamic/JSON variant wire slot into `sub`. `kind` is a
149             * JsonValueKind (or -1 for SharedVariant, which we read as String).
150             * Shared by both T_JSON and T_DYNAMIC decode paths; `ctx` is "JSON" or
151             * "Dynamic" for diagnostic context. The caller has already extended
152             * `sub` and mortalized it. */
153 1307           static void decode_dynamic_variant_slot(pTHX_ const unsigned char **p,
154             const unsigned char *end,
155             AV *sub, int kind, SSize_t nv_rows,
156             const char *ctx) {
157             SSize_t k;
158 1307 100         if (kind < 0 || kind == JV_STRING) {
    100          
159             /* SharedVariant (-1) is a String column on the wire; same path. */
160 822 100         for (k = 0; k < nv_rows; k++) {
161             const char *vs; STRLEN vl;
162 90           dec_lenpfx_string(aTHX_ p, end, &vs, &vl);
163 90           av_store(sub, k, newSVpvn(vs, vl));
164             }
165 732           return;
166             }
167 575 100         if (kind == JV_INT64) {
168             /* Division-form bounds check avoids overflow when nv_rows is
169             * attacker-controlled (variant disc bytes determine it). */
170 128 50         if ((UV)nv_rows > (UV)(end - *p) / 8)
171 0           croak("decode: buffer truncated (need %lu more bytes)",
172             (unsigned long)((UV)nv_rows * 8));
173 260 100         for (k = 0; k < nv_rows; k++) {
174 132           uint64_t u = dec_le64(*p); *p += 8;
175 132           int64_t v; memcpy(&v, &u, 8);
176 132           av_store(sub, k, newSViv((IV)v));
177             }
178 128           return;
179             }
180 447 100         if (kind == JV_FLOAT64) {
181 69 50         if ((UV)nv_rows > (UV)(end - *p) / 8)
182 0           croak("decode: buffer truncated (need %lu more bytes)",
183             (unsigned long)((UV)nv_rows * 8));
184 138 100         for (k = 0; k < nv_rows; k++) {
185 69           uint64_t u = dec_le64(*p); *p += 8;
186 69           double v; memcpy(&v, &u, 8);
187 69           av_store(sub, k, newSVnv(v));
188             }
189 69           return;
190             }
191 378 100         if (kind == JV_BOOL) {
192 88 50         if ((UV)nv_rows > (UV)(end - *p))
193 0           croak("decode: buffer truncated (need %lu more bytes)",
194             (unsigned long)nv_rows);
195 178 100         for (k = 0; k < nv_rows; k++) {
196 90           unsigned char b = *(*p)++;
197 90           av_store(sub, k, make_json_bool_sv(aTHX_ b));
198             }
199 88           return;
200             }
201 290 100         if (kind == JV_ARRAY_BOOL || kind == JV_ARRAY_FLOAT64
    100          
202 171 100         || kind == JV_ARRAY_INT64 || kind == JV_ARRAY_STRING) {
    50          
203             /* Array variant column: N UInt64 offsets, then offsets[N-1]
204             * inner-type elements concatenated. */
205 290 50         if (nv_rows == 0) return;
206 290 50         if ((UV)nv_rows > (UV)(end - *p) / 8)
207 0           croak("decode: buffer truncated (need %lu more bytes)",
208             (unsigned long)((UV)nv_rows * 8));
209             uint64_t *offs;
210 290 50         Newx(offs, nv_rows, uint64_t);
211 290           SAVEFREEPV(offs);
212 290           uint64_t prev_o = 0;
213 581 100         for (k = 0; k < nv_rows; k++) {
214 291           offs[k] = dec_le64(*p); *p += 8;
215             /* Per-offset overflow + monotonicity; protects later casts to
216             * SSize_t (e.g. av_extend) from negative-wrap. */
217 291 50         if (offs[k] > (uint64_t)SSize_t_MAX || offs[k] < prev_o)
    50          
218 0           croak("decode JSON: Array variant offset[%ld]=%lu invalid "
219             "(prev=%lu)",
220             (long)k, (unsigned long)offs[k], (unsigned long)prev_o);
221 291           prev_o = offs[k];
222             }
223 290           uint64_t total = offs[nv_rows - 1];
224             /* Defensive: total elements must fit into the remaining buffer
225             * (1+ bytes per element minimum). Catches corrupted offset
226             * lists before they trigger huge AV allocations. */
227 290 50         if (total > (uint64_t)(end - *p))
228 0           croak("decode JSON: Array variant total=%lu exceeds remaining "
229             "buffer (%lu bytes)",
230             (unsigned long)total, (unsigned long)(end - *p));
231 581 100         for (k = 0; k < nv_rows; k++)
232 291           av_store(sub, k, newRV_noinc((SV*)newAV()));
233              
234             /* Inner cursor walks through elements while row_idx advances
235             * each time we hit the cumulative offset boundary. */
236 290           uint64_t prev = 0;
237 290           SSize_t row_idx = 0;
238 290           AV *inner = (AV*)SvRV(*av_fetch(sub, 0, 0));
239 290 100         if (offs[0] > 0) av_extend(inner, (SSize_t)offs[0] - 1);
240 290           SSize_t inner_cursor = 0;
241              
242 290 100         STRLEN per_elem = (kind == JV_ARRAY_BOOL) ? 1 : 8;
243             /* `total` is attacker-controlled (sum of wire offsets); use the
244             * division-form check to avoid the multiplication overflowing. */
245 290 100         if (kind != JV_ARRAY_STRING
246 237 50         && total > (uint64_t)(end - *p) / per_elem)
247 0           croak("decode: buffer truncated (need %lu more bytes)",
248             (unsigned long)(per_elem * total));
249              
250             uint64_t i;
251 894 100         for (i = 0; i < total; i++) {
252 605 100         while (inner_cursor >= (SSize_t)(offs[row_idx] - prev)) {
253 1           prev = offs[row_idx];
254 1           row_idx++;
255             /* If a corrupted offset list has trailing zero-length
256             * rows that the outer total didn't cover, row_idx
257             * could walk past the populated entries. Bail before
258             * av_fetch returns NULL and we deref it. */
259 1 50         if (row_idx >= nv_rows)
260 0           croak("decode: array variant offsets advanced past "
261             "nv_rows=%ld (corrupted block)", (long)nv_rows);
262 1           inner_cursor = 0;
263 1           inner = (AV*)SvRV(*av_fetch(sub, row_idx, 0));
264 1           uint64_t n2 = offs[row_idx] - prev;
265 1 50         if (n2 > 0) av_extend(inner, (SSize_t)n2 - 1);
266             }
267             SV *ev;
268 604           switch (kind) {
269 153           case JV_ARRAY_BOOL: {
270 153           unsigned char b = *(*p)++;
271 153           ev = make_json_bool_sv(aTHX_ b);
272 153           break;
273             }
274 159           case JV_ARRAY_INT64: {
275 159           uint64_t u = dec_le64(*p); *p += 8;
276 159           int64_t v; memcpy(&v, &u, 8);
277 159           ev = newSViv((IV)v);
278 159           break;
279             }
280 156           case JV_ARRAY_FLOAT64: {
281 156           uint64_t u = dec_le64(*p); *p += 8;
282 156           double v; memcpy(&v, &u, 8);
283 156           ev = newSVnv(v);
284 156           break;
285             }
286 136           case JV_ARRAY_STRING: {
287             const char *vs; STRLEN vl;
288 136           dec_lenpfx_string(aTHX_ p, end, &vs, &vl);
289 136           ev = newSVpvn(vs, vl);
290 136           break;
291             }
292 0           default: ev = newSV(0); /* unreachable */
293             }
294 604           av_store(inner, inner_cursor++, ev);
295             }
296 290           return;
297             }
298 0           croak("decode %s: internal: unknown kind %d", ctx, kind);
299             }
300              
301             /* Helpers that bulk-read same-size scalars into the array, since the per-
302             * row dispatch overhead of unpack-style XS loops dwarfs the data read. */
303             #define DEC_SCALAR_LOOP(av, nrows, sv_expr) do { \
304             SSize_t r; \
305             for (r = 0; r < (nrows); r++) av_store(av, r, (sv_expr)); \
306             } while (0)
307              
308 1416           SV *decode_column(pTHX_ const unsigned char **p,
309             const unsigned char *end,
310             TypeInfo *t, SSize_t nrows) {
311 1416           AV *av = newAV();
312 1416 100         if (nrows > 0) av_extend(av, nrows - 1);
313             SSize_t r;
314              
315 1416           switch (t->code) {
316 4           case T_INT8: {
317 4 50         DEC_NEED((STRLEN)nrows);
318 10 100         DEC_SCALAR_LOOP(av, nrows, newSViv((IV)(int8_t)*(*p)++));
319 4           break;
320             }
321 205           case T_UINT8: case T_BOOL: case T_ENUM8: {
322 205 50         DEC_NEED((STRLEN)nrows);
323 20213 100         DEC_SCALAR_LOOP(av, nrows, newSVuv((UV)*(*p)++));
324 205           break;
325             }
326 3           case T_INT16: {
327 3 50         DEC_NEED((STRLEN)(2 * nrows));
328 6 100         for (r = 0; r < nrows; r++) {
329             /* Use dec_le16 + memcpy bit-cast (same pattern as INT32
330             * and INT64) to keep the signed conversion well-defined
331             * across compilers; the inline (int16_t)(...) cast on
332             * a promoted-int high-bit value is implementation-
333             * defined in C99/C11. */
334 3           uint16_t u = dec_le16(*p);
335 3           int16_t v; memcpy(&v, &u, 2);
336 3           av_store(av, r, newSViv((IV)v));
337 3           *p += 2;
338             }
339 3           break;
340             }
341 7           case T_UINT16: case T_DATE: case T_ENUM16: {
342 7 50         DEC_NEED((STRLEN)(2 * nrows));
343 19 100         for (r = 0; r < nrows; r++) {
344 12           uint16_t v = dec_le16(*p);
345 12           av_store(av, r, newSVuv((UV)v));
346 12           *p += 2;
347             }
348 7           break;
349             }
350 293           case T_INT32: case T_DATE32: case T_DECIMAL32: {
351 293 100         DEC_NEED((STRLEN)(4 * nrows));
352 20452 100         for (r = 0; r < nrows; r++) {
353 20163           uint32_t u = dec_le32(*p);
354 20163           int32_t v; memcpy(&v, &u, 4);
355 20163           av_store(av, r, newSViv((IV)v));
356 20163           *p += 4;
357             }
358 289           break;
359             }
360 236           case T_UINT32: case T_DATETIME: {
361 236 100         DEC_NEED((STRLEN)(4 * nrows));
362 20297 100         for (r = 0; r < nrows; r++) {
363 20062           uint32_t v = dec_le32(*p);
364 20062           av_store(av, r, newSVuv((UV)v));
365 20062           *p += 4;
366             }
367 235           break;
368             }
369 13           case T_INT64: case T_DATETIME64: case T_DECIMAL64: {
370 13 50         DEC_NEED((STRLEN)(8 * nrows));
371 35 100         for (r = 0; r < nrows; r++) {
372 22           uint64_t u = dec_le64(*p);
373 22           int64_t v; memcpy(&v, &u, 8);
374 22           av_store(av, r, newSViv((IV)v));
375 22           *p += 8;
376             }
377 13           break;
378             }
379 11           case T_UINT64: {
380 11 50         DEC_NEED((STRLEN)(8 * nrows));
381 33 100         for (r = 0; r < nrows; r++) {
382 22           uint64_t v = dec_le64(*p);
383 22           av_store(av, r, newSVuv((UV)v));
384 22           *p += 8;
385             }
386 11           break;
387             }
388 1           case T_FLOAT32: {
389 1 50         DEC_NEED((STRLEN)(4 * nrows));
390 3 100         for (r = 0; r < nrows; r++) {
391 2           uint32_t u = dec_le32(*p);
392 2           float v; memcpy(&v, &u, 4);
393 2           av_store(av, r, newSVnv((NV)v));
394 2           *p += 4;
395             }
396 1           break;
397             }
398 206           case T_FLOAT64: {
399 206 50         DEC_NEED((STRLEN)(8 * nrows));
400 20213 100         for (r = 0; r < nrows; r++) {
401 20007           uint64_t u = dec_le64(*p);
402 20007           double v; memcpy(&v, &u, 8);
403 20007           av_store(av, r, newSVnv((NV)v));
404 20007           *p += 8;
405             }
406 206           break;
407             }
408 1           case T_BFLOAT16: {
409             /* Reconstruct a Float32 by shifting 16 wire bits into the high half. */
410 1 50         DEC_NEED((STRLEN)(2 * nrows));
411 3 100         for (r = 0; r < nrows; r++) {
412 2           uint32_t bits = (uint32_t)((*p)[0] | ((*p)[1] << 8)) << 16;
413             float fv;
414 2           memcpy(&fv, &bits, 4);
415 2           av_store(av, r, newSVnv((NV)fv));
416 2           *p += 2;
417             }
418 1           break;
419             }
420 299           case T_STRING: {
421 20468 100         for (r = 0; r < nrows; r++) {
422             const char *s; STRLEN l;
423 20183           dec_lenpfx_string(aTHX_ p, end, &s, &l);
424 20169           av_store(av, r, newSVpvn(s, l));
425             }
426 285           break;
427             }
428 5           case T_FIXEDSTRING: {
429 5           STRLEN n = (STRLEN)t->param;
430             /* `n * nrows` is the only multiplicative bound here whose
431             * multiplier is fully user-controlled (FixedString(N) for
432             * any N up to ~2^31). Defend against overflow explicitly
433             * rather than relying on prologue's per-row bound. */
434 5 50         if (n > 0 && (UV)nrows > ((UV)(end - *p)) / n)
    50          
435 0           croak("decode: FixedString(%lu) x %ld rows exceeds "
436             "remaining buffer (%lu bytes)",
437             (unsigned long)n, (long)nrows,
438             (unsigned long)(end - *p));
439 5 50         DEC_NEED(n * (STRLEN)nrows);
440 14 100         for (r = 0; r < nrows; r++) {
441 9           av_store(av, r, newSVpvn((const char *)*p, n));
442 9           *p += n;
443             }
444 5           break;
445             }
446 3           case T_DECIMAL128: {
447             /* Division-form guard against `16 * nrows` overflow when
448             * nrows is attacker-controlled. */
449 3 50         if ((UV)nrows > (UV)(end - *p) / 16)
450 0           croak("decode: Decimal128 x %ld rows exceeds buffer",
451             (long)nrows);
452 3 50         DEC_NEED((STRLEN)(16 * nrows));
453 11 100         for (r = 0; r < nrows; r++) {
454 8           uint64_t lo = dec_le64(*p);
455 8           uint64_t hu = dec_le64(*p + 8);
456 8           int64_t hi; memcpy(&hi, &hu, 8);
457 8           AV *pair = newAV();
458 8           av_extend(pair, 1);
459 8           av_store(pair, 0, newSVuv((UV)lo));
460 8           av_store(pair, 1, newSViv((IV)hi));
461 8           av_store(av, r, newRV_noinc((SV*)pair));
462 8           *p += 16;
463             }
464 3           break;
465             }
466 2           case T_DECIMAL256: {
467 2 50         if ((UV)nrows > (UV)(end - *p) / 32)
468 0           croak("decode: Decimal256 x %ld rows exceeds buffer",
469             (long)nrows);
470 2 50         DEC_NEED((STRLEN)(32 * nrows));
471 7 100         for (r = 0; r < nrows; r++) {
472 5           AV *limbs = newAV();
473 5           av_extend(limbs, 3);
474             int i;
475 25 100         for (i = 0; i < 4; i++) {
476 20           uint64_t l = dec_le64(*p + 8 * i);
477 20           av_store(limbs, i, newSVuv((UV)l));
478             }
479 5           av_store(av, r, newRV_noinc((SV*)limbs));
480 5           *p += 32;
481             }
482 2           break;
483             }
484 4           case T_UUID: {
485             /* Wire: two LE UInt64 halves with bytes reversed within each
486             * half. Reassemble to standard 8-4-4-4-12 hex form. */
487 4 50         if ((UV)nrows > (UV)(end - *p) / 16)
488 0           croak("decode: UUID x %ld rows exceeds buffer",
489             (long)nrows);
490 4 50         DEC_NEED((STRLEN)(16 * nrows));
491 10 100         for (r = 0; r < nrows; r++) {
492             unsigned char b[16];
493             int i;
494 54 100         for (i = 0; i < 8; i++) b[i] = (*p)[7 - i];
495 54 100         for (i = 0; i < 8; i++) b[8+i] = (*p)[15 - i];
496 6           *p += 16;
497             char hex[37];
498             static const char H[] = "0123456789abcdef";
499 6           int j = 0, k;
500 102 100         for (k = 0; k < 16; k++) {
501 96           hex[j++] = H[(b[k] >> 4) & 0xf];
502 96           hex[j++] = H[b[k] & 0xf];
503 96 100         if (k == 3 || k == 5 || k == 7 || k == 9) hex[j++] = '-';
    100          
    100          
    100          
504             }
505 6           hex[36] = '\0';
506 6           av_store(av, r, newSVpvn(hex, 36));
507             }
508 4           break;
509             }
510 5           case T_IPV4: {
511 5 50         DEC_NEED((STRLEN)(4 * nrows));
512 11 100         for (r = 0; r < nrows; r++) {
513             /* Wire is LE uint32 = [oct4][oct3][oct2][oct1]. Read the
514             * bytes directly so the output order is endianness-
515             * independent. */
516 6           const unsigned char *b = *p;
517             char buf[16];
518 6           int n = my_snprintf(buf, sizeof buf, "%u.%u.%u.%u",
519 6           b[3], b[2], b[1], b[0]);
520 6           av_store(av, r, newSVpvn(buf, n));
521 6           *p += 4;
522             }
523 5           break;
524             }
525 1           case T_IPV6: {
526 1 50         if ((UV)nrows > (UV)(end - *p) / 16)
527 0           croak("decode: IPv6 x %ld rows exceeds buffer",
528             (long)nrows);
529 1 50         DEC_NEED((STRLEN)(16 * nrows));
530 2 100         for (r = 0; r < nrows; r++) {
531 1           av_store(av, r, newSVpvn((const char *)*p, 16));
532 1           *p += 16;
533             }
534 1           break;
535             }
536 13           case T_ARRAY: {
537             /* Read nrows UInt64 offsets, then decode flat inner array
538             * of total = offsets[nrows-1] elements, then slice. */
539 13 50         if ((UV)nrows > (UV)(end - *p) / 8)
540 0           croak("decode: Array offsets x %ld rows exceeds buffer",
541             (long)nrows);
542 13 50         DEC_NEED((STRLEN)(8 * nrows));
543             SSize_t *offsets;
544 13 50         Newx(offsets, nrows + 1, SSize_t);
545 13           SAVEFREEPV(offsets);
546 13           offsets[0] = 0;
547 45 100         for (r = 0; r < nrows; r++) {
548 32           uint64_t o = dec_le64(*p);
549             /* Per-offset overflow + monotonicity check; the final-only
550             * check below is insufficient because an intermediate
551             * offset with bit 63 set narrows to a negative SSize_t and
552             * later produces an enormous stop-start span in av_extend. */
553 32 50         if (o > (uint64_t)SSize_t_MAX || (SSize_t)o < offsets[r])
    50          
554 0           croak("decode: Array offset[%ld]=%lu invalid (prev=%ld)",
555             (long)r, (unsigned long)o, (long)offsets[r]);
556 32           offsets[r + 1] = (SSize_t)o;
557 32           *p += 8;
558             }
559 13           SSize_t total = offsets[nrows];
560             /* Defensive: a corrupted offset must not allocate gigabytes
561             * for the inner array. */
562 13 50         if ((UV)total > (UV)(end - *p))
563 0           croak("decode: Array total=%ld exceeds remaining buffer "
564             "(%lu bytes)",
565             (long)total, (unsigned long)(end - *p));
566 13           SV *flat_rv = decode_column(aTHX_ p, end, t->inner, total);
567 13           AV *flat = (AV *)SvRV(flat_rv);
568 45 100         for (r = 0; r < nrows; r++) {
569 32           AV *slice = newAV();
570 32           SSize_t start = offsets[r], stop = offsets[r + 1];
571 32 100         if (stop > start) av_extend(slice, stop - start - 1);
572             SSize_t i;
573 68 100         for (i = start; i < stop; i++) {
574 36           SV **elem = av_fetch(flat, i, 0);
575 36 50         av_store(slice, i - start,
576             elem ? SvREFCNT_inc(*elem) : newSV(0));
577             }
578 32           av_store(av, r, newRV_noinc((SV*)slice));
579             }
580 13           SvREFCNT_dec(flat_rv);
581 13           break;
582             }
583 5           case T_TUPLE: {
584             /* Decode each element type as a column of `nrows`, then
585             * transpose into per-row tuples. */
586             int i;
587             SV **cols;
588 5           Newx(cols, t->tuple_len, SV*);
589 5           SAVEFREEPV(cols);
590 15 100         for (i = 0; i < t->tuple_len; i++)
591 10           cols[i] = decode_column(aTHX_ p, end, t->tuple[i], nrows);
592 14 100         for (r = 0; r < nrows; r++) {
593 9           AV *row = newAV();
594 9 50         if (t->tuple_len > 0) av_extend(row, t->tuple_len - 1);
595 27 100         for (i = 0; i < t->tuple_len; i++) {
596 18           SV **elem = av_fetch((AV *)SvRV(cols[i]), r, 0);
597 18 50         av_store(row, i, elem ? SvREFCNT_inc(*elem) : newSV(0));
598             }
599 9           av_store(av, r, newRV_noinc((SV*)row));
600             }
601 15 100         for (i = 0; i < t->tuple_len; i++) SvREFCNT_dec(cols[i]);
602 5           break;
603             }
604 13           case T_NULLABLE: {
605 13 50         DEC_NEED((STRLEN)nrows);
606             unsigned char *nulls;
607 13           Newx(nulls, nrows, unsigned char);
608 13           SAVEFREEPV(nulls);
609 39 100         for (r = 0; r < nrows; r++) nulls[r] = *(*p)++;
610 13           SV *inner_rv = decode_column(aTHX_ p, end, t->inner, nrows);
611 13           AV *inner = (AV *)SvRV(inner_rv);
612 39 100         for (r = 0; r < nrows; r++) {
613 26 100         if (nulls[r]) {
614 12           av_store(av, r, newSV(0));
615             } else {
616 14           SV **elem = av_fetch(inner, r, 0);
617 14 50         av_store(av, r, elem ? SvREFCNT_inc(*elem) : newSV(0));
618             }
619             }
620 13           SvREFCNT_dec(inner_rv);
621 13           break;
622             }
623 4           case T_MAP: {
624             /* Map(K, V) on the wire is Array(Tuple(K, V)). Re-dispatch
625             * through a synthetic Array(Tuple) type. */
626             TypeInfo array_t, tuple_t;
627 4           memset(&array_t, 0, sizeof array_t);
628 4           memset(&tuple_t, 0, sizeof tuple_t);
629 4           tuple_t.code = T_TUPLE;
630 4           tuple_t.tuple = t->tuple;
631 4           tuple_t.tuple_len = t->tuple_len;
632 4           array_t.code = T_ARRAY;
633 4           array_t.inner = &tuple_t;
634 4           SV *rv = decode_column(aTHX_ p, end, &array_t, nrows);
635 4           SvREFCNT_dec((SV *)av);
636 4           return rv;
637             }
638 5           case T_LOWCARDINALITY: {
639 5 50         DEC_NEED(24);
640 5           uint64_t version = dec_le64(*p);
641 5           uint64_t flags = dec_le64(*p + 8);
642 5           uint64_t dict_n = dec_le64(*p + 16);
643 5           *p += 24;
644 5 50         if (version != 1) croak("decode: LowCardinality version != 1 (got %lu)", (unsigned long)version);
645             /* Defensive: a corrupted dict_n must not allocate gigabytes. */
646 5 50         if (dict_n > (uint64_t)(end - *p))
647 0           croak("decode: LowCardinality dict_n=%lu exceeds remaining "
648             "buffer (%lu bytes)",
649             (unsigned long)dict_n, (unsigned long)(end - *p));
650 5           int idx_type = (int)(flags & 0xff);
651             /* Only TUInt8=0..TUInt64=3 are defined; reject the rest
652             * loudly instead of silently aliasing to UInt64. */
653 5 50         if (idx_type > 3)
654 0           croak("decode: LowCardinality: unknown index type %d in flags",
655             idx_type);
656 5           TypeInfo *inner = t->inner;
657 5           int is_null = (inner->code == T_NULLABLE);
658 5 100         TypeInfo *leaf = is_null ? inner->inner : inner;
659 5           SV *dict_rv = decode_column(aTHX_ p, end, leaf, (SSize_t)dict_n);
660 5           AV *dict = (AV *)SvRV(dict_rv);
661 5 50         DEC_NEED(8);
662 5           uint64_t idx_n = dec_le64(*p);
663 5           *p += 8;
664             /* Surface the meaningful error before DEC_NEED would croak on
665             * a truncated buffer for an absurdly large idx_n. */
666 5 50         if (idx_n > (uint64_t)SSize_t_MAX || (SSize_t)idx_n != nrows)
    50          
667 0           croak("decode: LowCardinality index count (%lu) != nrows (%ld)",
668             (unsigned long)idx_n, (long)nrows);
669 5 50         size_t idx_bytes = (idx_type == 0) ? 1 :
    0          
    0          
670             (idx_type == 1) ? 2 :
671             (idx_type == 2) ? 4 : 8;
672 5 50         DEC_NEED((STRLEN)(idx_bytes * idx_n));
673 20 100         for (r = 0; r < nrows; r++) {
674 15           uint64_t i = 0;
675 15           switch (idx_bytes) {
676 15           case 1: i = (uint64_t)(*p)[0]; break;
677 0           case 2: i = (uint64_t)dec_le16(*p); break;
678 0           case 4: i = (uint64_t)dec_le32(*p); break;
679 0           case 8: i = dec_le64(*p); break;
680             }
681 15           *p += idx_bytes;
682 15 100         if (is_null && i == 0) {
    100          
683 2           av_store(av, r, newSV(0));
684             } else {
685 13 50         if (i >= dict_n)
686 0           croak("decode: LowCardinality index %lu out of range "
687             "(dict_n=%lu) at row %ld",
688             (unsigned long)i, (unsigned long)dict_n, (long)r);
689 13           SV **elem = av_fetch(dict, (SSize_t)i, 0);
690 13 50         av_store(av, r, elem ? SvREFCNT_inc(*elem) : newSV(0));
691             }
692             }
693 5           SvREFCNT_dec(dict_rv);
694 5           break;
695             }
696 5           case T_VARIANT: {
697 5 50         DEC_NEED(8);
698 5           uint64_t mode = dec_le64(*p);
699 5           *p += 8;
700 5 50         if (mode != 0) croak("decode: Variant mode != 0 (got %lu)", (unsigned long)mode);
701 5 50         DEC_NEED((STRLEN)nrows);
702             unsigned char *wire_disc;
703 5           Newx(wire_disc, nrows, unsigned char);
704 5           SAVEFREEPV(wire_disc);
705 17 100         for (r = 0; r < nrows; r++) wire_disc[r] = *(*p)++;
706 5           int nvar = t->tuple_len;
707             SSize_t *counts;
708 5           Newxz(counts, nvar, SSize_t);
709 5           SAVEFREEPV(counts);
710 17 100         for (r = 0; r < nrows; r++) {
711 12           unsigned char w = wire_disc[r];
712 12 100         if (w != 255) {
713 4 50         if (w >= nvar) croak("decode: Variant wire idx %u out of range", w);
714 4           counts[w]++;
715             }
716             }
717             /* Decode each sub-column in wire (alphabetical) order; the
718             * decl index of wire position w is t->variant_wire_to_decl[w]. */
719             SV **subcols;
720 5           Newx(subcols, nvar, SV*);
721 5           SAVEFREEPV(subcols);
722             int w;
723 15 100         for (w = 0; w < nvar; w++) {
724 10           int decl = t->variant_wire_to_decl[w];
725 10           subcols[w] = decode_column(aTHX_ p, end, t->tuple[decl], counts[w]);
726             }
727             SSize_t *cursors;
728 5           Newxz(cursors, nvar, SSize_t);
729 5           SAVEFREEPV(cursors);
730 17 100         for (r = 0; r < nrows; r++) {
731 12           unsigned char wd = wire_disc[r];
732 12 100         if (wd == 255) {
733 8           av_store(av, r, newSV(0));
734             } else {
735 4           int decl = t->variant_wire_to_decl[wd];
736 4           SV **elem = av_fetch((AV *)SvRV(subcols[wd]), cursors[wd]++, 0);
737 4           AV *pair = newAV();
738 4           av_extend(pair, 1);
739 4           av_store(pair, 0, newSViv(decl));
740 4 50         av_store(pair, 1, elem ? SvREFCNT_inc(*elem) : newSV(0));
741 4           av_store(av, r, newRV_noinc((SV*)pair));
742             }
743             }
744 15 100         for (w = 0; w < nvar; w++) SvREFCNT_dec(subcols[w]);
745 5           break;
746             }
747              
748 67           case T_JSON: {
749             /* Object structure prefix. Versions: V1=0, V2=2, V3=4. */
750 67 50         DEC_NEED(8);
751 67           uint64_t obj_ver = dec_le64(*p); *p += 8;
752 67 50         if (obj_ver != 0 && obj_ver != 2 && obj_ver != 4)
    0          
    0          
753 0           croak("decode JSON: unsupported Object version %lu "
754             "(known: 0, 2, 4); upgrade ClickHouse::Encoder",
755             (unsigned long)obj_ver);
756 67 50         if (obj_ver == 0) {
757             /* V1: extra max_dynamic_paths varint before count. */
758 67           (void)dec_varint(aTHX_ p, end);
759             }
760 67           UV num_paths = dec_varint(aTHX_ p, end);
761             /* Defensive: each path takes at least 2 bytes (1-byte varint
762             * length prefix + 1-byte name). Reject obviously-impossible
763             * counts before allocating arrays sized by num_paths. */
764 67 50         if (num_paths > (UV)(end - *p))
765 0           croak("decode JSON: num_paths=%lu exceeds remaining buffer "
766             "(%lu bytes)",
767             (unsigned long)num_paths, (unsigned long)(end - *p));
768              
769 67           char **paths = NULL;
770 67           STRLEN *path_lens = NULL;
771 67 100         if (num_paths > 0) {
772 53 50         Newx(paths, num_paths, char*);
773 53           SAVEFREEPV(paths);
774 53 50         Newx(path_lens, num_paths, STRLEN);
775 53           SAVEFREEPV(path_lens);
776             }
777             UV pi;
778 708 100         for (pi = 0; pi < num_paths; pi++) {
779             const char *ps;
780             STRLEN pl;
781 642           dec_lenpfx_string(aTHX_ p, end, &ps, &pl);
782 641           paths[pi] = (char*)ps; /* aliases input buffer */
783 641           path_lens[pi] = pl;
784             }
785 66 50         if (obj_ver == 4) {
786             /* V3 adds shared_data_serialization_version, and a
787             * buckets count when that version is MAP_WITH_BUCKETS
788             * (=1) or ADVANCED (=2). Native format with statistics
789             * disabled (the default) skips the stats afterwards. */
790 0           UV shared_ver = dec_varint(aTHX_ p, end);
791 0 0         if (shared_ver == 1 || shared_ver == 2)
    0          
792 0           (void)dec_varint(aTHX_ p, end);
793             }
794              
795             /* Per-path Dynamic prefix: collect type-name lists. */
796 66           int **path_kind_list = NULL; /* path_kind_list[p][i] = JsonValueKind */
797 66           int *path_kind_count = NULL;
798 66 100         if (num_paths > 0) {
799 52 50         Newxz(path_kind_list, num_paths, int*);
800 52           SAVEFREEPV(path_kind_list);
801 52 50         Newxz(path_kind_count, num_paths, int);
802 52           SAVEFREEPV(path_kind_count);
803             }
804 707 100         for (pi = 0; pi < num_paths; pi++) {
805 641 50         DEC_NEED(8);
806 641           uint64_t dyn_ver = dec_le64(*p); *p += 8;
807 641 50         if (dyn_ver != 1 && dyn_ver != 2 && dyn_ver != 4)
    0          
    0          
808 0           croak("decode JSON: unsupported Dynamic version %lu "
809             "(known: 1, 2, 4); upgrade ClickHouse::Encoder",
810             (unsigned long)dyn_ver);
811 641 50         if (dyn_ver == 1)
812 641           (void)dec_varint(aTHX_ p, end);
813 641           UV ntypes = dec_varint(aTHX_ p, end);
814             /* Each type name needs at least 2 bytes (length varint
815             * + name byte). Reject implausibly large counts. */
816 641 50         if (ntypes > (UV)(end - *p))
817 0           croak("decode JSON: ntypes=%lu exceeds remaining "
818             "buffer (%lu bytes)",
819             (unsigned long)ntypes, (unsigned long)(end - *p));
820              
821 641           int *kinds_in_order = NULL;
822 641 50         if (ntypes > 0) {
823 641 50         Newx(kinds_in_order, ntypes, int);
824 641           SAVEFREEPV(kinds_in_order);
825             }
826             UV ti;
827 1290 100         for (ti = 0; ti < ntypes; ti++) {
828             const char *ts;
829             STRLEN tl;
830 649           dec_lenpfx_string(aTHX_ p, end, &ts, &tl);
831 649           int k = json_kind_from_type_name(ts, tl);
832 649 50         if (k < 0)
833 0           croak("decode JSON: unsupported variant type '%.*s' "
834             "for path '%.*s' (supported: Bool, Float64, "
835             "Int64, String, Array(...) of those)",
836             (int)tl, ts, (int)path_lens[pi], paths[pi]);
837 649           kinds_in_order[ti] = k;
838             }
839 641           path_kind_list[pi] = kinds_in_order;
840 641           path_kind_count[pi] = (int)ntypes;
841              
842             /* Variant prefix: 8-byte mode. */
843 641 50         DEC_NEED(8);
844 641           uint64_t var_mode = dec_le64(*p); *p += 8;
845 641 50         if (var_mode != 0)
846 0           croak("decode JSON: only BASIC variant mode supported "
847             "(got %lu)", (unsigned long)var_mode);
848             }
849              
850             /* Build result: AV of HV refs, one per row. */
851 370 100         for (r = 0; r < nrows; r++)
852 304           av_store(av, r, newRV_noinc((SV*)newHV()));
853              
854             /* Typed path data (when t was declared as JSON(name Type, ...)):
855             * the inner column data comes on the wire right after all
856             * Dynamic prefixes and before any dynamic Variant data, in
857             * name-sorted order. Decode each typed path's column,
858             * distribute into per-row hashes by path name. */
859             {
860             int tp;
861 85 100         for (tp = 0; tp < t->tuple_len; tp++) {
862 19           SV *col_rv = decode_column(aTHX_ p, end, t->tuple[tp],
863             nrows);
864 19           AV *col_av = (AV*)SvRV(col_rv);
865 19           STRLEN nlen = strlen(t->tuple_names[tp]);
866 54 100         for (r = 0; r < nrows; r++) {
867 35           SV **e = av_fetch(col_av, r, 0);
868 35 50         if (!e) continue;
869 35           SV *row_rv = *av_fetch(av, r, 0);
870 35           HV *row_hv = (HV*)SvRV(row_rv);
871 35           hv_store(row_hv, t->tuple_names[tp], (I32)nlen,
872             SvREFCNT_inc(*e), 0);
873             }
874 19           SvREFCNT_dec(col_rv);
875             }
876             }
877              
878             /* Per-path Variant data: discs + per-variant values. */
879 707 100         for (pi = 0; pi < num_paths; pi++) {
880             /* Read N disc bytes. */
881 641 50         DEC_NEED((STRLEN)nrows);
882             unsigned char *discs;
883 641           Newx(discs, nrows, unsigned char);
884 641           SAVEFREEPV(discs);
885 8179 100         for (r = 0; r < nrows; r++) discs[r] = *(*p)++;
886              
887             /* Compute per-variant row counts and lex-position table.
888             * The wire variant list has (kind_count + 1) entries (the
889             * +1 is SharedVariant inserted at its lex position 7).
890             * Rebuild the kind mask from the type-name list we just
891             * parsed and reuse the same lex-table helper as encode. */
892 641           int nv = path_kind_count[pi];
893 641           int wire_slots = nv + 1;
894             SSize_t *var_counts;
895 641           Newxz(var_counts, wire_slots, SSize_t);
896 641           SAVEFREEPV(var_counts);
897              
898             int slot_to_kind_or_shared[JSON_LEX_SLOTS];
899             {
900 641           unsigned mask = 0;
901             int i;
902 1290 100         for (i = 0; i < nv; i++)
903 649           mask |= 1u << path_kind_list[pi][i];
904 641           (void)json_build_lex_table(mask, slot_to_kind_or_shared);
905             }
906              
907 8179 100         for (r = 0; r < nrows; r++) {
908 7538           unsigned char d = discs[r];
909 7538 100         if (d == 0xff) continue;
910 660 50         if (d >= wire_slots)
911 0           croak("decode JSON: path '%.*s' disc %u out of range "
912             "(wire_slots=%d)",
913             (int)path_lens[pi], paths[pi], d, wire_slots);
914 660           var_counts[d]++;
915             }
916              
917             /* Decode each wire-slot's column data. SharedVariant
918             * (kind=-1) is a String column on the wire, which our
919             * encoder never populates (0 rows here in practice). */
920             AV **var_avs;
921 641           Newxz(var_avs, wire_slots, AV*);
922 641           SAVEFREEPV(var_avs);
923             int slot;
924 1931 100         for (slot = 0; slot < wire_slots; slot++) {
925 1290           SSize_t nv_rows = var_counts[slot];
926 1290           AV *sub = newAV();
927 1290           var_avs[slot] = sub;
928 1290           sv_2mortal((SV*)sub);
929 1290 100         if (nv_rows > 0) av_extend(sub, nv_rows - 1);
930 1290           decode_dynamic_variant_slot(aTHX_ p, end, sub,
931             slot_to_kind_or_shared[slot], nv_rows, "JSON");
932             }
933              
934             /* Distribute values into per-row hashes. */
935             SSize_t *cursors;
936 641           Newxz(cursors, wire_slots, SSize_t);
937 641           SAVEFREEPV(cursors);
938 8179 100         for (r = 0; r < nrows; r++) {
939 7538           unsigned char d = discs[r];
940 7538 100         if (d == 0xff) continue;
941 660           SV **e = av_fetch(var_avs[d], cursors[d]++, 0);
942 660 50         if (!e) continue;
943 660           SV *row_rv = *av_fetch(av, r, 0);
944 660           HV *row_hv = (HV*)SvRV(row_rv);
945 660           SV *val = SvREFCNT_inc(*e);
946 660           hv_store(row_hv, paths[pi], (I32)path_lens[pi], val, 0);
947             }
948             }
949              
950             /* Trailing shared data: N UInt64 offsets, then if final
951             * offset > 0, offsets[N-1] key strings + value strings.
952             * Only the last offset determines downstream parsing; skip
953             * the rest with a single pointer bump. */
954 66           uint64_t last_offset = 0;
955 66 100         if (nrows > 0) {
956 64 50         DEC_NEED((STRLEN)(8 * nrows));
957 64           last_offset = dec_le64(*p + 8 * (nrows - 1));
958 64           *p += 8 * nrows;
959             }
960 66 50         if (last_offset > 0) {
961             /* Each string is a length varint + bytes (>= 1 byte
962             * total). A corrupted last_offset must not let us spin
963             * calling dec_lenpfx_string 2^32 times before each one
964             * croaks on the truncated buffer. */
965 0 0         if (last_offset > (uint64_t)(end - *p))
966 0           croak("decode JSON: shared-data last_offset=%lu "
967             "exceeds remaining buffer (%lu bytes)",
968             (unsigned long)last_offset,
969             (unsigned long)(end - *p));
970             uint64_t i;
971 0 0         for (i = 0; i < last_offset; i++) {
972             const char *s; STRLEN l;
973 0           dec_lenpfx_string(aTHX_ p, end, &s, &l);
974             }
975 0 0         for (i = 0; i < last_offset; i++) {
976             const char *s; STRLEN l;
977 0           dec_lenpfx_string(aTHX_ p, end, &s, &l);
978             }
979             }
980              
981             /* Unflatten dotted-path keys into nested hashes. Symmetric to the
982             * encoder which flattens nested hashrefs into dotted paths on the
983             * wire. Collision-safe: if an intermediate hop is already a
984             * non-HV (some path emitted a leaf at "a" while another emitted
985             * "a.b"), the dotted form is left intact at the top level. */
986 370 100         for (r = 0; r < nrows; r++) {
987 304           SV *row_rv = *av_fetch(av, r, 0);
988 304           HV *row_hv = (HV*)SvRV(row_rv);
989             /* Snapshot keys: we may mutate row_hv during iteration. */
990 304           AV *keys = (AV*)sv_2mortal((SV*)newAV());
991 304           hv_iterinit(row_hv);
992             HE *he;
993 999 100         while ((he = hv_iternext(row_hv))) {
994             I32 klen;
995 695           char *kstr = hv_iterkey(he, &klen);
996 695 100         if (memchr(kstr, '.', klen))
997 242           av_push(keys, newSVpvn(kstr, klen));
998             }
999 304           SSize_t nk = av_len(keys) + 1;
1000             SSize_t ki;
1001 546 100         for (ki = 0; ki < nk; ki++) {
1002 242           SV *ksv = *av_fetch(keys, ki, 0);
1003             STRLEN klen;
1004 242           const char *kstr = SvPV(ksv, klen);
1005             /* Walk dotted segments. */
1006 242           HV *cur = row_hv;
1007 242           STRLEN seg_start = 0, off;
1008 242           int conflict = 0;
1009 2861 100         for (off = 0; off <= klen; off++) {
1010 2619 100         if (off == klen || kstr[off] == '.') {
    100          
1011 563           const char *seg = kstr + seg_start;
1012 563           STRLEN slen = off - seg_start;
1013 563 100         if (off == klen) {
1014             /* Final segment: move value here. */
1015 242           SV **leaf = hv_fetch(row_hv, kstr,
1016             (I32)klen, 0);
1017 242 50         if (!leaf) { conflict = 1; break; }
1018 242           SV *val = SvREFCNT_inc(*leaf);
1019 242 50         if (!hv_store(cur, seg, (I32)slen,
1020             val, 0)) {
1021 0           SvREFCNT_dec(val);
1022 0           conflict = 1;
1023             }
1024             } else {
1025 321           SV **next = hv_fetch(cur, seg, (I32)slen,
1026             0);
1027             HV *next_hv;
1028 321 100         if (next && SvROK(*next)
    50          
1029 176 50         && SvTYPE(SvRV(*next)) == SVt_PVHV) {
1030 176           next_hv = (HV*)SvRV(*next);
1031 145 50         } else if (next) {
1032 0           conflict = 1;
1033 0           break;
1034             } else {
1035 145           next_hv = newHV();
1036 145           hv_store(cur, seg, (I32)slen,
1037             newRV_noinc((SV*)next_hv), 0);
1038             }
1039 321           cur = next_hv;
1040             }
1041 563           seg_start = off + 1;
1042             }
1043             }
1044 242 50         if (!conflict)
1045 242           hv_delete(row_hv, kstr, (I32)klen, G_DISCARD);
1046             }
1047             }
1048 66           break;
1049             }
1050              
1051 5           case T_DYNAMIC: {
1052             /* Dynamic V1/V2/V3 prefix + Variant data, no Object wrapper. */
1053 5 50         DEC_NEED(8);
1054 5           uint64_t dyn_ver = dec_le64(*p); *p += 8;
1055 5 50         if (dyn_ver != 1 && dyn_ver != 2 && dyn_ver != 4)
    0          
    0          
1056 0           croak("decode Dynamic: unsupported version %lu "
1057             "(known: 1, 2, 4); upgrade ClickHouse::Encoder",
1058             (unsigned long)dyn_ver);
1059 5 50         if (dyn_ver == 1)
1060 5           (void)dec_varint(aTHX_ p, end);
1061 5           UV ntypes = dec_varint(aTHX_ p, end);
1062             /* Same bound as the JSON path's per-path Dynamic prefix:
1063             * each type name takes at least 2 bytes on the wire. */
1064 5 50         if (ntypes > (UV)(end - *p))
1065 0           croak("decode Dynamic: ntypes=%lu exceeds remaining "
1066             "buffer (%lu bytes)",
1067             (unsigned long)ntypes, (unsigned long)(end - *p));
1068              
1069 5           int *kinds_in_order = NULL;
1070 5 100         if (ntypes > 0) {
1071 4 50         Newx(kinds_in_order, ntypes, int);
1072 4           SAVEFREEPV(kinds_in_order);
1073             }
1074             UV ti;
1075 17 100         for (ti = 0; ti < ntypes; ti++) {
1076             const char *ts; STRLEN tl;
1077 12           dec_lenpfx_string(aTHX_ p, end, &ts, &tl);
1078 12           int k = json_kind_from_type_name(ts, tl);
1079 12 50         if (k < 0)
1080 0           croak("decode Dynamic: unsupported variant type '%.*s' "
1081             "(supported: Bool, Float64, Int64, String, "
1082             "Array(...) of those)",
1083             (int)tl, ts);
1084 12           kinds_in_order[ti] = k;
1085             }
1086              
1087 5 50         DEC_NEED(8);
1088 5           uint64_t var_mode = dec_le64(*p); *p += 8;
1089 5 50         if (var_mode != 0)
1090 0           croak("decode Dynamic: only BASIC variant mode supported "
1091             "(got %lu)", (unsigned long)var_mode);
1092              
1093 5           int nv = (int)ntypes;
1094 5           int wire_slots = nv + 1;
1095 5           unsigned mask = 0;
1096             int i;
1097 17 100         for (i = 0; i < nv; i++) mask |= 1u << kinds_in_order[i];
1098              
1099             int slot_to_kind[JSON_LEX_SLOTS];
1100 5           (void)json_build_lex_table(mask, slot_to_kind);
1101              
1102 5 50         DEC_NEED((STRLEN)nrows);
1103             unsigned char *discs;
1104 5           Newx(discs, nrows, unsigned char);
1105 5           SAVEFREEPV(discs);
1106 22 100         for (r = 0; r < nrows; r++) discs[r] = *(*p)++;
1107              
1108             SSize_t *var_counts;
1109 5           Newxz(var_counts, wire_slots, SSize_t);
1110 5           SAVEFREEPV(var_counts);
1111 22 100         for (r = 0; r < nrows; r++) {
1112 17 100         if (discs[r] == 0xff) continue;
1113 12 50         if (discs[r] >= wire_slots)
1114 0           croak("decode Dynamic: disc %u out of range "
1115             "(wire_slots=%d)", discs[r], wire_slots);
1116 12           var_counts[discs[r]]++;
1117             }
1118              
1119             AV **var_avs;
1120 5           Newxz(var_avs, wire_slots, AV*);
1121 5           SAVEFREEPV(var_avs);
1122             int slot;
1123 22 100         for (slot = 0; slot < wire_slots; slot++) {
1124 17           SSize_t nv_rows = var_counts[slot];
1125 17           AV *sub = newAV();
1126 17           var_avs[slot] = sub;
1127 17           sv_2mortal((SV*)sub);
1128 17 100         if (nv_rows > 0) av_extend(sub, nv_rows - 1);
1129 17           decode_dynamic_variant_slot(aTHX_ p, end, sub,
1130             slot_to_kind[slot], nv_rows, "Dynamic");
1131             }
1132              
1133             SSize_t *cursors;
1134 5           Newxz(cursors, wire_slots, SSize_t);
1135 5           SAVEFREEPV(cursors);
1136 22 100         for (r = 0; r < nrows; r++) {
1137 17           unsigned char d = discs[r];
1138 17 100         if (d == 0xff) { av_store(av, r, newSV(0)); continue; }
1139 12           SV **e = av_fetch(var_avs[d], cursors[d]++, 0);
1140 12 50         av_store(av, r, e ? SvREFCNT_inc(*e) : newSV(0));
1141             }
1142 5           break;
1143             }
1144              
1145 0           default:
1146 0           croak("decode: unhandled type code %d", t->code);
1147             }
1148 1392           return newRV_noinc((SV *)av);
1149             }
1150              
1151             #undef DEC_NEED
1152             #undef DEC_SCALAR_LOOP