| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT |
|
2
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
3
|
|
|
|
|
|
|
#include "perl.h" |
|
4
|
|
|
|
|
|
|
#include "XSUB.h" |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
#include |
|
7
|
|
|
|
|
|
|
#include |
|
8
|
|
|
|
|
|
|
#include |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
#include "types.h" |
|
11
|
|
|
|
|
|
|
|
|
12
|
9871
|
|
|
|
|
|
void free_typeinfo(pTHX_ TypeInfo *t) { |
|
13
|
9871
|
50
|
|
|
|
|
if (!t) return; |
|
14
|
9871
|
100
|
|
|
|
|
if (t->inner) free_typeinfo(aTHX_ t->inner); |
|
15
|
9871
|
100
|
|
|
|
|
if (t->tuple) { |
|
16
|
|
|
|
|
|
|
int i; |
|
17
|
3268
|
100
|
|
|
|
|
for (i = 0; i < t->tuple_len; i++) free_typeinfo(aTHX_ t->tuple[i]); |
|
18
|
1097
|
|
|
|
|
|
Safefree(t->tuple); |
|
19
|
|
|
|
|
|
|
} |
|
20
|
9871
|
100
|
|
|
|
|
if (t->enum_entries) { |
|
21
|
|
|
|
|
|
|
int i; |
|
22
|
3053
|
100
|
|
|
|
|
for (i = 0; i < t->enum_count; i++) Safefree(t->enum_entries[i].name); |
|
23
|
1017
|
|
|
|
|
|
Safefree(t->enum_entries); |
|
24
|
|
|
|
|
|
|
} |
|
25
|
9871
|
100
|
|
|
|
|
if (t->enum_lookup) SvREFCNT_dec((SV*)t->enum_lookup); |
|
26
|
9871
|
100
|
|
|
|
|
if (t->variant_decl_to_wire) Safefree(t->variant_decl_to_wire); |
|
27
|
9871
|
100
|
|
|
|
|
if (t->variant_wire_to_decl) Safefree(t->variant_wire_to_decl); |
|
28
|
9871
|
100
|
|
|
|
|
if (t->tuple_names) { |
|
29
|
|
|
|
|
|
|
int i; |
|
30
|
98
|
100
|
|
|
|
|
for (i = 0; i < t->tuple_len; i++) |
|
31
|
57
|
50
|
|
|
|
|
if (t->tuple_names[i]) Safefree(t->tuple_names[i]); |
|
32
|
41
|
|
|
|
|
|
Safefree(t->tuple_names); |
|
33
|
|
|
|
|
|
|
} |
|
34
|
9871
|
|
|
|
|
|
Safefree(t); |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
|
|
37
|
1020
|
|
|
|
|
|
static void parse_enum_entries(pTHX_ TypeInfo *t, const char *s, STRLEN len, int code) { |
|
38
|
1020
|
|
|
|
|
|
int cap = 0; |
|
39
|
1020
|
100
|
|
|
|
|
long min_val = (code == T_ENUM8) ? -128 : -32768; |
|
40
|
1020
|
100
|
|
|
|
|
long max_val = (code == T_ENUM8) ? 127 : 32767; |
|
41
|
1020
|
|
|
|
|
|
t->enum_entries = NULL; |
|
42
|
1020
|
|
|
|
|
|
t->enum_count = 0; |
|
43
|
1020
|
|
|
|
|
|
t->enum_lookup = newHV(); |
|
44
|
|
|
|
|
|
|
|
|
45
|
1020
|
|
|
|
|
|
STRLEN i = 0; |
|
46
|
3056
|
100
|
|
|
|
|
while (i < len) { |
|
47
|
4077
|
50
|
|
|
|
|
while (i < len && (s[i] == ' ' || s[i] == ',')) i++; |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
48
|
2039
|
50
|
|
|
|
|
if (i >= len) break; |
|
49
|
|
|
|
|
|
|
|
|
50
|
2039
|
50
|
|
|
|
|
if (s[i] != '\'') |
|
51
|
0
|
|
|
|
|
|
croak("Invalid enum format: expected single quote at position %d", (int)i); |
|
52
|
2039
|
|
|
|
|
|
i++; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
/* Scan the name and unescape backslash-escapes (\\, \') so the |
|
55
|
|
|
|
|
|
|
* stored name matches the unescaped form ClickHouse emits in |
|
56
|
|
|
|
|
|
|
* describe table output and that the user passes to encode(). */ |
|
57
|
2039
|
|
|
|
|
|
STRLEN name_start = i; |
|
58
|
2039
|
|
|
|
|
|
STRLEN name_raw_len = 0; /* raw bytes consumed (incl. backslashes) */ |
|
59
|
4147
|
50
|
|
|
|
|
while (i < len && s[i] != '\'') { |
|
|
|
100
|
|
|
|
|
|
|
60
|
2108
|
100
|
|
|
|
|
if (s[i] == '\\' && i + 1 < len) { i++; name_raw_len++; } |
|
|
|
50
|
|
|
|
|
|
|
61
|
2108
|
|
|
|
|
|
i++; name_raw_len++; |
|
62
|
|
|
|
|
|
|
} |
|
63
|
2039
|
50
|
|
|
|
|
if (i >= len) |
|
64
|
0
|
|
|
|
|
|
croak("Invalid enum format: unterminated quote"); |
|
65
|
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
/* Build the unescaped name into a freshly-allocated buffer. The |
|
67
|
|
|
|
|
|
|
* unescaped length is at most name_raw_len; allocate that bound. */ |
|
68
|
|
|
|
|
|
|
char *name_buf; |
|
69
|
2039
|
|
|
|
|
|
Newx(name_buf, name_raw_len + 1, char); |
|
70
|
2039
|
|
|
|
|
|
STRLEN name_len = 0; |
|
71
|
2039
|
|
|
|
|
|
STRLEN j = name_start; |
|
72
|
4147
|
100
|
|
|
|
|
while (j < i) { |
|
73
|
2108
|
100
|
|
|
|
|
if (s[j] == '\\' && j + 1 < i) j++; |
|
|
|
50
|
|
|
|
|
|
|
74
|
2108
|
|
|
|
|
|
name_buf[name_len++] = s[j++]; |
|
75
|
|
|
|
|
|
|
} |
|
76
|
2039
|
|
|
|
|
|
name_buf[name_len] = 0; |
|
77
|
2039
|
100
|
|
|
|
|
if (name_len == 0) { |
|
78
|
1
|
|
|
|
|
|
Safefree(name_buf); |
|
79
|
1
|
|
|
|
|
|
croak("Invalid enum format: empty name at position %d", (int)name_start); |
|
80
|
|
|
|
|
|
|
} |
|
81
|
2038
|
|
|
|
|
|
i++; /* closing quote */ |
|
82
|
|
|
|
|
|
|
|
|
83
|
8152
|
50
|
|
|
|
|
while (i < len && (s[i] == ' ' || s[i] == '=')) i++; |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
|
|
85
|
2038
|
|
|
|
|
|
int neg = 0; |
|
86
|
2038
|
50
|
|
|
|
|
if (i < len && s[i] == '-') { neg = 1; i++; } |
|
|
|
100
|
|
|
|
|
|
|
87
|
2038
|
50
|
|
|
|
|
if (i >= len || s[i] < '0' || s[i] > '9') { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
88
|
0
|
|
|
|
|
|
Safefree(name_buf); |
|
89
|
0
|
|
|
|
|
|
croak("Invalid enum format: expected digit at position %d", (int)i); |
|
90
|
|
|
|
|
|
|
} |
|
91
|
2038
|
|
|
|
|
|
long val = 0; |
|
92
|
4101
|
100
|
|
|
|
|
while (i < len && s[i] >= '0' && s[i] <= '9') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
93
|
2063
|
|
|
|
|
|
val = val * 10 + (s[i] - '0'); |
|
94
|
2063
|
|
|
|
|
|
i++; |
|
95
|
|
|
|
|
|
|
} |
|
96
|
2038
|
100
|
|
|
|
|
if (neg) val = -val; |
|
97
|
2038
|
50
|
|
|
|
|
if (val < min_val || val > max_val) { |
|
|
|
100
|
|
|
|
|
|
|
98
|
2
|
|
|
|
|
|
Safefree(name_buf); |
|
99
|
2
|
100
|
|
|
|
|
croak("Enum value %ld out of range for %s", |
|
100
|
|
|
|
|
|
|
val, code == T_ENUM8 ? "Enum8" : "Enum16"); |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
2036
|
100
|
|
|
|
|
if (t->enum_count >= cap) { |
|
104
|
1017
|
50
|
|
|
|
|
cap = cap ? cap * 2 : 8; |
|
105
|
1017
|
|
|
|
|
|
Renew(t->enum_entries, cap, EnumEntry); |
|
106
|
|
|
|
|
|
|
} |
|
107
|
2036
|
|
|
|
|
|
t->enum_entries[t->enum_count].name = name_buf; |
|
108
|
2036
|
|
|
|
|
|
t->enum_entries[t->enum_count].name_len = name_len; |
|
109
|
2036
|
|
|
|
|
|
t->enum_entries[t->enum_count].value = (int16_t)val; |
|
110
|
2036
|
|
|
|
|
|
t->enum_count++; |
|
111
|
|
|
|
|
|
|
|
|
112
|
2036
|
|
|
|
|
|
hv_store(t->enum_lookup, name_buf, name_len, newSViv(val), 0); |
|
113
|
|
|
|
|
|
|
} |
|
114
|
1017
|
|
|
|
|
|
} |
|
115
|
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
/* Heap-allocated cleanup slot for a TypeInfo*. Disarmed by setting *slot=NULL. */ |
|
117
|
9892
|
|
|
|
|
|
static void cleanup_typeinfo_slot(pTHX_ void *p) { |
|
118
|
9892
|
|
|
|
|
|
TypeInfo **slot = (TypeInfo **)p; |
|
119
|
9892
|
100
|
|
|
|
|
if (*slot) free_typeinfo(aTHX_ *slot); |
|
120
|
9892
|
|
|
|
|
|
Safefree(slot); |
|
121
|
9892
|
|
|
|
|
|
} |
|
122
|
|
|
|
|
|
|
|
|
123
|
|
|
|
|
|
|
/* Cleanup for a partially-built Tuple types array. The struct owns the array |
|
124
|
|
|
|
|
|
|
* directly so the destructor never dereferences stack memory after a longjmp |
|
125
|
|
|
|
|
|
|
* out of parse_tuple_types. Disarm by setting slot->types = NULL. */ |
|
126
|
|
|
|
|
|
|
typedef struct { |
|
127
|
|
|
|
|
|
|
TypeInfo **types; |
|
128
|
|
|
|
|
|
|
int count; |
|
129
|
|
|
|
|
|
|
} TupleSlot; |
|
130
|
|
|
|
|
|
|
|
|
131
|
1097
|
|
|
|
|
|
static void cleanup_tuple_slot(pTHX_ void *p) { |
|
132
|
1097
|
|
|
|
|
|
TupleSlot *s = (TupleSlot *)p; |
|
133
|
1097
|
50
|
|
|
|
|
if (s->types) { |
|
134
|
|
|
|
|
|
|
int i; |
|
135
|
0
|
0
|
|
|
|
|
for (i = 0; i < s->count; i++) |
|
136
|
0
|
0
|
|
|
|
|
if (s->types[i]) free_typeinfo(aTHX_ s->types[i]); |
|
137
|
0
|
|
|
|
|
|
Safefree(s->types); |
|
138
|
|
|
|
|
|
|
} |
|
139
|
1097
|
|
|
|
|
|
Safefree(s); |
|
140
|
1097
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
/* Bound of one entry in a comma-separated type list, after outer-WS strip |
|
143
|
|
|
|
|
|
|
* and optional "field-name" prefix removal. Both Tuple/Map/Variant |
|
144
|
|
|
|
|
|
|
* parsing and Variant alphabetical sorting need these post-strip bounds. */ |
|
145
|
|
|
|
|
|
|
typedef struct { |
|
146
|
|
|
|
|
|
|
STRLEN start; /* offset of the trimmed type expression */ |
|
147
|
|
|
|
|
|
|
STRLEN len; /* length of the trimmed type expression */ |
|
148
|
|
|
|
|
|
|
STRLEN name_start; /* offset of the field-name prefix (or 0 if absent) */ |
|
149
|
|
|
|
|
|
|
STRLEN name_len; /* length of the field-name prefix (0 = no name) */ |
|
150
|
|
|
|
|
|
|
} TypeBound; |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
/* Split a comma-separated type list at depth-0 commas, trim outer |
|
153
|
|
|
|
|
|
|
* whitespace, strip any leading "name" field-name prefix (Tuple |
|
154
|
|
|
|
|
|
|
* named-element form). When a field-name is found, name_start/name_len |
|
155
|
|
|
|
|
|
|
* record it so callers (e.g. T_TUPLE) can keep the names; when absent, |
|
156
|
|
|
|
|
|
|
* name_len is 0. bounds must have at least len+1 slots. Returns the |
|
157
|
|
|
|
|
|
|
* count of non-empty entries. */ |
|
158
|
1063
|
|
|
|
|
|
static int split_type_list(const char *s, STRLEN len, TypeBound *bounds) { |
|
159
|
1063
|
|
|
|
|
|
int count = 0; |
|
160
|
1063
|
|
|
|
|
|
int depth = 0; |
|
161
|
1063
|
|
|
|
|
|
STRLEN start = 0, i; |
|
162
|
|
|
|
|
|
|
#define IS_WS(c) ((c)==' '||(c)=='\t'||(c)=='\n'||(c)=='\r') |
|
163
|
26119
|
100
|
|
|
|
|
for (i = 0; i <= len; i++) { |
|
164
|
25056
|
100
|
|
|
|
|
char c = (i < len) ? s[i] : ','; |
|
165
|
25056
|
100
|
|
|
|
|
if (c == '(') depth++; |
|
166
|
24046
|
100
|
|
|
|
|
else if (c == ')') depth--; |
|
167
|
23036
|
100
|
|
|
|
|
else if ((c == ',' && depth == 0) || i == len) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
168
|
2127
|
|
|
|
|
|
STRLEN tstart = start, tend = i; |
|
169
|
2127
|
|
|
|
|
|
STRLEN nstart = 0, nlen = 0; |
|
170
|
3191
|
100
|
|
|
|
|
while (tstart < tend && IS_WS(s[tstart])) tstart++; |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
171
|
2127
|
100
|
|
|
|
|
while (tend > tstart && IS_WS(s[tend-1])) tend--; |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
172
|
2127
|
100
|
|
|
|
|
if (tend > tstart) { |
|
173
|
2126
|
|
|
|
|
|
STRLEN id_end = tstart; |
|
174
|
2126
|
50
|
|
|
|
|
if (id_end < tend |
|
175
|
2126
|
50
|
|
|
|
|
&& ((s[id_end] >= 'A' && s[id_end] <= 'Z') |
|
|
|
100
|
|
|
|
|
|
|
176
|
12
|
50
|
|
|
|
|
|| (s[id_end] >= 'a' && s[id_end] <= 'z') |
|
|
|
50
|
|
|
|
|
|
|
177
|
0
|
0
|
|
|
|
|
|| s[id_end] == '_')) { |
|
178
|
2126
|
|
|
|
|
|
id_end++; |
|
179
|
2126
|
|
|
|
|
|
while (id_end < tend |
|
180
|
13698
|
100
|
|
|
|
|
&& ((s[id_end] >= 'A' && s[id_end] <= 'Z') |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
181
|
12561
|
100
|
|
|
|
|
|| (s[id_end] >= 'a' && s[id_end] <= 'z') |
|
|
|
50
|
|
|
|
|
|
|
182
|
3147
|
100
|
|
|
|
|
|| (s[id_end] >= '0' && s[id_end] <= '9') |
|
|
|
50
|
|
|
|
|
|
|
183
|
1019
|
50
|
|
|
|
|
|| s[id_end] == '_')) |
|
184
|
11572
|
|
|
|
|
|
id_end++; |
|
185
|
2126
|
100
|
|
|
|
|
if (id_end < tend && IS_WS(s[id_end])) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
186
|
12
|
|
|
|
|
|
nstart = tstart; |
|
187
|
12
|
|
|
|
|
|
nlen = id_end - tstart; |
|
188
|
12
|
|
|
|
|
|
STRLEN ts = id_end; |
|
189
|
24
|
50
|
|
|
|
|
while (ts < tend && IS_WS(s[ts])) ts++; |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
190
|
12
|
50
|
|
|
|
|
if (ts < tend) tstart = ts; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
} |
|
193
|
2126
|
|
|
|
|
|
bounds[count].start = tstart; |
|
194
|
2126
|
|
|
|
|
|
bounds[count].len = tend - tstart; |
|
195
|
2126
|
|
|
|
|
|
bounds[count].name_start = nstart; |
|
196
|
2126
|
|
|
|
|
|
bounds[count].name_len = nlen; |
|
197
|
2126
|
|
|
|
|
|
count++; |
|
198
|
|
|
|
|
|
|
} |
|
199
|
2127
|
|
|
|
|
|
start = i + 1; |
|
200
|
|
|
|
|
|
|
} |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
#undef IS_WS |
|
203
|
1063
|
|
|
|
|
|
return count; |
|
204
|
|
|
|
|
|
|
} |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
/* parse_tuple_types_with_bounds: caller already split the list and wants |
|
207
|
|
|
|
|
|
|
* to reuse the bounds (e.g. Variant alphabetical sort). For convenience, |
|
208
|
|
|
|
|
|
|
* parse_tuple_types is a thin wrapper that splits internally. */ |
|
209
|
1097
|
|
|
|
|
|
static TypeInfo** parse_tuple_types_with_bounds(pTHX_ const char *s, |
|
210
|
|
|
|
|
|
|
TypeBound *bounds, |
|
211
|
|
|
|
|
|
|
int n) { |
|
212
|
|
|
|
|
|
|
TupleSlot *slot; |
|
213
|
1097
|
|
|
|
|
|
Newxz(slot, 1, TupleSlot); |
|
214
|
1097
|
|
|
|
|
|
SAVEDESTRUCTOR_X(cleanup_tuple_slot, slot); |
|
215
|
1097
|
50
|
|
|
|
|
if (n > 0) Newxz(slot->types, n, TypeInfo*); |
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
int i; |
|
218
|
3268
|
100
|
|
|
|
|
for (i = 0; i < n; i++) { |
|
219
|
2171
|
|
|
|
|
|
slot->types[i] = parse_type(aTHX_ s + bounds[i].start, bounds[i].len); |
|
220
|
2171
|
|
|
|
|
|
slot->count = i + 1; |
|
221
|
|
|
|
|
|
|
} |
|
222
|
|
|
|
|
|
|
{ |
|
223
|
1097
|
|
|
|
|
|
TypeInfo **result = slot->types; |
|
224
|
1097
|
|
|
|
|
|
slot->types = NULL; /* Disarm: caller now owns the array. */ |
|
225
|
1097
|
|
|
|
|
|
return result; |
|
226
|
|
|
|
|
|
|
} |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
24
|
|
|
|
|
|
static TypeInfo** parse_tuple_types(pTHX_ const char *s, STRLEN len, int *count) { |
|
230
|
|
|
|
|
|
|
TypeBound *bounds; |
|
231
|
24
|
50
|
|
|
|
|
Newx(bounds, len + 1, TypeBound); |
|
232
|
24
|
|
|
|
|
|
SAVEFREEPV(bounds); |
|
233
|
24
|
|
|
|
|
|
*count = split_type_list(s, len, bounds); |
|
234
|
24
|
|
|
|
|
|
return parse_tuple_types_with_bounds(aTHX_ s, bounds, *count); |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
/* Return 1 if this type can be used as a JSON typed path. CH writes |
|
238
|
|
|
|
|
|
|
* typed paths as a regular column; types whose serialization has a |
|
239
|
|
|
|
|
|
|
* non-empty state-prefix stream (Variant: mode byte; LC: version + |
|
240
|
|
|
|
|
|
|
* flags + dict; JSON/Dynamic: their own prefix) would interleave |
|
241
|
|
|
|
|
|
|
* incorrectly with other paths' prefixes in the Object prefix |
|
242
|
|
|
|
|
|
|
* section. Composites recursively check. */ |
|
243
|
63
|
|
|
|
|
|
static int type_can_be_typed_path(TypeInfo *t) { |
|
244
|
63
|
|
|
|
|
|
switch (t->code) { |
|
245
|
6
|
|
|
|
|
|
case T_VARIANT: |
|
246
|
|
|
|
|
|
|
case T_LOWCARDINALITY: |
|
247
|
|
|
|
|
|
|
case T_JSON: |
|
248
|
|
|
|
|
|
|
case T_DYNAMIC: |
|
249
|
6
|
|
|
|
|
|
return 0; |
|
250
|
6
|
|
|
|
|
|
case T_ARRAY: |
|
251
|
|
|
|
|
|
|
case T_NULLABLE: |
|
252
|
6
|
|
|
|
|
|
return type_can_be_typed_path(t->inner); |
|
253
|
6
|
|
|
|
|
|
case T_TUPLE: |
|
254
|
|
|
|
|
|
|
case T_MAP: { |
|
255
|
|
|
|
|
|
|
int i; |
|
256
|
18
|
100
|
|
|
|
|
for (i = 0; i < t->tuple_len; i++) |
|
257
|
12
|
50
|
|
|
|
|
if (!type_can_be_typed_path(t->tuple[i])) |
|
258
|
0
|
|
|
|
|
|
return 0; |
|
259
|
6
|
|
|
|
|
|
return 1; |
|
260
|
|
|
|
|
|
|
} |
|
261
|
45
|
|
|
|
|
|
default: |
|
262
|
45
|
|
|
|
|
|
return 1; |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
} |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
/* Parse "name Type, name Type, ..." inside JSON(...). Names may include |
|
267
|
|
|
|
|
|
|
* dots (CH typed paths are dotted, like JSON(user.id UInt64)); type is |
|
268
|
|
|
|
|
|
|
* a full type expression. Stores parsed entries on t in name-sorted |
|
269
|
|
|
|
|
|
|
* order via tuple_names + tuple. Empty body (JSON()) is a no-op. */ |
|
270
|
41
|
|
|
|
|
|
static void parse_json_typed_paths(pTHX_ TypeInfo *t, |
|
271
|
|
|
|
|
|
|
const char *body, STRLEN body_len) { |
|
272
|
|
|
|
|
|
|
TypeBound *bounds; |
|
273
|
41
|
50
|
|
|
|
|
Newxz(bounds, body_len + 1, TypeBound); |
|
274
|
41
|
|
|
|
|
|
SAVEFREEPV(bounds); |
|
275
|
|
|
|
|
|
|
|
|
276
|
41
|
|
|
|
|
|
int idx = 0; |
|
277
|
41
|
|
|
|
|
|
int depth = 0; |
|
278
|
41
|
|
|
|
|
|
STRLEN start = 0, i; |
|
279
|
811
|
100
|
|
|
|
|
for (i = 0; i <= body_len; i++) { |
|
280
|
773
|
100
|
|
|
|
|
char c = (i < body_len) ? body[i] : ','; |
|
281
|
773
|
100
|
|
|
|
|
if (c == '(') depth++; |
|
282
|
757
|
100
|
|
|
|
|
else if (c == ')') depth--; |
|
283
|
741
|
100
|
|
|
|
|
else if ((c == ',' && depth == 0) || i == body_len) { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
284
|
52
|
|
|
|
|
|
STRLEN ts = start, te = i; |
|
285
|
|
|
|
|
|
|
#define J_WS(c2) ((c2)==' '||(c2)=='\t'||(c2)=='\n'||(c2)=='\r') |
|
286
|
63
|
100
|
|
|
|
|
while (ts < te && J_WS(body[ts])) ts++; |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
287
|
52
|
100
|
|
|
|
|
while (te > ts && J_WS(body[te-1])) te--; |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
288
|
52
|
100
|
|
|
|
|
if (te > ts) { |
|
289
|
50
|
|
|
|
|
|
STRLEN id = ts; |
|
290
|
50
|
50
|
|
|
|
|
if (body[id] == '_' |
|
291
|
50
|
50
|
|
|
|
|
|| (body[id] >= 'A' && body[id] <= 'Z') |
|
|
|
50
|
|
|
|
|
|
|
292
|
50
|
50
|
|
|
|
|
|| (body[id] >= 'a' && body[id] <= 'z')) { |
|
|
|
50
|
|
|
|
|
|
|
293
|
50
|
|
|
|
|
|
id++; |
|
294
|
50
|
|
|
|
|
|
while (id < te |
|
295
|
186
|
100
|
|
|
|
|
&& (body[id] == '_' || body[id] == '.' |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
296
|
178
|
100
|
|
|
|
|
|| (body[id] >= 'A' && body[id] <= 'Z') |
|
|
|
50
|
|
|
|
|
|
|
297
|
178
|
100
|
|
|
|
|
|| (body[id] >= 'a' && body[id] <= 'z') |
|
|
|
50
|
|
|
|
|
|
|
298
|
49
|
50
|
|
|
|
|
|| (body[id] >= '0' && body[id] <= '9'))) |
|
|
|
0
|
|
|
|
|
|
|
299
|
136
|
|
|
|
|
|
id++; |
|
300
|
|
|
|
|
|
|
} |
|
301
|
50
|
50
|
|
|
|
|
if (id == ts) |
|
302
|
0
|
|
|
|
|
|
croak("JSON(...): missing path name in '%.*s'", |
|
303
|
|
|
|
|
|
|
(int)(te - ts), body + ts); |
|
304
|
|
|
|
|
|
|
/* Reject trailing dot and consecutive dots in path names: |
|
305
|
|
|
|
|
|
|
* "a.", "a..b", ".a" (the leading dot is already caught |
|
306
|
|
|
|
|
|
|
* by the start-char rule). CH itself allows only well- |
|
307
|
|
|
|
|
|
|
* formed dotted identifiers; mirror that. */ |
|
308
|
50
|
100
|
|
|
|
|
if (body[id - 1] == '.') |
|
309
|
1
|
|
|
|
|
|
croak("JSON(...): path name must not end with '.' " |
|
310
|
|
|
|
|
|
|
"in '%.*s'", |
|
311
|
|
|
|
|
|
|
(int)(id - ts), body + ts); |
|
312
|
|
|
|
|
|
|
STRLEN dk; |
|
313
|
182
|
100
|
|
|
|
|
for (dk = ts + 1; dk < id; dk++) { |
|
314
|
134
|
100
|
|
|
|
|
if (body[dk] == '.' && body[dk - 1] == '.') |
|
|
|
100
|
|
|
|
|
|
|
315
|
1
|
|
|
|
|
|
croak("JSON(...): path name must not contain " |
|
316
|
|
|
|
|
|
|
"consecutive dots in '%.*s'", |
|
317
|
|
|
|
|
|
|
(int)(id - ts), body + ts); |
|
318
|
|
|
|
|
|
|
} |
|
319
|
48
|
|
|
|
|
|
STRLEN ws = id; |
|
320
|
95
|
100
|
|
|
|
|
while (ws < te && J_WS(body[ws])) ws++; |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
321
|
48
|
100
|
|
|
|
|
if (ws == id || ws == te) |
|
|
|
50
|
|
|
|
|
|
|
322
|
1
|
|
|
|
|
|
croak("JSON(...): expected 'name Type' but got '%.*s'", |
|
323
|
|
|
|
|
|
|
(int)(te - ts), body + ts); |
|
324
|
47
|
|
|
|
|
|
bounds[idx].name_start = ts; |
|
325
|
47
|
|
|
|
|
|
bounds[idx].name_len = id - ts; |
|
326
|
47
|
|
|
|
|
|
bounds[idx].start = ws; |
|
327
|
47
|
|
|
|
|
|
bounds[idx].len = te - ws; |
|
328
|
47
|
|
|
|
|
|
idx++; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
#undef J_WS |
|
331
|
49
|
|
|
|
|
|
start = i + 1; |
|
332
|
|
|
|
|
|
|
} |
|
333
|
|
|
|
|
|
|
} |
|
334
|
38
|
100
|
|
|
|
|
if (idx == 0) return; |
|
335
|
36
|
|
|
|
|
|
int n = idx; |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
int j, ii; |
|
338
|
47
|
100
|
|
|
|
|
for (ii = 1; ii < n; ii++) { |
|
339
|
11
|
|
|
|
|
|
TypeBound key = bounds[ii]; |
|
340
|
11
|
|
|
|
|
|
j = ii - 1; |
|
341
|
19
|
100
|
|
|
|
|
while (j >= 0) { |
|
342
|
11
|
|
|
|
|
|
STRLEN m = bounds[j].name_len < key.name_len |
|
343
|
|
|
|
|
|
|
? bounds[j].name_len : key.name_len; |
|
344
|
11
|
|
|
|
|
|
int cmp = memcmp(body + bounds[j].name_start, |
|
345
|
11
|
|
|
|
|
|
body + key.name_start, m); |
|
346
|
11
|
100
|
|
|
|
|
if (cmp == 0) |
|
347
|
1
|
|
|
|
|
|
cmp = (int)bounds[j].name_len - (int)key.name_len; |
|
348
|
11
|
100
|
|
|
|
|
if (cmp <= 0) break; |
|
349
|
8
|
|
|
|
|
|
bounds[j+1] = bounds[j]; |
|
350
|
8
|
|
|
|
|
|
j--; |
|
351
|
|
|
|
|
|
|
} |
|
352
|
11
|
|
|
|
|
|
bounds[j+1] = key; |
|
353
|
|
|
|
|
|
|
} |
|
354
|
|
|
|
|
|
|
|
|
355
|
46
|
100
|
|
|
|
|
for (ii = 1; ii < n; ii++) { |
|
356
|
11
|
100
|
|
|
|
|
if (bounds[ii].name_len == bounds[ii-1].name_len |
|
357
|
5
|
|
|
|
|
|
&& memcmp(body + bounds[ii].name_start, |
|
358
|
5
|
|
|
|
|
|
body + bounds[ii-1].name_start, |
|
359
|
5
|
100
|
|
|
|
|
bounds[ii].name_len) == 0) |
|
360
|
1
|
|
|
|
|
|
croak("JSON(...): duplicate typed path name '%.*s'", |
|
361
|
|
|
|
|
|
|
(int)bounds[ii].name_len, body + bounds[ii].name_start); |
|
362
|
|
|
|
|
|
|
} |
|
363
|
|
|
|
|
|
|
|
|
364
|
35
|
|
|
|
|
|
t->tuple_len = n; |
|
365
|
35
|
|
|
|
|
|
Newxz(t->tuple_names, n, char*); |
|
366
|
80
|
100
|
|
|
|
|
for (ii = 0; ii < n; ii++) { |
|
367
|
45
|
|
|
|
|
|
Newx(t->tuple_names[ii], bounds[ii].name_len + 1, char); |
|
368
|
45
|
|
|
|
|
|
memcpy(t->tuple_names[ii], body + bounds[ii].name_start, |
|
369
|
45
|
|
|
|
|
|
bounds[ii].name_len); |
|
370
|
45
|
|
|
|
|
|
t->tuple_names[ii][bounds[ii].name_len] = '\0'; |
|
371
|
|
|
|
|
|
|
} |
|
372
|
35
|
|
|
|
|
|
t->tuple = parse_tuple_types_with_bounds(aTHX_ body, bounds, n); |
|
373
|
|
|
|
|
|
|
|
|
374
|
74
|
100
|
|
|
|
|
for (ii = 0; ii < n; ii++) { |
|
375
|
45
|
100
|
|
|
|
|
if (!type_can_be_typed_path(t->tuple[ii])) |
|
376
|
6
|
|
|
|
|
|
croak("JSON(%s ...): typed path inner type cannot include " |
|
377
|
|
|
|
|
|
|
"Variant, LowCardinality, JSON, or Dynamic (those have " |
|
378
|
|
|
|
|
|
|
"wire prefixes that would interleave incorrectly)", |
|
379
|
|
|
|
|
|
|
t->tuple_names[ii]); |
|
380
|
|
|
|
|
|
|
} |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
|
|
383
|
9892
|
|
|
|
|
|
TypeInfo* parse_type(pTHX_ const char *type, STRLEN len) { |
|
384
|
|
|
|
|
|
|
TypeInfo *t; |
|
385
|
|
|
|
|
|
|
/* Slot lives on the heap so its address is stable across the XSUB lifetime. */ |
|
386
|
|
|
|
|
|
|
TypeInfo **slot; |
|
387
|
9892
|
|
|
|
|
|
Newx(slot, 1, TypeInfo*); |
|
388
|
9892
|
|
|
|
|
|
*slot = NULL; |
|
389
|
9892
|
|
|
|
|
|
SAVEDESTRUCTOR_X(cleanup_typeinfo_slot, slot); |
|
390
|
9892
|
|
|
|
|
|
Newxz(t, 1, TypeInfo); |
|
391
|
9892
|
|
|
|
|
|
*slot = t; |
|
392
|
|
|
|
|
|
|
|
|
393
|
9892
|
100
|
|
|
|
|
if (len == 4 && strncmp(type, "Int8", 4) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
394
|
12
|
|
|
|
|
|
t->code = T_INT8; |
|
395
|
9880
|
100
|
|
|
|
|
} else if (len == 5 && strncmp(type, "Int16", 5) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
396
|
10
|
|
|
|
|
|
t->code = T_INT16; |
|
397
|
9870
|
100
|
|
|
|
|
} else if (len == 5 && strncmp(type, "Int32", 5) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
398
|
1484
|
|
|
|
|
|
t->code = T_INT32; |
|
399
|
8386
|
100
|
|
|
|
|
} else if (len == 5 && strncmp(type, "Int64", 5) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
400
|
13
|
|
|
|
|
|
t->code = T_INT64; |
|
401
|
8373
|
100
|
|
|
|
|
} else if (len == 5 && strncmp(type, "UInt8", 5) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
402
|
27
|
|
|
|
|
|
t->code = T_UINT8; |
|
403
|
8346
|
100
|
|
|
|
|
} else if (len == 6 && strncmp(type, "UInt16", 6) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
404
|
4
|
|
|
|
|
|
t->code = T_UINT16; |
|
405
|
8342
|
100
|
|
|
|
|
} else if (len == 6 && strncmp(type, "UInt32", 6) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
406
|
129
|
|
|
|
|
|
t->code = T_UINT32; |
|
407
|
8213
|
100
|
|
|
|
|
} else if (len == 6 && strncmp(type, "UInt64", 6) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
408
|
40
|
|
|
|
|
|
t->code = T_UINT64; |
|
409
|
8173
|
100
|
|
|
|
|
} else if (len == 7 && strncmp(type, "Float32", 7) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
410
|
11
|
|
|
|
|
|
t->code = T_FLOAT32; |
|
411
|
8162
|
100
|
|
|
|
|
} else if (len == 7 && strncmp(type, "Float64", 7) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
412
|
354
|
|
|
|
|
|
t->code = T_FLOAT64; |
|
413
|
7808
|
100
|
|
|
|
|
} else if (len == 8 && strncmp(type, "BFloat16", 8) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
414
|
9
|
|
|
|
|
|
t->code = T_BFLOAT16; |
|
415
|
7799
|
100
|
|
|
|
|
} else if (len == 6 && strncmp(type, "String", 6) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
416
|
1593
|
|
|
|
|
|
t->code = T_STRING; |
|
417
|
6206
|
100
|
|
|
|
|
} else if (len > 12 && strncmp(type, "FixedString(", 12) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
418
|
22
|
|
|
|
|
|
t->code = T_FIXEDSTRING; |
|
419
|
22
|
|
|
|
|
|
t->param = atoi(type + 12); |
|
420
|
22
|
100
|
|
|
|
|
if (t->param <= 0) croak("FixedString needs positive length"); |
|
421
|
6184
|
100
|
|
|
|
|
} else if (len > 6 && strncmp(type, "Array(", 6) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
422
|
1069
|
|
|
|
|
|
t->code = T_ARRAY; |
|
423
|
1069
|
|
|
|
|
|
t->inner = parse_type(aTHX_ type + 6, len - 7); |
|
424
|
6136
|
100
|
|
|
|
|
} else if (len > 6 && strncmp(type, "Tuple(", 6) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
425
|
1021
|
|
|
|
|
|
t->code = T_TUPLE; |
|
426
|
1021
|
|
|
|
|
|
const char *body = type + 6; |
|
427
|
1021
|
|
|
|
|
|
STRLEN body_len = len - 7; |
|
428
|
|
|
|
|
|
|
TypeBound *bounds; |
|
429
|
1021
|
50
|
|
|
|
|
Newx(bounds, body_len + 1, TypeBound); |
|
430
|
1021
|
|
|
|
|
|
SAVEFREEPV(bounds); |
|
431
|
1021
|
|
|
|
|
|
t->tuple_len = split_type_list(body, body_len, bounds); |
|
432
|
1021
|
|
|
|
|
|
t->tuple = parse_tuple_types_with_bounds(aTHX_ body, bounds, t->tuple_len); |
|
433
|
|
|
|
|
|
|
/* If at least one element carries a field-name, capture all of |
|
434
|
|
|
|
|
|
|
* them so encode_column can accept hashrefs for this tuple. A |
|
435
|
|
|
|
|
|
|
* mix of named and unnamed elements isn't legal in ClickHouse; |
|
436
|
|
|
|
|
|
|
* we accept any element having a name as "named tuple". */ |
|
437
|
1021
|
|
|
|
|
|
int has_names = 0; |
|
438
|
|
|
|
|
|
|
int j; |
|
439
|
3051
|
100
|
|
|
|
|
for (j = 0; j < t->tuple_len; j++) { |
|
440
|
2036
|
100
|
|
|
|
|
if (bounds[j].name_len > 0) { has_names = 1; break; } |
|
441
|
|
|
|
|
|
|
} |
|
442
|
1021
|
100
|
|
|
|
|
if (has_names) { |
|
443
|
6
|
|
|
|
|
|
Newxz(t->tuple_names, t->tuple_len, char *); |
|
444
|
18
|
100
|
|
|
|
|
for (j = 0; j < t->tuple_len; j++) { |
|
445
|
12
|
50
|
|
|
|
|
if (bounds[j].name_len > 0) { |
|
446
|
12
|
|
|
|
|
|
Newx(t->tuple_names[j], bounds[j].name_len + 1, char); |
|
447
|
12
|
|
|
|
|
|
memcpy(t->tuple_names[j], |
|
448
|
12
|
|
|
|
|
|
body + bounds[j].name_start, bounds[j].name_len); |
|
449
|
12
|
|
|
|
|
|
t->tuple_names[j][bounds[j].name_len] = '\0'; |
|
450
|
|
|
|
|
|
|
} |
|
451
|
|
|
|
|
|
|
/* else: leave NULL -- mixed named/unnamed not really |
|
452
|
|
|
|
|
|
|
* supported; encode will croak if hashref is used. */ |
|
453
|
|
|
|
|
|
|
} |
|
454
|
|
|
|
|
|
|
} |
|
455
|
4094
|
100
|
|
|
|
|
} else if (len > 9 && strncmp(type, "Nullable(", 9) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
456
|
1068
|
100
|
|
|
|
|
if (len > 18 && strncmp(type + 9, "Nullable(", 9) == 0) |
|
|
|
100
|
|
|
|
|
|
|
457
|
2
|
|
|
|
|
|
croak("Nullable(Nullable(...)) is not allowed"); |
|
458
|
1066
|
|
|
|
|
|
t->code = T_NULLABLE; |
|
459
|
1066
|
|
|
|
|
|
t->inner = parse_type(aTHX_ type + 9, len - 10); |
|
460
|
3026
|
100
|
|
|
|
|
} else if (len > 6 && strncmp(type, "Enum8(", 6) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
461
|
1015
|
|
|
|
|
|
t->code = T_ENUM8; |
|
462
|
1015
|
|
|
|
|
|
parse_enum_entries(aTHX_ t, type + 6, len - 7, T_ENUM8); |
|
463
|
2011
|
100
|
|
|
|
|
} else if (len > 7 && strncmp(type, "Enum16(", 7) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
464
|
5
|
|
|
|
|
|
t->code = T_ENUM16; |
|
465
|
5
|
|
|
|
|
|
parse_enum_entries(aTHX_ t, type + 7, len - 8, T_ENUM16); |
|
466
|
2006
|
100
|
|
|
|
|
} else if (len > 10 && strncmp(type, "Decimal32(", 10) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
467
|
9
|
|
|
|
|
|
t->code = T_DECIMAL32; |
|
468
|
9
|
|
|
|
|
|
t->param = atoi(type + 10); |
|
469
|
9
|
50
|
|
|
|
|
if (t->param < 0 || t->param > 9) |
|
|
|
100
|
|
|
|
|
|
|
470
|
1
|
|
|
|
|
|
croak("Decimal32 scale must be 0..9, got %d", t->param); |
|
471
|
1997
|
100
|
|
|
|
|
} else if (len > 10 && strncmp(type, "Decimal64(", 10) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
472
|
1012
|
|
|
|
|
|
t->code = T_DECIMAL64; |
|
473
|
1012
|
|
|
|
|
|
t->param = atoi(type + 10); |
|
474
|
1012
|
50
|
|
|
|
|
if (t->param < 0 || t->param > 18) |
|
|
|
50
|
|
|
|
|
|
|
475
|
0
|
|
|
|
|
|
croak("Decimal64 scale must be 0..18, got %d", t->param); |
|
476
|
985
|
100
|
|
|
|
|
} else if (len > 11 && strncmp(type, "Decimal128(", 11) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
477
|
16
|
|
|
|
|
|
t->code = T_DECIMAL128; |
|
478
|
16
|
|
|
|
|
|
t->param = atoi(type + 11); |
|
479
|
16
|
50
|
|
|
|
|
if (t->param < 0 || t->param > 38) |
|
|
|
50
|
|
|
|
|
|
|
480
|
0
|
|
|
|
|
|
croak("Decimal128 scale must be 0..38, got %d", t->param); |
|
481
|
969
|
100
|
|
|
|
|
} else if (len > 11 && strncmp(type, "Decimal256(", 11) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
482
|
11
|
|
|
|
|
|
t->code = T_DECIMAL256; |
|
483
|
11
|
|
|
|
|
|
t->param = atoi(type + 11); |
|
484
|
11
|
50
|
|
|
|
|
if (t->param < 0 || t->param > 76) |
|
|
|
50
|
|
|
|
|
|
|
485
|
0
|
|
|
|
|
|
croak("Decimal256 scale must be 0..76, got %d", t->param); |
|
486
|
959
|
100
|
|
|
|
|
} else if (len > 8 && strncmp(type, "Decimal(", 8) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
487
|
2
|
|
|
|
|
|
int precision = atoi(type + 8); |
|
488
|
2
|
|
|
|
|
|
const char *comma = memchr(type + 8, ',', len - 8); |
|
489
|
2
|
50
|
|
|
|
|
if (!comma) croak("Decimal(P, S) requires precision and scale"); |
|
490
|
2
|
|
|
|
|
|
int scale = atoi(comma + 1); |
|
491
|
2
|
100
|
|
|
|
|
if (precision < 1 || precision > 38) |
|
|
|
50
|
|
|
|
|
|
|
492
|
1
|
|
|
|
|
|
croak("Decimal(P, S) precision must be 1..38, got %d (use Decimal256(S) explicitly for P > 38)", precision); |
|
493
|
1
|
50
|
|
|
|
|
if (scale < 0 || scale > precision) |
|
|
|
50
|
|
|
|
|
|
|
494
|
0
|
|
|
|
|
|
croak("Decimal scale must be 0..precision, got %d", scale); |
|
495
|
1
|
|
|
|
|
|
t->param = scale; |
|
496
|
1
|
50
|
|
|
|
|
if (precision <= 9) t->code = T_DECIMAL32; |
|
497
|
0
|
0
|
|
|
|
|
else if (precision <= 18) t->code = T_DECIMAL64; |
|
498
|
0
|
|
|
|
|
|
else t->code = T_DECIMAL128; |
|
499
|
956
|
100
|
|
|
|
|
} else if (len == 4 && strncmp(type, "Date", 4) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
500
|
20
|
|
|
|
|
|
t->code = T_DATE; |
|
501
|
936
|
100
|
|
|
|
|
} else if (len == 6 && strncmp(type, "Date32", 6) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
502
|
7
|
|
|
|
|
|
t->code = T_DATE32; |
|
503
|
929
|
100
|
|
|
|
|
} else if (len == 8 && strncmp(type, "DateTime", 8) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
504
|
353
|
|
|
|
|
|
t->code = T_DATETIME; |
|
505
|
576
|
100
|
|
|
|
|
} else if (len > 9 && strncmp(type, "DateTime(", 9) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
506
|
1
|
|
|
|
|
|
t->code = T_DATETIME; |
|
507
|
575
|
100
|
|
|
|
|
} else if (len > 11 && strncmp(type, "DateTime64(", 11) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
508
|
21
|
|
|
|
|
|
t->code = T_DATETIME64; |
|
509
|
21
|
|
|
|
|
|
t->param = atoi(type + 11); |
|
510
|
21
|
50
|
|
|
|
|
if (t->param < 0 || t->param > 9) |
|
|
|
100
|
|
|
|
|
|
|
511
|
1
|
|
|
|
|
|
croak("DateTime64 precision must be 0..9, got %d", t->param); |
|
512
|
554
|
100
|
|
|
|
|
} else if (len == 4 && strncmp(type, "Bool", 4) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
513
|
313
|
|
|
|
|
|
t->code = T_BOOL; |
|
514
|
241
|
100
|
|
|
|
|
} else if (len == 7 && strncmp(type, "Boolean", 7) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
515
|
0
|
|
|
|
|
|
t->code = T_BOOL; |
|
516
|
241
|
100
|
|
|
|
|
} else if (len == 4 && strncmp(type, "UUID", 4) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
517
|
12
|
|
|
|
|
|
t->code = T_UUID; |
|
518
|
229
|
100
|
|
|
|
|
} else if (len == 4 && strncmp(type, "IPv4", 4) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
519
|
13
|
|
|
|
|
|
t->code = T_IPV4; |
|
520
|
216
|
100
|
|
|
|
|
} else if (len == 4 && strncmp(type, "IPv6", 4) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
521
|
6
|
|
|
|
|
|
t->code = T_IPV6; |
|
522
|
211
|
100
|
|
|
|
|
} else if (len > 24 && strncmp(type, "SimpleAggregateFunction(", 24) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
/* SimpleAggregateFunction(func, T) is wire-equivalent to T -- the |
|
524
|
|
|
|
|
|
|
* func name only affects how readers aggregate on read, not how |
|
525
|
|
|
|
|
|
|
* values are stored. Strip it and parse the rest as the inner type. */ |
|
526
|
1
|
|
|
|
|
|
const char *body = type + 24; |
|
527
|
1
|
|
|
|
|
|
STRLEN body_len = len - 25; |
|
528
|
1
|
|
|
|
|
|
const char *comma = memchr(body, ',', body_len); |
|
529
|
1
|
50
|
|
|
|
|
if (!comma) croak("SimpleAggregateFunction requires (func, T)"); |
|
530
|
1
|
|
|
|
|
|
STRLEN inner_off = (comma - body) + 1; |
|
531
|
2
|
50
|
|
|
|
|
while (inner_off < body_len && body[inner_off] == ' ') inner_off++; |
|
|
|
100
|
|
|
|
|
|
|
532
|
1
|
|
|
|
|
|
TypeInfo *inner = parse_type(aTHX_ body + inner_off, body_len - inner_off); |
|
533
|
|
|
|
|
|
|
/* Steal inner's contents in one shot. The outer slot still owns t; the |
|
534
|
|
|
|
|
|
|
* inner's slot was already disarmed before parse_type returned, so we |
|
535
|
|
|
|
|
|
|
* can free the now-redundant inner struct directly. */ |
|
536
|
1
|
|
|
|
|
|
*t = *inner; |
|
537
|
1
|
|
|
|
|
|
Safefree(inner); |
|
538
|
226
|
100
|
|
|
|
|
} else if (len > 8 && strncmp(type, "Variant(", 8) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
539
|
|
|
|
|
|
|
/* Variant(T1, T2, ...) - tagged union. Each input row is either |
|
540
|
|
|
|
|
|
|
* undef (NULL) or [$variant_idx, $value]. ClickHouse stores |
|
541
|
|
|
|
|
|
|
* Variant sub-columns and per-row discriminators in alphabetical |
|
542
|
|
|
|
|
|
|
* order of variant type names, not declaration order, so build |
|
543
|
|
|
|
|
|
|
* a permutation that maps the user's declaration index to the |
|
544
|
|
|
|
|
|
|
* wire (alphabetical) position. */ |
|
545
|
18
|
|
|
|
|
|
t->code = T_VARIANT; |
|
546
|
18
|
|
|
|
|
|
const char *body = type + 8; |
|
547
|
18
|
|
|
|
|
|
STRLEN body_len = len - 9; |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
/* Split once, then share the bounds with parse_tuple_types_with_bounds |
|
550
|
|
|
|
|
|
|
* (the alphabetical sort and the parsed TypeInfo entries reference |
|
551
|
|
|
|
|
|
|
* the same ranges). */ |
|
552
|
|
|
|
|
|
|
TypeBound *bounds; |
|
553
|
18
|
50
|
|
|
|
|
Newx(bounds, body_len + 1, TypeBound); |
|
554
|
18
|
|
|
|
|
|
SAVEFREEPV(bounds); |
|
555
|
18
|
|
|
|
|
|
t->tuple_len = split_type_list(body, body_len, bounds); |
|
556
|
18
|
100
|
|
|
|
|
if (t->tuple_len < 1) |
|
557
|
1
|
|
|
|
|
|
croak("Variant requires at least one type argument"); |
|
558
|
17
|
50
|
|
|
|
|
if (t->tuple_len > 254) |
|
559
|
0
|
|
|
|
|
|
croak("Variant supports at most 254 types (got %d)", t->tuple_len); |
|
560
|
17
|
|
|
|
|
|
t->tuple = parse_tuple_types_with_bounds(aTHX_ body, bounds, t->tuple_len); |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
/* Sort declaration indices alphabetically by their type bytes. |
|
563
|
|
|
|
|
|
|
* Selection sort -- nvar is at most 254. */ |
|
564
|
17
|
|
|
|
|
|
Newx(t->variant_wire_to_decl, t->tuple_len, int); |
|
565
|
17
|
|
|
|
|
|
Newx(t->variant_decl_to_wire, t->tuple_len, int); |
|
566
|
|
|
|
|
|
|
int j, k; |
|
567
|
53
|
100
|
|
|
|
|
for (j = 0; j < t->tuple_len; j++) t->variant_wire_to_decl[j] = j; |
|
568
|
36
|
100
|
|
|
|
|
for (j = 0; j < t->tuple_len - 1; j++) { |
|
569
|
19
|
|
|
|
|
|
int min_idx = j; |
|
570
|
40
|
100
|
|
|
|
|
for (k = j + 1; k < t->tuple_len; k++) { |
|
571
|
21
|
|
|
|
|
|
int a = t->variant_wire_to_decl[min_idx]; |
|
572
|
21
|
|
|
|
|
|
int b = t->variant_wire_to_decl[k]; |
|
573
|
21
|
|
|
|
|
|
STRLEN la = bounds[a].len, lb = bounds[b].len; |
|
574
|
21
|
|
|
|
|
|
STRLEN cmp_len = la < lb ? la : lb; |
|
575
|
21
|
|
|
|
|
|
int cmp = memcmp(body + bounds[a].start, |
|
576
|
21
|
|
|
|
|
|
body + bounds[b].start, cmp_len); |
|
577
|
21
|
100
|
|
|
|
|
if (cmp > 0 || (cmp == 0 && la > lb)) min_idx = k; |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
578
|
|
|
|
|
|
|
} |
|
579
|
19
|
100
|
|
|
|
|
if (min_idx != j) { |
|
580
|
4
|
|
|
|
|
|
int tmp = t->variant_wire_to_decl[j]; |
|
581
|
4
|
|
|
|
|
|
t->variant_wire_to_decl[j] = t->variant_wire_to_decl[min_idx]; |
|
582
|
4
|
|
|
|
|
|
t->variant_wire_to_decl[min_idx] = tmp; |
|
583
|
|
|
|
|
|
|
} |
|
584
|
|
|
|
|
|
|
} |
|
585
|
53
|
100
|
|
|
|
|
for (j = 0; j < t->tuple_len; j++) |
|
586
|
36
|
|
|
|
|
|
t->variant_decl_to_wire[t->variant_wire_to_decl[j]] = j; |
|
587
|
191
|
100
|
|
|
|
|
} else if (len > 4 && strncmp(type, "Map(", 4) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
/* Map(K, V) is wire-equivalent to Array(Tuple(K, V)). Build the |
|
589
|
|
|
|
|
|
|
* synthetic structure so encode_column can reuse Array+Tuple paths. */ |
|
590
|
14
|
|
|
|
|
|
t->code = T_MAP; |
|
591
|
14
|
|
|
|
|
|
t->tuple = parse_tuple_types(aTHX_ type + 4, len - 5, &t->tuple_len); |
|
592
|
14
|
50
|
|
|
|
|
if (t->tuple_len != 2) |
|
593
|
0
|
|
|
|
|
|
croak("Map type requires exactly 2 type arguments, got %d", t->tuple_len); |
|
594
|
177
|
100
|
|
|
|
|
} else if (len > 7 && strncmp(type, "Nested(", 7) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
/* On the wire, ClickHouse splits a Nested(a T1, b T2) column into |
|
596
|
|
|
|
|
|
|
* flat columns ".a Array(T1)" and ".b Array(T2)" -- this |
|
597
|
|
|
|
|
|
|
* encoder does not perform that expansion. Use the flat form |
|
598
|
|
|
|
|
|
|
* directly in your column spec. */ |
|
599
|
2
|
|
|
|
|
|
croak("Nested(...) is not supported directly; declare flat columns " |
|
600
|
|
|
|
|
|
|
"like 'name.field' Array(T) instead (CH stores Nested that way " |
|
601
|
|
|
|
|
|
|
"on the wire). describe table / for_table() returns the flat form."); |
|
602
|
175
|
100
|
|
|
|
|
} else if (len == 7 && strncmp(type, "Dynamic", 7) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
603
|
|
|
|
|
|
|
/* Standalone Dynamic column: same wire machinery as a single |
|
604
|
|
|
|
|
|
|
* JSON path's Dynamic sub-column. Each row is a scalar leaf |
|
605
|
|
|
|
|
|
|
* (Bool/Float64/Int64/String), an Array(T) of those, or |
|
606
|
|
|
|
|
|
|
* undef (NULL). Hashrefs aren't accepted here - use JSON for |
|
607
|
|
|
|
|
|
|
* object-shaped values. */ |
|
608
|
12
|
|
|
|
|
|
t->code = T_DYNAMIC; |
|
609
|
163
|
100
|
|
|
|
|
} else if ((len == 4 && strncmp(type, "JSON", 4) == 0) |
|
|
|
100
|
|
|
|
|
|
|
610
|
93
|
100
|
|
|
|
|
|| (len > 5 && strncmp(type, "JSON(", 5) == 0 |
|
|
|
100
|
|
|
|
|
|
|
611
|
41
|
50
|
|
|
|
|
&& type[len-1] == ')') |
|
612
|
52
|
100
|
|
|
|
|
|| (len > 7 && strncmp(type, "Object(", 7) == 0)) { |
|
|
|
50
|
|
|
|
|
|
|
613
|
|
|
|
|
|
|
/* ClickHouse's stable JSON type (24.8+). Wire layout (V1 over |
|
614
|
|
|
|
|
|
|
* Native, validated byte-for-byte against the server in |
|
615
|
|
|
|
|
|
|
* doc/json-research/): Object structure prefix, then for each |
|
616
|
|
|
|
|
|
|
* path a Dynamic prefix + Variant mode byte, then per-path |
|
617
|
|
|
|
|
|
|
* Variant data, then a shared-data Array(Tuple(String,String)) |
|
618
|
|
|
|
|
|
|
* trailer. The per-row schema is determined at encode time by |
|
619
|
|
|
|
|
|
|
* inspecting each value's Perl type. The JSON(name Type, ...) |
|
620
|
|
|
|
|
|
|
* form pins specific paths to concrete inner types; those |
|
621
|
|
|
|
|
|
|
* paths skip the Dynamic+Variant wrapping. */ |
|
622
|
111
|
|
|
|
|
|
t->code = T_JSON; |
|
623
|
111
|
100
|
|
|
|
|
if (len > 5 && type[4] == '(') { |
|
|
|
50
|
|
|
|
|
|
|
624
|
41
|
|
|
|
|
|
parse_json_typed_paths(aTHX_ t, type + 5, len - 6); |
|
625
|
|
|
|
|
|
|
} |
|
626
|
52
|
100
|
|
|
|
|
} else if (len == 5 && strncmp(type, "Point", 5) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
/* Point = Tuple(Float64, Float64) */ |
|
628
|
10
|
|
|
|
|
|
t->code = T_TUPLE; |
|
629
|
10
|
|
|
|
|
|
t->tuple = parse_tuple_types(aTHX_ "Float64, Float64", 16, &t->tuple_len); |
|
630
|
42
|
100
|
|
|
|
|
} else if (len == 4 && strncmp(type, "Ring", 4) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
/* Ring = Array(Point) */ |
|
632
|
3
|
|
|
|
|
|
t->code = T_ARRAY; |
|
633
|
3
|
|
|
|
|
|
t->inner = parse_type(aTHX_ "Point", 5); |
|
634
|
39
|
100
|
|
|
|
|
} else if (len == 10 && strncmp(type, "LineString", 10) == 0) { |
|
|
|
100
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
/* LineString = Array(Point) */ |
|
636
|
2
|
|
|
|
|
|
t->code = T_ARRAY; |
|
637
|
2
|
|
|
|
|
|
t->inner = parse_type(aTHX_ "Point", 5); |
|
638
|
37
|
50
|
|
|
|
|
} else if (len == 15 && strncmp(type, "MultiLineString", 15) == 0) { |
|
|
|
0
|
|
|
|
|
|
|
639
|
|
|
|
|
|
|
/* MultiLineString = Array(Array(Point)) */ |
|
640
|
0
|
|
|
|
|
|
t->code = T_ARRAY; |
|
641
|
0
|
|
|
|
|
|
t->inner = parse_type(aTHX_ "Array(Point)", 12); |
|
642
|
37
|
100
|
|
|
|
|
} else if (len == 7 && strncmp(type, "Polygon", 7) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
/* Polygon = Array(Ring) */ |
|
644
|
3
|
|
|
|
|
|
t->code = T_ARRAY; |
|
645
|
3
|
|
|
|
|
|
t->inner = parse_type(aTHX_ "Ring", 4); |
|
646
|
34
|
100
|
|
|
|
|
} else if (len == 12 && strncmp(type, "MultiPolygon", 12) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
647
|
|
|
|
|
|
|
/* MultiPolygon = Array(Polygon) */ |
|
648
|
2
|
|
|
|
|
|
t->code = T_ARRAY; |
|
649
|
2
|
|
|
|
|
|
t->inner = parse_type(aTHX_ "Polygon", 7); |
|
650
|
32
|
100
|
|
|
|
|
} else if (len > 15 && strncmp(type, "LowCardinality(", 15) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
651
|
30
|
|
|
|
|
|
t->code = T_LOWCARDINALITY; |
|
652
|
30
|
|
|
|
|
|
t->inner = parse_type(aTHX_ type + 15, len - 16); |
|
653
|
30
|
100
|
|
|
|
|
if (t->inner->code != T_STRING && t->inner->code != T_FIXEDSTRING |
|
|
|
100
|
|
|
|
|
|
|
654
|
10
|
100
|
|
|
|
|
&& (t->inner->code != T_NULLABLE |
|
655
|
9
|
50
|
|
|
|
|
|| (t->inner->inner->code != T_STRING |
|
656
|
0
|
0
|
|
|
|
|
&& t->inner->inner->code != T_FIXEDSTRING))) |
|
657
|
1
|
|
|
|
|
|
croak("LowCardinality(T) currently supports T = String / FixedString / Nullable(String) / Nullable(FixedString)"); |
|
658
|
|
|
|
|
|
|
} else { |
|
659
|
2
|
|
|
|
|
|
croak("Unknown type: %.*s", (int)len, type); |
|
660
|
|
|
|
|
|
|
} |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
/* Disarm the slot: caller now owns t. */ |
|
663
|
9866
|
|
|
|
|
|
*slot = NULL; |
|
664
|
9866
|
|
|
|
|
|
return t; |
|
665
|
|
|
|
|
|
|
} |