| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
* Separated.xs - Perl XS bindings for the File::Raw::Separated parser core. |
|
3
|
|
|
|
|
|
|
* |
|
4
|
|
|
|
|
|
|
* Surface: |
|
5
|
|
|
|
|
|
|
* In-memory primitives: |
|
6
|
|
|
|
|
|
|
* parse_buf($scalar [, \%opts]) -> \@rows |
|
7
|
|
|
|
|
|
|
* parse_buf_each($scalar, $code [, \%opts]) ;callback per row |
|
8
|
|
|
|
|
|
|
* parse_stream($path, $code [, \%opts]) ;chunked file streamer |
|
9
|
|
|
|
|
|
|
* plus dialect-pinning aliases csv_* and tsv_* (Perl-side, .pm) |
|
10
|
|
|
|
|
|
|
* |
|
11
|
|
|
|
|
|
|
* File::Raw plugin integration: |
|
12
|
|
|
|
|
|
|
* At BOOT we register two plugins ("csv", "tsv") via |
|
13
|
|
|
|
|
|
|
* include/file_plugin.h. They expose a READ phase that turns |
|
14
|
|
|
|
|
|
|
* File::Raw::slurp($p, plugin => 'csv', ...) into AoA. Per-call |
|
15
|
|
|
|
|
|
|
* options arrive through ctx->options (a per-call HV) and merge |
|
16
|
|
|
|
|
|
|
* on top of the dialect's defaults held in ctx->plugin_state. |
|
17
|
|
|
|
|
|
|
* There is no more global hook state, no enable/disable, no |
|
18
|
|
|
|
|
|
|
* get/set/with_options scaffolding - all of that lived to back the |
|
19
|
|
|
|
|
|
|
* old hook system; the plugin model passes options inline. |
|
20
|
|
|
|
|
|
|
*/ |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
#define PERL_NO_GET_CONTEXT |
|
23
|
|
|
|
|
|
|
#include "EXTERN.h" |
|
24
|
|
|
|
|
|
|
#include "perl.h" |
|
25
|
|
|
|
|
|
|
#include "XSUB.h" |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
#include "include/separated_parser.h" |
|
28
|
|
|
|
|
|
|
/* file_plugin.h comes from File::Raw via ExtUtils::Depends -- the |
|
29
|
|
|
|
|
|
|
consumer Makefile.PL adds the right -I to find it. */ |
|
30
|
|
|
|
|
|
|
#include "file_plugin.h" |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
#include |
|
33
|
|
|
|
|
|
|
#include |
|
34
|
|
|
|
|
|
|
#include |
|
35
|
|
|
|
|
|
|
#include |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
/* XS_EXTERNAL was added in 5.16; older perls (5.10/5.14) need this |
|
38
|
|
|
|
|
|
|
fallback or our `import` XSUB forward-decl + definition won't |
|
39
|
|
|
|
|
|
|
expand and BOOT can't take its address. */ |
|
40
|
|
|
|
|
|
|
#ifndef XS_EXTERNAL |
|
41
|
|
|
|
|
|
|
# define XS_EXTERNAL(name) XS(name) |
|
42
|
|
|
|
|
|
|
#endif |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
/* ============================================================ |
|
45
|
|
|
|
|
|
|
* Option decoding |
|
46
|
|
|
|
|
|
|
* |
|
47
|
|
|
|
|
|
|
* Reads a Perl hashref of options into a separated_options_t. |
|
48
|
|
|
|
|
|
|
* Unknown keys croak (catches typos like 'seperator'). |
|
49
|
|
|
|
|
|
|
* Caller is expected to have already seeded sensible defaults |
|
50
|
|
|
|
|
|
|
* before calling this (so the merge is "user opts on top of defaults"). |
|
51
|
|
|
|
|
|
|
* ============================================================ */ |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
static const char *VALID_OPT_KEYS[] = { |
|
54
|
|
|
|
|
|
|
"sep", "quote", "escape", "strict", "eol", "trim", |
|
55
|
|
|
|
|
|
|
"empty_is_undef", "binary", "header", "max_field_len", |
|
56
|
|
|
|
|
|
|
/* dialect: selects the seeded defaults (csv | tsv). Consumed by |
|
57
|
|
|
|
|
|
|
* seed_opts_for_dialect() before decode_opts() runs; listed here so |
|
58
|
|
|
|
|
|
|
* known_opt() doesn't reject it during the merge sweep. */ |
|
59
|
|
|
|
|
|
|
"dialect", |
|
60
|
|
|
|
|
|
|
/* plugin: present in the HV that File::Raw builds for its dispatch |
|
61
|
|
|
|
|
|
|
* call (e.g. slurp($p, plugin => 'csv', sep => ';')). The plugin |
|
62
|
|
|
|
|
|
|
* machinery uses it to look us up; for our merge sweep it's a |
|
63
|
|
|
|
|
|
|
* known-and-ignored key. */ |
|
64
|
|
|
|
|
|
|
"plugin", |
|
65
|
|
|
|
|
|
|
NULL |
|
66
|
|
|
|
|
|
|
}; |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
static int |
|
69
|
166
|
|
|
|
|
|
known_opt(const char *key, STRLEN klen) |
|
70
|
|
|
|
|
|
|
{ |
|
71
|
|
|
|
|
|
|
const char *const *p; |
|
72
|
1462
|
100
|
|
|
|
|
for (p = VALID_OPT_KEYS; *p; p++) { |
|
73
|
1460
|
100
|
|
|
|
|
if (strlen(*p) == klen && memcmp(*p, key, klen) == 0) return 1; |
|
|
|
100
|
|
|
|
|
|
|
74
|
|
|
|
|
|
|
} |
|
75
|
2
|
|
|
|
|
|
return 0; |
|
76
|
|
|
|
|
|
|
} |
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
/* Take the first byte of a string SV, croaking if empty. |
|
79
|
|
|
|
|
|
|
* Used for sep / quote / escape (each must be exactly one byte; |
|
80
|
|
|
|
|
|
|
* single-byte ASCII for v0.01 — multi-byte separators are a future stretch). */ |
|
81
|
|
|
|
|
|
|
static int |
|
82
|
18
|
|
|
|
|
|
sv_first_byte(pTHX_ SV *sv, const char *what) |
|
83
|
|
|
|
|
|
|
{ |
|
84
|
|
|
|
|
|
|
STRLEN len; |
|
85
|
18
|
|
|
|
|
|
const char *p = SvPV(sv, len); |
|
86
|
18
|
50
|
|
|
|
|
if (len < 1) croak("File::Raw::Separated: %s must be a non-empty string", what); |
|
87
|
18
|
|
|
|
|
|
return (unsigned char)p[0]; |
|
88
|
|
|
|
|
|
|
} |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
static separated_eol_t |
|
91
|
10
|
|
|
|
|
|
sv_to_eol(pTHX_ SV *sv) |
|
92
|
|
|
|
|
|
|
{ |
|
93
|
|
|
|
|
|
|
STRLEN len; |
|
94
|
10
|
|
|
|
|
|
const char *p = SvPV(sv, len); |
|
95
|
10
|
100
|
|
|
|
|
if (len == 4 && memcmp(p, "auto", 4) == 0) return SEPARATED_EOL_AUTO; |
|
|
|
100
|
|
|
|
|
|
|
96
|
9
|
100
|
|
|
|
|
if (len == 2 && memcmp(p, "lf", 2) == 0) return SEPARATED_EOL_LF; |
|
|
|
100
|
|
|
|
|
|
|
97
|
5
|
100
|
|
|
|
|
if (len == 4 && memcmp(p, "crlf", 4) == 0) return SEPARATED_EOL_CRLF; |
|
|
|
50
|
|
|
|
|
|
|
98
|
3
|
100
|
|
|
|
|
if (len == 2 && memcmp(p, "cr", 2) == 0) return SEPARATED_EOL_CR; |
|
|
|
50
|
|
|
|
|
|
|
99
|
2
|
|
|
|
|
|
croak("File::Raw::Separated: eol must be one of auto|lf|crlf|cr (got '%.*s')", |
|
100
|
|
|
|
|
|
|
(int)len, p); |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
/* Dialect pinning: AUTO reads the `dialect` key from the user's hash (default |
|
104
|
|
|
|
|
|
|
* CSV); CSV/TSV force the dialect regardless of what the hash says. The |
|
105
|
|
|
|
|
|
|
* dialect-prefixed XSUBs (csv_parse_buf, tsv_parse_stream, …) all forward |
|
106
|
|
|
|
|
|
|
* with a non-AUTO pin — they used to be Perl-side wrappers that mutated the |
|
107
|
|
|
|
|
|
|
* opts hash via _pin_dialect; the pin now happens here instead. */ |
|
108
|
|
|
|
|
|
|
typedef enum { |
|
109
|
|
|
|
|
|
|
DIALECT_AUTO = 0, |
|
110
|
|
|
|
|
|
|
DIALECT_CSV = 1, |
|
111
|
|
|
|
|
|
|
DIALECT_TSV = 2, |
|
112
|
|
|
|
|
|
|
} dialect_pin_t; |
|
113
|
|
|
|
|
|
|
|
|
114
|
|
|
|
|
|
|
/* Read the optional `dialect` key from a user-supplied options hashref |
|
115
|
|
|
|
|
|
|
* (may be NULL) and seed *opts with the corresponding defaults. |
|
116
|
|
|
|
|
|
|
* If `pin` is CSV/TSV the hash's dialect key is ignored entirely. |
|
117
|
|
|
|
|
|
|
* Defaults to CSV. Croaks on an unknown dialect string. |
|
118
|
|
|
|
|
|
|
* This MUST be called before decode_opts() so user-supplied keys layer |
|
119
|
|
|
|
|
|
|
* cleanly on top of the dialect's defaults. */ |
|
120
|
|
|
|
|
|
|
static void |
|
121
|
114
|
|
|
|
|
|
seed_opts_for_dialect(pTHX_ HV *hv, separated_options_t *opts, dialect_pin_t pin) |
|
122
|
|
|
|
|
|
|
{ |
|
123
|
|
|
|
|
|
|
SV **slot; |
|
124
|
114
|
100
|
|
|
|
|
if (pin == DIALECT_CSV) { separated_options_init_csv(opts); return; } |
|
125
|
18
|
100
|
|
|
|
|
if (pin == DIALECT_TSV) { separated_options_init_tsv(opts); return; } |
|
126
|
2
|
50
|
|
|
|
|
if (hv && (slot = hv_fetchs(hv, "dialect", 0)) && *slot && SvOK(*slot)) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
127
|
|
|
|
|
|
|
STRLEN dlen; |
|
128
|
0
|
|
|
|
|
|
const char *dpv = SvPV(*slot, dlen); |
|
129
|
0
|
0
|
|
|
|
|
if (dlen == 3 && memcmp(dpv, "csv", 3) == 0) { |
|
|
|
0
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
separated_options_init_csv(opts); |
|
131
|
0
|
|
|
|
|
|
return; |
|
132
|
|
|
|
|
|
|
} |
|
133
|
0
|
0
|
|
|
|
|
if (dlen == 3 && memcmp(dpv, "tsv", 3) == 0) { |
|
|
|
0
|
|
|
|
|
|
|
134
|
0
|
|
|
|
|
|
separated_options_init_tsv(opts); |
|
135
|
0
|
|
|
|
|
|
return; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
0
|
|
|
|
|
|
croak("File::Raw::Separated: dialect must be 'csv' or 'tsv' (got '%.*s')", |
|
138
|
|
|
|
|
|
|
(int)dlen, dpv); |
|
139
|
|
|
|
|
|
|
} |
|
140
|
|
|
|
|
|
|
/* default: CSV */ |
|
141
|
2
|
|
|
|
|
|
separated_options_init_csv(opts); |
|
142
|
|
|
|
|
|
|
} |
|
143
|
|
|
|
|
|
|
|
|
144
|
|
|
|
|
|
|
/* Same as above, but takes a plain SV instead of looking up a hash key. |
|
145
|
|
|
|
|
|
|
* Used by the class-method state setters (set_options('csv'|'tsv', ...)). */ |
|
146
|
|
|
|
|
|
|
static int |
|
147
|
0
|
|
|
|
|
|
parse_dialect_sv(pTHX_ SV *sv, const char *fn) |
|
148
|
|
|
|
|
|
|
{ |
|
149
|
|
|
|
|
|
|
STRLEN dlen; |
|
150
|
|
|
|
|
|
|
const char *dpv; |
|
151
|
0
|
0
|
|
|
|
|
if (!sv || !SvOK(sv)) |
|
|
|
0
|
|
|
|
|
|
|
152
|
0
|
|
|
|
|
|
croak("%s: dialect (first arg) must be 'csv' or 'tsv'", fn); |
|
153
|
0
|
|
|
|
|
|
dpv = SvPV(sv, dlen); |
|
154
|
0
|
0
|
|
|
|
|
if (dlen == 3 && memcmp(dpv, "csv", 3) == 0) return 0; /* csv slot */ |
|
|
|
0
|
|
|
|
|
|
|
155
|
0
|
0
|
|
|
|
|
if (dlen == 3 && memcmp(dpv, "tsv", 3) == 0) return 1; /* tsv slot */ |
|
|
|
0
|
|
|
|
|
|
|
156
|
0
|
|
|
|
|
|
croak("%s: dialect must be 'csv' or 'tsv' (got '%.*s')", |
|
157
|
|
|
|
|
|
|
fn, (int)dlen, dpv); |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
/* Merge an options hashref (may be NULL or undef) into *opts. |
|
161
|
|
|
|
|
|
|
* Croaks on unknown key or wrong-shape value. */ |
|
162
|
|
|
|
|
|
|
static void |
|
163
|
194
|
|
|
|
|
|
decode_opts(pTHX_ HV *hv, separated_options_t *opts) |
|
164
|
|
|
|
|
|
|
{ |
|
165
|
194
|
100
|
|
|
|
|
if (!hv) return; |
|
166
|
|
|
|
|
|
|
|
|
167
|
129
|
|
|
|
|
|
hv_iterinit(hv); |
|
168
|
|
|
|
|
|
|
HE *he; |
|
169
|
291
|
100
|
|
|
|
|
while ((he = hv_iternext(hv))) { |
|
170
|
|
|
|
|
|
|
I32 klen_i; |
|
171
|
166
|
|
|
|
|
|
const char *key = hv_iterkey(he, &klen_i); |
|
172
|
166
|
|
|
|
|
|
STRLEN klen = (STRLEN)klen_i; |
|
173
|
166
|
|
|
|
|
|
SV *val = hv_iterval(hv, he); |
|
174
|
|
|
|
|
|
|
|
|
175
|
166
|
100
|
|
|
|
|
if (!known_opt(key, klen)) { |
|
176
|
2
|
|
|
|
|
|
croak("File::Raw::Separated: unknown option '%.*s'", |
|
177
|
|
|
|
|
|
|
(int)klen, key); |
|
178
|
|
|
|
|
|
|
} |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
/* Treat undef value as "use default" — i.e. skip; gives callers |
|
181
|
|
|
|
|
|
|
* a way to express "I don't care, use the seeded default". */ |
|
182
|
164
|
100
|
|
|
|
|
if (!SvOK(val)) continue; |
|
183
|
|
|
|
|
|
|
|
|
184
|
157
|
100
|
|
|
|
|
if (klen == 3 && memcmp(key, "sep", 3) == 0) opts->sep = sv_first_byte(aTHX_ val, "sep"); |
|
|
|
100
|
|
|
|
|
|
|
185
|
145
|
100
|
|
|
|
|
else if (klen == 5 && memcmp(key, "quote", 5) == 0) opts->quote = sv_first_byte(aTHX_ val, "quote"); |
|
|
|
50
|
|
|
|
|
|
|
186
|
142
|
100
|
|
|
|
|
else if (klen == 6 && memcmp(key, "escape",6) == 0) opts->escape= sv_first_byte(aTHX_ val, "escape"); |
|
|
|
100
|
|
|
|
|
|
|
187
|
139
|
100
|
|
|
|
|
else if (klen == 6 && memcmp(key, "strict",6) == 0) opts->strict= SvTRUE(val) ? 1 : 0; |
|
|
|
100
|
|
|
|
|
|
|
188
|
132
|
100
|
|
|
|
|
else if (klen == 3 && memcmp(key, "eol", 3) == 0) opts->eol_mode = sv_to_eol(aTHX_ val); |
|
|
|
50
|
|
|
|
|
|
|
189
|
122
|
100
|
|
|
|
|
else if (klen == 4 && memcmp(key, "trim", 4) == 0) opts->trim = SvTRUE(val) ? 1 : 0; |
|
|
|
50
|
|
|
|
|
|
|
190
|
120
|
100
|
|
|
|
|
else if (klen == 14 && memcmp(key, "empty_is_undef", 14) == 0) opts->empty_is_undef = SvTRUE(val) ? 1 : 0; |
|
|
|
50
|
|
|
|
|
|
|
191
|
118
|
100
|
|
|
|
|
else if (klen == 6 && memcmp(key, "binary",6) == 0) opts->binary= SvTRUE(val) ? 1 : 0; |
|
|
|
100
|
|
|
|
|
|
|
192
|
115
|
100
|
|
|
|
|
else if (klen == 6 && memcmp(key, "header",6) == 0) opts->header= SvTRUE(val) ? 1 : 0; |
|
|
|
100
|
|
|
|
|
|
|
193
|
78
|
100
|
|
|
|
|
else if (klen == 13 && memcmp(key, "max_field_len", 13) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
194
|
1
|
|
|
|
|
|
IV n = SvIV(val); |
|
195
|
1
|
50
|
|
|
|
|
if (n < 0) croak("File::Raw::Separated: max_field_len must be >= 0"); |
|
196
|
1
|
|
|
|
|
|
opts->max_field_len = (size_t)n; |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
} |
|
199
|
|
|
|
|
|
|
} |
|
200
|
|
|
|
|
|
|
|
|
201
|
|
|
|
|
|
|
/* ============================================================ |
|
202
|
|
|
|
|
|
|
* Dispatcher state — passed through the C parser as user-data |
|
203
|
|
|
|
|
|
|
* ============================================================ */ |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
typedef struct { |
|
206
|
|
|
|
|
|
|
#ifdef PERL_IMPLICIT_CONTEXT |
|
207
|
|
|
|
|
|
|
PerlInterpreter *my_perl; /* used by dTHXa(c->my_perl) in callbacks */ |
|
208
|
|
|
|
|
|
|
#endif |
|
209
|
|
|
|
|
|
|
AV *result; /* used in collect mode (as_callback == 0) */ |
|
210
|
|
|
|
|
|
|
AV *current_row; /* AV reused across rows in collect mode */ |
|
211
|
|
|
|
|
|
|
SV *cb; /* user callback in callback mode (1) */ |
|
212
|
|
|
|
|
|
|
AV *row_av; /* one reusable AV for callback mode */ |
|
213
|
|
|
|
|
|
|
int as_callback; |
|
214
|
|
|
|
|
|
|
int empty_is_undef; |
|
215
|
|
|
|
|
|
|
int binary; |
|
216
|
|
|
|
|
|
|
/* Header mode: when 1, first emitted row is consumed as keys and |
|
217
|
|
|
|
|
|
|
* subsequent rows are emitted as hashrefs keyed by those names. */ |
|
218
|
|
|
|
|
|
|
int header_mode; |
|
219
|
|
|
|
|
|
|
AV *headers; /* NULL until first row consumed in header mode */ |
|
220
|
|
|
|
|
|
|
/* When the user callback dies, we propagate via a stash. */ |
|
221
|
|
|
|
|
|
|
SV *die_msg; |
|
222
|
|
|
|
|
|
|
} dispatch_ctx_t; |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
/* Build a single field SV from the parser's borrowed pointer. */ |
|
225
|
|
|
|
|
|
|
static SV * |
|
226
|
921389
|
|
|
|
|
|
make_field_sv(pTHX_ const char *field, STRLEN len, int is_null, |
|
227
|
|
|
|
|
|
|
int empty_is_undef, int binary) |
|
228
|
|
|
|
|
|
|
{ |
|
229
|
|
|
|
|
|
|
PERL_UNUSED_VAR(empty_is_undef); |
|
230
|
921389
|
100
|
|
|
|
|
if (is_null) return newSV(0); /* PL_sv_undef would be SVREADONLY */ |
|
231
|
921387
|
50
|
|
|
|
|
SV *sv = newSVpvn(field ? field : "", len); |
|
232
|
921387
|
100
|
|
|
|
|
if (!binary) sv_utf8_decode(sv); |
|
233
|
921387
|
|
|
|
|
|
return sv; |
|
234
|
|
|
|
|
|
|
} |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
/* Header-mode helpers (used by both collect_cb and each_cb). |
|
237
|
|
|
|
|
|
|
* |
|
238
|
|
|
|
|
|
|
* Contract: |
|
239
|
|
|
|
|
|
|
* - First row in header mode is taken as the header. Duplicate keys |
|
240
|
|
|
|
|
|
|
* croak. Subsequent calls see ctx->headers != NULL. |
|
241
|
|
|
|
|
|
|
* - Subsequent rows are zipped against the header into a fresh HV. |
|
242
|
|
|
|
|
|
|
* Row arity > header arity croaks. Row arity < header arity pads |
|
243
|
|
|
|
|
|
|
* trailing keys with undef. |
|
244
|
|
|
|
|
|
|
* - Field SVs are copied into the HV via newSVsv (the source AV gets |
|
245
|
|
|
|
|
|
|
* av_clear'd or freed afterwards). */ |
|
246
|
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
static void |
|
248
|
34
|
|
|
|
|
|
check_no_duplicate_headers(pTHX_ AV *headers) |
|
249
|
|
|
|
|
|
|
{ |
|
250
|
34
|
|
|
|
|
|
HV *seen = newHV(); |
|
251
|
34
|
|
|
|
|
|
SSize_t n = av_len(headers) + 1; |
|
252
|
|
|
|
|
|
|
SSize_t i; |
|
253
|
115
|
100
|
|
|
|
|
for (i = 0; i < n; i++) { |
|
254
|
83
|
|
|
|
|
|
SV **kp = av_fetch(headers, i, 0); |
|
255
|
|
|
|
|
|
|
STRLEN klen; |
|
256
|
|
|
|
|
|
|
const char *kpv; |
|
257
|
83
|
50
|
|
|
|
|
if (!kp || !*kp) continue; |
|
|
|
50
|
|
|
|
|
|
|
258
|
83
|
|
|
|
|
|
kpv = SvPV(*kp, klen); |
|
259
|
83
|
100
|
|
|
|
|
if (hv_exists(seen, kpv, klen)) { |
|
260
|
2
|
|
|
|
|
|
SvREFCNT_dec((SV *)seen); |
|
261
|
2
|
|
|
|
|
|
croak("File::Raw::Separated: duplicate header key '%.*s'", |
|
262
|
|
|
|
|
|
|
(int)klen, kpv); |
|
263
|
|
|
|
|
|
|
} |
|
264
|
81
|
|
|
|
|
|
(void)hv_store(seen, kpv, klen, &PL_sv_yes, 0); |
|
265
|
|
|
|
|
|
|
} |
|
266
|
32
|
|
|
|
|
|
SvREFCNT_dec((SV *)seen); |
|
267
|
32
|
|
|
|
|
|
} |
|
268
|
|
|
|
|
|
|
|
|
269
|
|
|
|
|
|
|
static HV * |
|
270
|
69
|
|
|
|
|
|
build_header_row_hv(pTHX_ AV *headers, AV *row) |
|
271
|
|
|
|
|
|
|
{ |
|
272
|
69
|
|
|
|
|
|
HV *hv = newHV(); |
|
273
|
69
|
|
|
|
|
|
SSize_t hcount = av_len(headers) + 1; |
|
274
|
69
|
|
|
|
|
|
SSize_t rcount = av_len(row) + 1; |
|
275
|
|
|
|
|
|
|
SSize_t i; |
|
276
|
69
|
100
|
|
|
|
|
if (rcount > hcount) { |
|
277
|
2
|
|
|
|
|
|
SvREFCNT_dec((SV *)hv); |
|
278
|
2
|
|
|
|
|
|
croak("File::Raw::Separated: row has %ld field(s), header has %ld", |
|
279
|
|
|
|
|
|
|
(long)rcount, (long)hcount); |
|
280
|
|
|
|
|
|
|
} |
|
281
|
235
|
100
|
|
|
|
|
for (i = 0; i < hcount; i++) { |
|
282
|
168
|
|
|
|
|
|
SV **kp = av_fetch(headers, i, 0); |
|
283
|
|
|
|
|
|
|
STRLEN klen; |
|
284
|
|
|
|
|
|
|
const char *kpv; |
|
285
|
|
|
|
|
|
|
SV *val; |
|
286
|
168
|
50
|
|
|
|
|
if (!kp || !*kp) continue; |
|
|
|
50
|
|
|
|
|
|
|
287
|
168
|
|
|
|
|
|
kpv = SvPV(*kp, klen); |
|
288
|
168
|
100
|
|
|
|
|
if (i < rcount) { |
|
289
|
164
|
|
|
|
|
|
SV **vp = av_fetch(row, i, 0); |
|
290
|
164
|
50
|
|
|
|
|
val = (vp && *vp) ? newSVsv(*vp) : newSV(0); |
|
|
|
50
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
} else { |
|
292
|
4
|
|
|
|
|
|
val = newSV(0); |
|
293
|
|
|
|
|
|
|
} |
|
294
|
168
|
|
|
|
|
|
(void)hv_store(hv, kpv, klen, val, 0); |
|
295
|
|
|
|
|
|
|
} |
|
296
|
67
|
|
|
|
|
|
return hv; |
|
297
|
|
|
|
|
|
|
} |
|
298
|
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
/* Extract caller-supplied header names from an options HV. Returns: |
|
300
|
|
|
|
|
|
|
* - NULL if `header` is missing, false-ish, or `header => 1` (the |
|
301
|
|
|
|
|
|
|
* "consume the file's first row as headers" mode); |
|
302
|
|
|
|
|
|
|
* - a fresh AV (refcount 1, owned by caller) of header-name SVs when |
|
303
|
|
|
|
|
|
|
* `header => [name, name, ...]` was supplied. Validates: arrayref, |
|
304
|
|
|
|
|
|
|
* non-empty, no undef entries, no duplicates. |
|
305
|
|
|
|
|
|
|
* |
|
306
|
|
|
|
|
|
|
* The caller installs the returned AV directly into dispatch_ctx_t:: |
|
307
|
|
|
|
|
|
|
* headers BEFORE the parser starts emitting fields, which short- |
|
308
|
|
|
|
|
|
|
* circuits the each_cb / collect_cb "first row becomes headers" branch |
|
309
|
|
|
|
|
|
|
* so row 0 is treated as data and emitted as a hashref. */ |
|
310
|
|
|
|
|
|
|
static AV * |
|
311
|
178
|
|
|
|
|
|
extract_explicit_headers(pTHX_ HV *opts) |
|
312
|
|
|
|
|
|
|
{ |
|
313
|
|
|
|
|
|
|
SV **slot; |
|
314
|
|
|
|
|
|
|
SV *val; |
|
315
|
|
|
|
|
|
|
AV *user_av; |
|
316
|
|
|
|
|
|
|
AV *out; |
|
317
|
|
|
|
|
|
|
SSize_t i, n; |
|
318
|
|
|
|
|
|
|
|
|
319
|
178
|
100
|
|
|
|
|
if (!opts) return NULL; |
|
320
|
113
|
|
|
|
|
|
slot = hv_fetchs(opts, "header", 0); |
|
321
|
113
|
100
|
|
|
|
|
if (!slot || !*slot || !SvOK(*slot)) return NULL; |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
322
|
37
|
|
|
|
|
|
val = *slot; |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
/* `header => 1` (or any non-arrayref truthy) keeps legacy behaviour. */ |
|
325
|
37
|
100
|
|
|
|
|
if (!SvROK(val) || SvTYPE(SvRV(val)) != SVt_PVAV) return NULL; |
|
|
|
50
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
|
|
327
|
20
|
|
|
|
|
|
user_av = (AV *)SvRV(val); |
|
328
|
20
|
|
|
|
|
|
n = av_len(user_av) + 1; |
|
329
|
20
|
100
|
|
|
|
|
if (n <= 0) |
|
330
|
1
|
|
|
|
|
|
croak("File::Raw::Separated: header => [] is empty; " |
|
331
|
|
|
|
|
|
|
"use header => 1 to consume the file's first row, " |
|
332
|
|
|
|
|
|
|
"or supply at least one name"); |
|
333
|
|
|
|
|
|
|
|
|
334
|
19
|
|
|
|
|
|
out = newAV(); |
|
335
|
19
|
|
|
|
|
|
av_extend(out, n - 1); |
|
336
|
68
|
100
|
|
|
|
|
for (i = 0; i < n; i++) { |
|
337
|
50
|
|
|
|
|
|
SV **kp = av_fetch(user_av, i, 0); |
|
338
|
50
|
50
|
|
|
|
|
if (!kp || !*kp || !SvOK(*kp)) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
339
|
1
|
|
|
|
|
|
SvREFCNT_dec((SV *)out); |
|
340
|
1
|
|
|
|
|
|
croak("File::Raw::Separated: header => [...] entry %ld is undef", |
|
341
|
|
|
|
|
|
|
(long)i); |
|
342
|
|
|
|
|
|
|
} |
|
343
|
|
|
|
|
|
|
/* Copy to detach from the caller's arrayref. */ |
|
344
|
49
|
|
|
|
|
|
av_push(out, newSVsv(*kp)); |
|
345
|
|
|
|
|
|
|
} |
|
346
|
|
|
|
|
|
|
/* Reuses the same dup-check the implicit path uses for symmetry. */ |
|
347
|
18
|
|
|
|
|
|
check_no_duplicate_headers(aTHX_ out); |
|
348
|
17
|
|
|
|
|
|
return out; |
|
349
|
|
|
|
|
|
|
} |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
/* Field callback for as_callback == 0: accumulate into AoA (or |
|
352
|
|
|
|
|
|
|
* arrayref-of-hashref if header_mode). */ |
|
353
|
|
|
|
|
|
|
static int |
|
354
|
860
|
|
|
|
|
|
collect_cb(const char *field, size_t len, int eor, void *ud) |
|
355
|
|
|
|
|
|
|
{ |
|
356
|
860
|
|
|
|
|
|
dispatch_ctx_t *c = (dispatch_ctx_t *)ud; |
|
357
|
|
|
|
|
|
|
dTHXa(c->my_perl); |
|
358
|
860
|
|
|
|
|
|
int is_null = (len == SEPARATED_FIELD_NULL_LEN); |
|
359
|
860
|
100
|
|
|
|
|
SV *sv = make_field_sv(aTHX_ field, is_null ? 0 : (STRLEN)len, is_null, |
|
360
|
|
|
|
|
|
|
c->empty_is_undef, c->binary); |
|
361
|
860
|
|
|
|
|
|
av_push(c->current_row, sv); |
|
362
|
860
|
100
|
|
|
|
|
if (eor) { |
|
363
|
403
|
100
|
|
|
|
|
if (c->header_mode && !c->headers) { |
|
|
|
100
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
/* First row is the header. Validate duplicates, then steal. */ |
|
365
|
11
|
|
|
|
|
|
check_no_duplicate_headers(aTHX_ c->current_row); |
|
366
|
10
|
|
|
|
|
|
c->headers = c->current_row; |
|
367
|
10
|
|
|
|
|
|
c->current_row = newAV(); |
|
368
|
|
|
|
|
|
|
/* Do NOT push to result. */ |
|
369
|
392
|
100
|
|
|
|
|
} else if (c->header_mode) { |
|
370
|
|
|
|
|
|
|
/* Subsequent row: zip against headers into hash. */ |
|
371
|
54
|
|
|
|
|
|
HV *row_hv = build_header_row_hv(aTHX_ c->headers, c->current_row); |
|
372
|
52
|
|
|
|
|
|
av_push(c->result, newRV_noinc((SV *)row_hv)); |
|
373
|
52
|
|
|
|
|
|
av_clear(c->current_row); |
|
374
|
|
|
|
|
|
|
} else { |
|
375
|
338
|
|
|
|
|
|
av_push(c->result, newRV_noinc((SV *)c->current_row)); |
|
376
|
338
|
|
|
|
|
|
c->current_row = newAV(); |
|
377
|
|
|
|
|
|
|
} |
|
378
|
|
|
|
|
|
|
} |
|
379
|
857
|
|
|
|
|
|
return 0; |
|
380
|
|
|
|
|
|
|
} |
|
381
|
|
|
|
|
|
|
|
|
382
|
|
|
|
|
|
|
/* Field callback for as_callback == 1: invoke user code per row. */ |
|
383
|
|
|
|
|
|
|
static int |
|
384
|
920529
|
|
|
|
|
|
each_cb(const char *field, size_t len, int eor, void *ud) |
|
385
|
|
|
|
|
|
|
{ |
|
386
|
920529
|
|
|
|
|
|
dispatch_ctx_t *c = (dispatch_ctx_t *)ud; |
|
387
|
|
|
|
|
|
|
dTHXa(c->my_perl); |
|
388
|
920529
|
|
|
|
|
|
int is_null = (len == SEPARATED_FIELD_NULL_LEN); |
|
389
|
920529
|
50
|
|
|
|
|
SV *sv = make_field_sv(aTHX_ field, is_null ? 0 : (STRLEN)len, is_null, |
|
390
|
|
|
|
|
|
|
c->empty_is_undef, c->binary); |
|
391
|
920529
|
|
|
|
|
|
av_push(c->row_av, sv); |
|
392
|
920529
|
100
|
|
|
|
|
if (eor) { |
|
393
|
|
|
|
|
|
|
/* Header mode: first row is consumed as headers, no callback. */ |
|
394
|
110214
|
100
|
|
|
|
|
if (c->header_mode && !c->headers) { |
|
|
|
100
|
|
|
|
|
|
|
395
|
5
|
|
|
|
|
|
check_no_duplicate_headers(aTHX_ c->row_av); |
|
396
|
|
|
|
|
|
|
/* Steal row_av as headers; allocate fresh row_av for next row. */ |
|
397
|
5
|
|
|
|
|
|
c->headers = c->row_av; |
|
398
|
5
|
|
|
|
|
|
c->row_av = newAV(); |
|
399
|
5
|
|
|
|
|
|
return 0; |
|
400
|
|
|
|
|
|
|
} |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
/* Build the arg the callback sees: AV (default) or HV (header). */ |
|
403
|
|
|
|
|
|
|
SV *rowref; |
|
404
|
110209
|
100
|
|
|
|
|
if (c->header_mode) { |
|
405
|
15
|
|
|
|
|
|
HV *row_hv = build_header_row_hv(aTHX_ c->headers, c->row_av); |
|
406
|
15
|
|
|
|
|
|
rowref = newRV_noinc((SV *)row_hv); |
|
407
|
|
|
|
|
|
|
} else { |
|
408
|
110194
|
|
|
|
|
|
rowref = newRV_inc((SV *)c->row_av); /* +1, not consumed */ |
|
409
|
|
|
|
|
|
|
} |
|
410
|
110209
|
|
|
|
|
|
sv_2mortal(rowref); |
|
411
|
|
|
|
|
|
|
|
|
412
|
110209
|
|
|
|
|
|
dSP; |
|
413
|
110209
|
|
|
|
|
|
ENTER; SAVETMPS; |
|
414
|
110209
|
50
|
|
|
|
|
PUSHMARK(SP); |
|
415
|
110209
|
50
|
|
|
|
|
XPUSHs(rowref); |
|
416
|
110209
|
|
|
|
|
|
PUTBACK; |
|
417
|
|
|
|
|
|
|
|
|
418
|
|
|
|
|
|
|
int count; |
|
419
|
110209
|
|
|
|
|
|
I32 flags = G_DISCARD | G_EVAL; |
|
420
|
110209
|
|
|
|
|
|
count = call_sv(c->cb, flags); |
|
421
|
110209
|
|
|
|
|
|
SPAGAIN; |
|
422
|
|
|
|
|
|
|
PERL_UNUSED_VAR(count); |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
/* Did the callback die? Stash the message and abort the parse. */ |
|
425
|
110209
|
50
|
|
|
|
|
SV *errsv = ERRSV; |
|
426
|
110209
|
100
|
|
|
|
|
if (SvTRUE(errsv)) { |
|
427
|
3
|
|
|
|
|
|
c->die_msg = newSVsv(errsv); |
|
428
|
3
|
50
|
|
|
|
|
FREETMPS; LEAVE; |
|
429
|
3
|
|
|
|
|
|
av_clear(c->row_av); /* prepare for cleanup */ |
|
430
|
3
|
|
|
|
|
|
return 1; /* tell parser to abort */ |
|
431
|
|
|
|
|
|
|
} |
|
432
|
|
|
|
|
|
|
|
|
433
|
110206
|
50
|
|
|
|
|
FREETMPS; LEAVE; |
|
434
|
110206
|
|
|
|
|
|
av_clear(c->row_av); |
|
435
|
|
|
|
|
|
|
} |
|
436
|
920521
|
|
|
|
|
|
return 0; |
|
437
|
|
|
|
|
|
|
} |
|
438
|
|
|
|
|
|
|
|
|
439
|
|
|
|
|
|
|
/* The shared dispatcher. `input_pv` / `input_len` is the byte buffer |
|
440
|
|
|
|
|
|
|
* to parse; `opts` is fully resolved. as_callback selects collect vs |
|
441
|
|
|
|
|
|
|
* callback mode. cb is the user code in callback mode (NULL otherwise). |
|
442
|
|
|
|
|
|
|
* |
|
443
|
|
|
|
|
|
|
* Returns: |
|
444
|
|
|
|
|
|
|
* collect mode (as_callback == 0): AV* of rowrefs (caller must mortalise) |
|
445
|
|
|
|
|
|
|
* callback mode (as_callback == 1): NULL (no return value) |
|
446
|
|
|
|
|
|
|
* |
|
447
|
|
|
|
|
|
|
* On parse error, croaks. On callback-die, croaks with the propagated msg. |
|
448
|
|
|
|
|
|
|
*/ |
|
449
|
|
|
|
|
|
|
static AV * |
|
450
|
148
|
|
|
|
|
|
separated_parse_dispatch(pTHX_ const char *input_pv, STRLEN input_len, |
|
451
|
|
|
|
|
|
|
const separated_options_t *opts, |
|
452
|
|
|
|
|
|
|
int as_callback, SV *cb, |
|
453
|
|
|
|
|
|
|
AV *explicit_headers) |
|
454
|
|
|
|
|
|
|
{ |
|
455
|
|
|
|
|
|
|
dispatch_ctx_t ctx; |
|
456
|
148
|
|
|
|
|
|
memset(&ctx, 0, sizeof ctx); |
|
457
|
|
|
|
|
|
|
#ifdef PERL_IMPLICIT_CONTEXT |
|
458
|
|
|
|
|
|
|
ctx.my_perl = aTHX; |
|
459
|
|
|
|
|
|
|
#endif |
|
460
|
148
|
|
|
|
|
|
ctx.empty_is_undef = opts->empty_is_undef; |
|
461
|
148
|
|
|
|
|
|
ctx.binary = opts->binary; |
|
462
|
|
|
|
|
|
|
/* Force header_mode on when explicit names were supplied, so |
|
463
|
|
|
|
|
|
|
* callbacks emit hashrefs from row 0. Caller is responsible for |
|
464
|
|
|
|
|
|
|
* passing this only when meaningful (e.g. only on the read side). */ |
|
465
|
148
|
100
|
|
|
|
|
ctx.header_mode = opts->header || (explicit_headers != NULL); |
|
|
|
50
|
|
|
|
|
|
|
466
|
148
|
|
|
|
|
|
ctx.headers = explicit_headers; /* takes ownership */ |
|
467
|
148
|
|
|
|
|
|
ctx.as_callback = as_callback; |
|
468
|
|
|
|
|
|
|
|
|
469
|
|
|
|
|
|
|
separated_field_cb fcb; |
|
470
|
148
|
100
|
|
|
|
|
if (as_callback) { |
|
471
|
10
|
|
|
|
|
|
ctx.cb = cb; |
|
472
|
10
|
|
|
|
|
|
ctx.row_av = newAV(); |
|
473
|
10
|
|
|
|
|
|
fcb = each_cb; |
|
474
|
|
|
|
|
|
|
} else { |
|
475
|
138
|
|
|
|
|
|
ctx.result = newAV(); |
|
476
|
138
|
|
|
|
|
|
ctx.current_row = newAV(); |
|
477
|
138
|
|
|
|
|
|
fcb = collect_cb; |
|
478
|
|
|
|
|
|
|
} |
|
479
|
|
|
|
|
|
|
|
|
480
|
148
|
|
|
|
|
|
size_t err_off = 0; |
|
481
|
148
|
|
|
|
|
|
long rc = separated_parse(input_pv, input_len, opts, fcb, &ctx, &err_off); |
|
482
|
|
|
|
|
|
|
|
|
483
|
145
|
100
|
|
|
|
|
if (rc < 0) { |
|
484
|
|
|
|
|
|
|
/* Cleanup. */ |
|
485
|
8
|
100
|
|
|
|
|
if (as_callback) { |
|
486
|
1
|
|
|
|
|
|
SvREFCNT_dec((SV *)ctx.row_av); |
|
487
|
|
|
|
|
|
|
} else { |
|
488
|
7
|
|
|
|
|
|
SvREFCNT_dec((SV *)ctx.current_row); |
|
489
|
7
|
|
|
|
|
|
SvREFCNT_dec((SV *)ctx.result); |
|
490
|
|
|
|
|
|
|
} |
|
491
|
8
|
50
|
|
|
|
|
if (ctx.headers) SvREFCNT_dec((SV *)ctx.headers); |
|
492
|
8
|
100
|
|
|
|
|
if (ctx.die_msg) { |
|
493
|
|
|
|
|
|
|
STRLEN dlen; |
|
494
|
1
|
|
|
|
|
|
const char *dpv = SvPV(ctx.die_msg, dlen); |
|
495
|
|
|
|
|
|
|
/* Re-raise the original die message verbatim. Not using |
|
496
|
|
|
|
|
|
|
* croak_sv (5.13.1+) for 5.8/5.10 compat. */ |
|
497
|
1
|
|
|
|
|
|
SV *msg_mortal = sv_2mortal(ctx.die_msg); |
|
498
|
|
|
|
|
|
|
PERL_UNUSED_VAR(msg_mortal); |
|
499
|
1
|
|
|
|
|
|
croak("%.*s", (int)dlen, dpv); |
|
500
|
|
|
|
|
|
|
} |
|
501
|
7
|
|
|
|
|
|
croak("File::Raw::Separated: %s at byte offset %lu", |
|
502
|
|
|
|
|
|
|
separated_strerror((separated_err_t)rc), (unsigned long)err_off); |
|
503
|
|
|
|
|
|
|
} |
|
504
|
|
|
|
|
|
|
|
|
505
|
137
|
100
|
|
|
|
|
if (as_callback) { |
|
506
|
9
|
|
|
|
|
|
SvREFCNT_dec((SV *)ctx.row_av); |
|
507
|
9
|
100
|
|
|
|
|
if (ctx.headers) SvREFCNT_dec((SV *)ctx.headers); |
|
508
|
9
|
|
|
|
|
|
return NULL; |
|
509
|
|
|
|
|
|
|
} |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
/* Trailing in-progress row — should always be empty if the parser |
|
512
|
|
|
|
|
|
|
* finished successfully. Free it. */ |
|
513
|
128
|
|
|
|
|
|
SvREFCNT_dec((SV *)ctx.current_row); |
|
514
|
128
|
100
|
|
|
|
|
if (ctx.headers) SvREFCNT_dec((SV *)ctx.headers); |
|
515
|
128
|
|
|
|
|
|
return ctx.result; |
|
516
|
|
|
|
|
|
|
} |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
#define SEPARATED_STREAM_CHUNK 65536 |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
static void |
|
522
|
19
|
|
|
|
|
|
separated_parse_dispatch_stream(pTHX_ const char *path, |
|
523
|
|
|
|
|
|
|
const separated_options_t *opts, |
|
524
|
|
|
|
|
|
|
SV *cb, |
|
525
|
|
|
|
|
|
|
AV *explicit_headers) |
|
526
|
|
|
|
|
|
|
{ |
|
527
|
|
|
|
|
|
|
dispatch_ctx_t ctx; |
|
528
|
19
|
|
|
|
|
|
memset(&ctx, 0, sizeof ctx); |
|
529
|
|
|
|
|
|
|
#ifdef PERL_IMPLICIT_CONTEXT |
|
530
|
|
|
|
|
|
|
ctx.my_perl = aTHX; |
|
531
|
|
|
|
|
|
|
#endif |
|
532
|
19
|
|
|
|
|
|
ctx.empty_is_undef = opts->empty_is_undef; |
|
533
|
19
|
|
|
|
|
|
ctx.binary = opts->binary; |
|
534
|
19
|
100
|
|
|
|
|
ctx.header_mode = opts->header || (explicit_headers != NULL); |
|
|
|
50
|
|
|
|
|
|
|
535
|
19
|
|
|
|
|
|
ctx.headers = explicit_headers; /* takes ownership */ |
|
536
|
19
|
|
|
|
|
|
ctx.as_callback = 1; |
|
537
|
19
|
|
|
|
|
|
ctx.cb = cb; |
|
538
|
19
|
|
|
|
|
|
ctx.row_av = newAV(); |
|
539
|
|
|
|
|
|
|
|
|
540
|
19
|
|
|
|
|
|
separated_ctx_t *parser = separated_init(opts, each_cb, &ctx); |
|
541
|
19
|
50
|
|
|
|
|
if (!parser) { |
|
542
|
0
|
|
|
|
|
|
SvREFCNT_dec((SV *)ctx.row_av); |
|
543
|
0
|
|
|
|
|
|
croak("File::Raw::Separated: out of memory initialising parser"); |
|
544
|
|
|
|
|
|
|
} |
|
545
|
|
|
|
|
|
|
|
|
546
|
19
|
|
|
|
|
|
int fd = PerlLIO_open(path, O_RDONLY); |
|
547
|
19
|
100
|
|
|
|
|
if (fd < 0) { |
|
548
|
1
|
|
|
|
|
|
int saved_errno = errno; |
|
549
|
1
|
|
|
|
|
|
separated_free(parser); |
|
550
|
1
|
|
|
|
|
|
SvREFCNT_dec((SV *)ctx.row_av); |
|
551
|
1
|
|
|
|
|
|
croak("File::Raw::Separated: cannot open %s: %s", |
|
552
|
|
|
|
|
|
|
path, Strerror(saved_errno)); |
|
553
|
|
|
|
|
|
|
} |
|
554
|
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
/* Local buffer per call. Stack-allocated so concurrent calls in |
|
556
|
|
|
|
|
|
|
* different threads don't collide on a static. */ |
|
557
|
|
|
|
|
|
|
char buf[SEPARATED_STREAM_CHUNK]; |
|
558
|
18
|
|
|
|
|
|
separated_err_t parse_err = SEPARATED_OK; |
|
559
|
18
|
|
|
|
|
|
int read_errno = 0; |
|
560
|
|
|
|
|
|
|
SSize_t n; |
|
561
|
|
|
|
|
|
|
|
|
562
|
359
|
100
|
|
|
|
|
while ((n = PerlLIO_read(fd, buf, sizeof buf)) > 0) { |
|
563
|
343
|
|
|
|
|
|
parse_err = separated_feed(parser, buf, (size_t)n); |
|
564
|
343
|
100
|
|
|
|
|
if (parse_err != SEPARATED_OK) break; |
|
565
|
|
|
|
|
|
|
} |
|
566
|
18
|
50
|
|
|
|
|
if (n < 0) read_errno = errno; |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
/* Only call _finish on success — on error the context is already |
|
569
|
|
|
|
|
|
|
* sticky-failed and _finish would just no-op anyway, but staying |
|
570
|
|
|
|
|
|
|
* symmetric makes intent clearer. */ |
|
571
|
18
|
100
|
|
|
|
|
if (parse_err == SEPARATED_OK && read_errno == 0) { |
|
|
|
50
|
|
|
|
|
|
|
572
|
16
|
|
|
|
|
|
parse_err = separated_finish(parser); |
|
573
|
|
|
|
|
|
|
} |
|
574
|
|
|
|
|
|
|
|
|
575
|
18
|
|
|
|
|
|
PerlLIO_close(fd); |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
/* Pull diagnostics out before freeing the parser. */ |
|
578
|
18
|
|
|
|
|
|
size_t err_off = (parse_err != SEPARATED_OK) |
|
579
|
18
|
100
|
|
|
|
|
? separated_offset(parser) : 0; |
|
580
|
18
|
|
|
|
|
|
SV *die_msg = ctx.die_msg; |
|
581
|
18
|
|
|
|
|
|
ctx.die_msg = NULL; |
|
582
|
|
|
|
|
|
|
|
|
583
|
18
|
|
|
|
|
|
separated_free(parser); |
|
584
|
18
|
|
|
|
|
|
SvREFCNT_dec((SV *)ctx.row_av); |
|
585
|
18
|
100
|
|
|
|
|
if (ctx.headers) SvREFCNT_dec((SV *)ctx.headers); |
|
586
|
|
|
|
|
|
|
|
|
587
|
|
|
|
|
|
|
/* Order: callback-die > read error > parse error. The first |
|
588
|
|
|
|
|
|
|
* cleanly explains user code aborting; the second is always |
|
589
|
|
|
|
|
|
|
* recoverable info; the third is our domain. */ |
|
590
|
18
|
100
|
|
|
|
|
if (die_msg) { |
|
591
|
|
|
|
|
|
|
STRLEN dlen; |
|
592
|
1
|
|
|
|
|
|
const char *dpv = SvPV(die_msg, dlen); |
|
593
|
1
|
|
|
|
|
|
SV *m = sv_2mortal(die_msg); |
|
594
|
|
|
|
|
|
|
PERL_UNUSED_VAR(m); |
|
595
|
1
|
|
|
|
|
|
croak("%.*s", (int)dlen, dpv); |
|
596
|
|
|
|
|
|
|
} |
|
597
|
17
|
50
|
|
|
|
|
if (read_errno) { |
|
598
|
0
|
|
|
|
|
|
croak("File::Raw::Separated: read error on %s: %s", |
|
599
|
|
|
|
|
|
|
path, Strerror(read_errno)); |
|
600
|
|
|
|
|
|
|
} |
|
601
|
17
|
100
|
|
|
|
|
if (parse_err != SEPARATED_OK) { |
|
602
|
1
|
|
|
|
|
|
croak("File::Raw::Separated: %s at byte offset %lu in %s", |
|
603
|
|
|
|
|
|
|
separated_strerror(parse_err), |
|
604
|
|
|
|
|
|
|
(unsigned long)err_off, path); |
|
605
|
|
|
|
|
|
|
} |
|
606
|
16
|
|
|
|
|
|
} |
|
607
|
|
|
|
|
|
|
|
|
608
|
|
|
|
|
|
|
/* ============================================================ |
|
609
|
|
|
|
|
|
|
* Plugin integration with File::Raw |
|
610
|
|
|
|
|
|
|
* |
|
611
|
|
|
|
|
|
|
* BOOT registers two plugins ("csv" and "tsv") via file_register_plugin |
|
612
|
|
|
|
|
|
|
* (declared in include/file_plugin.h). Each plugin's `state` slot points |
|
613
|
|
|
|
|
|
|
* to a static separated_options_t carrying the dialect's defaults. |
|
614
|
|
|
|
|
|
|
* |
|
615
|
|
|
|
|
|
|
* The READ phase fires from File::Raw::slurp($p, plugin => 'csv', ...). |
|
616
|
|
|
|
|
|
|
* Per-call options arrive in ctx->options as an HV; we layer them on |
|
617
|
|
|
|
|
|
|
* top of *(ctx->plugin_state) and parse the slurped bytes into AoA. |
|
618
|
|
|
|
|
|
|
* |
|
619
|
|
|
|
|
|
|
* No global enable/disable knob: callers without a `plugin =>` opt get |
|
620
|
|
|
|
|
|
|
* the unmodified bytes back from File::Raw, by definition. |
|
621
|
|
|
|
|
|
|
* ============================================================ */ |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
/* Per-dialect default options. The plugin struct's `state` field points |
|
624
|
|
|
|
|
|
|
* here; sep_read copies into a stack-local before merging ctx->options |
|
625
|
|
|
|
|
|
|
* on top, so concurrent calls don't fight over the defaults table. */ |
|
626
|
|
|
|
|
|
|
static separated_options_t csv_default_opts; |
|
627
|
|
|
|
|
|
|
static separated_options_t tsv_default_opts; |
|
628
|
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
static SV * |
|
630
|
59
|
|
|
|
|
|
sep_read(pTHX_ FilePluginContext *ctx) |
|
631
|
|
|
|
|
|
|
{ |
|
632
|
|
|
|
|
|
|
separated_options_t local; |
|
633
|
|
|
|
|
|
|
STRLEN len; |
|
634
|
|
|
|
|
|
|
const char *pv; |
|
635
|
|
|
|
|
|
|
AV *result; |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
/* Start from the dialect defaults. plugin_state always points to |
|
638
|
|
|
|
|
|
|
* one of csv_default_opts / tsv_default_opts; treat NULL as csv |
|
639
|
|
|
|
|
|
|
* defensively. */ |
|
640
|
59
|
50
|
|
|
|
|
if (ctx->plugin_state) |
|
641
|
59
|
|
|
|
|
|
local = *(const separated_options_t *)ctx->plugin_state; |
|
642
|
|
|
|
|
|
|
else |
|
643
|
0
|
|
|
|
|
|
separated_options_init_csv(&local); |
|
644
|
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
/* Merge the per-call options HV on top. ctx->options is the same |
|
646
|
|
|
|
|
|
|
* HV file_plugin_dispatch_read built from the variadic XSUB args; |
|
647
|
|
|
|
|
|
|
* decode_opts ignores the 'plugin' key (added to VALID_OPT_KEYS in |
|
648
|
|
|
|
|
|
|
* an earlier pass) and the 'dialect' key (likewise ignored). |
|
649
|
|
|
|
|
|
|
* |
|
650
|
|
|
|
|
|
|
* No seed_opts_for_dialect call here — the dialect is fully implied |
|
651
|
|
|
|
|
|
|
* by which plugin fired, and the defaults already live in *local. */ |
|
652
|
59
|
50
|
|
|
|
|
if (ctx->options) decode_opts(aTHX_ ctx->options, &local); |
|
653
|
|
|
|
|
|
|
|
|
654
|
57
|
50
|
|
|
|
|
if (!ctx->data) return &PL_sv_undef; |
|
655
|
57
|
|
|
|
|
|
pv = SvPV(ctx->data, len); |
|
656
|
|
|
|
|
|
|
{ |
|
657
|
57
|
|
|
|
|
|
AV *xhdr = extract_explicit_headers(aTHX_ ctx->options); |
|
658
|
54
|
|
|
|
|
|
result = separated_parse_dispatch(aTHX_ pv, len, &local, 0, NULL, xhdr); |
|
659
|
|
|
|
|
|
|
} |
|
660
|
|
|
|
|
|
|
/* result is a fresh AV with refcount 1; wrap without bumping. */ |
|
661
|
51
|
|
|
|
|
|
return newRV_noinc((SV *)result); |
|
662
|
|
|
|
|
|
|
} |
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
/* ============================================================ |
|
665
|
|
|
|
|
|
|
* sep_write - WRITE phase |
|
666
|
|
|
|
|
|
|
* |
|
667
|
|
|
|
|
|
|
* Fires from File::Raw::spew / append / atomic_spew when the caller |
|
668
|
|
|
|
|
|
|
* passes plugin => 'csv' (or 'tsv'). Serialises an arrayref of arrayref |
|
669
|
|
|
|
|
|
|
* rows into bytes following RFC 4180 conventions: fields containing the |
|
670
|
|
|
|
|
|
|
* separator, quote character, CR, or LF are quoted; embedded quote |
|
671
|
|
|
|
|
|
|
* characters are doubled (or backslash-escaped if opts.escape is set). |
|
672
|
|
|
|
|
|
|
* |
|
673
|
|
|
|
|
|
|
* Hashref rows are not currently accepted (would require an explicit |
|
674
|
|
|
|
|
|
|
* header => [keys] order to be deterministic). Undef fields emit as |
|
675
|
|
|
|
|
|
|
* empty. |
|
676
|
|
|
|
|
|
|
* ============================================================ */ |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
static SV * |
|
679
|
13
|
|
|
|
|
|
sep_write(pTHX_ FilePluginContext *ctx) |
|
680
|
|
|
|
|
|
|
{ |
|
681
|
|
|
|
|
|
|
separated_options_t o; |
|
682
|
|
|
|
|
|
|
AV *rows; |
|
683
|
|
|
|
|
|
|
SSize_t nrows, i, j; |
|
684
|
|
|
|
|
|
|
char *buf; |
|
685
|
|
|
|
|
|
|
STRLEN buf_len, buf_cap; |
|
686
|
|
|
|
|
|
|
char eol[2]; |
|
687
|
|
|
|
|
|
|
int eol_len; |
|
688
|
|
|
|
|
|
|
SV *out; |
|
689
|
|
|
|
|
|
|
|
|
690
|
13
|
50
|
|
|
|
|
if (ctx->plugin_state) |
|
691
|
13
|
|
|
|
|
|
o = *(const separated_options_t *)ctx->plugin_state; |
|
692
|
|
|
|
|
|
|
else |
|
693
|
0
|
|
|
|
|
|
separated_options_init_csv(&o); |
|
694
|
13
|
50
|
|
|
|
|
if (ctx->options) decode_opts(aTHX_ ctx->options, &o); |
|
695
|
|
|
|
|
|
|
|
|
696
|
12
|
50
|
|
|
|
|
if (!ctx->data || !SvROK(ctx->data) || |
|
|
|
100
|
|
|
|
|
|
|
697
|
11
|
50
|
|
|
|
|
SvTYPE(SvRV(ctx->data)) != SVt_PVAV) |
|
698
|
1
|
|
|
|
|
|
croak("File::Raw::Separated: write expects an arrayref of rows"); |
|
699
|
|
|
|
|
|
|
|
|
700
|
11
|
|
|
|
|
|
rows = (AV *)SvRV(ctx->data); |
|
701
|
11
|
|
|
|
|
|
nrows = av_len(rows) + 1; |
|
702
|
|
|
|
|
|
|
|
|
703
|
|
|
|
|
|
|
/* EOL. AUTO degrades to LF for write since we have nothing to |
|
704
|
|
|
|
|
|
|
* auto-detect; CRLF and CR honour the explicit pin. */ |
|
705
|
11
|
|
|
|
|
|
switch (o.eol_mode) { |
|
706
|
1
|
|
|
|
|
|
case SEPARATED_EOL_CRLF: eol[0] = '\r'; eol[1] = '\n'; eol_len = 2; break; |
|
707
|
0
|
|
|
|
|
|
case SEPARATED_EOL_CR: eol[0] = '\r'; eol_len = 1; break; |
|
708
|
10
|
|
|
|
|
|
default: eol[0] = '\n'; eol_len = 1; break; |
|
709
|
|
|
|
|
|
|
} |
|
710
|
|
|
|
|
|
|
|
|
711
|
11
|
|
|
|
|
|
buf_cap = 4096; |
|
712
|
11
|
|
|
|
|
|
Newx(buf, buf_cap, char); |
|
713
|
11
|
|
|
|
|
|
buf_len = 0; |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
#define SEP_BUF_ENSURE(n) do { \ |
|
716
|
|
|
|
|
|
|
STRLEN _need = buf_len + (STRLEN)(n); \ |
|
717
|
|
|
|
|
|
|
if (_need > buf_cap) { \ |
|
718
|
|
|
|
|
|
|
while (_need > buf_cap) buf_cap *= 2; \ |
|
719
|
|
|
|
|
|
|
Renew(buf, buf_cap, char); \ |
|
720
|
|
|
|
|
|
|
} \ |
|
721
|
|
|
|
|
|
|
} while (0) |
|
722
|
|
|
|
|
|
|
|
|
723
|
35
|
100
|
|
|
|
|
for (i = 0; i < nrows; i++) { |
|
724
|
25
|
|
|
|
|
|
SV **rowp = av_fetch(rows, i, 0); |
|
725
|
|
|
|
|
|
|
AV *row; |
|
726
|
|
|
|
|
|
|
SSize_t nfields; |
|
727
|
|
|
|
|
|
|
|
|
728
|
25
|
50
|
|
|
|
|
if (!rowp || !*rowp || !SvROK(*rowp) || |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
729
|
24
|
50
|
|
|
|
|
SvTYPE(SvRV(*rowp)) != SVt_PVAV) { |
|
730
|
1
|
|
|
|
|
|
Safefree(buf); |
|
731
|
1
|
|
|
|
|
|
croak("File::Raw::Separated: row %ld is not an arrayref", (long)i); |
|
732
|
|
|
|
|
|
|
} |
|
733
|
24
|
|
|
|
|
|
row = (AV *)SvRV(*rowp); |
|
734
|
24
|
|
|
|
|
|
nfields = av_len(row) + 1; |
|
735
|
|
|
|
|
|
|
|
|
736
|
67
|
100
|
|
|
|
|
for (j = 0; j < nfields; j++) { |
|
737
|
43
|
|
|
|
|
|
SV **fieldp = av_fetch(row, j, 0); |
|
738
|
|
|
|
|
|
|
STRLEN flen; |
|
739
|
|
|
|
|
|
|
const char *fpv; |
|
740
|
43
|
|
|
|
|
|
int needs_quote = 0; |
|
741
|
|
|
|
|
|
|
|
|
742
|
43
|
100
|
|
|
|
|
if (j > 0) { SEP_BUF_ENSURE(1); buf[buf_len++] = (char)o.sep; } |
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
743
|
|
|
|
|
|
|
|
|
744
|
43
|
50
|
|
|
|
|
if (!fieldp || !*fieldp || !SvOK(*fieldp)) continue; |
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
745
|
42
|
|
|
|
|
|
fpv = SvPV(*fieldp, flen); |
|
746
|
|
|
|
|
|
|
|
|
747
|
|
|
|
|
|
|
/* Decide if quoting is needed. Only relevant when the |
|
748
|
|
|
|
|
|
|
* dialect actually has a quote char. TSV with quote=-1 |
|
749
|
|
|
|
|
|
|
* emits raw and is the caller's problem if it contains tab |
|
750
|
|
|
|
|
|
|
* or newline. */ |
|
751
|
42
|
100
|
|
|
|
|
if (o.quote >= 0) { |
|
752
|
|
|
|
|
|
|
STRLEN k; |
|
753
|
87
|
100
|
|
|
|
|
for (k = 0; k < flen; k++) { |
|
754
|
55
|
|
|
|
|
|
char c = fpv[k]; |
|
755
|
55
|
100
|
|
|
|
|
if (c == (char)o.sep || c == (char)o.quote || |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
756
|
51
|
50
|
|
|
|
|
c == '\n' || c == '\r') { |
|
757
|
4
|
|
|
|
|
|
needs_quote = 1; |
|
758
|
4
|
|
|
|
|
|
break; |
|
759
|
|
|
|
|
|
|
} |
|
760
|
|
|
|
|
|
|
} |
|
761
|
|
|
|
|
|
|
} |
|
762
|
|
|
|
|
|
|
|
|
763
|
42
|
100
|
|
|
|
|
if (needs_quote) { |
|
764
|
|
|
|
|
|
|
STRLEN k; |
|
765
|
|
|
|
|
|
|
/* worst case: every byte doubles + open + close quote */ |
|
766
|
4
|
50
|
|
|
|
|
SEP_BUF_ENSURE(flen * 2 + 2); |
|
|
|
0
|
|
|
|
|
|
|
767
|
4
|
|
|
|
|
|
buf[buf_len++] = (char)o.quote; |
|
768
|
48
|
100
|
|
|
|
|
for (k = 0; k < flen; k++) { |
|
769
|
44
|
|
|
|
|
|
char c = fpv[k]; |
|
770
|
44
|
100
|
|
|
|
|
if (c == (char)o.quote) { |
|
771
|
3
|
50
|
|
|
|
|
if (o.escape >= 0) |
|
772
|
0
|
|
|
|
|
|
buf[buf_len++] = (char)o.escape; |
|
773
|
|
|
|
|
|
|
else |
|
774
|
3
|
|
|
|
|
|
buf[buf_len++] = (char)o.quote; /* RFC 4180 */ |
|
775
|
|
|
|
|
|
|
} |
|
776
|
44
|
|
|
|
|
|
buf[buf_len++] = c; |
|
777
|
|
|
|
|
|
|
} |
|
778
|
4
|
|
|
|
|
|
buf[buf_len++] = (char)o.quote; |
|
779
|
|
|
|
|
|
|
} else { |
|
780
|
38
|
50
|
|
|
|
|
SEP_BUF_ENSURE(flen); |
|
|
|
0
|
|
|
|
|
|
|
781
|
38
|
|
|
|
|
|
memcpy(buf + buf_len, fpv, flen); |
|
782
|
38
|
|
|
|
|
|
buf_len += flen; |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
} |
|
785
|
|
|
|
|
|
|
|
|
786
|
24
|
50
|
|
|
|
|
SEP_BUF_ENSURE(eol_len); |
|
|
|
0
|
|
|
|
|
|
|
787
|
24
|
|
|
|
|
|
memcpy(buf + buf_len, eol, eol_len); |
|
788
|
24
|
|
|
|
|
|
buf_len += eol_len; |
|
789
|
|
|
|
|
|
|
} |
|
790
|
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
#undef SEP_BUF_ENSURE |
|
792
|
|
|
|
|
|
|
|
|
793
|
10
|
|
|
|
|
|
out = newSVpvn(buf, buf_len); |
|
794
|
10
|
|
|
|
|
|
Safefree(buf); |
|
795
|
10
|
|
|
|
|
|
return out; |
|
796
|
|
|
|
|
|
|
} |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
/* ============================================================ |
|
799
|
|
|
|
|
|
|
* sep_stream - STREAM phase |
|
800
|
|
|
|
|
|
|
* |
|
801
|
|
|
|
|
|
|
* Fires from File::Raw::each_line($p, $cb, plugin => 'csv'). File::Raw |
|
802
|
|
|
|
|
|
|
* opens the file and feeds us chunks; we own the parser context across |
|
803
|
|
|
|
|
|
|
* calls via FilePluginContext::call_state. On the EOF call we flush |
|
804
|
|
|
|
|
|
|
* any trailing field/row, free the parser, and clear call_state. |
|
805
|
|
|
|
|
|
|
* |
|
806
|
|
|
|
|
|
|
* The user's callback is invoked once per emitted record (arrayref or |
|
807
|
|
|
|
|
|
|
* hashref under header mode), driven by the same each_cb the in-memory |
|
808
|
|
|
|
|
|
|
* callback variant uses. |
|
809
|
|
|
|
|
|
|
* ============================================================ */ |
|
810
|
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
typedef struct { |
|
812
|
|
|
|
|
|
|
dispatch_ctx_t disp; |
|
813
|
|
|
|
|
|
|
separated_ctx_t *parser; |
|
814
|
|
|
|
|
|
|
separated_options_t opts; /* parser copies internally; keep for clarity */ |
|
815
|
|
|
|
|
|
|
int destroyed; |
|
816
|
|
|
|
|
|
|
} sep_stream_state_t; |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
static void |
|
819
|
8
|
|
|
|
|
|
sep_stream_state_free(pTHX_ sep_stream_state_t *st) |
|
820
|
|
|
|
|
|
|
{ |
|
821
|
8
|
50
|
|
|
|
|
if (!st || st->destroyed) return; |
|
|
|
50
|
|
|
|
|
|
|
822
|
8
|
|
|
|
|
|
st->destroyed = 1; |
|
823
|
8
|
50
|
|
|
|
|
if (st->parser) { separated_free(st->parser); st->parser = NULL; } |
|
824
|
8
|
50
|
|
|
|
|
if (st->disp.row_av) { SvREFCNT_dec((SV *)st->disp.row_av); st->disp.row_av = NULL; } |
|
825
|
8
|
100
|
|
|
|
|
if (st->disp.headers) { SvREFCNT_dec((SV *)st->disp.headers); st->disp.headers = NULL; } |
|
826
|
8
|
100
|
|
|
|
|
if (st->disp.die_msg) { SvREFCNT_dec(st->disp.die_msg); st->disp.die_msg = NULL; } |
|
827
|
8
|
|
|
|
|
|
Safefree(st); |
|
828
|
|
|
|
|
|
|
} |
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
static int |
|
831
|
17
|
|
|
|
|
|
sep_stream(pTHX_ FilePluginContext *ctx, const char *chunk, size_t len, int eof) |
|
832
|
|
|
|
|
|
|
{ |
|
833
|
17
|
|
|
|
|
|
sep_stream_state_t *st = (sep_stream_state_t *)ctx->call_state; |
|
834
|
|
|
|
|
|
|
separated_err_t rc; |
|
835
|
|
|
|
|
|
|
|
|
836
|
|
|
|
|
|
|
/* First call: build state from defaults + per-call opts, init parser. */ |
|
837
|
17
|
100
|
|
|
|
|
if (!st) { |
|
838
|
|
|
|
|
|
|
/* Extract / validate explicit headers BEFORE allocating state - |
|
839
|
|
|
|
|
|
|
* a validation croak here would otherwise leak the partially- |
|
840
|
|
|
|
|
|
|
* built state (no destructor has been hooked up yet). */ |
|
841
|
8
|
|
|
|
|
|
AV *xhdr = extract_explicit_headers(aTHX_ ctx->options); |
|
842
|
|
|
|
|
|
|
|
|
843
|
8
|
|
|
|
|
|
Newxz(st, 1, sep_stream_state_t); |
|
844
|
|
|
|
|
|
|
|
|
845
|
8
|
50
|
|
|
|
|
if (ctx->plugin_state) |
|
846
|
8
|
|
|
|
|
|
st->opts = *(const separated_options_t *)ctx->plugin_state; |
|
847
|
|
|
|
|
|
|
else |
|
848
|
0
|
|
|
|
|
|
separated_options_init_csv(&st->opts); |
|
849
|
8
|
50
|
|
|
|
|
if (ctx->options) decode_opts(aTHX_ ctx->options, &st->opts); |
|
850
|
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
#ifdef PERL_IMPLICIT_CONTEXT |
|
852
|
|
|
|
|
|
|
st->disp.my_perl = aTHX; |
|
853
|
|
|
|
|
|
|
#endif |
|
854
|
8
|
|
|
|
|
|
st->disp.empty_is_undef = st->opts.empty_is_undef; |
|
855
|
8
|
|
|
|
|
|
st->disp.binary = st->opts.binary; |
|
856
|
8
|
|
|
|
|
|
st->disp.headers = xhdr; /* takes ownership; NULL means "use first row" */ |
|
857
|
8
|
100
|
|
|
|
|
st->disp.header_mode = st->opts.header || (xhdr != NULL); |
|
|
|
50
|
|
|
|
|
|
|
858
|
8
|
|
|
|
|
|
st->disp.as_callback = 1; |
|
859
|
8
|
|
|
|
|
|
st->disp.cb = ctx->callback; |
|
860
|
8
|
|
|
|
|
|
st->disp.row_av = newAV(); |
|
861
|
|
|
|
|
|
|
|
|
862
|
8
|
|
|
|
|
|
st->parser = separated_init(&st->opts, each_cb, &st->disp); |
|
863
|
8
|
50
|
|
|
|
|
if (!st->parser) { |
|
864
|
0
|
|
|
|
|
|
sep_stream_state_free(aTHX_ st); |
|
865
|
0
|
|
|
|
|
|
ctx->cancel = 1; |
|
866
|
0
|
|
|
|
|
|
croak("File::Raw::Separated: out of memory initialising parser"); |
|
867
|
|
|
|
|
|
|
} |
|
868
|
8
|
|
|
|
|
|
ctx->call_state = st; |
|
869
|
|
|
|
|
|
|
} |
|
870
|
|
|
|
|
|
|
|
|
871
|
17
|
100
|
|
|
|
|
if (chunk && len > 0) { |
|
|
|
50
|
|
|
|
|
|
|
872
|
10
|
|
|
|
|
|
rc = separated_feed(st->parser, chunk, len); |
|
873
|
10
|
100
|
|
|
|
|
if (rc != SEPARATED_OK) { |
|
874
|
1
|
|
|
|
|
|
SV *die_msg = st->disp.die_msg; |
|
875
|
1
|
|
|
|
|
|
size_t off = separated_offset(st->parser); |
|
876
|
1
|
|
|
|
|
|
ctx->cancel = 1; |
|
877
|
1
|
50
|
|
|
|
|
if (die_msg) { |
|
878
|
|
|
|
|
|
|
STRLEN dlen; |
|
879
|
1
|
|
|
|
|
|
SV *m = newSVsv(die_msg); |
|
880
|
1
|
|
|
|
|
|
const char *dpv = SvPV(m, dlen); |
|
881
|
1
|
|
|
|
|
|
sep_stream_state_free(aTHX_ st); |
|
882
|
1
|
|
|
|
|
|
ctx->call_state = NULL; |
|
883
|
1
|
|
|
|
|
|
sv_2mortal(m); |
|
884
|
1
|
|
|
|
|
|
croak("%.*s", (int)dlen, dpv); |
|
885
|
|
|
|
|
|
|
} |
|
886
|
0
|
|
|
|
|
|
sep_stream_state_free(aTHX_ st); |
|
887
|
0
|
|
|
|
|
|
ctx->call_state = NULL; |
|
888
|
0
|
|
|
|
|
|
croak("File::Raw::Separated: %s at byte offset %lu", |
|
889
|
|
|
|
|
|
|
separated_strerror(rc), (unsigned long)off); |
|
890
|
|
|
|
|
|
|
} |
|
891
|
|
|
|
|
|
|
} |
|
892
|
|
|
|
|
|
|
|
|
893
|
16
|
100
|
|
|
|
|
if (eof) { |
|
894
|
7
|
|
|
|
|
|
rc = separated_finish(st->parser); |
|
895
|
7
|
50
|
|
|
|
|
if (rc != SEPARATED_OK) { |
|
896
|
0
|
|
|
|
|
|
SV *die_msg = st->disp.die_msg; |
|
897
|
0
|
|
|
|
|
|
size_t off = separated_offset(st->parser); |
|
898
|
0
|
|
|
|
|
|
ctx->cancel = 1; |
|
899
|
0
|
0
|
|
|
|
|
if (die_msg) { |
|
900
|
|
|
|
|
|
|
STRLEN dlen; |
|
901
|
0
|
|
|
|
|
|
SV *m = newSVsv(die_msg); |
|
902
|
0
|
|
|
|
|
|
const char *dpv = SvPV(m, dlen); |
|
903
|
0
|
|
|
|
|
|
sep_stream_state_free(aTHX_ st); |
|
904
|
0
|
|
|
|
|
|
ctx->call_state = NULL; |
|
905
|
0
|
|
|
|
|
|
sv_2mortal(m); |
|
906
|
0
|
|
|
|
|
|
croak("%.*s", (int)dlen, dpv); |
|
907
|
|
|
|
|
|
|
} |
|
908
|
0
|
|
|
|
|
|
sep_stream_state_free(aTHX_ st); |
|
909
|
0
|
|
|
|
|
|
ctx->call_state = NULL; |
|
910
|
0
|
|
|
|
|
|
croak("File::Raw::Separated: %s at byte offset %lu", |
|
911
|
|
|
|
|
|
|
separated_strerror(rc), (unsigned long)off); |
|
912
|
|
|
|
|
|
|
} |
|
913
|
7
|
|
|
|
|
|
sep_stream_state_free(aTHX_ st); |
|
914
|
7
|
|
|
|
|
|
ctx->call_state = NULL; |
|
915
|
|
|
|
|
|
|
} |
|
916
|
|
|
|
|
|
|
|
|
917
|
16
|
|
|
|
|
|
return 0; |
|
918
|
|
|
|
|
|
|
} |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
/* Plugin descriptors. Static-storage lifetime so the registry's |
|
921
|
|
|
|
|
|
|
* non-owning pointer stays valid for the life of the process. */ |
|
922
|
|
|
|
|
|
|
static FilePlugin csv_plugin; |
|
923
|
|
|
|
|
|
|
static FilePlugin tsv_plugin; |
|
924
|
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
/* ============================================================ |
|
926
|
|
|
|
|
|
|
* Per-XSUB helpers |
|
927
|
|
|
|
|
|
|
* |
|
928
|
|
|
|
|
|
|
* The nine XSUBs (parse_buf / parse_buf_each / parse_stream and the |
|
929
|
|
|
|
|
|
|
* six dialect-pinned csv_ / tsv_ variants) all do the same work modulo |
|
930
|
|
|
|
|
|
|
* dialect pinning. Bodies live here so each XSUB is a one-liner; the |
|
931
|
|
|
|
|
|
|
* dialect-prefixed variants used to be pure-Perl wrappers in the .pm |
|
932
|
|
|
|
|
|
|
* that mutated the opts hash via _pin_dialect - that's gone now. |
|
933
|
|
|
|
|
|
|
* ============================================================ */ |
|
934
|
|
|
|
|
|
|
|
|
935
|
|
|
|
|
|
|
static HV * |
|
936
|
115
|
|
|
|
|
|
opts_to_hv(pTHX_ const char *fn, SV *opts) |
|
937
|
|
|
|
|
|
|
{ |
|
938
|
115
|
100
|
|
|
|
|
if (!opts || !SvOK(opts)) return NULL; |
|
|
|
50
|
|
|
|
|
|
|
939
|
50
|
100
|
|
|
|
|
if (!SvROK(opts) || SvTYPE(SvRV(opts)) != SVt_PVHV) |
|
|
|
50
|
|
|
|
|
|
|
940
|
1
|
|
|
|
|
|
croak("%s: options argument must be a hashref", fn); |
|
941
|
49
|
|
|
|
|
|
return (HV *)SvRV(opts); |
|
942
|
|
|
|
|
|
|
} |
|
943
|
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
static SV * |
|
945
|
85
|
|
|
|
|
|
do_parse_buf(pTHX_ const char *fn, SV *input, SV *opts, dialect_pin_t pin) |
|
946
|
|
|
|
|
|
|
{ |
|
947
|
|
|
|
|
|
|
separated_options_t o; |
|
948
|
85
|
|
|
|
|
|
HV *opts_hv = opts_to_hv(aTHX_ fn, opts); |
|
949
|
|
|
|
|
|
|
STRLEN ilen; |
|
950
|
|
|
|
|
|
|
const char *ipv; |
|
951
|
|
|
|
|
|
|
AV *result; |
|
952
|
|
|
|
|
|
|
|
|
953
|
85
|
|
|
|
|
|
seed_opts_for_dialect(aTHX_ opts_hv, &o, pin); |
|
954
|
85
|
|
|
|
|
|
decode_opts(aTHX_ opts_hv, &o); |
|
955
|
|
|
|
|
|
|
|
|
956
|
84
|
|
|
|
|
|
ipv = SvPV(input, ilen); |
|
957
|
|
|
|
|
|
|
{ |
|
958
|
84
|
|
|
|
|
|
AV *xhdr = extract_explicit_headers(aTHX_ opts_hv); |
|
959
|
84
|
|
|
|
|
|
result = separated_parse_dispatch(aTHX_ ipv, ilen, &o, 0, NULL, xhdr); |
|
960
|
|
|
|
|
|
|
} |
|
961
|
77
|
|
|
|
|
|
return newRV_noinc((SV *)result); |
|
962
|
|
|
|
|
|
|
} |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
static void |
|
965
|
11
|
|
|
|
|
|
do_parse_buf_each(pTHX_ const char *fn, SV *input, SV *code, SV *opts, |
|
966
|
|
|
|
|
|
|
dialect_pin_t pin) |
|
967
|
|
|
|
|
|
|
{ |
|
968
|
|
|
|
|
|
|
separated_options_t o; |
|
969
|
|
|
|
|
|
|
HV *opts_hv; |
|
970
|
|
|
|
|
|
|
STRLEN ilen; |
|
971
|
|
|
|
|
|
|
const char *ipv; |
|
972
|
|
|
|
|
|
|
|
|
973
|
11
|
100
|
|
|
|
|
if (!SvROK(code) || SvTYPE(SvRV(code)) != SVt_PVCV) |
|
|
|
50
|
|
|
|
|
|
|
974
|
1
|
|
|
|
|
|
croak("%s: code argument must be a CODE ref", fn); |
|
975
|
10
|
|
|
|
|
|
opts_hv = opts_to_hv(aTHX_ fn, opts); |
|
976
|
10
|
|
|
|
|
|
seed_opts_for_dialect(aTHX_ opts_hv, &o, pin); |
|
977
|
10
|
|
|
|
|
|
decode_opts(aTHX_ opts_hv, &o); |
|
978
|
|
|
|
|
|
|
|
|
979
|
10
|
|
|
|
|
|
ipv = SvPV(input, ilen); |
|
980
|
|
|
|
|
|
|
{ |
|
981
|
10
|
|
|
|
|
|
AV *xhdr = extract_explicit_headers(aTHX_ opts_hv); |
|
982
|
10
|
|
|
|
|
|
(void)separated_parse_dispatch(aTHX_ ipv, ilen, &o, 1, code, xhdr); |
|
983
|
|
|
|
|
|
|
} |
|
984
|
9
|
|
|
|
|
|
} |
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
static void |
|
987
|
21
|
|
|
|
|
|
do_parse_stream(pTHX_ const char *fn, SV *path, SV *code, SV *opts, |
|
988
|
|
|
|
|
|
|
dialect_pin_t pin) |
|
989
|
|
|
|
|
|
|
{ |
|
990
|
|
|
|
|
|
|
separated_options_t o; |
|
991
|
|
|
|
|
|
|
HV *opts_hv; |
|
992
|
|
|
|
|
|
|
STRLEN plen; |
|
993
|
|
|
|
|
|
|
const char *path_pv; |
|
994
|
|
|
|
|
|
|
|
|
995
|
21
|
50
|
|
|
|
|
if (!SvOK(path)) croak("%s: path must be defined", fn); |
|
996
|
21
|
100
|
|
|
|
|
if (!SvROK(code) || SvTYPE(SvRV(code)) != SVt_PVCV) |
|
|
|
50
|
|
|
|
|
|
|
997
|
1
|
|
|
|
|
|
croak("%s: code argument must be a CODE ref", fn); |
|
998
|
20
|
|
|
|
|
|
opts_hv = opts_to_hv(aTHX_ fn, opts); |
|
999
|
19
|
|
|
|
|
|
seed_opts_for_dialect(aTHX_ opts_hv, &o, pin); |
|
1000
|
19
|
|
|
|
|
|
decode_opts(aTHX_ opts_hv, &o); |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
19
|
|
|
|
|
|
path_pv = SvPV(path, plen); |
|
1003
|
|
|
|
|
|
|
PERL_UNUSED_VAR(plen); |
|
1004
|
|
|
|
|
|
|
{ |
|
1005
|
19
|
|
|
|
|
|
AV *xhdr = extract_explicit_headers(aTHX_ opts_hv); |
|
1006
|
19
|
|
|
|
|
|
separated_parse_dispatch_stream(aTHX_ path_pv, &o, code, xhdr); |
|
1007
|
|
|
|
|
|
|
} |
|
1008
|
16
|
|
|
|
|
|
} |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
/* ============================================================ |
|
1011
|
|
|
|
|
|
|
* Import dispatcher |
|
1012
|
|
|
|
|
|
|
* |
|
1013
|
|
|
|
|
|
|
* `use File::Raw::Separated qw(import|:all|:unified|:csv|:tsv|)` |
|
1014
|
|
|
|
|
|
|
* lands in XS_File__Raw__Separated_import, which walks the requested |
|
1015
|
|
|
|
|
|
|
* names and `newXS`'s "${caller}::file_${name}" -> the matching XSUB |
|
1016
|
|
|
|
|
|
|
* pointer into the caller's symbol table. Mirrors File::Raw's import |
|
1017
|
|
|
|
|
|
|
* (file.c XS_file_import) — same `file_` prefix convention so the two |
|
1018
|
|
|
|
|
|
|
* modules compose: `use File::Raw qw(import); use File::Raw::Separated |
|
1019
|
|
|
|
|
|
|
* qw(import);` lands `file_slurp` *and* `file_parse_buf` etc. in the |
|
1020
|
|
|
|
|
|
|
* same package without collision. |
|
1021
|
|
|
|
|
|
|
* |
|
1022
|
|
|
|
|
|
|
* The xs_func slots are populated at BOOT time by looking up each |
|
1023
|
|
|
|
|
|
|
* already-registered XSUB via get_cv() and stashing CvXSUB(cv); avoids |
|
1024
|
|
|
|
|
|
|
* fragile forward-declarations of static XSUBs that xsubpp may have |
|
1025
|
|
|
|
|
|
|
* emitted with PERL_EUPXS_ALWAYS_EXPORT either set or not. |
|
1026
|
|
|
|
|
|
|
* ============================================================ */ |
|
1027
|
|
|
|
|
|
|
|
|
1028
|
|
|
|
|
|
|
typedef struct { |
|
1029
|
|
|
|
|
|
|
const char *name; |
|
1030
|
|
|
|
|
|
|
XSUBADDR_t xs_func; |
|
1031
|
|
|
|
|
|
|
} ImportEntry; |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
/* Index ranges used by the tag handlers below. Keep in sync. */ |
|
1034
|
|
|
|
|
|
|
#define IMPORT_UNIFIED_LO 0 |
|
1035
|
|
|
|
|
|
|
#define IMPORT_UNIFIED_HI 3 /* exclusive */ |
|
1036
|
|
|
|
|
|
|
#define IMPORT_CSV_LO 3 |
|
1037
|
|
|
|
|
|
|
#define IMPORT_CSV_HI 6 |
|
1038
|
|
|
|
|
|
|
#define IMPORT_TSV_LO 6 |
|
1039
|
|
|
|
|
|
|
#define IMPORT_TSV_HI 9 |
|
1040
|
|
|
|
|
|
|
|
|
1041
|
|
|
|
|
|
|
static ImportEntry g_import_funcs[] = { |
|
1042
|
|
|
|
|
|
|
/* :unified */ |
|
1043
|
|
|
|
|
|
|
{ "parse_buf", NULL }, |
|
1044
|
|
|
|
|
|
|
{ "parse_buf_each", NULL }, |
|
1045
|
|
|
|
|
|
|
{ "parse_stream", NULL }, |
|
1046
|
|
|
|
|
|
|
/* :csv */ |
|
1047
|
|
|
|
|
|
|
{ "csv_parse_buf", NULL }, |
|
1048
|
|
|
|
|
|
|
{ "csv_parse_buf_each", NULL }, |
|
1049
|
|
|
|
|
|
|
{ "csv_parse_stream", NULL }, |
|
1050
|
|
|
|
|
|
|
/* :tsv */ |
|
1051
|
|
|
|
|
|
|
{ "tsv_parse_buf", NULL }, |
|
1052
|
|
|
|
|
|
|
{ "tsv_parse_buf_each", NULL }, |
|
1053
|
|
|
|
|
|
|
{ "tsv_parse_stream", NULL }, |
|
1054
|
|
|
|
|
|
|
{ NULL, NULL } |
|
1055
|
|
|
|
|
|
|
}; |
|
1056
|
|
|
|
|
|
|
|
|
1057
|
|
|
|
|
|
|
static void |
|
1058
|
39
|
|
|
|
|
|
populate_import_table(pTHX) |
|
1059
|
|
|
|
|
|
|
{ |
|
1060
|
|
|
|
|
|
|
int i; |
|
1061
|
390
|
100
|
|
|
|
|
for (i = 0; g_import_funcs[i].name; i++) { |
|
1062
|
|
|
|
|
|
|
char full[256]; |
|
1063
|
|
|
|
|
|
|
CV *cv; |
|
1064
|
351
|
|
|
|
|
|
snprintf(full, sizeof full, |
|
1065
|
|
|
|
|
|
|
"File::Raw::Separated::%s", g_import_funcs[i].name); |
|
1066
|
351
|
|
|
|
|
|
cv = get_cv(full, 0); |
|
1067
|
351
|
50
|
|
|
|
|
if (!cv || !CvISXSUB(cv)) |
|
|
|
50
|
|
|
|
|
|
|
1068
|
0
|
|
|
|
|
|
croak("File::Raw::Separated boot: missing XSUB '%s'", full); |
|
1069
|
351
|
|
|
|
|
|
g_import_funcs[i].xs_func = CvXSUB(cv); |
|
1070
|
|
|
|
|
|
|
} |
|
1071
|
39
|
|
|
|
|
|
} |
|
1072
|
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
static void |
|
1074
|
243
|
|
|
|
|
|
install_one(pTHX_ const char *pkg, const ImportEntry *e) |
|
1075
|
|
|
|
|
|
|
{ |
|
1076
|
|
|
|
|
|
|
char full[256]; |
|
1077
|
243
|
|
|
|
|
|
snprintf(full, sizeof full, "%s::file_%s", pkg, e->name); |
|
1078
|
243
|
|
|
|
|
|
newXS(full, e->xs_func, __FILE__); |
|
1079
|
243
|
|
|
|
|
|
} |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
static void |
|
1082
|
27
|
|
|
|
|
|
install_range(pTHX_ const char *pkg, int lo, int hi) |
|
1083
|
|
|
|
|
|
|
{ |
|
1084
|
|
|
|
|
|
|
int i; |
|
1085
|
270
|
100
|
|
|
|
|
for (i = lo; i < hi; i++) install_one(aTHX_ pkg, &g_import_funcs[i]); |
|
1086
|
27
|
|
|
|
|
|
} |
|
1087
|
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
XS_EXTERNAL(XS_File__Raw__Separated_import); |
|
1089
|
39
|
|
|
|
|
|
XS_EXTERNAL(XS_File__Raw__Separated_import) |
|
1090
|
|
|
|
|
|
|
{ |
|
1091
|
39
|
|
|
|
|
|
dXSARGS; |
|
1092
|
39
|
50
|
|
|
|
|
const char *pkg = CopSTASHPV(PL_curcop); |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
int i, j; |
|
1094
|
|
|
|
|
|
|
int matched; |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
/* No imports requested: bare `use File::Raw::Separated;` lands |
|
1097
|
|
|
|
|
|
|
* here with items==1 (just the package name). Plugin BOOT has |
|
1098
|
|
|
|
|
|
|
* already registered csv/tsv with File::Raw — nothing more to do. */ |
|
1099
|
39
|
100
|
|
|
|
|
if (items <= 1) XSRETURN_EMPTY; |
|
1100
|
|
|
|
|
|
|
|
|
1101
|
54
|
100
|
|
|
|
|
for (i = 1; i < items; i++) { |
|
1102
|
|
|
|
|
|
|
STRLEN len; |
|
1103
|
27
|
|
|
|
|
|
const char *arg = SvPV(ST(i), len); |
|
1104
|
|
|
|
|
|
|
|
|
1105
|
27
|
50
|
|
|
|
|
if (len > 0 && arg[0] == ':') { |
|
|
|
50
|
|
|
|
|
|
|
1106
|
0
|
0
|
|
|
|
|
if (len == 4 && memcmp(arg, ":all", 4) == 0) { |
|
|
|
0
|
|
|
|
|
|
|
1107
|
0
|
|
|
|
|
|
install_range(aTHX_ pkg, 0, IMPORT_TSV_HI); |
|
1108
|
27
|
|
|
|
|
|
continue; |
|
1109
|
|
|
|
|
|
|
} |
|
1110
|
0
|
0
|
|
|
|
|
if (len == 8 && memcmp(arg, ":unified", 8) == 0) { |
|
|
|
0
|
|
|
|
|
|
|
1111
|
0
|
|
|
|
|
|
install_range(aTHX_ pkg, IMPORT_UNIFIED_LO, IMPORT_UNIFIED_HI); |
|
1112
|
0
|
|
|
|
|
|
continue; |
|
1113
|
|
|
|
|
|
|
} |
|
1114
|
0
|
0
|
|
|
|
|
if (len == 4 && memcmp(arg, ":csv", 4) == 0) { |
|
|
|
0
|
|
|
|
|
|
|
1115
|
0
|
|
|
|
|
|
install_range(aTHX_ pkg, IMPORT_CSV_LO, IMPORT_CSV_HI); |
|
1116
|
0
|
|
|
|
|
|
continue; |
|
1117
|
|
|
|
|
|
|
} |
|
1118
|
0
|
0
|
|
|
|
|
if (len == 4 && memcmp(arg, ":tsv", 4) == 0) { |
|
|
|
0
|
|
|
|
|
|
|
1119
|
0
|
|
|
|
|
|
install_range(aTHX_ pkg, IMPORT_TSV_LO, IMPORT_TSV_HI); |
|
1120
|
0
|
|
|
|
|
|
continue; |
|
1121
|
|
|
|
|
|
|
} |
|
1122
|
0
|
|
|
|
|
|
warn("File::Raw::Separated: unknown tag '%.*s'", (int)len, arg); |
|
1123
|
0
|
|
|
|
|
|
continue; |
|
1124
|
|
|
|
|
|
|
} |
|
1125
|
|
|
|
|
|
|
|
|
1126
|
|
|
|
|
|
|
/* Bare `import` is shorthand for `:all`, matching the File::Raw |
|
1127
|
|
|
|
|
|
|
* idiom: `use File::Raw qw(import);`. */ |
|
1128
|
27
|
50
|
|
|
|
|
if (len == 6 && memcmp(arg, "import", 6) == 0) { |
|
|
|
50
|
|
|
|
|
|
|
1129
|
27
|
|
|
|
|
|
install_range(aTHX_ pkg, 0, IMPORT_TSV_HI); |
|
1130
|
27
|
|
|
|
|
|
continue; |
|
1131
|
|
|
|
|
|
|
} |
|
1132
|
|
|
|
|
|
|
|
|
1133
|
0
|
|
|
|
|
|
matched = 0; |
|
1134
|
0
|
0
|
|
|
|
|
for (j = 0; g_import_funcs[j].name; j++) { |
|
1135
|
0
|
0
|
|
|
|
|
if (strlen(g_import_funcs[j].name) == len |
|
1136
|
0
|
0
|
|
|
|
|
&& memcmp(arg, g_import_funcs[j].name, len) == 0) { |
|
1137
|
0
|
|
|
|
|
|
install_one(aTHX_ pkg, &g_import_funcs[j]); |
|
1138
|
0
|
|
|
|
|
|
matched = 1; |
|
1139
|
0
|
|
|
|
|
|
break; |
|
1140
|
|
|
|
|
|
|
} |
|
1141
|
|
|
|
|
|
|
} |
|
1142
|
0
|
0
|
|
|
|
|
if (!matched) |
|
1143
|
0
|
|
|
|
|
|
warn("File::Raw::Separated: '%.*s' is not exported", |
|
1144
|
|
|
|
|
|
|
(int)len, arg); |
|
1145
|
|
|
|
|
|
|
} |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
27
|
|
|
|
|
|
XSRETURN_EMPTY; |
|
1148
|
|
|
|
|
|
|
} |
|
1149
|
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
/* ============================================================ |
|
1151
|
|
|
|
|
|
|
* XS surface |
|
1152
|
|
|
|
|
|
|
* ============================================================ */ |
|
1153
|
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
MODULE = File::Raw::Separated PACKAGE = File::Raw::Separated |
|
1155
|
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
PROTOTYPES: DISABLE |
|
1157
|
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
BOOT: |
|
1159
|
|
|
|
|
|
|
/* Seed the per-dialect defaults the plugins point at. */ |
|
1160
|
39
|
|
|
|
|
|
separated_options_init_csv(&csv_default_opts); |
|
1161
|
39
|
|
|
|
|
|
separated_options_init_tsv(&tsv_default_opts); |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
/* Build and register the CSV plugin. Only the READ phase is wired |
|
1164
|
|
|
|
|
|
|
* for now; WRITE/RECORD/STREAM stay NULL until the parser core |
|
1165
|
|
|
|
|
|
|
* grows a serialiser and File::Raw teaches each_line/grep_lines |
|
1166
|
|
|
|
|
|
|
* the plugin pipeline. */ |
|
1167
|
39
|
|
|
|
|
|
csv_plugin.name = "csv"; |
|
1168
|
39
|
|
|
|
|
|
csv_plugin.read_fn = sep_read; |
|
1169
|
39
|
|
|
|
|
|
csv_plugin.write_fn = sep_write; |
|
1170
|
39
|
|
|
|
|
|
csv_plugin.record_fn = NULL; |
|
1171
|
39
|
|
|
|
|
|
csv_plugin.stream_fn = sep_stream; |
|
1172
|
39
|
|
|
|
|
|
csv_plugin.state = &csv_default_opts; |
|
1173
|
39
|
50
|
|
|
|
|
if (file_register_plugin(aTHX_ &csv_plugin) <= 0) |
|
1174
|
0
|
|
|
|
|
|
warn("File::Raw::Separated: failed to register 'csv' plugin"); |
|
1175
|
|
|
|
|
|
|
|
|
1176
|
39
|
|
|
|
|
|
tsv_plugin.name = "tsv"; |
|
1177
|
39
|
|
|
|
|
|
tsv_plugin.read_fn = sep_read; |
|
1178
|
39
|
|
|
|
|
|
tsv_plugin.write_fn = sep_write; |
|
1179
|
39
|
|
|
|
|
|
tsv_plugin.record_fn = NULL; |
|
1180
|
39
|
|
|
|
|
|
tsv_plugin.stream_fn = sep_stream; |
|
1181
|
39
|
|
|
|
|
|
tsv_plugin.state = &tsv_default_opts; |
|
1182
|
39
|
50
|
|
|
|
|
if (file_register_plugin(aTHX_ &tsv_plugin) <= 0) |
|
1183
|
0
|
|
|
|
|
|
warn("File::Raw::Separated: failed to register 'tsv' plugin"); |
|
1184
|
|
|
|
|
|
|
|
|
1185
|
|
|
|
|
|
|
/* Populate g_import_funcs[].xs_func from the just-registered XSUBs; |
|
1186
|
|
|
|
|
|
|
* the dispatcher uses these pointers when stamping `file_*` aliases |
|
1187
|
|
|
|
|
|
|
* into callers' packages. Boot order: xsubpp emits the newXS_deffile |
|
1188
|
|
|
|
|
|
|
* registrations *before* this initialisation block, so get_cv() is |
|
1189
|
|
|
|
|
|
|
* guaranteed to find each one. */ |
|
1190
|
39
|
|
|
|
|
|
populate_import_table(aTHX); |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
/* Override Exporter::import (we no longer inherit it; .pm has been |
|
1193
|
|
|
|
|
|
|
* stripped of the Exporter glue) with our XS dispatcher so |
|
1194
|
|
|
|
|
|
|
* `use File::Raw::Separated qw(...)` lands in our import directly. */ |
|
1195
|
39
|
|
|
|
|
|
newXS("File::Raw::Separated::import", |
|
1196
|
|
|
|
|
|
|
XS_File__Raw__Separated_import, __FILE__); |
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
# ===================================================================== |
|
1200
|
|
|
|
|
|
|
# Parser entry points |
|
1201
|
|
|
|
|
|
|
# ===================================================================== |
|
1202
|
|
|
|
|
|
|
# |
|
1203
|
|
|
|
|
|
|
# Nine XSUBs, three logical groups, all thin shims over the do_* helpers |
|
1204
|
|
|
|
|
|
|
# above: |
|
1205
|
|
|
|
|
|
|
# |
|
1206
|
|
|
|
|
|
|
# Unified (dialect read from opts hash, defaults to csv): |
|
1207
|
|
|
|
|
|
|
# parse_buf($input [, \%opts]) -> \@rows |
|
1208
|
|
|
|
|
|
|
# parse_buf_each($input, $cb [, \%opts]) |
|
1209
|
|
|
|
|
|
|
# parse_stream($path, $cb [, \%opts]) |
|
1210
|
|
|
|
|
|
|
# |
|
1211
|
|
|
|
|
|
|
# CSV-pinned (dialect key in opts ignored): |
|
1212
|
|
|
|
|
|
|
# csv_parse_buf, csv_parse_buf_each, csv_parse_stream |
|
1213
|
|
|
|
|
|
|
# |
|
1214
|
|
|
|
|
|
|
# TSV-pinned: |
|
1215
|
|
|
|
|
|
|
# tsv_parse_buf, tsv_parse_buf_each, tsv_parse_stream |
|
1216
|
|
|
|
|
|
|
# |
|
1217
|
|
|
|
|
|
|
# xsubpp registers them in package File::Raw::Separated; users get them |
|
1218
|
|
|
|
|
|
|
# under `file_` prefix in their own namespace via `use File::Raw::Separated |
|
1219
|
|
|
|
|
|
|
# qw(import|:all|:unified|:csv|:tsv|)` (see import dispatcher above). |
|
1220
|
|
|
|
|
|
|
# |
|
1221
|
|
|
|
|
|
|
# parse_stream / *_parse_stream open the file directly via PerlLIO, |
|
1222
|
|
|
|
|
|
|
# bypassing File::Raw's read hook (no recursion / no double-parse). |
|
1223
|
|
|
|
|
|
|
|
|
1224
|
|
|
|
|
|
|
SV * |
|
1225
|
|
|
|
|
|
|
parse_buf(input, opts = NULL) |
|
1226
|
|
|
|
|
|
|
SV *input |
|
1227
|
|
|
|
|
|
|
SV *opts |
|
1228
|
|
|
|
|
|
|
CODE: |
|
1229
|
1
|
|
|
|
|
|
RETVAL = do_parse_buf(aTHX_ "parse_buf", input, opts, DIALECT_AUTO); |
|
1230
|
|
|
|
|
|
|
OUTPUT: |
|
1231
|
|
|
|
|
|
|
RETVAL |
|
1232
|
|
|
|
|
|
|
|
|
1233
|
|
|
|
|
|
|
SV * |
|
1234
|
|
|
|
|
|
|
csv_parse_buf(input, opts = NULL) |
|
1235
|
|
|
|
|
|
|
SV *input |
|
1236
|
|
|
|
|
|
|
SV *opts |
|
1237
|
|
|
|
|
|
|
CODE: |
|
1238
|
70
|
|
|
|
|
|
RETVAL = do_parse_buf(aTHX_ "csv_parse_buf", input, opts, DIALECT_CSV); |
|
1239
|
|
|
|
|
|
|
OUTPUT: |
|
1240
|
|
|
|
|
|
|
RETVAL |
|
1241
|
|
|
|
|
|
|
|
|
1242
|
|
|
|
|
|
|
SV * |
|
1243
|
|
|
|
|
|
|
tsv_parse_buf(input, opts = NULL) |
|
1244
|
|
|
|
|
|
|
SV *input |
|
1245
|
|
|
|
|
|
|
SV *opts |
|
1246
|
|
|
|
|
|
|
CODE: |
|
1247
|
14
|
|
|
|
|
|
RETVAL = do_parse_buf(aTHX_ "tsv_parse_buf", input, opts, DIALECT_TSV); |
|
1248
|
|
|
|
|
|
|
OUTPUT: |
|
1249
|
|
|
|
|
|
|
RETVAL |
|
1250
|
|
|
|
|
|
|
|
|
1251
|
|
|
|
|
|
|
|
|
1252
|
|
|
|
|
|
|
void |
|
1253
|
|
|
|
|
|
|
parse_buf_each(input, code, opts = NULL) |
|
1254
|
|
|
|
|
|
|
SV *input |
|
1255
|
|
|
|
|
|
|
SV *code |
|
1256
|
|
|
|
|
|
|
SV *opts |
|
1257
|
|
|
|
|
|
|
PPCODE: |
|
1258
|
0
|
|
|
|
|
|
do_parse_buf_each(aTHX_ "parse_buf_each", input, code, opts, DIALECT_AUTO); |
|
1259
|
0
|
|
|
|
|
|
XSRETURN_EMPTY; |
|
1260
|
|
|
|
|
|
|
|
|
1261
|
|
|
|
|
|
|
void |
|
1262
|
|
|
|
|
|
|
csv_parse_buf_each(input, code, opts = NULL) |
|
1263
|
|
|
|
|
|
|
SV *input |
|
1264
|
|
|
|
|
|
|
SV *code |
|
1265
|
|
|
|
|
|
|
SV *opts |
|
1266
|
|
|
|
|
|
|
PPCODE: |
|
1267
|
9
|
|
|
|
|
|
do_parse_buf_each(aTHX_ "csv_parse_buf_each", input, code, opts, DIALECT_CSV); |
|
1268
|
7
|
|
|
|
|
|
XSRETURN_EMPTY; |
|
1269
|
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
void |
|
1271
|
|
|
|
|
|
|
tsv_parse_buf_each(input, code, opts = NULL) |
|
1272
|
|
|
|
|
|
|
SV *input |
|
1273
|
|
|
|
|
|
|
SV *code |
|
1274
|
|
|
|
|
|
|
SV *opts |
|
1275
|
|
|
|
|
|
|
PPCODE: |
|
1276
|
2
|
|
|
|
|
|
do_parse_buf_each(aTHX_ "tsv_parse_buf_each", input, code, opts, DIALECT_TSV); |
|
1277
|
2
|
|
|
|
|
|
XSRETURN_EMPTY; |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
|
|
1280
|
|
|
|
|
|
|
void |
|
1281
|
|
|
|
|
|
|
parse_stream(path, code, opts = NULL) |
|
1282
|
|
|
|
|
|
|
SV *path |
|
1283
|
|
|
|
|
|
|
SV *code |
|
1284
|
|
|
|
|
|
|
SV *opts |
|
1285
|
|
|
|
|
|
|
PPCODE: |
|
1286
|
1
|
|
|
|
|
|
do_parse_stream(aTHX_ "parse_stream", path, code, opts, DIALECT_AUTO); |
|
1287
|
1
|
|
|
|
|
|
XSRETURN_EMPTY; |
|
1288
|
|
|
|
|
|
|
|
|
1289
|
|
|
|
|
|
|
void |
|
1290
|
|
|
|
|
|
|
csv_parse_stream(path, code, opts = NULL) |
|
1291
|
|
|
|
|
|
|
SV *path |
|
1292
|
|
|
|
|
|
|
SV *code |
|
1293
|
|
|
|
|
|
|
SV *opts |
|
1294
|
|
|
|
|
|
|
PPCODE: |
|
1295
|
20
|
|
|
|
|
|
do_parse_stream(aTHX_ "csv_parse_stream", path, code, opts, DIALECT_CSV); |
|
1296
|
15
|
|
|
|
|
|
XSRETURN_EMPTY; |
|
1297
|
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
void |
|
1299
|
|
|
|
|
|
|
tsv_parse_stream(path, code, opts = NULL) |
|
1300
|
|
|
|
|
|
|
SV *path |
|
1301
|
|
|
|
|
|
|
SV *code |
|
1302
|
|
|
|
|
|
|
SV *opts |
|
1303
|
|
|
|
|
|
|
PPCODE: |
|
1304
|
0
|
|
|
|
|
|
do_parse_stream(aTHX_ "tsv_parse_stream", path, code, opts, DIALECT_TSV); |
|
1305
|
0
|
|
|
|
|
|
XSRETURN_EMPTY; |
|
1306
|
|
|
|
|
|
|
|