File Coverage

Separated.xs
Criterion Covered Total %
statement 421 492 85.5
branch 282 426 66.2
condition n/a
subroutine n/a
pod n/a
total 703 918 76.5


line stmt bran cond sub pod time code
1             /*
2             * Separated.xs - Perl XS bindings for the File::Raw::Separated parser core.
3             *
4             * Surface:
5             * In-memory primitives:
6             * parse_buf($scalar [, \%opts]) -> \@rows
7             * parse_buf_each($scalar, $code [, \%opts]) ;callback per row
8             * parse_stream($path, $code [, \%opts]) ;chunked file streamer
9             * plus dialect-pinning aliases csv_* and tsv_* (Perl-side, .pm)
10             *
11             * File::Raw plugin integration:
12             * At BOOT we register two plugins ("csv", "tsv") via
13             * include/file_plugin.h. They expose a READ phase that turns
14             * File::Raw::slurp($p, plugin => 'csv', ...) into AoA. Per-call
15             * options arrive through ctx->options (a per-call HV) and merge
16             * on top of the dialect's defaults held in ctx->plugin_state.
17             * There is no more global hook state, no enable/disable, no
18             * get/set/with_options scaffolding - all of that lived to back the
19             * old hook system; the plugin model passes options inline.
20             */
21              
22             #define PERL_NO_GET_CONTEXT
23             #include "EXTERN.h"
24             #include "perl.h"
25             #include "XSUB.h"
26              
27             #include "include/separated_parser.h"
28             /* file_plugin.h comes from File::Raw via ExtUtils::Depends -- the
29             consumer Makefile.PL adds the right -I to find it. */
30             #include "file_plugin.h"
31              
32             #include
33             #include
34             #include
35             #include
36              
37             /* XS_EXTERNAL was added in 5.16; older perls (5.10/5.14) need this
38             fallback or our `import` XSUB forward-decl + definition won't
39             expand and BOOT can't take its address. */
40             #ifndef XS_EXTERNAL
41             # define XS_EXTERNAL(name) XS(name)
42             #endif
43              
44             /* ============================================================
45             * Option decoding
46             *
47             * Reads a Perl hashref of options into a separated_options_t.
48             * Unknown keys croak (catches typos like 'seperator').
49             * Caller is expected to have already seeded sensible defaults
50             * before calling this (so the merge is "user opts on top of defaults").
51             * ============================================================ */
52              
53             static const char *VALID_OPT_KEYS[] = {
54             "sep", "quote", "escape", "strict", "eol", "trim",
55             "empty_is_undef", "binary", "header", "max_field_len",
56             /* dialect: selects the seeded defaults (csv | tsv). Consumed by
57             * seed_opts_for_dialect() before decode_opts() runs; listed here so
58             * known_opt() doesn't reject it during the merge sweep. */
59             "dialect",
60             /* plugin: present in the HV that File::Raw builds for its dispatch
61             * call (e.g. slurp($p, plugin => 'csv', sep => ';')). The plugin
62             * machinery uses it to look us up; for our merge sweep it's a
63             * known-and-ignored key. */
64             "plugin",
65             NULL
66             };
67              
68             static int
69 166           known_opt(const char *key, STRLEN klen)
70             {
71             const char *const *p;
72 1462 100         for (p = VALID_OPT_KEYS; *p; p++) {
73 1460 100         if (strlen(*p) == klen && memcmp(*p, key, klen) == 0) return 1;
    100          
74             }
75 2           return 0;
76             }
77              
78             /* Take the first byte of a string SV, croaking if empty.
79             * Used for sep / quote / escape (each must be exactly one byte;
80             * single-byte ASCII for v0.01 — multi-byte separators are a future stretch). */
81             static int
82 18           sv_first_byte(pTHX_ SV *sv, const char *what)
83             {
84             STRLEN len;
85 18           const char *p = SvPV(sv, len);
86 18 50         if (len < 1) croak("File::Raw::Separated: %s must be a non-empty string", what);
87 18           return (unsigned char)p[0];
88             }
89              
90             static separated_eol_t
91 10           sv_to_eol(pTHX_ SV *sv)
92             {
93             STRLEN len;
94 10           const char *p = SvPV(sv, len);
95 10 100         if (len == 4 && memcmp(p, "auto", 4) == 0) return SEPARATED_EOL_AUTO;
    100          
96 9 100         if (len == 2 && memcmp(p, "lf", 2) == 0) return SEPARATED_EOL_LF;
    100          
97 5 100         if (len == 4 && memcmp(p, "crlf", 4) == 0) return SEPARATED_EOL_CRLF;
    50          
98 3 100         if (len == 2 && memcmp(p, "cr", 2) == 0) return SEPARATED_EOL_CR;
    50          
99 2           croak("File::Raw::Separated: eol must be one of auto|lf|crlf|cr (got '%.*s')",
100             (int)len, p);
101             }
102              
103             /* Dialect pinning: AUTO reads the `dialect` key from the user's hash (default
104             * CSV); CSV/TSV force the dialect regardless of what the hash says. The
105             * dialect-prefixed XSUBs (csv_parse_buf, tsv_parse_stream, …) all forward
106             * with a non-AUTO pin — they used to be Perl-side wrappers that mutated the
107             * opts hash via _pin_dialect; the pin now happens here instead. */
108             typedef enum {
109             DIALECT_AUTO = 0,
110             DIALECT_CSV = 1,
111             DIALECT_TSV = 2,
112             } dialect_pin_t;
113              
114             /* Read the optional `dialect` key from a user-supplied options hashref
115             * (may be NULL) and seed *opts with the corresponding defaults.
116             * If `pin` is CSV/TSV the hash's dialect key is ignored entirely.
117             * Defaults to CSV. Croaks on an unknown dialect string.
118             * This MUST be called before decode_opts() so user-supplied keys layer
119             * cleanly on top of the dialect's defaults. */
120             static void
121 114           seed_opts_for_dialect(pTHX_ HV *hv, separated_options_t *opts, dialect_pin_t pin)
122             {
123             SV **slot;
124 114 100         if (pin == DIALECT_CSV) { separated_options_init_csv(opts); return; }
125 18 100         if (pin == DIALECT_TSV) { separated_options_init_tsv(opts); return; }
126 2 50         if (hv && (slot = hv_fetchs(hv, "dialect", 0)) && *slot && SvOK(*slot)) {
    50          
    0          
    0          
127             STRLEN dlen;
128 0           const char *dpv = SvPV(*slot, dlen);
129 0 0         if (dlen == 3 && memcmp(dpv, "csv", 3) == 0) {
    0          
130 0           separated_options_init_csv(opts);
131 0           return;
132             }
133 0 0         if (dlen == 3 && memcmp(dpv, "tsv", 3) == 0) {
    0          
134 0           separated_options_init_tsv(opts);
135 0           return;
136             }
137 0           croak("File::Raw::Separated: dialect must be 'csv' or 'tsv' (got '%.*s')",
138             (int)dlen, dpv);
139             }
140             /* default: CSV */
141 2           separated_options_init_csv(opts);
142             }
143              
144             /* Same as above, but takes a plain SV instead of looking up a hash key.
145             * Used by the class-method state setters (set_options('csv'|'tsv', ...)). */
146             static int
147 0           parse_dialect_sv(pTHX_ SV *sv, const char *fn)
148             {
149             STRLEN dlen;
150             const char *dpv;
151 0 0         if (!sv || !SvOK(sv))
    0          
152 0           croak("%s: dialect (first arg) must be 'csv' or 'tsv'", fn);
153 0           dpv = SvPV(sv, dlen);
154 0 0         if (dlen == 3 && memcmp(dpv, "csv", 3) == 0) return 0; /* csv slot */
    0          
155 0 0         if (dlen == 3 && memcmp(dpv, "tsv", 3) == 0) return 1; /* tsv slot */
    0          
156 0           croak("%s: dialect must be 'csv' or 'tsv' (got '%.*s')",
157             fn, (int)dlen, dpv);
158             }
159              
160             /* Merge an options hashref (may be NULL or undef) into *opts.
161             * Croaks on unknown key or wrong-shape value. */
162             static void
163 194           decode_opts(pTHX_ HV *hv, separated_options_t *opts)
164             {
165 194 100         if (!hv) return;
166              
167 129           hv_iterinit(hv);
168             HE *he;
169 291 100         while ((he = hv_iternext(hv))) {
170             I32 klen_i;
171 166           const char *key = hv_iterkey(he, &klen_i);
172 166           STRLEN klen = (STRLEN)klen_i;
173 166           SV *val = hv_iterval(hv, he);
174              
175 166 100         if (!known_opt(key, klen)) {
176 2           croak("File::Raw::Separated: unknown option '%.*s'",
177             (int)klen, key);
178             }
179              
180             /* Treat undef value as "use default" — i.e. skip; gives callers
181             * a way to express "I don't care, use the seeded default". */
182 164 100         if (!SvOK(val)) continue;
183              
184 157 100         if (klen == 3 && memcmp(key, "sep", 3) == 0) opts->sep = sv_first_byte(aTHX_ val, "sep");
    100          
185 145 100         else if (klen == 5 && memcmp(key, "quote", 5) == 0) opts->quote = sv_first_byte(aTHX_ val, "quote");
    50          
186 142 100         else if (klen == 6 && memcmp(key, "escape",6) == 0) opts->escape= sv_first_byte(aTHX_ val, "escape");
    100          
187 139 100         else if (klen == 6 && memcmp(key, "strict",6) == 0) opts->strict= SvTRUE(val) ? 1 : 0;
    100          
188 132 100         else if (klen == 3 && memcmp(key, "eol", 3) == 0) opts->eol_mode = sv_to_eol(aTHX_ val);
    50          
189 122 100         else if (klen == 4 && memcmp(key, "trim", 4) == 0) opts->trim = SvTRUE(val) ? 1 : 0;
    50          
190 120 100         else if (klen == 14 && memcmp(key, "empty_is_undef", 14) == 0) opts->empty_is_undef = SvTRUE(val) ? 1 : 0;
    50          
191 118 100         else if (klen == 6 && memcmp(key, "binary",6) == 0) opts->binary= SvTRUE(val) ? 1 : 0;
    100          
192 115 100         else if (klen == 6 && memcmp(key, "header",6) == 0) opts->header= SvTRUE(val) ? 1 : 0;
    100          
193 78 100         else if (klen == 13 && memcmp(key, "max_field_len", 13) == 0) {
    50          
194 1           IV n = SvIV(val);
195 1 50         if (n < 0) croak("File::Raw::Separated: max_field_len must be >= 0");
196 1           opts->max_field_len = (size_t)n;
197             }
198             }
199             }
200              
201             /* ============================================================
202             * Dispatcher state — passed through the C parser as user-data
203             * ============================================================ */
204              
205             typedef struct {
206             #ifdef PERL_IMPLICIT_CONTEXT
207             PerlInterpreter *my_perl; /* used by dTHXa(c->my_perl) in callbacks */
208             #endif
209             AV *result; /* used in collect mode (as_callback == 0) */
210             AV *current_row; /* AV reused across rows in collect mode */
211             SV *cb; /* user callback in callback mode (1) */
212             AV *row_av; /* one reusable AV for callback mode */
213             int as_callback;
214             int empty_is_undef;
215             int binary;
216             /* Header mode: when 1, first emitted row is consumed as keys and
217             * subsequent rows are emitted as hashrefs keyed by those names. */
218             int header_mode;
219             AV *headers; /* NULL until first row consumed in header mode */
220             /* When the user callback dies, we propagate via a stash. */
221             SV *die_msg;
222             } dispatch_ctx_t;
223              
224             /* Build a single field SV from the parser's borrowed pointer. */
225             static SV *
226 921389           make_field_sv(pTHX_ const char *field, STRLEN len, int is_null,
227             int empty_is_undef, int binary)
228             {
229             PERL_UNUSED_VAR(empty_is_undef);
230 921389 100         if (is_null) return newSV(0); /* PL_sv_undef would be SVREADONLY */
231 921387 50         SV *sv = newSVpvn(field ? field : "", len);
232 921387 100         if (!binary) sv_utf8_decode(sv);
233 921387           return sv;
234             }
235              
236             /* Header-mode helpers (used by both collect_cb and each_cb).
237             *
238             * Contract:
239             * - First row in header mode is taken as the header. Duplicate keys
240             * croak. Subsequent calls see ctx->headers != NULL.
241             * - Subsequent rows are zipped against the header into a fresh HV.
242             * Row arity > header arity croaks. Row arity < header arity pads
243             * trailing keys with undef.
244             * - Field SVs are copied into the HV via newSVsv (the source AV gets
245             * av_clear'd or freed afterwards). */
246              
247             static void
248 34           check_no_duplicate_headers(pTHX_ AV *headers)
249             {
250 34           HV *seen = newHV();
251 34           SSize_t n = av_len(headers) + 1;
252             SSize_t i;
253 115 100         for (i = 0; i < n; i++) {
254 83           SV **kp = av_fetch(headers, i, 0);
255             STRLEN klen;
256             const char *kpv;
257 83 50         if (!kp || !*kp) continue;
    50          
258 83           kpv = SvPV(*kp, klen);
259 83 100         if (hv_exists(seen, kpv, klen)) {
260 2           SvREFCNT_dec((SV *)seen);
261 2           croak("File::Raw::Separated: duplicate header key '%.*s'",
262             (int)klen, kpv);
263             }
264 81           (void)hv_store(seen, kpv, klen, &PL_sv_yes, 0);
265             }
266 32           SvREFCNT_dec((SV *)seen);
267 32           }
268              
269             static HV *
270 69           build_header_row_hv(pTHX_ AV *headers, AV *row)
271             {
272 69           HV *hv = newHV();
273 69           SSize_t hcount = av_len(headers) + 1;
274 69           SSize_t rcount = av_len(row) + 1;
275             SSize_t i;
276 69 100         if (rcount > hcount) {
277 2           SvREFCNT_dec((SV *)hv);
278 2           croak("File::Raw::Separated: row has %ld field(s), header has %ld",
279             (long)rcount, (long)hcount);
280             }
281 235 100         for (i = 0; i < hcount; i++) {
282 168           SV **kp = av_fetch(headers, i, 0);
283             STRLEN klen;
284             const char *kpv;
285             SV *val;
286 168 50         if (!kp || !*kp) continue;
    50          
287 168           kpv = SvPV(*kp, klen);
288 168 100         if (i < rcount) {
289 164           SV **vp = av_fetch(row, i, 0);
290 164 50         val = (vp && *vp) ? newSVsv(*vp) : newSV(0);
    50          
291             } else {
292 4           val = newSV(0);
293             }
294 168           (void)hv_store(hv, kpv, klen, val, 0);
295             }
296 67           return hv;
297             }
298              
299             /* Extract caller-supplied header names from an options HV. Returns:
300             * - NULL if `header` is missing, false-ish, or `header => 1` (the
301             * "consume the file's first row as headers" mode);
302             * - a fresh AV (refcount 1, owned by caller) of header-name SVs when
303             * `header => [name, name, ...]` was supplied. Validates: arrayref,
304             * non-empty, no undef entries, no duplicates.
305             *
306             * The caller installs the returned AV directly into dispatch_ctx_t::
307             * headers BEFORE the parser starts emitting fields, which short-
308             * circuits the each_cb / collect_cb "first row becomes headers" branch
309             * so row 0 is treated as data and emitted as a hashref. */
310             static AV *
311 178           extract_explicit_headers(pTHX_ HV *opts)
312             {
313             SV **slot;
314             SV *val;
315             AV *user_av;
316             AV *out;
317             SSize_t i, n;
318              
319 178 100         if (!opts) return NULL;
320 113           slot = hv_fetchs(opts, "header", 0);
321 113 100         if (!slot || !*slot || !SvOK(*slot)) return NULL;
    50          
    50          
322 37           val = *slot;
323              
324             /* `header => 1` (or any non-arrayref truthy) keeps legacy behaviour. */
325 37 100         if (!SvROK(val) || SvTYPE(SvRV(val)) != SVt_PVAV) return NULL;
    50          
326              
327 20           user_av = (AV *)SvRV(val);
328 20           n = av_len(user_av) + 1;
329 20 100         if (n <= 0)
330 1           croak("File::Raw::Separated: header => [] is empty; "
331             "use header => 1 to consume the file's first row, "
332             "or supply at least one name");
333              
334 19           out = newAV();
335 19           av_extend(out, n - 1);
336 68 100         for (i = 0; i < n; i++) {
337 50           SV **kp = av_fetch(user_av, i, 0);
338 50 50         if (!kp || !*kp || !SvOK(*kp)) {
    50          
    100          
339 1           SvREFCNT_dec((SV *)out);
340 1           croak("File::Raw::Separated: header => [...] entry %ld is undef",
341             (long)i);
342             }
343             /* Copy to detach from the caller's arrayref. */
344 49           av_push(out, newSVsv(*kp));
345             }
346             /* Reuses the same dup-check the implicit path uses for symmetry. */
347 18           check_no_duplicate_headers(aTHX_ out);
348 17           return out;
349             }
350              
351             /* Field callback for as_callback == 0: accumulate into AoA (or
352             * arrayref-of-hashref if header_mode). */
353             static int
354 860           collect_cb(const char *field, size_t len, int eor, void *ud)
355             {
356 860           dispatch_ctx_t *c = (dispatch_ctx_t *)ud;
357             dTHXa(c->my_perl);
358 860           int is_null = (len == SEPARATED_FIELD_NULL_LEN);
359 860 100         SV *sv = make_field_sv(aTHX_ field, is_null ? 0 : (STRLEN)len, is_null,
360             c->empty_is_undef, c->binary);
361 860           av_push(c->current_row, sv);
362 860 100         if (eor) {
363 403 100         if (c->header_mode && !c->headers) {
    100          
364             /* First row is the header. Validate duplicates, then steal. */
365 11           check_no_duplicate_headers(aTHX_ c->current_row);
366 10           c->headers = c->current_row;
367 10           c->current_row = newAV();
368             /* Do NOT push to result. */
369 392 100         } else if (c->header_mode) {
370             /* Subsequent row: zip against headers into hash. */
371 54           HV *row_hv = build_header_row_hv(aTHX_ c->headers, c->current_row);
372 52           av_push(c->result, newRV_noinc((SV *)row_hv));
373 52           av_clear(c->current_row);
374             } else {
375 338           av_push(c->result, newRV_noinc((SV *)c->current_row));
376 338           c->current_row = newAV();
377             }
378             }
379 857           return 0;
380             }
381              
382             /* Field callback for as_callback == 1: invoke user code per row. */
383             static int
384 920529           each_cb(const char *field, size_t len, int eor, void *ud)
385             {
386 920529           dispatch_ctx_t *c = (dispatch_ctx_t *)ud;
387             dTHXa(c->my_perl);
388 920529           int is_null = (len == SEPARATED_FIELD_NULL_LEN);
389 920529 50         SV *sv = make_field_sv(aTHX_ field, is_null ? 0 : (STRLEN)len, is_null,
390             c->empty_is_undef, c->binary);
391 920529           av_push(c->row_av, sv);
392 920529 100         if (eor) {
393             /* Header mode: first row is consumed as headers, no callback. */
394 110214 100         if (c->header_mode && !c->headers) {
    100          
395 5           check_no_duplicate_headers(aTHX_ c->row_av);
396             /* Steal row_av as headers; allocate fresh row_av for next row. */
397 5           c->headers = c->row_av;
398 5           c->row_av = newAV();
399 5           return 0;
400             }
401              
402             /* Build the arg the callback sees: AV (default) or HV (header). */
403             SV *rowref;
404 110209 100         if (c->header_mode) {
405 15           HV *row_hv = build_header_row_hv(aTHX_ c->headers, c->row_av);
406 15           rowref = newRV_noinc((SV *)row_hv);
407             } else {
408 110194           rowref = newRV_inc((SV *)c->row_av); /* +1, not consumed */
409             }
410 110209           sv_2mortal(rowref);
411              
412 110209           dSP;
413 110209           ENTER; SAVETMPS;
414 110209 50         PUSHMARK(SP);
415 110209 50         XPUSHs(rowref);
416 110209           PUTBACK;
417              
418             int count;
419 110209           I32 flags = G_DISCARD | G_EVAL;
420 110209           count = call_sv(c->cb, flags);
421 110209           SPAGAIN;
422             PERL_UNUSED_VAR(count);
423              
424             /* Did the callback die? Stash the message and abort the parse. */
425 110209 50         SV *errsv = ERRSV;
426 110209 100         if (SvTRUE(errsv)) {
427 3           c->die_msg = newSVsv(errsv);
428 3 50         FREETMPS; LEAVE;
429 3           av_clear(c->row_av); /* prepare for cleanup */
430 3           return 1; /* tell parser to abort */
431             }
432              
433 110206 50         FREETMPS; LEAVE;
434 110206           av_clear(c->row_av);
435             }
436 920521           return 0;
437             }
438              
439             /* The shared dispatcher. `input_pv` / `input_len` is the byte buffer
440             * to parse; `opts` is fully resolved. as_callback selects collect vs
441             * callback mode. cb is the user code in callback mode (NULL otherwise).
442             *
443             * Returns:
444             * collect mode (as_callback == 0): AV* of rowrefs (caller must mortalise)
445             * callback mode (as_callback == 1): NULL (no return value)
446             *
447             * On parse error, croaks. On callback-die, croaks with the propagated msg.
448             */
449             static AV *
450 148           separated_parse_dispatch(pTHX_ const char *input_pv, STRLEN input_len,
451             const separated_options_t *opts,
452             int as_callback, SV *cb,
453             AV *explicit_headers)
454             {
455             dispatch_ctx_t ctx;
456 148           memset(&ctx, 0, sizeof ctx);
457             #ifdef PERL_IMPLICIT_CONTEXT
458             ctx.my_perl = aTHX;
459             #endif
460 148           ctx.empty_is_undef = opts->empty_is_undef;
461 148           ctx.binary = opts->binary;
462             /* Force header_mode on when explicit names were supplied, so
463             * callbacks emit hashrefs from row 0. Caller is responsible for
464             * passing this only when meaningful (e.g. only on the read side). */
465 148 100         ctx.header_mode = opts->header || (explicit_headers != NULL);
    50          
466 148           ctx.headers = explicit_headers; /* takes ownership */
467 148           ctx.as_callback = as_callback;
468              
469             separated_field_cb fcb;
470 148 100         if (as_callback) {
471 10           ctx.cb = cb;
472 10           ctx.row_av = newAV();
473 10           fcb = each_cb;
474             } else {
475 138           ctx.result = newAV();
476 138           ctx.current_row = newAV();
477 138           fcb = collect_cb;
478             }
479              
480 148           size_t err_off = 0;
481 148           long rc = separated_parse(input_pv, input_len, opts, fcb, &ctx, &err_off);
482              
483 145 100         if (rc < 0) {
484             /* Cleanup. */
485 8 100         if (as_callback) {
486 1           SvREFCNT_dec((SV *)ctx.row_av);
487             } else {
488 7           SvREFCNT_dec((SV *)ctx.current_row);
489 7           SvREFCNT_dec((SV *)ctx.result);
490             }
491 8 50         if (ctx.headers) SvREFCNT_dec((SV *)ctx.headers);
492 8 100         if (ctx.die_msg) {
493             STRLEN dlen;
494 1           const char *dpv = SvPV(ctx.die_msg, dlen);
495             /* Re-raise the original die message verbatim. Not using
496             * croak_sv (5.13.1+) for 5.8/5.10 compat. */
497 1           SV *msg_mortal = sv_2mortal(ctx.die_msg);
498             PERL_UNUSED_VAR(msg_mortal);
499 1           croak("%.*s", (int)dlen, dpv);
500             }
501 7           croak("File::Raw::Separated: %s at byte offset %lu",
502             separated_strerror((separated_err_t)rc), (unsigned long)err_off);
503             }
504              
505 137 100         if (as_callback) {
506 9           SvREFCNT_dec((SV *)ctx.row_av);
507 9 100         if (ctx.headers) SvREFCNT_dec((SV *)ctx.headers);
508 9           return NULL;
509             }
510              
511             /* Trailing in-progress row — should always be empty if the parser
512             * finished successfully. Free it. */
513 128           SvREFCNT_dec((SV *)ctx.current_row);
514 128 100         if (ctx.headers) SvREFCNT_dec((SV *)ctx.headers);
515 128           return ctx.result;
516             }
517              
518              
519             #define SEPARATED_STREAM_CHUNK 65536
520              
521             static void
522 19           separated_parse_dispatch_stream(pTHX_ const char *path,
523             const separated_options_t *opts,
524             SV *cb,
525             AV *explicit_headers)
526             {
527             dispatch_ctx_t ctx;
528 19           memset(&ctx, 0, sizeof ctx);
529             #ifdef PERL_IMPLICIT_CONTEXT
530             ctx.my_perl = aTHX;
531             #endif
532 19           ctx.empty_is_undef = opts->empty_is_undef;
533 19           ctx.binary = opts->binary;
534 19 100         ctx.header_mode = opts->header || (explicit_headers != NULL);
    50          
535 19           ctx.headers = explicit_headers; /* takes ownership */
536 19           ctx.as_callback = 1;
537 19           ctx.cb = cb;
538 19           ctx.row_av = newAV();
539              
540 19           separated_ctx_t *parser = separated_init(opts, each_cb, &ctx);
541 19 50         if (!parser) {
542 0           SvREFCNT_dec((SV *)ctx.row_av);
543 0           croak("File::Raw::Separated: out of memory initialising parser");
544             }
545              
546 19           int fd = PerlLIO_open(path, O_RDONLY);
547 19 100         if (fd < 0) {
548 1           int saved_errno = errno;
549 1           separated_free(parser);
550 1           SvREFCNT_dec((SV *)ctx.row_av);
551 1           croak("File::Raw::Separated: cannot open %s: %s",
552             path, Strerror(saved_errno));
553             }
554              
555             /* Local buffer per call. Stack-allocated so concurrent calls in
556             * different threads don't collide on a static. */
557             char buf[SEPARATED_STREAM_CHUNK];
558 18           separated_err_t parse_err = SEPARATED_OK;
559 18           int read_errno = 0;
560             SSize_t n;
561              
562 359 100         while ((n = PerlLIO_read(fd, buf, sizeof buf)) > 0) {
563 343           parse_err = separated_feed(parser, buf, (size_t)n);
564 343 100         if (parse_err != SEPARATED_OK) break;
565             }
566 18 50         if (n < 0) read_errno = errno;
567              
568             /* Only call _finish on success — on error the context is already
569             * sticky-failed and _finish would just no-op anyway, but staying
570             * symmetric makes intent clearer. */
571 18 100         if (parse_err == SEPARATED_OK && read_errno == 0) {
    50          
572 16           parse_err = separated_finish(parser);
573             }
574              
575 18           PerlLIO_close(fd);
576              
577             /* Pull diagnostics out before freeing the parser. */
578 18           size_t err_off = (parse_err != SEPARATED_OK)
579 18 100         ? separated_offset(parser) : 0;
580 18           SV *die_msg = ctx.die_msg;
581 18           ctx.die_msg = NULL;
582              
583 18           separated_free(parser);
584 18           SvREFCNT_dec((SV *)ctx.row_av);
585 18 100         if (ctx.headers) SvREFCNT_dec((SV *)ctx.headers);
586              
587             /* Order: callback-die > read error > parse error. The first
588             * cleanly explains user code aborting; the second is always
589             * recoverable info; the third is our domain. */
590 18 100         if (die_msg) {
591             STRLEN dlen;
592 1           const char *dpv = SvPV(die_msg, dlen);
593 1           SV *m = sv_2mortal(die_msg);
594             PERL_UNUSED_VAR(m);
595 1           croak("%.*s", (int)dlen, dpv);
596             }
597 17 50         if (read_errno) {
598 0           croak("File::Raw::Separated: read error on %s: %s",
599             path, Strerror(read_errno));
600             }
601 17 100         if (parse_err != SEPARATED_OK) {
602 1           croak("File::Raw::Separated: %s at byte offset %lu in %s",
603             separated_strerror(parse_err),
604             (unsigned long)err_off, path);
605             }
606 16           }
607              
608             /* ============================================================
609             * Plugin integration with File::Raw
610             *
611             * BOOT registers two plugins ("csv" and "tsv") via file_register_plugin
612             * (declared in include/file_plugin.h). Each plugin's `state` slot points
613             * to a static separated_options_t carrying the dialect's defaults.
614             *
615             * The READ phase fires from File::Raw::slurp($p, plugin => 'csv', ...).
616             * Per-call options arrive in ctx->options as an HV; we layer them on
617             * top of *(ctx->plugin_state) and parse the slurped bytes into AoA.
618             *
619             * No global enable/disable knob: callers without a `plugin =>` opt get
620             * the unmodified bytes back from File::Raw, by definition.
621             * ============================================================ */
622              
623             /* Per-dialect default options. The plugin struct's `state` field points
624             * here; sep_read copies into a stack-local before merging ctx->options
625             * on top, so concurrent calls don't fight over the defaults table. */
626             static separated_options_t csv_default_opts;
627             static separated_options_t tsv_default_opts;
628              
629             static SV *
630 59           sep_read(pTHX_ FilePluginContext *ctx)
631             {
632             separated_options_t local;
633             STRLEN len;
634             const char *pv;
635             AV *result;
636              
637             /* Start from the dialect defaults. plugin_state always points to
638             * one of csv_default_opts / tsv_default_opts; treat NULL as csv
639             * defensively. */
640 59 50         if (ctx->plugin_state)
641 59           local = *(const separated_options_t *)ctx->plugin_state;
642             else
643 0           separated_options_init_csv(&local);
644              
645             /* Merge the per-call options HV on top. ctx->options is the same
646             * HV file_plugin_dispatch_read built from the variadic XSUB args;
647             * decode_opts ignores the 'plugin' key (added to VALID_OPT_KEYS in
648             * an earlier pass) and the 'dialect' key (likewise ignored).
649             *
650             * No seed_opts_for_dialect call here — the dialect is fully implied
651             * by which plugin fired, and the defaults already live in *local. */
652 59 50         if (ctx->options) decode_opts(aTHX_ ctx->options, &local);
653              
654 57 50         if (!ctx->data) return &PL_sv_undef;
655 57           pv = SvPV(ctx->data, len);
656             {
657 57           AV *xhdr = extract_explicit_headers(aTHX_ ctx->options);
658 54           result = separated_parse_dispatch(aTHX_ pv, len, &local, 0, NULL, xhdr);
659             }
660             /* result is a fresh AV with refcount 1; wrap without bumping. */
661 51           return newRV_noinc((SV *)result);
662             }
663              
664             /* ============================================================
665             * sep_write - WRITE phase
666             *
667             * Fires from File::Raw::spew / append / atomic_spew when the caller
668             * passes plugin => 'csv' (or 'tsv'). Serialises an arrayref of arrayref
669             * rows into bytes following RFC 4180 conventions: fields containing the
670             * separator, quote character, CR, or LF are quoted; embedded quote
671             * characters are doubled (or backslash-escaped if opts.escape is set).
672             *
673             * Hashref rows are not currently accepted (would require an explicit
674             * header => [keys] order to be deterministic). Undef fields emit as
675             * empty.
676             * ============================================================ */
677              
678             static SV *
679 13           sep_write(pTHX_ FilePluginContext *ctx)
680             {
681             separated_options_t o;
682             AV *rows;
683             SSize_t nrows, i, j;
684             char *buf;
685             STRLEN buf_len, buf_cap;
686             char eol[2];
687             int eol_len;
688             SV *out;
689              
690 13 50         if (ctx->plugin_state)
691 13           o = *(const separated_options_t *)ctx->plugin_state;
692             else
693 0           separated_options_init_csv(&o);
694 13 50         if (ctx->options) decode_opts(aTHX_ ctx->options, &o);
695              
696 12 50         if (!ctx->data || !SvROK(ctx->data) ||
    100          
697 11 50         SvTYPE(SvRV(ctx->data)) != SVt_PVAV)
698 1           croak("File::Raw::Separated: write expects an arrayref of rows");
699              
700 11           rows = (AV *)SvRV(ctx->data);
701 11           nrows = av_len(rows) + 1;
702              
703             /* EOL. AUTO degrades to LF for write since we have nothing to
704             * auto-detect; CRLF and CR honour the explicit pin. */
705 11           switch (o.eol_mode) {
706 1           case SEPARATED_EOL_CRLF: eol[0] = '\r'; eol[1] = '\n'; eol_len = 2; break;
707 0           case SEPARATED_EOL_CR: eol[0] = '\r'; eol_len = 1; break;
708 10           default: eol[0] = '\n'; eol_len = 1; break;
709             }
710              
711 11           buf_cap = 4096;
712 11           Newx(buf, buf_cap, char);
713 11           buf_len = 0;
714              
715             #define SEP_BUF_ENSURE(n) do { \
716             STRLEN _need = buf_len + (STRLEN)(n); \
717             if (_need > buf_cap) { \
718             while (_need > buf_cap) buf_cap *= 2; \
719             Renew(buf, buf_cap, char); \
720             } \
721             } while (0)
722              
723 35 100         for (i = 0; i < nrows; i++) {
724 25           SV **rowp = av_fetch(rows, i, 0);
725             AV *row;
726             SSize_t nfields;
727              
728 25 50         if (!rowp || !*rowp || !SvROK(*rowp) ||
    50          
    100          
729 24 50         SvTYPE(SvRV(*rowp)) != SVt_PVAV) {
730 1           Safefree(buf);
731 1           croak("File::Raw::Separated: row %ld is not an arrayref", (long)i);
732             }
733 24           row = (AV *)SvRV(*rowp);
734 24           nfields = av_len(row) + 1;
735              
736 67 100         for (j = 0; j < nfields; j++) {
737 43           SV **fieldp = av_fetch(row, j, 0);
738             STRLEN flen;
739             const char *fpv;
740 43           int needs_quote = 0;
741              
742 43 100         if (j > 0) { SEP_BUF_ENSURE(1); buf[buf_len++] = (char)o.sep; }
    50          
    0          
743              
744 43 50         if (!fieldp || !*fieldp || !SvOK(*fieldp)) continue;
    50          
    100          
745 42           fpv = SvPV(*fieldp, flen);
746              
747             /* Decide if quoting is needed. Only relevant when the
748             * dialect actually has a quote char. TSV with quote=-1
749             * emits raw and is the caller's problem if it contains tab
750             * or newline. */
751 42 100         if (o.quote >= 0) {
752             STRLEN k;
753 87 100         for (k = 0; k < flen; k++) {
754 55           char c = fpv[k];
755 55 100         if (c == (char)o.sep || c == (char)o.quote ||
    100          
    100          
756 51 50         c == '\n' || c == '\r') {
757 4           needs_quote = 1;
758 4           break;
759             }
760             }
761             }
762              
763 42 100         if (needs_quote) {
764             STRLEN k;
765             /* worst case: every byte doubles + open + close quote */
766 4 50         SEP_BUF_ENSURE(flen * 2 + 2);
    0          
767 4           buf[buf_len++] = (char)o.quote;
768 48 100         for (k = 0; k < flen; k++) {
769 44           char c = fpv[k];
770 44 100         if (c == (char)o.quote) {
771 3 50         if (o.escape >= 0)
772 0           buf[buf_len++] = (char)o.escape;
773             else
774 3           buf[buf_len++] = (char)o.quote; /* RFC 4180 */
775             }
776 44           buf[buf_len++] = c;
777             }
778 4           buf[buf_len++] = (char)o.quote;
779             } else {
780 38 50         SEP_BUF_ENSURE(flen);
    0          
781 38           memcpy(buf + buf_len, fpv, flen);
782 38           buf_len += flen;
783             }
784             }
785              
786 24 50         SEP_BUF_ENSURE(eol_len);
    0          
787 24           memcpy(buf + buf_len, eol, eol_len);
788 24           buf_len += eol_len;
789             }
790              
791             #undef SEP_BUF_ENSURE
792              
793 10           out = newSVpvn(buf, buf_len);
794 10           Safefree(buf);
795 10           return out;
796             }
797              
798             /* ============================================================
799             * sep_stream - STREAM phase
800             *
801             * Fires from File::Raw::each_line($p, $cb, plugin => 'csv'). File::Raw
802             * opens the file and feeds us chunks; we own the parser context across
803             * calls via FilePluginContext::call_state. On the EOF call we flush
804             * any trailing field/row, free the parser, and clear call_state.
805             *
806             * The user's callback is invoked once per emitted record (arrayref or
807             * hashref under header mode), driven by the same each_cb the in-memory
808             * callback variant uses.
809             * ============================================================ */
810              
811             typedef struct {
812             dispatch_ctx_t disp;
813             separated_ctx_t *parser;
814             separated_options_t opts; /* parser copies internally; keep for clarity */
815             int destroyed;
816             } sep_stream_state_t;
817              
818             static void
819 8           sep_stream_state_free(pTHX_ sep_stream_state_t *st)
820             {
821 8 50         if (!st || st->destroyed) return;
    50          
822 8           st->destroyed = 1;
823 8 50         if (st->parser) { separated_free(st->parser); st->parser = NULL; }
824 8 50         if (st->disp.row_av) { SvREFCNT_dec((SV *)st->disp.row_av); st->disp.row_av = NULL; }
825 8 100         if (st->disp.headers) { SvREFCNT_dec((SV *)st->disp.headers); st->disp.headers = NULL; }
826 8 100         if (st->disp.die_msg) { SvREFCNT_dec(st->disp.die_msg); st->disp.die_msg = NULL; }
827 8           Safefree(st);
828             }
829              
830             static int
831 17           sep_stream(pTHX_ FilePluginContext *ctx, const char *chunk, size_t len, int eof)
832             {
833 17           sep_stream_state_t *st = (sep_stream_state_t *)ctx->call_state;
834             separated_err_t rc;
835              
836             /* First call: build state from defaults + per-call opts, init parser. */
837 17 100         if (!st) {
838             /* Extract / validate explicit headers BEFORE allocating state -
839             * a validation croak here would otherwise leak the partially-
840             * built state (no destructor has been hooked up yet). */
841 8           AV *xhdr = extract_explicit_headers(aTHX_ ctx->options);
842              
843 8           Newxz(st, 1, sep_stream_state_t);
844              
845 8 50         if (ctx->plugin_state)
846 8           st->opts = *(const separated_options_t *)ctx->plugin_state;
847             else
848 0           separated_options_init_csv(&st->opts);
849 8 50         if (ctx->options) decode_opts(aTHX_ ctx->options, &st->opts);
850              
851             #ifdef PERL_IMPLICIT_CONTEXT
852             st->disp.my_perl = aTHX;
853             #endif
854 8           st->disp.empty_is_undef = st->opts.empty_is_undef;
855 8           st->disp.binary = st->opts.binary;
856 8           st->disp.headers = xhdr; /* takes ownership; NULL means "use first row" */
857 8 100         st->disp.header_mode = st->opts.header || (xhdr != NULL);
    50          
858 8           st->disp.as_callback = 1;
859 8           st->disp.cb = ctx->callback;
860 8           st->disp.row_av = newAV();
861              
862 8           st->parser = separated_init(&st->opts, each_cb, &st->disp);
863 8 50         if (!st->parser) {
864 0           sep_stream_state_free(aTHX_ st);
865 0           ctx->cancel = 1;
866 0           croak("File::Raw::Separated: out of memory initialising parser");
867             }
868 8           ctx->call_state = st;
869             }
870              
871 17 100         if (chunk && len > 0) {
    50          
872 10           rc = separated_feed(st->parser, chunk, len);
873 10 100         if (rc != SEPARATED_OK) {
874 1           SV *die_msg = st->disp.die_msg;
875 1           size_t off = separated_offset(st->parser);
876 1           ctx->cancel = 1;
877 1 50         if (die_msg) {
878             STRLEN dlen;
879 1           SV *m = newSVsv(die_msg);
880 1           const char *dpv = SvPV(m, dlen);
881 1           sep_stream_state_free(aTHX_ st);
882 1           ctx->call_state = NULL;
883 1           sv_2mortal(m);
884 1           croak("%.*s", (int)dlen, dpv);
885             }
886 0           sep_stream_state_free(aTHX_ st);
887 0           ctx->call_state = NULL;
888 0           croak("File::Raw::Separated: %s at byte offset %lu",
889             separated_strerror(rc), (unsigned long)off);
890             }
891             }
892              
893 16 100         if (eof) {
894 7           rc = separated_finish(st->parser);
895 7 50         if (rc != SEPARATED_OK) {
896 0           SV *die_msg = st->disp.die_msg;
897 0           size_t off = separated_offset(st->parser);
898 0           ctx->cancel = 1;
899 0 0         if (die_msg) {
900             STRLEN dlen;
901 0           SV *m = newSVsv(die_msg);
902 0           const char *dpv = SvPV(m, dlen);
903 0           sep_stream_state_free(aTHX_ st);
904 0           ctx->call_state = NULL;
905 0           sv_2mortal(m);
906 0           croak("%.*s", (int)dlen, dpv);
907             }
908 0           sep_stream_state_free(aTHX_ st);
909 0           ctx->call_state = NULL;
910 0           croak("File::Raw::Separated: %s at byte offset %lu",
911             separated_strerror(rc), (unsigned long)off);
912             }
913 7           sep_stream_state_free(aTHX_ st);
914 7           ctx->call_state = NULL;
915             }
916              
917 16           return 0;
918             }
919              
920             /* Plugin descriptors. Static-storage lifetime so the registry's
921             * non-owning pointer stays valid for the life of the process. */
922             static FilePlugin csv_plugin;
923             static FilePlugin tsv_plugin;
924              
925             /* ============================================================
926             * Per-XSUB helpers
927             *
928             * The nine XSUBs (parse_buf / parse_buf_each / parse_stream and the
929             * six dialect-pinned csv_ / tsv_ variants) all do the same work modulo
930             * dialect pinning. Bodies live here so each XSUB is a one-liner; the
931             * dialect-prefixed variants used to be pure-Perl wrappers in the .pm
932             * that mutated the opts hash via _pin_dialect - that's gone now.
933             * ============================================================ */
934              
935             static HV *
936 115           opts_to_hv(pTHX_ const char *fn, SV *opts)
937             {
938 115 100         if (!opts || !SvOK(opts)) return NULL;
    50          
939 50 100         if (!SvROK(opts) || SvTYPE(SvRV(opts)) != SVt_PVHV)
    50          
940 1           croak("%s: options argument must be a hashref", fn);
941 49           return (HV *)SvRV(opts);
942             }
943              
944             static SV *
945 85           do_parse_buf(pTHX_ const char *fn, SV *input, SV *opts, dialect_pin_t pin)
946             {
947             separated_options_t o;
948 85           HV *opts_hv = opts_to_hv(aTHX_ fn, opts);
949             STRLEN ilen;
950             const char *ipv;
951             AV *result;
952              
953 85           seed_opts_for_dialect(aTHX_ opts_hv, &o, pin);
954 85           decode_opts(aTHX_ opts_hv, &o);
955              
956 84           ipv = SvPV(input, ilen);
957             {
958 84           AV *xhdr = extract_explicit_headers(aTHX_ opts_hv);
959 84           result = separated_parse_dispatch(aTHX_ ipv, ilen, &o, 0, NULL, xhdr);
960             }
961 77           return newRV_noinc((SV *)result);
962             }
963              
964             static void
965 11           do_parse_buf_each(pTHX_ const char *fn, SV *input, SV *code, SV *opts,
966             dialect_pin_t pin)
967             {
968             separated_options_t o;
969             HV *opts_hv;
970             STRLEN ilen;
971             const char *ipv;
972              
973 11 100         if (!SvROK(code) || SvTYPE(SvRV(code)) != SVt_PVCV)
    50          
974 1           croak("%s: code argument must be a CODE ref", fn);
975 10           opts_hv = opts_to_hv(aTHX_ fn, opts);
976 10           seed_opts_for_dialect(aTHX_ opts_hv, &o, pin);
977 10           decode_opts(aTHX_ opts_hv, &o);
978              
979 10           ipv = SvPV(input, ilen);
980             {
981 10           AV *xhdr = extract_explicit_headers(aTHX_ opts_hv);
982 10           (void)separated_parse_dispatch(aTHX_ ipv, ilen, &o, 1, code, xhdr);
983             }
984 9           }
985              
986             static void
987 21           do_parse_stream(pTHX_ const char *fn, SV *path, SV *code, SV *opts,
988             dialect_pin_t pin)
989             {
990             separated_options_t o;
991             HV *opts_hv;
992             STRLEN plen;
993             const char *path_pv;
994              
995 21 50         if (!SvOK(path)) croak("%s: path must be defined", fn);
996 21 100         if (!SvROK(code) || SvTYPE(SvRV(code)) != SVt_PVCV)
    50          
997 1           croak("%s: code argument must be a CODE ref", fn);
998 20           opts_hv = opts_to_hv(aTHX_ fn, opts);
999 19           seed_opts_for_dialect(aTHX_ opts_hv, &o, pin);
1000 19           decode_opts(aTHX_ opts_hv, &o);
1001              
1002 19           path_pv = SvPV(path, plen);
1003             PERL_UNUSED_VAR(plen);
1004             {
1005 19           AV *xhdr = extract_explicit_headers(aTHX_ opts_hv);
1006 19           separated_parse_dispatch_stream(aTHX_ path_pv, &o, code, xhdr);
1007             }
1008 16           }
1009              
1010             /* ============================================================
1011             * Import dispatcher
1012             *
1013             * `use File::Raw::Separated qw(import|:all|:unified|:csv|:tsv|)`
1014             * lands in XS_File__Raw__Separated_import, which walks the requested
1015             * names and `newXS`'s "${caller}::file_${name}" -> the matching XSUB
1016             * pointer into the caller's symbol table. Mirrors File::Raw's import
1017             * (file.c XS_file_import) — same `file_` prefix convention so the two
1018             * modules compose: `use File::Raw qw(import); use File::Raw::Separated
1019             * qw(import);` lands `file_slurp` *and* `file_parse_buf` etc. in the
1020             * same package without collision.
1021             *
1022             * The xs_func slots are populated at BOOT time by looking up each
1023             * already-registered XSUB via get_cv() and stashing CvXSUB(cv); avoids
1024             * fragile forward-declarations of static XSUBs that xsubpp may have
1025             * emitted with PERL_EUPXS_ALWAYS_EXPORT either set or not.
1026             * ============================================================ */
1027              
1028             typedef struct {
1029             const char *name;
1030             XSUBADDR_t xs_func;
1031             } ImportEntry;
1032              
1033             /* Index ranges used by the tag handlers below. Keep in sync. */
1034             #define IMPORT_UNIFIED_LO 0
1035             #define IMPORT_UNIFIED_HI 3 /* exclusive */
1036             #define IMPORT_CSV_LO 3
1037             #define IMPORT_CSV_HI 6
1038             #define IMPORT_TSV_LO 6
1039             #define IMPORT_TSV_HI 9
1040              
1041             static ImportEntry g_import_funcs[] = {
1042             /* :unified */
1043             { "parse_buf", NULL },
1044             { "parse_buf_each", NULL },
1045             { "parse_stream", NULL },
1046             /* :csv */
1047             { "csv_parse_buf", NULL },
1048             { "csv_parse_buf_each", NULL },
1049             { "csv_parse_stream", NULL },
1050             /* :tsv */
1051             { "tsv_parse_buf", NULL },
1052             { "tsv_parse_buf_each", NULL },
1053             { "tsv_parse_stream", NULL },
1054             { NULL, NULL }
1055             };
1056              
1057             static void
1058 39           populate_import_table(pTHX)
1059             {
1060             int i;
1061 390 100         for (i = 0; g_import_funcs[i].name; i++) {
1062             char full[256];
1063             CV *cv;
1064 351           snprintf(full, sizeof full,
1065             "File::Raw::Separated::%s", g_import_funcs[i].name);
1066 351           cv = get_cv(full, 0);
1067 351 50         if (!cv || !CvISXSUB(cv))
    50          
1068 0           croak("File::Raw::Separated boot: missing XSUB '%s'", full);
1069 351           g_import_funcs[i].xs_func = CvXSUB(cv);
1070             }
1071 39           }
1072              
1073             static void
1074 243           install_one(pTHX_ const char *pkg, const ImportEntry *e)
1075             {
1076             char full[256];
1077 243           snprintf(full, sizeof full, "%s::file_%s", pkg, e->name);
1078 243           newXS(full, e->xs_func, __FILE__);
1079 243           }
1080              
1081             static void
1082 27           install_range(pTHX_ const char *pkg, int lo, int hi)
1083             {
1084             int i;
1085 270 100         for (i = lo; i < hi; i++) install_one(aTHX_ pkg, &g_import_funcs[i]);
1086 27           }
1087              
1088             XS_EXTERNAL(XS_File__Raw__Separated_import);
1089 39           XS_EXTERNAL(XS_File__Raw__Separated_import)
1090             {
1091 39           dXSARGS;
1092 39 50         const char *pkg = CopSTASHPV(PL_curcop);
    50          
    50          
    50          
    0          
    50          
    50          
1093             int i, j;
1094             int matched;
1095              
1096             /* No imports requested: bare `use File::Raw::Separated;` lands
1097             * here with items==1 (just the package name). Plugin BOOT has
1098             * already registered csv/tsv with File::Raw — nothing more to do. */
1099 39 100         if (items <= 1) XSRETURN_EMPTY;
1100              
1101 54 100         for (i = 1; i < items; i++) {
1102             STRLEN len;
1103 27           const char *arg = SvPV(ST(i), len);
1104              
1105 27 50         if (len > 0 && arg[0] == ':') {
    50          
1106 0 0         if (len == 4 && memcmp(arg, ":all", 4) == 0) {
    0          
1107 0           install_range(aTHX_ pkg, 0, IMPORT_TSV_HI);
1108 27           continue;
1109             }
1110 0 0         if (len == 8 && memcmp(arg, ":unified", 8) == 0) {
    0          
1111 0           install_range(aTHX_ pkg, IMPORT_UNIFIED_LO, IMPORT_UNIFIED_HI);
1112 0           continue;
1113             }
1114 0 0         if (len == 4 && memcmp(arg, ":csv", 4) == 0) {
    0          
1115 0           install_range(aTHX_ pkg, IMPORT_CSV_LO, IMPORT_CSV_HI);
1116 0           continue;
1117             }
1118 0 0         if (len == 4 && memcmp(arg, ":tsv", 4) == 0) {
    0          
1119 0           install_range(aTHX_ pkg, IMPORT_TSV_LO, IMPORT_TSV_HI);
1120 0           continue;
1121             }
1122 0           warn("File::Raw::Separated: unknown tag '%.*s'", (int)len, arg);
1123 0           continue;
1124             }
1125              
1126             /* Bare `import` is shorthand for `:all`, matching the File::Raw
1127             * idiom: `use File::Raw qw(import);`. */
1128 27 50         if (len == 6 && memcmp(arg, "import", 6) == 0) {
    50          
1129 27           install_range(aTHX_ pkg, 0, IMPORT_TSV_HI);
1130 27           continue;
1131             }
1132              
1133 0           matched = 0;
1134 0 0         for (j = 0; g_import_funcs[j].name; j++) {
1135 0 0         if (strlen(g_import_funcs[j].name) == len
1136 0 0         && memcmp(arg, g_import_funcs[j].name, len) == 0) {
1137 0           install_one(aTHX_ pkg, &g_import_funcs[j]);
1138 0           matched = 1;
1139 0           break;
1140             }
1141             }
1142 0 0         if (!matched)
1143 0           warn("File::Raw::Separated: '%.*s' is not exported",
1144             (int)len, arg);
1145             }
1146              
1147 27           XSRETURN_EMPTY;
1148             }
1149              
1150             /* ============================================================
1151             * XS surface
1152             * ============================================================ */
1153              
1154             MODULE = File::Raw::Separated PACKAGE = File::Raw::Separated
1155              
1156             PROTOTYPES: DISABLE
1157              
1158             BOOT:
1159             /* Seed the per-dialect defaults the plugins point at. */
1160 39           separated_options_init_csv(&csv_default_opts);
1161 39           separated_options_init_tsv(&tsv_default_opts);
1162              
1163             /* Build and register the CSV plugin. Only the READ phase is wired
1164             * for now; WRITE/RECORD/STREAM stay NULL until the parser core
1165             * grows a serialiser and File::Raw teaches each_line/grep_lines
1166             * the plugin pipeline. */
1167 39           csv_plugin.name = "csv";
1168 39           csv_plugin.read_fn = sep_read;
1169 39           csv_plugin.write_fn = sep_write;
1170 39           csv_plugin.record_fn = NULL;
1171 39           csv_plugin.stream_fn = sep_stream;
1172 39           csv_plugin.state = &csv_default_opts;
1173 39 50         if (file_register_plugin(aTHX_ &csv_plugin) <= 0)
1174 0           warn("File::Raw::Separated: failed to register 'csv' plugin");
1175              
1176 39           tsv_plugin.name = "tsv";
1177 39           tsv_plugin.read_fn = sep_read;
1178 39           tsv_plugin.write_fn = sep_write;
1179 39           tsv_plugin.record_fn = NULL;
1180 39           tsv_plugin.stream_fn = sep_stream;
1181 39           tsv_plugin.state = &tsv_default_opts;
1182 39 50         if (file_register_plugin(aTHX_ &tsv_plugin) <= 0)
1183 0           warn("File::Raw::Separated: failed to register 'tsv' plugin");
1184              
1185             /* Populate g_import_funcs[].xs_func from the just-registered XSUBs;
1186             * the dispatcher uses these pointers when stamping `file_*` aliases
1187             * into callers' packages. Boot order: xsubpp emits the newXS_deffile
1188             * registrations *before* this initialisation block, so get_cv() is
1189             * guaranteed to find each one. */
1190 39           populate_import_table(aTHX);
1191              
1192             /* Override Exporter::import (we no longer inherit it; .pm has been
1193             * stripped of the Exporter glue) with our XS dispatcher so
1194             * `use File::Raw::Separated qw(...)` lands in our import directly. */
1195 39           newXS("File::Raw::Separated::import",
1196             XS_File__Raw__Separated_import, __FILE__);
1197              
1198              
1199             # =====================================================================
1200             # Parser entry points
1201             # =====================================================================
1202             #
1203             # Nine XSUBs, three logical groups, all thin shims over the do_* helpers
1204             # above:
1205             #
1206             # Unified (dialect read from opts hash, defaults to csv):
1207             # parse_buf($input [, \%opts]) -> \@rows
1208             # parse_buf_each($input, $cb [, \%opts])
1209             # parse_stream($path, $cb [, \%opts])
1210             #
1211             # CSV-pinned (dialect key in opts ignored):
1212             # csv_parse_buf, csv_parse_buf_each, csv_parse_stream
1213             #
1214             # TSV-pinned:
1215             # tsv_parse_buf, tsv_parse_buf_each, tsv_parse_stream
1216             #
1217             # xsubpp registers them in package File::Raw::Separated; users get them
1218             # under `file_` prefix in their own namespace via `use File::Raw::Separated
1219             # qw(import|:all|:unified|:csv|:tsv|)` (see import dispatcher above).
1220             #
1221             # parse_stream / *_parse_stream open the file directly via PerlLIO,
1222             # bypassing File::Raw's read hook (no recursion / no double-parse).
1223              
1224             SV *
1225             parse_buf(input, opts = NULL)
1226             SV *input
1227             SV *opts
1228             CODE:
1229 1           RETVAL = do_parse_buf(aTHX_ "parse_buf", input, opts, DIALECT_AUTO);
1230             OUTPUT:
1231             RETVAL
1232              
1233             SV *
1234             csv_parse_buf(input, opts = NULL)
1235             SV *input
1236             SV *opts
1237             CODE:
1238 70           RETVAL = do_parse_buf(aTHX_ "csv_parse_buf", input, opts, DIALECT_CSV);
1239             OUTPUT:
1240             RETVAL
1241              
1242             SV *
1243             tsv_parse_buf(input, opts = NULL)
1244             SV *input
1245             SV *opts
1246             CODE:
1247 14           RETVAL = do_parse_buf(aTHX_ "tsv_parse_buf", input, opts, DIALECT_TSV);
1248             OUTPUT:
1249             RETVAL
1250              
1251              
1252             void
1253             parse_buf_each(input, code, opts = NULL)
1254             SV *input
1255             SV *code
1256             SV *opts
1257             PPCODE:
1258 0           do_parse_buf_each(aTHX_ "parse_buf_each", input, code, opts, DIALECT_AUTO);
1259 0           XSRETURN_EMPTY;
1260              
1261             void
1262             csv_parse_buf_each(input, code, opts = NULL)
1263             SV *input
1264             SV *code
1265             SV *opts
1266             PPCODE:
1267 9           do_parse_buf_each(aTHX_ "csv_parse_buf_each", input, code, opts, DIALECT_CSV);
1268 7           XSRETURN_EMPTY;
1269              
1270             void
1271             tsv_parse_buf_each(input, code, opts = NULL)
1272             SV *input
1273             SV *code
1274             SV *opts
1275             PPCODE:
1276 2           do_parse_buf_each(aTHX_ "tsv_parse_buf_each", input, code, opts, DIALECT_TSV);
1277 2           XSRETURN_EMPTY;
1278              
1279              
1280             void
1281             parse_stream(path, code, opts = NULL)
1282             SV *path
1283             SV *code
1284             SV *opts
1285             PPCODE:
1286 1           do_parse_stream(aTHX_ "parse_stream", path, code, opts, DIALECT_AUTO);
1287 1           XSRETURN_EMPTY;
1288              
1289             void
1290             csv_parse_stream(path, code, opts = NULL)
1291             SV *path
1292             SV *code
1293             SV *opts
1294             PPCODE:
1295 20           do_parse_stream(aTHX_ "csv_parse_stream", path, code, opts, DIALECT_CSV);
1296 15           XSRETURN_EMPTY;
1297              
1298             void
1299             tsv_parse_stream(path, code, opts = NULL)
1300             SV *path
1301             SV *code
1302             SV *opts
1303             PPCODE:
1304 0           do_parse_stream(aTHX_ "tsv_parse_stream", path, code, opts, DIALECT_TSV);
1305 0           XSRETURN_EMPTY;
1306