File Coverage

src/mds_render_html.c
Criterion Covered Total %
statement 725 807 89.8
branch 668 996 67.0
condition n/a
subroutine n/a
pod n/a
total 1393 1803 77.2


, 0 inside */ \n"); break; \n"); break; \n"); break;
line stmt bran cond sub pod time code
1             /* mds_render_html.c โ€” SAX consumer that writes HTML into an mds_buf.
2             *
3             * Handles the block kinds the scanner emits
4             * (paragraph, heading, thematic-break, fenced code, quote, list, list_item)
5             * plus pass-through inline text (HTML-escaped) and pre-escaped raw.
6             *
7             * Captures aTHX via tTHX in render_state since the inline `mds_buf_write`
8             * needs the interpreter context.
9             */
10              
11             #include "mds_render_html.h"
12             #include "mds_buf.h"
13             #include "mds_ir.h"
14             #include "mds.h"
15             #include "mds_entity.h"
16              
17             #include "EXTERN.h"
18             #include "perl.h"
19              
20             #include
21             #include
22             #include
23              
24             #define CLOSE_STACK_MAX 1024
25              
26             typedef enum {
27             CLOSE_NONE = 0,
28             CLOSE_P,
29             CLOSE_H1, CLOSE_H2, CLOSE_H3, CLOSE_H4, CLOSE_H5, CLOSE_H6,
30             CLOSE_CODE_FENCED,
31             CLOSE_CODE_INDENTED,
32             CLOSE_BLOCKQUOTE,
33             CLOSE_OL,
34             CLOSE_UL,
35             CLOSE_OL_TIGHT,
36             CLOSE_UL_TIGHT,
37             CLOSE_LI,
38             CLOSE_TABLE,
39             CLOSE_THEAD,
40             CLOSE_TBODY,
41             CLOSE_TR,
42             CLOSE_TH,
43             CLOSE_TD,
44             CLOSE_TIGHT_PARA,
45             CLOSE_FN_SECTION,
46             CLOSE_FN_DEF,
47             CLOSE_FN_DEF_SKIP,
48             CLOSE_NOOP
49             } close_kind;
50              
51             typedef struct {
52             #ifdef MULTIPLICITY
53             tTHX my_perl; /* captured interpreter, used via dTHXa */
54             #endif
55             mds_buf* buf;
56             close_kind closes[CLOSE_STACK_MAX];
57             int top;
58             /* tight_stack[i] = 1 if the i-th open container forces tight rendering
59             * (suppress

) for direct paragraph children, 0 otherwise. Push on

60             * every container enter that affects paragraph rendering (LIST,
61             * BLOCKQUOTE); pop on close. Paragraph checks tight_stack[tight_top-1].
62             * BLOCKQUOTE pushes 0 to shield its inner paragraphs from an outer
63             * tight list. Loose LIST also pushes 0 to shield deeper paragraphs. */
64             unsigned char tight_stack[CLOSE_STACK_MAX];
65             int tight_top;
66             int tight_depth; /* legacy counter; still tracks >0 = any tight list active */
67             int image_depth; /* >0 = inside an ; collect alt text */
68             char* alt; /* malloc'd alt-text accumulator */
69             size_t alt_len;
70             size_t alt_cap;
71             unsigned flags; /* MDS_FLAG_* bitmask */
72             int li_check_pending; /* >0 = look for [ ]/[x] at start of first text in LIST_ITEM */
73             int li_first_block; /* 1 = waiting for first block child after
  • */
  • 74             int need_nl_next; /* 1 = emit '\n' before next block (after suppressed tight para) */
    75             int in_thead; /* 1 inside
    76             const mds_align* tbl_aligns; /* current table column alignments */
    77             unsigned tbl_ncols; /* column count */
    78             unsigned tbl_col; /* next cell index in current row */
    79             /* GFM autolink: coalesce consecutive cb_text events so the scanner can
    80             * see across intra-word `_`/`*` splits emitted by the inline tokenizer. */
    81             char* pending_text;
    82             size_t pending_len;
    83             size_t pending_cap;
    84             /* Tier E.1 โ€” footnote tracking. fn_used[i] = malloc'd label string;
    85             * fn_uses[i] = instance counter (incremented per FOOTNOTE_REF).
    86             * fn_count = number of distinct labels used. Indices double as 1-based
    87             * footnote ordinals (first-use order). fn_skip is set while we are
    88             * inside a FOOTNOTE_DEF whose label was never referenced; it
    89             * suppresses all output between enter/leave of that def. fn_in_def is
    90             * the 1-based ordinal of the def we are currently rendering (so the
    91             * leave callback knows what backref label/idx to emit). fn_section_open
    92             * tracks whether we have already written the
      headers. */
    93             const char** fn_labels; /* arena-borrowed label pointers */
    94             size_t* fn_label_lens; /* parallel label lengths */
    95             unsigned* fn_uses; /* parallel use counts */
    96             size_t fn_count;
    97             size_t fn_cap;
    98             int fn_skip; /* inside an unused def */
    99             unsigned fn_in_def; /* 1-based ordinal of current def, 0 if none */
    100             const char* fn_in_def_label; /* label of current def (for backref) */
    101             size_t fn_in_def_label_len;
    102             } render_state;
    103              
    104             /* Forward decls for helpers defined later but used by early callbacks. */
    105             static void flush_pending_text(pTHX_ render_state* st);
    106             static unsigned fn_lookup(render_state* st, const char* s, size_t n);
    107             static unsigned fn_register(render_state* st, const char* s, size_t n);
    108             static void write_fn_label_attr(pTHX_ mds_buf* b, const char* s, size_t n);
    109              
    110             /* Static escape tables: zero entry means byte is safe (run/memcpy through);
    111             * non-zero entries supply the replacement string and length. Built once at
    112             * load with the four chars that matter in HTML text/attribute context. */
    113             typedef struct { const char* rep; unsigned char rlen; } mds_esc_entry;
    114              
    115             static const mds_esc_entry mds_escape_text[256] = {
    116             ['<'] = { "<", 4 },
    117             ['>'] = { ">", 4 },
    118             ['&'] = { "&", 5 },
    119             ['"'] = { """, 6 },
    120             };
    121              
    122             /* Image info stack: 32 nested images max โ€” generous. */
    123             #define IMG_STACK_MAX 32
    124             typedef struct {
    125             const char* href; size_t hlen;
    126             const char* title; size_t tlen;
    127             } img_info;
    128             static img_info g_img_stack[IMG_STACK_MAX];
    129             static int g_img_stack_top = 0;
    130              
    131 100           static void alt_append(render_state* st, const char* s, size_t n) {
    132 100 50         if (n == 0) return;
    133 100 100         if (st->alt_len + n + 1 > st->alt_cap) {
    134 73 50         size_t nc = st->alt_cap ? st->alt_cap * 2 : 64;
    135 73 50         while (nc < st->alt_len + n + 1) nc *= 2;
    136 73           st->alt = (char*)realloc(st->alt, nc);
    137 73           st->alt_cap = nc;
    138             }
    139 100           memcpy(st->alt + st->alt_len, s, n);
    140 100           st->alt_len += n;
    141             }
    142              
    143             /* The XS hook keeps this single per-call instance alive in stack scope and
    144             * passes &st as ud_storage. mds_render_html_install() merely wires the
    145             * callbacks and stores `buf` (the rest is already initialised). */
    146              
    147 5561           static void push_close(render_state* st, close_kind k) {
    148 5561 50         if (st->top < CLOSE_STACK_MAX) st->closes[st->top++] = k;
    149 5561           }
    150 5561           static close_kind pop_close(render_state* st) {
    151 5561 50         return st->top > 0 ? st->closes[--st->top] : CLOSE_NONE;
    152             }
    153              
    154             /* SWAR-accelerated HTML escape. Every rendered text byte flows
    155             * through here, so the inner loop matters a lot. We process 8 bytes at a
    156             * time with the classic "has-zero" trick to detect whether the word
    157             * contains ANY of '<', '>', '&', '"'. Clean words advance the run
    158             * pointer without touching `mds_buf_write` until we either hit a hot
    159             * byte or reach the tail. On corpora dominated by plain prose this skips
    160             * ~7/8 of the per-byte work. */
    161             #define MDS_HASZ(x) (((x) - 0x0101010101010101ULL) & ~(x) & 0x8080808080808080ULL)
    162 14628           static inline uint64_t mds_escape_hot64(uint64_t w) {
    163             /* '<'=0x3C '>'=0x3E '&'=0x26 '"'=0x22 */
    164 14628           uint64_t a = w ^ 0x3C3C3C3C3C3C3C3CULL;
    165 14628           uint64_t b = w ^ 0x3E3E3E3E3E3E3E3EULL;
    166 14628           uint64_t c = w ^ 0x2626262626262626ULL;
    167 14628           uint64_t d = w ^ 0x2222222222222222ULL;
    168 14628           return MDS_HASZ(a) | MDS_HASZ(b) | MDS_HASZ(c) | MDS_HASZ(d);
    169             }
    170              
    171 5845           MDS_HOT static void html_escape(pTHX_ mds_buf* b, const char* s, size_t n) {
    172 5845           const char* run = s;
    173 5845           const char* end = s + n;
    174 5845           const char* p = s;
    175             /* Fast 8-byte SWAR skip over clean prose. Unaligned loads are fine
    176             * on arm64/x86_64; misaligned reads near the page boundary are safe
    177             * because we never dereference past `end`. */
    178 20473 100         while ((size_t)(end - p) >= 8) {
    179             uint64_t w;
    180             int k;
    181 14628           memcpy(&w, p, 8);
    182 14628 100         if (MDS_LIKELY(!mds_escape_hot64(w))) { p += 8; continue; }
    183             /* Word has at least one hot byte: emit any of the 8 that match
    184             * via the standard table, then continue. */
    185 1296 100         for (k = 0; k < 8; k++) {
    186 1152           const mds_esc_entry e = mds_escape_text[(unsigned char)p[k]];
    187 1152 100         if (!e.rlen) continue;
    188 193 100         if (p + k > run) mds_buf_write(aTHX_ b, run, (size_t)((p + k) - run));
        50          
    189 193 50         mds_buf_write(aTHX_ b, e.rep, e.rlen);
    190 193           run = p + k + 1;
    191             }
    192 144           p += 8;
    193             }
    194             /* Scalar tail (<8 bytes). */
    195 22174 100         for (; p < end; p++) {
    196 16329           const mds_esc_entry e = mds_escape_text[(unsigned char)*p];
    197 16329 100         if (!e.rlen) continue;
    198 242 100         if (p > run) mds_buf_write(aTHX_ b, run, (size_t)(p - run));
        50          
    199 242 50         mds_buf_write(aTHX_ b, e.rep, e.rlen);
    200 242           run = p + 1;
    201             }
    202 5845 100         if (run < end) mds_buf_write(aTHX_ b, run, (size_t)(end - run));
        100          
    203 5845           }
    204             #undef MDS_HASZ
    205              
    206             /* HTML-escape with backslash unescaping for ASCII punctuation. Used for
    207             * link/image title attributes which honour ยง6.1 backslash escapes. */
    208 157           static void html_escape_unesc(pTHX_ mds_buf* b, const char* s, size_t n) {
    209             /* expand_entity_at writes through `out` which is just `b` for titles. */
    210             extern size_t mds_expand_entity_at(const char* s, size_t i, size_t n,
    211             char* out, size_t* outlen);
    212             size_t i;
    213             mds_esc_entry e;
    214             unsigned char nx;
    215             char ebuf[8];
    216             size_t elen;
    217             size_t k;
    218             size_t z;
    219              
    220 157           i = 0;
    221 1141 100         while (i < n) {
    222 984 100         if (s[i] == '\\' && i + 1 < n) {
        50          
    223 15           nx = (unsigned char)s[i+1];
    224 15 50         if ((nx >= 0x21 && nx <= 0x2F) ||
        100          
    225 3 50         (nx >= 0x3A && nx <= 0x40) ||
        50          
    226 3 50         (nx >= 0x5B && nx <= 0x60) ||
        50          
    227 3 50         (nx >= 0x7B && nx <= 0x7E)) {
        0          
    228 12           e = mds_escape_text[nx];
    229 12 100         if (e.rlen) mds_buf_write(aTHX_ b, e.rep, e.rlen);
        50          
    230             else mds_buf_write(aTHX_ b, (const char*)&nx, 1);
    231 12           i += 2;
    232 12           continue;
    233             }
    234             }
    235 972 100         if (s[i] == '&') {
    236 30           k = mds_expand_entity_at(s, i, n, ebuf, &elen);
    237 30 100         if (k) {
    238             /* Re-emit decoded bytes through html_escape_text so chars
    239             * like '<' '&' '"' get re-escaped. */
    240 60 100         for (z = 0; z < elen; z++) {
    241 39           e = mds_escape_text[(unsigned char)ebuf[z]];
    242 39 100         if (e.rlen) mds_buf_write(aTHX_ b, e.rep, e.rlen);
        50          
    243 36 50         else mds_buf_write(aTHX_ b, ebuf + z, 1);
    244             }
    245 21           i += k;
    246 21           continue;
    247             }
    248             }
    249 951           e = mds_escape_text[(unsigned char)s[i]];
    250 951 100         if (e.rlen) mds_buf_write(aTHX_ b, e.rep, e.rlen);
        50          
    251 936 50         else mds_buf_write(aTHX_ b, s + i, 1);
    252 951           i++;
    253             }
    254 157           }
    255              
    256             /* Decode an entity at s[i..] โ€” either &name; or &#NNN; / &#xHH; โ€” into
    257             * `out` (up to 8 bytes is enough for any single entity; multi-codepoint
    258             * entities like ≧̸ need 5). Returns the number of source bytes
    259             * consumed (0 = not an entity), and stores the UTF-8 length in *outlen. */
    260 51           size_t mds_expand_entity_at(const char* s, size_t i, size_t n,
    261             char* out, size_t* outlen) {
    262             size_t q;
    263             unsigned long cp;
    264             size_t digits;
    265             char c;
    266             size_t blen;
    267             size_t name_start;
    268             const mds_entity* e;
    269             mds_entity ent_scratch;
    270              
    271 51 50         if (i >= n || s[i] != '&') return 0;
        50          
    272 51           q = i + 1;
    273 51 50         if (q < n && s[q] == '#') {
        50          
    274 0           q++;
    275 0           cp = 0;
    276 0           digits = 0;
    277 0 0         if (q < n && (s[q] == 'x' || s[q] == 'X')) {
        0          
        0          
    278 0           q++;
    279 0 0         while (q < n && digits < 6 && isxdigit((unsigned char)s[q])) {
        0          
        0          
    280 0           c = s[q];
    281 0 0         cp = cp * 16 + (c <= '9' ? c - '0' :
        0          
    282 0           (c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
    283 0           q++; digits++;
    284             }
    285             } else {
    286 0 0         while (q < n && digits < 7 && s[q] >= '0' && s[q] <= '9') {
        0          
        0          
        0          
    287 0           cp = cp * 10 + (unsigned long)(s[q] - '0');
    288 0           q++; digits++;
    289             }
    290             }
    291 0 0         if (!digits || q >= n || s[q] != ';') return 0;
        0          
        0          
    292 0           q++;
    293 0 0         if (cp == 0 || cp > 0x10FFFF || (cp >= 0xD800 && cp <= 0xDFFF))
        0          
        0          
        0          
    294 0           cp = 0xFFFD;
    295 0 0         if (cp < 0x80) {
    296 0           out[0] = (char)cp; blen = 1;
    297 0 0         } else if (cp < 0x800) {
    298 0           out[0] = (char)(0xC0 | (cp >> 6));
    299 0           out[1] = (char)(0x80 | (cp & 0x3F)); blen = 2;
    300 0 0         } else if (cp < 0x10000) {
    301 0           out[0] = (char)(0xE0 | (cp >> 12));
    302 0           out[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
    303 0           out[2] = (char)(0x80 | (cp & 0x3F)); blen = 3;
    304             } else {
    305 0           out[0] = (char)(0xF0 | (cp >> 18));
    306 0           out[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
    307 0           out[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
    308 0           out[3] = (char)(0x80 | (cp & 0x3F)); blen = 4;
    309             }
    310 0           *outlen = blen;
    311 0           return q - i;
    312             }
    313 51           name_start = q;
    314 222 100         while (q < n && isalnum((unsigned char)s[q])) q++;
        100          
    315 51 100         if (q == name_start || q >= n || s[q] != ';') return 0;
        100          
        100          
    316 36           e = mds_entity_lookup(s + name_start, q - name_start, &ent_scratch);
    317 36 50         if (!e) return 0;
    318 36 50         if (e->ulen > 8) return 0;
    319 36           memcpy(out, e->utf8, e->ulen);
    320 36           *outlen = e->ulen;
    321 36           return (q + 1) - i;
    322             }
    323              
    324             /* Returns 1 if URL starts with a scheme we refuse to emit (xss vectors)
    325             * unless MDS_FLAG_UNSAFE is set. `for_image` permits data: image MIME. */
    326 470           static int url_is_dangerous(const char* s, size_t n, int for_image) {
    327             size_t i;
    328             size_t j;
    329             char c;
    330             size_t sl;
    331             const char* p;
    332             size_t rem;
    333              
    334             /* Skip leading whitespace (already trimmed upstream but be defensive). */
    335 470           i = 0;
    336 470 100         while (i < n && (unsigned char)s[i] <= 0x20) i++;
        50          
    337             /* Find optional scheme ending at ':'. */
    338 470           j = i;
    339 1249 100         while (j < n) {
    340 1190           c = s[j];
    341 1190 100         if (c == ':') break;
    342 1110 100         if (!isalnum((unsigned char)c) && c != '+' && c != '-' && c != '.') return 0;
        100          
        100          
        100          
    343 779           j++;
    344             }
    345 139 100         if (j >= n || j == i) return 0;
        50          
    346 80           sl = j - i;
    347             /* Case-insensitive scheme compare. */
    348             #define SCHEME_EQ(lit) (sl == (sizeof(lit) - 1) && strncasecmp(s + i, (lit), sl) == 0)
    349 80 100         if (SCHEME_EQ("javascript") || SCHEME_EQ("vbscript") || SCHEME_EQ("file"))
        50          
        100          
        50          
        100          
        50          
    350 2           return 1;
    351 78 100         if (SCHEME_EQ("data")) {
        100          
    352 1 50         if (!for_image) return 1;
    353             /* Allow only data:image/{gif,png,jpeg,webp}[;...]. */
    354 0           p = s + j + 1;
    355 0           rem = n - (j + 1);
    356 0 0         if (rem < 6 || strncasecmp(p, "image/", 6) != 0) return 1;
        0          
    357 0           p += 6; rem -= 6;
    358 0 0         if ((rem >= 3 && strncasecmp(p, "gif", 3) == 0) ||
        0          
        0          
    359 0 0         (rem >= 3 && strncasecmp(p, "png", 3) == 0) ||
        0          
    360 0 0         (rem >= 4 && strncasecmp(p, "jpeg", 4) == 0) ||
        0          
    361 0 0         (rem >= 4 && strncasecmp(p, "webp", 4) == 0))
    362 0           return 0;
    363 0           return 1;
    364             }
    365             #undef SCHEME_EQ
    366 77           return 0;
    367             }
    368              
    369             /* ------------------ callbacks ------------------ */
    370              
    371 5561           static void cb_enter_block(void* ud, mds_block_type t, const mds_block_detail* d) {
    372 5561           render_state* st = (render_state*)ud;
    373             dTHXa(st->my_perl);
    374 5561           mds_buf* b = st->buf;
    375             /* While we are inside an unreferenced footnote def, swallow all
    376             * nested block events. Push CLOSE_NOOP so the matching leave pops
    377             * cleanly without disturbing the close stack. The FOOTNOTE_DEF
    378             * event itself is fired BEFORE fn_skip is set (in this function),
    379             * so the dispatch below still runs for that one event. */
    380 5561 50         if (st->fn_skip && t != MDS_BLK_FOOTNOTE_DEF && t != MDS_BLK_FOOTNOTES_SECTION) {
        0          
        0          
    381 0           push_close(st, CLOSE_NOOP);
    382 0           return;
    383             }
    384 5561           flush_pending_text(aTHX_ st);
    385             /* Decide if this block starts on the same line as
  • (tight para)
  • 386             * or on a new line. li_first_block fires only for the very first child
    387             * of a list item; need_nl_next fires after a suppressed tight paragraph
    388             * when any further block follows in the same item. */
    389 5561 100         if (st->li_first_block) {
    390             int tight_para;
    391 573           st->li_first_block = 0;
    392 1098           tight_para = (t == MDS_BLK_PARAGRAPH &&
    393 1098 100         st->tight_top > 0 &&
        50          
    394 525 100         st->tight_stack[st->tight_top - 1]);
    395 573 100         if (!tight_para) MDS_BUF_LIT(b, "\n");
    396 4988 100         } else if (st->need_nl_next) {
    397 53 50         st->need_nl_next = 0;
    398             MDS_BUF_LIT(b, "\n");
    399             }
    400              
    401 5561           switch (t) {
    402 0           case MDS_BLK_DOC:
    403 0           push_close(st, CLOSE_NOOP);
    404 0           break;
    405 2880           case MDS_BLK_PARAGRAPH: {
    406 3579 100         int suppress = (st->tight_top > 0 &&
    407 699 100         st->tight_stack[st->tight_top - 1]);
    408 2880 100         if (suppress) {
    409 393           push_close(st, CLOSE_TIGHT_PARA);
    410             } else {
    411             MDS_BUF_LIT(b, "

    ");

    412 2487           push_close(st, CLOSE_P);
    413             }
    414 2880           break;
    415             }
    416 216           case MDS_BLK_HEADING: {
    417             int lvl;
    418             char open[5];
    419 216           lvl = d->u.heading.level;
    420 216 50         if (lvl < 1) lvl = 1; else if (lvl > 6) lvl = 6;
        50          
    421 216           open[0] = '<';
    422 216           open[1] = 'h';
    423 216           open[2] = (char)('0' + lvl);
    424 216           open[3] = '>';
    425 216 50         open[4] = 0;
    426             mds_buf_write(aTHX_ b, open, 4);
    427 216           push_close(st, (close_kind)(CLOSE_H1 + (lvl - 1)));
    428 216           break;
    429             }
    430 104 50         case MDS_BLK_THEMATIC_BREAK:
    431             MDS_BUF_LIT(b, "
    \n");
    432 104           push_close(st, CLOSE_NOOP);
    433 104           break;
    434 200           case MDS_BLK_HTML:
    435             /* Raw HTML block: children are EV_RAW text; nothing to wrap. */
    436 200           push_close(st, CLOSE_NOOP);
    437 200           break;
    438 114           case MDS_BLK_CODE_FENCED: {
    439 114           const char* info = d->u.code_fenced.info;
    440 114           size_t il = d->u.code_fenced.info_len;
    441             /* Trim info to first word for the language class. */
    442 114           size_t lang_len = 0;
    443 217 100         while (lang_len < il && info[lang_len] != ' ' && info[lang_len] != '\t')
        100          
        50          
    444 103           lang_len++;
    445 114 100         if (lang_len > 0) {
    446             MDS_BUF_LIT(b, "
    
    
    447 19           html_escape_unesc(aTHX_ b, info, lang_len);
    448             MDS_BUF_LIT(b, "\">");
    449             } else {
    450             MDS_BUF_LIT(b, "
    "); 
    451             }
    452 114           push_close(st, CLOSE_CODE_FENCED);
    453 114           break;
    454             }
    455 170 50         case MDS_BLK_CODE_INDENTED:
    456             MDS_BUF_LIT(b, "
    "); 
    457 170           push_close(st, CLOSE_CODE_INDENTED);
    458 170           break;
    459 176 100         case MDS_BLK_QUOTE:
    460             MDS_BUF_LIT(b, "
    \n");
    461 176 50         if (st->tight_top < CLOSE_STACK_MAX)
    462 176           st->tight_stack[st->tight_top++] = 0;
    463 176           push_close(st, CLOSE_BLOCKQUOTE);
    464 176           break;
    465 377           case MDS_BLK_LIST: {
    466 377           int tight = d->u.list.is_tight;
    467 377 100         if (tight) st->tight_depth++;
    468 377 50         if (st->tight_top < CLOSE_STACK_MAX)
    469 377 100         st->tight_stack[st->tight_top++] = (unsigned char)(tight ? 1 : 0);
    470 377 100         if (d->u.list.is_ordered) {
    471 97 100         if (d->u.list.start != 1) {
    472             char buf[64];
    473 28           int n = snprintf(buf, sizeof buf, "
      \n",
    474 28           d->u.list.start);
    475 28 50         if (n > 0) mds_buf_write(aTHX_ b, buf, (size_t)n);
        50          
    476             } else {
    477             MDS_BUF_LIT(b, "
      \n");
    478             }
    479 97 100         push_close(st, tight ? CLOSE_OL_TIGHT : CLOSE_OL);
    480             } else {
    481             MDS_BUF_LIT(b, "
      \n");
    482 280 100         push_close(st, tight ? CLOSE_UL_TIGHT : CLOSE_UL);
    483             }
    484 377           break;
    485             }
    486 591 50         case MDS_BLK_LIST_ITEM:
    487             MDS_BUF_LIT(b, "
  • ");
  • 488 591           st->li_check_pending = 1;
    489 591           st->li_first_block = 1;
    490 591           st->need_nl_next = 0;
    491 591           push_close(st, CLOSE_LI);
    492 591           break;
    493 68 50         case MDS_BLK_TABLE:
    494             MDS_BUF_LIT(b, "\n"); \n"); \n"); \n");
    495 68           st->tbl_aligns = d->u.table.aligns;
    496 68           st->tbl_ncols = d->u.table.ncols;
    497 68           push_close(st, CLOSE_TABLE);
    498 68           break;
    499 68 50         case MDS_BLK_TABLE_HEAD:
    500             MDS_BUF_LIT(b, "
    501 68           st->in_thead = 1;
    502 68           push_close(st, CLOSE_THEAD);
    503 68           break;
    504 56 50         case MDS_BLK_TABLE_BODY:
    505             MDS_BUF_LIT(b, "
    506 56           st->in_thead = 0;
    507 56           push_close(st, CLOSE_TBODY);
    508 56           break;
    509 164 50         case MDS_BLK_TABLE_ROW:
    510             MDS_BUF_LIT(b, "
    511 164           st->tbl_col = 0;
    512 164           push_close(st, CLOSE_TR);
    513 164           break;
    514 350           case MDS_BLK_TABLE_CELL: {
    515 350 50         mds_align al = (st->tbl_col < st->tbl_ncols && st->tbl_aligns)
    516 700 50         ? st->tbl_aligns[st->tbl_col] : MDS_ALIGN_NONE;
    517 350 100         if (MDS_LIKELY(al == MDS_ALIGN_NONE)) {
    518             /* Fast path โ€” 4-byte literal in a single buf write.
    519             * synth-tables corpus hits this on every cell. */
    520 308 100         if (st->in_thead) MDS_BUF_LIT(b, "");
    521             else MDS_BUF_LIT(b, "");
    522             } else {
    523 42 100         const char* tag = st->in_thead ? "th" : "td";
    524 42 50         const char* aname = al == MDS_ALIGN_LEFT ? "left"
    525 42 100         : al == MDS_ALIGN_RIGHT ? "right" : "center";
        100          
    526             mds_buf_write(aTHX_ b, "<", 1);
    527             mds_buf_write(aTHX_ b, tag, 2);
    528             MDS_BUF_LIT(b, " align=\"");
    529 42 50         mds_buf_write(aTHX_ b, aname, strlen(aname));
    530             MDS_BUF_LIT(b, "\">");
    531             }
    532 350           st->tbl_col++;
    533 350 100         push_close(st, st->in_thead ? CLOSE_TH : CLOSE_TD);
    534 350           break;
    535             }
    536 0           default:
    537 0           push_close(st, CLOSE_NOOP);
    538 0           break;
    539 9 100         case MDS_BLK_FOOTNOTES_SECTION:
    540             MDS_BUF_LIT(b, "
    \n
      \n");
    541 9           push_close(st, CLOSE_FN_SECTION);
    542 9           break;
    543 18           case MDS_BLK_FOOTNOTE_DEF: {
    544 18           const char* lab = d->u.footnote_def.label;
    545 18           size_t ll = d->u.footnote_def.label_len;
    546 18           unsigned idx = fn_lookup(st, lab, ll);
    547 18 50         if (!idx) {
    548             /* Unreferenced def: drop the entire subtree. */
    549 0           st->fn_skip = 1;
    550 0           push_close(st, CLOSE_FN_DEF_SKIP);
    551 0           break;
    552             }
    553 18           st->fn_in_def = idx;
    554 18           st->fn_in_def_label = lab;
    555 18 50         st->fn_in_def_label_len = ll;
    556             MDS_BUF_LIT(b, "
  • 557 18           write_fn_label_attr(aTHX_ b, lab, ll);
    558             MDS_BUF_LIT(b, "\">\n");
    559 18           push_close(st, CLOSE_FN_DEF);
    560 18           break;
    561             }
    562             }
    563             }
    564              
    565 5561           static void cb_leave_block(void* ud, mds_block_type t) {
    566 5561           render_state* st = (render_state*)ud;
    567             dTHXa(st->my_perl);
    568 5561           mds_buf* b = st->buf;
    569             close_kind k;
    570             (void)t;
    571             /* Mirror cb_enter_block: in fn_skip mode, nested events were given
    572             * CLOSE_NOOP on the stack so pop here and return without flushing. */
    573 5561 50         if (st->fn_skip) {
    574 0           k = pop_close(st);
    575 0 0         if (k == CLOSE_FN_DEF_SKIP) {
    576 0           st->fn_skip = 0;
    577             }
    578 0           return;
    579             }
    580 5561           flush_pending_text(aTHX_ st);
    581 5561 50         if (st->li_check_pending == 2) {
    582             MDS_BUF_LIT(b, "[");
    583             }
    584 5561           st->li_check_pending = 0;
    585 5561           k = pop_close(st);
    586 5561           switch (k) {
    587 304           case CLOSE_NONE:
    588             case CLOSE_NOOP:
    589 304           break;
    590 4974 100         case CLOSE_P: MDS_BUF_LIT(b, "

    \n"); break;
    591 186 50         case CLOSE_H1: MDS_BUF_LIT(b, "\n"); break;
    592 156 50         case CLOSE_H2: MDS_BUF_LIT(b, "\n"); break;
    593 60 50         case CLOSE_H3: MDS_BUF_LIT(b, "\n"); break;
    594 8 50         case CLOSE_H4: MDS_BUF_LIT(b, "\n"); break;
    595 14 50         case CLOSE_H5: MDS_BUF_LIT(b, "\n"); break;
    596 8 50         case CLOSE_H6: MDS_BUF_LIT(b, "\n"); break;
    597 228 100         case CLOSE_CODE_FENCED: MDS_BUF_LIT(b, "\n"); break;
    598 340 50         case CLOSE_CODE_INDENTED: MDS_BUF_LIT(b, "\n"); break;
    599 176 100         case CLOSE_BLOCKQUOTE: MDS_BUF_LIT(b, "\n");
    600 176 50         if (st->tight_top > 0) st->tight_top--; break;
    601 42 50         case CLOSE_OL: MDS_BUF_LIT(b, "\n");
    602 42 50         if (st->tight_top > 0) st->tight_top--; break;
    603 59 50         case CLOSE_UL: MDS_BUF_LIT(b, "\n");
    604 59 50         if (st->tight_top > 0) st->tight_top--; break;
    605 55 50         case CLOSE_OL_TIGHT: MDS_BUF_LIT(b, "\n"); st->tight_depth--;
    606 55 50         if (st->tight_top > 0) st->tight_top--; break;
    607 221 50         case CLOSE_UL_TIGHT: MDS_BUF_LIT(b, "\n"); st->tight_depth--;
    608 221 50         if (st->tight_top > 0) st->tight_top--; break;
    609 591 50         case CLOSE_LI: st->need_nl_next = 0; st->li_first_block = 0;
    610 591           MDS_BUF_LIT(b, "\n"); break;
    611 393           case CLOSE_TIGHT_PARA: st->need_nl_next = 1; break;
    612 136 100         case CLOSE_TABLE: MDS_BUF_LIT(b, "
    \n"); break;
    613 136 50         case CLOSE_THEAD: MDS_BUF_LIT(b, "\n"); break;
    614 112 100         case CLOSE_TBODY: MDS_BUF_LIT(b, "
    615 328 100         case CLOSE_TR: MDS_BUF_LIT(b, "
    616 282 50         case CLOSE_TH: MDS_BUF_LIT(b, "\n"); break;
    617 418 100         case CLOSE_TD: MDS_BUF_LIT(b, "
    618 18 50         case CLOSE_FN_SECTION: MDS_BUF_LIT(b, "\n\n"); break;
    619 0           case CLOSE_FN_DEF_SKIP: st->fn_skip = 0; break;
    620 18           case CLOSE_FN_DEF: {
    621             /* Sub-scanned body has now emitted all its blocks. If the very
    622             * last bytes are `

    \n` we rewind 5 bytes and inject the
    623             * backref(s) before the

    ; otherwise the def ended in a
    624             * code block / quote / etc. and the backref goes after as a
    625             * bare . One backref per ref-instance; first is plain `โ†ฉ`,
    626             * subsequent are `โ†ฉN`. */
    627             unsigned idx;
    628             unsigned uses;
    629             int inject_in_p;
    630             unsigned u;
    631             char nbuf[16];
    632             int nn;
    633             char ibuf[32];
    634             int in;
    635              
    636 18           idx = st->fn_in_def;
    637 18 50         uses = (idx >= 1 && idx <= st->fn_count) ? st->fn_uses[idx - 1] : 1;
        50          
    638 18           inject_in_p = 0;
    639 18 50         if (b->cur - b->base >= 5 && memcmp(b->cur - 5, "

    \n", 5) == 0) {
        100          
    640 15           b->cur -= 5;
    641 15           inject_in_p = 1;
    642             }
    643 42 100         for (u = 1; u <= uses; u++) {
    644             /* Inside

    : separate anchors from preceding content with

    645             * a space. As a bare trailing anchor (after /
    646             * etc.) no leading space โ€” the newline from the prior block
    647             * already provides the visual gap. Subsequent anchors in the
    648             * same line always get a single space separator. */
    649 24 100         if (inject_in_p || u > 1) MDS_BUF_LIT(b, " ");
        50          
    650             MDS_BUF_LIT(b, "
    651 24           write_fn_label_attr(aTHX_ b, st->fn_in_def_label, st->fn_in_def_label_len);
    652 24 100         if (u > 1) {
    653 6           nn = snprintf(nbuf, sizeof nbuf, "-%u", u);
    654 6 50         if (nn > 0) mds_buf_write(aTHX_ b, nbuf, (size_t)nn);
        50          
    655             }
    656             MDS_BUF_LIT(b, "\" class=\"footnote-backref\" data-footnote-backref data-footnote-backref-idx=\"");
    657 24 100         if (u == 1) in = snprintf(ibuf, sizeof ibuf, "%u", idx);
    658 6           else in = snprintf(ibuf, sizeof ibuf, "%u-%u", idx, u);
    659 24 50         if (in > 0) mds_buf_write(aTHX_ b, ibuf, (size_t)in);
        50          
    660             MDS_BUF_LIT(b, "\" aria-label=\"Back to reference ");
    661 24 50         if (in > 0) mds_buf_write(aTHX_ b, ibuf, (size_t)in);
        50          
    662             MDS_BUF_LIT(b, "\">\xe2\x86\xa9");
    663 24 100         if (u > 1) {
    664             MDS_BUF_LIT(b, "");
    665 6           nn = snprintf(nbuf, sizeof nbuf, "%u", u);
    666 6 50         if (nn > 0) mds_buf_write(aTHX_ b, nbuf, (size_t)nn);
        50          
    667             MDS_BUF_LIT(b, "");
    668             }
    669             MDS_BUF_LIT(b, "");
    670             }
    671 18 100         if (inject_in_p) {
    672             MDS_BUF_LIT(b, "

    \n");
    673             } else {
    674             MDS_BUF_LIT(b, "\n");
    675             }
    676             MDS_BUF_LIT(b, "\n");
    677 18           st->fn_in_def = 0;
    678 18           st->fn_in_def_label = NULL;
    679 18           st->fn_in_def_label_len = 0;
    680 18           break;
    681             }
    682             }
    683             }
    684              
    685             /* ------------------ GFM autolink extension (ยง6.9) ------------------ */
    686              
    687             static void write_url_attr(pTHX_ mds_buf* b, const char* s, size_t n);
    688              
    689             /* GFM footnote label encoder. Percent-encodes bytes that would be unsafe
    690             * in an HTML attribute / URL fragment. Per GFM expected output, leaves
    691             * unreserved + a few sub-delim bytes literal: `( ) ! * - . _ ~ /` and
    692             * alphanumerics. Everything else (incl. control bytes, space, `"`, `<`,
    693             * `>`, `&`, `\\`, `^`, backtick, `{ | }`, `[ ]`, `%`, `?`, `#`, `+`,
    694             * `=`, `,`, `;`, `:`, `'`, `@`, `$`, `/`) is %XX-encoded. Multi-byte
    695             * UTF-8 sequences are encoded byte-by-byte. */
    696 90           static void write_fn_label_attr(pTHX_ mds_buf* b, const char* s, size_t n) {
    697             static const char hex[] = "0123456789ABCDEF";
    698             size_t i;
    699 1110 100         for (i = 0; i < n; i++) {
    700 1020           unsigned char c = (unsigned char)s[i];
    701 918 100         int safe = (c >= '0' && c <= '9') ||
    702 996 100         (c >= 'A' && c <= 'Z') ||
        50          
    703 996 100         (c >= 'a' && c <= 'z') ||
        50          
    704 162 100         c == '(' || c == ')' || c == '!' || c == '*' ||
        100          
        50          
        50          
    705 2124 100         c == '-' || c == '.' || c == '_' || c == '~' ||
        100          
        50          
        50          
        50          
    706 84 100         c == '/';
    707 1020 100         if (safe) {
    708             mds_buf_write(aTHX_ b, (const char*)&c, 1);
    709             } else {
    710 72 50         char esc[3] = { '%', hex[c >> 4], hex[c & 0xF] };
    711             mds_buf_write(aTHX_ b, esc, 3);
    712             }
    713             }
    714 90           }
    715              
    716             /* Look up label in render_state's used set. Returns 1-based ordinal
    717             * if found, 0 if not. Linear scan; footnote counts are tiny. */
    718 42           static unsigned fn_lookup(render_state* st, const char* s, size_t n) {
    719             size_t i;
    720 78 100         for (i = 0; i < st->fn_count; i++) {
    721 60 100         if (st->fn_label_lens[i] == n &&
    722 24 50         memcmp(st->fn_labels[i], s, n) == 0) {
    723 24           return (unsigned)(i + 1);
    724             }
    725             }
    726 18           return 0;
    727             }
    728              
    729             /* Register or increment usage. Returns 1-based ordinal. */
    730 24           static unsigned fn_register(render_state* st, const char* s, size_t n) {
    731 24           unsigned idx = fn_lookup(st, s, n);
    732 24 100         if (idx) {
    733 6           st->fn_uses[idx - 1]++;
    734 6           return idx;
    735             }
    736 18 100         if (st->fn_count == st->fn_cap) {
    737 9 50         size_t nc = st->fn_cap ? st->fn_cap * 2 : 8;
    738 9           st->fn_labels = (const char**)realloc(st->fn_labels, nc * sizeof(*st->fn_labels));
    739 9           st->fn_label_lens = (size_t*)realloc(st->fn_label_lens, nc * sizeof(*st->fn_label_lens));
    740 9           st->fn_uses = (unsigned*)realloc(st->fn_uses, nc * sizeof(*st->fn_uses));
    741 9           st->fn_cap = nc;
    742             }
    743             /* Label bytes are stable in the arena (footnote table arena_dup). */
    744 18           st->fn_labels[st->fn_count] = s;
    745 18           st->fn_label_lens[st->fn_count] = n;
    746 18           st->fn_uses[st->fn_count] = 1;
    747 18           st->fn_count++;
    748 18           return (unsigned)st->fn_count;
    749             }
    750              
    751 10171           static int gfm_word_byte(unsigned char c) {
    752 10171 100         return isalnum(c) || c == '_';
        100          
    753             }
    754              
    755             /* Domain scanner. p..end points at start of would-be domain. Returns
    756             * length on success (>=1 dot, valid segments, no underscores in last
    757             * two segments), else 0. */
    758 137           static size_t gfm_scan_domain_ex(const char* p, const char* end, int need_dot) {
    759             const char* q;
    760             int dots;
    761             const char* last_dot;
    762             const char* prev_dot;
    763             const char* check_end;
    764             const char* r;
    765             const char* l_s;
    766             const char* l_e;
    767             const char* m_s;
    768             const char* m_e;
    769              
    770 137           q = p;
    771 137           dots = 0;
    772 137           last_dot = NULL;
    773 137           prev_dot = NULL;
    774 1527 100         while (q < end) {
    775 1459           unsigned char c = (unsigned char)*q;
    776 1459 100         if (c == '.') {
    777 176 100         if (q == p || q[-1] == '.') break;
        50          
    778 164           prev_dot = last_dot; last_dot = q; dots++;
    779 164           q++; continue;
    780             }
    781 1283 100         if (isalnum(c) || c == '-' || c == '_' || c >= 0x80) { q++; continue; }
        50          
        100          
        100          
    782 57           break;
    783             }
    784 137 100         if (q == p) return 0;
    785 125 100         if (need_dot && dots == 0) return 0;
        50          
    786             /* Permit trailing dot in domain; strip-trail handles it. Roll q back
    787             * just past the dot for the underscore check. */
    788 125           check_end = q;
    789 125 50         if (check_end > p && check_end[-1] == '.') {
        100          
    790 9           check_end--;
    791             /* recompute last_dot/prev_dot if the trailing dot was the last one */
    792 9 50         if (last_dot == check_end) {
    793 9           last_dot = prev_dot;
    794 9           prev_dot = NULL;
    795 72 50         for (r = p; r < last_dot; r++) if (*r == '.') prev_dot = r;
        100          
    796             }
    797             }
    798 125 100         if (last_dot) {
    799             /* No underscores in last two segments. */
    800 122           l_s = last_dot + 1;
    801 122           l_e = check_end;
    802 467 100         for (r = l_s; r < l_e; r++) if (*r == '_') return 0;
        100          
    803 119 100         m_s = prev_dot ? prev_dot + 1 : p;
    804 119           m_e = last_dot;
    805 844 100         for (r = m_s; r < m_e; r++) if (*r == '_') return 0;
        100          
    806             } else {
    807             /* No dot at all (only allowed when need_dot==0): also disallow `_`. */
    808 24 50         for (r = p; r < check_end; r++) if (*r == '_') return 0;
        100          
    809             }
    810 119           return (size_t)(q - p);
    811             }
    812              
    813 96           static size_t gfm_scan_domain(const char* p, const char* end) {
    814 96           return gfm_scan_domain_ex(p, end, 1);
    815             }
    816              
    817             /* Strip GFM trailing-punct from URL run p..q. Returns new q. */
    818 125           static const char* gfm_strip_trail(const char* p, const char* q) {
    819 152 50         while (q > p) {
    820 152           unsigned char c = (unsigned char)q[-1];
    821 152 50         if (c == '?' || c == '!' || c == '.' || c == ',' ||
        50          
        100          
        50          
        50          
    822 134 50         c == ':' || c == '*' || c == '_' || c == '~' ||
        50          
        50          
        100          
    823 131 100         c == '\'' || c == '"') {
    824 24           q--; continue;
    825             }
    826 128 100         if (c == ')') {
    827 3           int opens = 0, closes = 0;
    828             const char* r;
    829 48 100         for (r = p; r < q; r++) {
    830 45 50         if (*r == '(') opens++;
    831 45 100         else if (*r == ')') closes++;
    832             }
    833 3 50         if (closes > opens) { q--; continue; }
    834 0           break;
    835             }
    836 125 50         if (c == ';') {
    837 0           const char* r = q - 1;
    838 0 0         while (r > p && isalnum((unsigned char)*r)) r--;
        0          
    839 0 0         if (r > p && *r == '&') { q = r; continue; }
        0          
    840             }
    841 125           break;
    842             }
    843 125           return q;
    844             }
    845              
    846             /* Scan body bytes for a URL after a scheme (http/https/ftp) or after a
    847             * www. prefix. Returns length consumed past start of domain, 0 if no
    848             * valid URL. */
    849 41           static size_t gfm_scan_url_body(const char* p, const char* end) {
    850             size_t dl;
    851             const char* q;
    852 41           dl = gfm_scan_domain_ex(p, end, 0);
    853 41 50         if (!dl) return 0;
    854 41           q = p + dl;
    855             /* Optional path: byte run until whitespace, '<', or end. */
    856 89 100         while (q < end) {
    857 57           unsigned char c = (unsigned char)*q;
    858 57 100         if (c <= 0x20 || c == '<') break;
        50          
    859 48           q++;
    860             }
    861             /* Strip trailing punctuation, applied to entire URL [p,q). */
    862 41           q = gfm_strip_trail(p, q);
    863 41           return (size_t)(q - p);
    864             }
    865              
    866             /* Email body scan starting at @. base is start of text run so we can
    867             * walk backwards. Sets *l_out to local-part length before @, *d_out to
    868             * domain length after @. Returns 1 on success. */
    869 72           static int gfm_scan_email_at(const char* base, const char* at, const char* end,
    870             size_t* l_out, size_t* d_out) {
    871             const char* L;
    872             const char* D;
    873             size_t dl;
    874             const char* de;
    875              
    876             /* local part: [a-zA-Z0-9._+-] walking back */
    877 72           L = at;
    878 552 100         while (L > base) {
    879 480           unsigned char c = (unsigned char)L[-1];
    880 480 100         if (isalnum(c) || c == '.' || c == '_' || c == '+' || c == '-') L--;
        100          
        100          
        100          
        50          
    881             else break;
    882             }
    883 72 50         if (L == at) return 0;
    884 72 50         if (*L == '.') return 0; /* can't start with dot */
    885             /* domain: alphanum, _, -, ., must have a dot, no _ in last two segs */
    886 72           D = at + 1;
    887 72           dl = gfm_scan_domain(D, end);
    888 72 100         if (!dl) return 0;
    889             /* Trailing punct stripping on domain. */
    890 60           de = gfm_strip_trail(D, D + dl);
    891 60           dl = (size_t)(de - D);
    892 60 50         if (!dl) return 0;
    893 60           *l_out = (size_t)(at - L);
    894 60           *d_out = dl;
    895 60           return 1;
    896             }
    897              
    898             /* Emit a chunk that may contain autolinks; non-autolink bytes go through
    899             * html_escape. Caller must have already handled li_check / image alt. */
    900             /* Emit a chunk that may contain autolinks; non-autolink bytes go through
    901             * html_escape. Caller must have already handled li_check / image alt. */
    902             static void gfm_emit_autolinked(pTHX_ render_state* st,
    903             const char* s, size_t n);
    904              
    905             /* Flush any buffered text accumulated from consecutive cb_text events.
    906             * Called by every non-text event so the AUTOLINK scanner sees the whole
    907             * contiguous text run (the inline tokenizer splits at intra-word `_`/`*`). */
    908 14947           static void flush_pending_text(pTHX_ render_state* st) {
    909             size_t n;
    910 14947 100         if (st->pending_len == 0) return;
    911 1415           n = st->pending_len;
    912 1415           st->pending_len = 0; /* clear first so re-entry is safe */
    913 1415           gfm_emit_autolinked(aTHX_ st, st->pending_text, n);
    914             }
    915              
    916 1415           static void gfm_emit_autolinked(pTHX_ render_state* st,
    917             const char* s, size_t n) {
    918 1415           mds_buf* b = st->buf;
    919 1415           const char* end = s + n;
    920 1415           const char* run = s;
    921 1415           const char* p = s;
    922 13001 100         while (p < end) {
    923             unsigned char c;
    924             int boundary;
    925             size_t url_len;
    926             size_t scheme_len;
    927             int is_email;
    928             const char* scheme_prefix;
    929             size_t dl;
    930              
    931 11586           c = (unsigned char)*p;
    932 11586 100         boundary = (p == s) ? 1 : !gfm_word_byte((unsigned char)p[-1]);
        100          
    933 11586 100         if (!boundary) { p++; continue; }
    934             /* http://, https://, ftp:// */
    935 3395           url_len = 0; scheme_len = 0; is_email = 0;
    936 3395           scheme_prefix = NULL;
    937 3395 100         if (c == 'h' && (size_t)(end - p) >= 7 &&
        100          
    938 81 100         memcmp(p, "http://", 7) == 0) {
    939 35           scheme_len = 7;
    940 35           url_len = gfm_scan_url_body(p + 7, end);
    941 3360 100         } else if (c == 'h' && (size_t)(end - p) >= 8 &&
        100          
    942 40 100         memcmp(p, "https://", 8) == 0) {
    943 6           scheme_len = 8;
    944 6           url_len = gfm_scan_url_body(p + 8, end);
    945 3354 100         } else if (c == 'f' && (size_t)(end - p) >= 6 &&
        100          
    946 16 50         memcmp(p, "ftp://", 6) == 0) {
    947 0           scheme_len = 6;
    948 0           url_len = gfm_scan_url_body(p + 6, end);
    949 3354 100         } else if (c == 'w' && (size_t)(end - p) >= 4 &&
        50          
    950 38 100         memcmp(p, "www.", 4) == 0) {
    951 24           scheme_len = 0;
    952             /* www. variant: require at least one further dot in domain. */
    953 24           dl = gfm_scan_domain(p, end);
    954 24 100         if (dl) {
    955             /* Optional path. */
    956 18           const char* qq = p + dl;
    957 39 100         while (qq < end) {
    958 24           unsigned char x = (unsigned char)*qq;
    959 24 100         if (x <= 0x20 || x == '<') break;
        50          
    960 21           qq++;
    961             }
    962 18           qq = gfm_strip_trail(p, qq);
    963 18           url_len = (size_t)(qq - p);
    964             }
    965 24 100         if (url_len) scheme_prefix = "http://";
    966             }
    967 3395 100         if (scheme_len || scheme_prefix) {
        100          
    968 59 50         if (url_len) {
    969 59 100         size_t total = (scheme_prefix ? url_len : scheme_len + url_len);
    970             /* flush text before */
    971 59 100         if (p > run) html_escape(aTHX_ b, run, (size_t)(p - run));
    972             MDS_BUF_LIT(b, "
    973 59 100         if (scheme_prefix) mds_buf_write(aTHX_ b, scheme_prefix, strlen(scheme_prefix));
        50          
    974 59           write_url_attr(aTHX_ b, p, total);
    975             MDS_BUF_LIT(b, "\">");
    976 59           html_escape(aTHX_ b, p, total);
    977             MDS_BUF_LIT(b, "");
    978 59           p += total;
    979 59           run = p;
    980 59           continue;
    981             }
    982             }
    983             /* xmpp:user@host or mailto:user@host */
    984 3336 100         if (c == 'x' && (size_t)(end - p) >= 5 && memcmp(p, "xmpp:", 5) == 0) {
        100          
        100          
    985 12           const char* at = (const char*)memchr(p + 5, '@', (size_t)(end - p - 5));
    986 12 50         if (at) {
    987             size_t ll, dl;
    988 12 50         if (gfm_scan_email_at(p + 5, at, end, &ll, &dl) &&
    989 12 50         (const char*)at - ll == p + 5) {
    990 12           size_t total = 5 + ll + 1 + dl;
    991             /* xmpp: includes optional /path in the autolink. */
    992 12           const char* q = p + total;
    993 12 100         if (q < end && *q == '/') {
        100          
    994 57 100         while (q < end) {
    995 51           unsigned char x = (unsigned char)*q;
    996 51 50         if (x <= 0x20 || x == '<') break;
        50          
    997 51           q++;
    998             }
    999 6           q = gfm_strip_trail(p + total, q);
    1000 6           total = (size_t)(q - p);
    1001             }
    1002 12 50         if (p > run) html_escape(aTHX_ b, run, (size_t)(p - run));
    1003             MDS_BUF_LIT(b, "
    1004 12           write_url_attr(aTHX_ b, p, total);
    1005             MDS_BUF_LIT(b, "\">");
    1006 12           html_escape(aTHX_ b, p, total);
    1007             MDS_BUF_LIT(b, "");
    1008 12           p += total;
    1009 12           run = p;
    1010 12           continue;
    1011             }
    1012             }
    1013             }
    1014 3324 100         if (c == 'm' && (size_t)(end - p) >= 7 && memcmp(p, "mailto:", 7) == 0) {
        100          
        100          
    1015 21           const char* at = (const char*)memchr(p + 7, '@', (size_t)(end - p - 7));
    1016 21 50         if (at) {
    1017             size_t ll, dl;
    1018 21 50         if (gfm_scan_email_at(p + 7, at, end, &ll, &dl) &&
    1019 21 50         (const char*)at - ll == p + 7) {
    1020 21           size_t total = 7 + ll + 1 + dl;
    1021 21 100         if (p > run) html_escape(aTHX_ b, run, (size_t)(p - run));
    1022             MDS_BUF_LIT(b, "
    1023 21           write_url_attr(aTHX_ b, p + 7, total - 7);
    1024             MDS_BUF_LIT(b, "\">");
    1025 21           html_escape(aTHX_ b, p, total);
    1026             MDS_BUF_LIT(b, "");
    1027 21           p += total;
    1028 21           run = p;
    1029 21           continue;
    1030             }
    1031             }
    1032             }
    1033             /* Bare email: look forward for '@', not just at this byte. */
    1034 3303 100         if (isalnum(c) || c == '.' || c == '_' || c == '+' || c == '-') {
        100          
        100          
        50          
        100          
    1035             /* Find next '@' within this text run, bounded by whitespace. */
    1036 2585           const char* q = p;
    1037 12358 100         while (q < end) {
    1038 11248           unsigned char x = (unsigned char)*q;
    1039 11248 100         if (x == '@') break;
    1040 11209 100         if (!(isalnum(x) || x == '.' || x == '_' || x == '+' || x == '-')) {
        100          
        100          
        100          
        100          
    1041 1436           q = NULL; break;
    1042             }
    1043 9773           q++;
    1044             }
    1045 2585 100         if (q && q < end && *q == '@') {
        100          
        50          
    1046             size_t ll, dl;
    1047 39 100         if (gfm_scan_email_at(p, q, end, &ll, &dl) &&
    1048 27 50         q - ll == p) {
    1049 27           size_t total = ll + 1 + dl;
    1050 27 100         if (p > run) html_escape(aTHX_ b, run, (size_t)(p - run));
    1051             MDS_BUF_LIT(b, "
    1052 27           write_url_attr(aTHX_ b, p, total);
    1053             MDS_BUF_LIT(b, "\">");
    1054 27           html_escape(aTHX_ b, p, total);
    1055             MDS_BUF_LIT(b, "");
    1056 27           p += total;
    1057 27           run = p;
    1058 27           continue;
    1059             }
    1060             }
    1061             }
    1062 3276           p++;
    1063             }
    1064 1415 100         if (run < end) html_escape(aTHX_ b, run, (size_t)(end - run));
    1065 1415           }
    1066              
    1067 5888           static void cb_text(void* ud, const char* s, size_t n) {
    1068 5888           render_state* st = (render_state*)ud;
    1069             dTHXa(st->my_perl);
    1070 5888 50         if (st->fn_skip) return;
    1071 5888 100         if (st->li_check_pending) {
    1072             /* Inline tokenizer may emit `[` as its own text event, then the
    1073             * remainder. Coalesce up to two text events to detect [ ]/[x]/[X]
    1074             * followed by space. State 1 = fresh LI; state 2 = saw lone `[`. */
    1075 612 100         if (st->li_check_pending == 1 && n == 1 && s[0] == '[') {
        100          
        100          
    1076 60           st->li_check_pending = 2;
    1077 60           return;
    1078             }
    1079 552 100         if (st->li_check_pending == 2) {
    1080 60           st->li_check_pending = 0;
    1081 60 50         if (n >= 3 && s[1] == ']' && s[2] == ' ' &&
        50          
        50          
    1082 60 100         (s[0] == ' ' || s[0] == 'x' || s[0] == 'X')) {
        100          
        50          
    1083 36 100         if (s[0] == ' ')
    1084 19 50         MDS_BUF_LIT(st->buf, " ");
    1085             else
    1086 17 100         MDS_BUF_LIT(st->buf, " ");
    1087 36           s += 3; n -= 3;
    1088 36 50         if (n == 0) return;
    1089             } else {
    1090             /* Not a task marker โ€” flush buffered `[` then fall through. */
    1091 24 50         MDS_BUF_LIT(st->buf, "[");
    1092             }
    1093             } else {
    1094 492           st->li_check_pending = 0;
    1095 492 100         if (n >= 4 && s[0] == '[' && s[2] == ']' && s[3] == ' ' &&
        50          
        0          
        0          
    1096 0 0         (s[1] == ' ' || s[1] == 'x' || s[1] == 'X')) {
        0          
        0          
    1097 0 0         if (s[1] == ' ')
    1098 0 0         MDS_BUF_LIT(st->buf, " ");
    1099             else
    1100 0 0         MDS_BUF_LIT(st->buf, " ");
    1101 0           s += 4; n -= 4;
    1102 0 0         if (n == 0) return;
    1103             }
    1104             }
    1105             }
    1106 5828 100         if (st->image_depth > 0) {
    1107             /* For alt: append raw bytes (no HTML escape โ€” done later when
    1108             * emitting alt="..." attribute via html_escape on accumulated). */
    1109 100           alt_append(st, s, n);
    1110 100           return;
    1111             }
    1112 5728 100         if (st->flags & MDS_FLAG_AUTOLINK) {
    1113             /* Accumulate; flushed by any non-text callback. */
    1114 1532           size_t need = st->pending_len + n;
    1115 1532 100         if (need > st->pending_cap) {
    1116             size_t nc;
    1117             char* np;
    1118 448 100         nc = st->pending_cap ? st->pending_cap : 64;
    1119 457 100         while (nc < need) nc *= 2;
    1120 448           np = (char*)realloc(st->pending_text, nc);
    1121 448 50         if (!np) { /* OOM: bypass coalescing for this chunk */
    1122 0           flush_pending_text(aTHX_ st);
    1123 0           gfm_emit_autolinked(aTHX_ st, s, n);
    1124 0           return;
    1125             }
    1126 448           st->pending_text = np;
    1127 448           st->pending_cap = nc;
    1128             }
    1129 1532           memcpy(st->pending_text + st->pending_len, s, n);
    1130 1532           st->pending_len += n;
    1131 1532           return;
    1132             }
    1133 4196           html_escape(aTHX_ st->buf, s, n);
    1134             }
    1135              
    1136 339           static void cb_raw(void* ud, const char* s, size_t n) {
    1137 339           render_state* st = (render_state*)ud;
    1138             dTHXa(st->my_perl);
    1139 339 50         if (st->fn_skip) return;
    1140 339           flush_pending_text(aTHX_ st);
    1141 339 50         if (st->image_depth > 0) {
    1142             /* raw HTML inside alt: stripped to text per spec; just append as text */
    1143 0           alt_append(st, s, n);
    1144 0           return;
    1145             }
    1146 339 100         if (st->flags & MDS_FLAG_DISALLOW_RAW_HTML) {
    1147             /* GFM ยง6.11: rewrite the opening `<` of these tags as `<`. */
    1148             static const char* const banned[] = {
    1149             "title", "textarea", "style", "xmp", "iframe",
    1150             "noembed", "noframes", "script", "plaintext", NULL
    1151             };
    1152 57           const char* run = s;
    1153 57           const char* end = s + n;
    1154             const char* p;
    1155             const char* q;
    1156             const char* name;
    1157             size_t nlen;
    1158             int hit;
    1159             int i;
    1160             size_t bl;
    1161 1578 100         for (p = s; p < end; p++) {
    1162 1521 100         if (*p != '<') continue;
    1163 90           q = p + 1;
    1164 90 50         if (q < end && *q == '/') q++;
        100          
    1165 90           name = q;
    1166 420 50         while (q < end && isalnum((unsigned char)*q)) q++;
        100          
    1167 90           nlen = (size_t)(q - name);
    1168 90 100         if (nlen == 0) continue;
    1169 74           hit = 0;
    1170 648 100         for (i = 0; banned[i]; i++) {
    1171 590           bl = strlen(banned[i]);
    1172 590 100         if (nlen == bl && strncasecmp(name, banned[i], bl) == 0) {
        100          
    1173 16           hit = 1; break;
    1174             }
    1175             }
    1176 74 100         if (!hit) continue;
    1177 16 100         if (p > run) mds_buf_write(aTHX_ st->buf, run, (size_t)(p - run));
        50          
    1178 16 50         mds_buf_write(aTHX_ st->buf, "<", 4);
    1179 16           run = p + 1;
    1180             }
    1181 57 50         if (run < end) mds_buf_write(aTHX_ st->buf, run, (size_t)(end - run));
        50          
    1182 57           return;
    1183             }
    1184 282 50         mds_buf_write(aTHX_ st->buf, s, n);
    1185             }
    1186              
    1187             /* ------------------ inline callbacks ------------------ */
    1188              
    1189 586           static void write_url_attr_impl(pTHX_ mds_buf* b, const char* s, size_t n, int unesc) {
    1190             /* Percent-escape URL: leave already-safe chars; escape <>"'`{}|\ ^ and ws.
    1191             * Then HTML-escape & to &. When unesc=1, also unescape \X for ASCII
    1192             * punctuation (ยง6.1 โ€” backslash escapes work in link destinations and
    1193             * titles, but NOT in autolinks). Entities (named + numeric) are expanded
    1194             * before percent-encoding so the resulting UTF-8 bytes are escaped. */
    1195             static const char hex[] = "0123456789ABCDEF";
    1196             size_t i;
    1197 6658 100         for (i = 0; i < n; ) {
    1198 6072           unsigned char c = (unsigned char)s[i];
    1199 6072 100         if (unesc && c == '\\' && i + 1 < n) {
        100          
        50          
    1200 36           unsigned char nx = (unsigned char)s[i+1];
    1201 36 50         if ((nx >= 0x21 && nx <= 0x2F) ||
        100          
        50          
    1202 9 100         (nx >= 0x3A && nx <= 0x40) ||
        50          
    1203 6 50         (nx >= 0x5B && nx <= 0x60) ||
        50          
    1204 0 0         (nx >= 0x7B && nx <= 0x7E)) {
    1205 30           c = nx;
    1206 30           i += 2;
    1207 30           goto emit_byte;
    1208             }
    1209             }
    1210 6042 100         if (c == '&') {
    1211             char ebuf[8]; size_t elen;
    1212 21           size_t k = mds_expand_entity_at(s, i, n, ebuf, &elen);
    1213 21 100         if (k) {
    1214             /* Re-feed decoded bytes through the same loop logic, so
    1215             * non-ASCII gets percent-encoded and '&' itself becomes
    1216             * & (CommonMark expects e.g. & โ†’ %26? No, spec
    1217             * says ENT decoded to '&' renders as & in href, and
    1218             * ä โ†’ รค โ†’ %C3%A4). */
    1219             size_t z;
    1220 45 100         for (z = 0; z < elen; z++) {
    1221 30           unsigned char ec = (unsigned char)ebuf[z];
    1222 60 50         if (ec == '&') { mds_buf_write(aTHX_ b, "&", 5); continue; }
    1223 30 50         if (ec <= 0x20 || ec == 0x7f ||
        50          
    1224 30 50         ec >= 0x80 ||
    1225 0 0         ec == '"' || ec == '<' || ec == '>' || ec == '`' ||
        0          
        0          
        0          
    1226 0 0         ec == '{' || ec == '}' || ec == '|' || ec == '\\' ||
        0          
        0          
        0          
    1227 0 0         ec == '^' || ec == '[' || ec == ']') {
        0          
        0          
    1228 30 50         char esc[3] = { '%', hex[ec >> 4], hex[ec & 0xF] };
    1229             mds_buf_write(aTHX_ b, esc, 3);
    1230 30           continue;
    1231             }
    1232             mds_buf_write(aTHX_ b, (const char*)&ec, 1);
    1233             }
    1234 15           i += k;
    1235 15           continue;
    1236             }
    1237             mds_buf_write(aTHX_ b, "&", 5);
    1238 6           i++;
    1239 6           continue;
    1240             }
    1241 6021           i++;
    1242 6051           emit_byte:
    1243 6051 100         if (c <= 0x20 || c == 0x7f || c >= 0x80 ||
        50          
        100          
    1244 5979 100         c == '"' || c == '<' || c == '>' || c == '`' ||
        50          
        50          
        100          
    1245 5964 50         c == '{' || c == '}' || c == '|' || c == '\\' ||
        50          
        50          
        100          
    1246 5949 50         c == '^' || c == '[' || c == ']') {
        100          
        100          
    1247 117 50         char esc[3] = { '%', hex[c >> 4], hex[c & 0xF] };
    1248             mds_buf_write(aTHX_ b, esc, 3);
    1249 117           continue;
    1250             }
    1251             mds_buf_write(aTHX_ b, (const char*)&c, 1);
    1252             }
    1253 586           }
    1254              
    1255 529           static void write_url_attr(pTHX_ mds_buf* b, const char* s, size_t n) {
    1256 529           write_url_attr_impl(aTHX_ b, s, n, /*unesc=*/1);
    1257 529           }
    1258 57           static void write_url_attr_raw(pTHX_ mds_buf* b, const char* s, size_t n) {
    1259 57           write_url_attr_impl(aTHX_ b, s, n, /*unesc=*/0);
    1260 57           }
    1261              
    1262 1743           static void cb_enter_inline(void* ud, mds_inline_type t, const mds_inline_detail* d) {
    1263 1743           render_state* st = (render_state*)ud;
    1264             dTHXa(st->my_perl);
    1265 1743           mds_buf* b = st->buf;
    1266 1743 50         if (st->fn_skip) return;
    1267 1743           flush_pending_text(aTHX_ st);
    1268             /* If the LIST_ITEM had a lone `[` buffered, flush it before this inline. */
    1269 1743 50         if (st->li_check_pending == 2) {
    1270             MDS_BUF_LIT(b, "[");
    1271 0           st->li_check_pending = 0;
    1272 1743 100         } else if (st->li_check_pending && t != MDS_INL_TEXT) {
        50          
    1273 6           st->li_check_pending = 0;
    1274             }
    1275             /* Inside an image: alt text is plain text per CommonMark. Suppress all
    1276             * markup wrappers; child TEXT/RAW bytes are routed through alt_append. */
    1277 1743 100         if (st->image_depth > 0 && t != MDS_INL_IMAGE) {
        100          
    1278 21           return;
    1279             }
    1280 1722           switch (t) {
    1281 656 50         case MDS_INL_EMPH: MDS_BUF_LIT(b, ""); break;
    1282 474 50         case MDS_INL_STRONG: MDS_BUF_LIT(b, ""); break;
    1283 52 50         case MDS_INL_STRIKE: MDS_BUF_LIT(b, ""); break;
    1284 288 100         case MDS_INL_CODE: MDS_BUF_LIT(b, ""); break;
    1285 322           case MDS_INL_SOFTBREAK:
    1286 322 100         if (st->flags & MDS_FLAG_HARD_BREAKS) MDS_BUF_LIT(b, "
    \n");
    1287             else MDS_BUF_LIT(b, "\n");
    1288 322           break;
    1289 58 50         case MDS_INL_LINEBREAK: MDS_BUF_LIT(b, "
    \n"); break;
    1290 337 50         case MDS_INL_LINK: {
    1291             MDS_BUF_LIT(b, "
    1292 674           if ((st->flags & MDS_FLAG_UNSAFE) ||
    1293 337           !url_is_dangerous(d->u.link.href, d->u.link.href_len, 0)) {
    1294 334           write_url_attr(aTHX_ b, d->u.link.href, d->u.link.href_len);
    1295             }
    1296             MDS_BUF_LIT(b, "\"");
    1297 337 100         if (d->u.link.title_len) {
    1298             MDS_BUF_LIT(b, " title=\"");
    1299 100           html_escape_unesc(aTHX_ b, d->u.link.title, d->u.link.title_len);
    1300             MDS_BUF_LIT(b, "\"");
    1301             }
    1302             MDS_BUF_LIT(b, ">");
    1303 337           break;
    1304             }
    1305 79           case MDS_INL_IMAGE: {
    1306 79 100         if (st->image_depth == 0) {
    1307 76           st->alt_len = 0;
    1308             }
    1309 79           st->image_depth++;
    1310 79 50         if (g_img_stack_top < IMG_STACK_MAX) {
    1311 79           g_img_stack[g_img_stack_top].href = d->u.image.href;
    1312 79           g_img_stack[g_img_stack_top].hlen = d->u.image.href_len;
    1313 79           g_img_stack[g_img_stack_top].title = d->u.image.title;
    1314 79           g_img_stack[g_img_stack_top].tlen = d->u.image.title_len;
    1315 79           g_img_stack_top++;
    1316             }
    1317 79           break;
    1318             }
    1319 57 50         case MDS_INL_AUTOLINK: {
    1320             MDS_BUF_LIT(b, "
    1321 57 100         if (d->u.autolink.is_email) {
    1322             MDS_BUF_LIT(b, "mailto:");
    1323             }
    1324 114           if ((st->flags & MDS_FLAG_UNSAFE) ||
    1325 57           !url_is_dangerous(d->u.autolink.uri, d->u.autolink.uri_len, 0)) {
    1326 57           write_url_attr_raw(aTHX_ b, d->u.autolink.uri, d->u.autolink.uri_len);
    1327             }
    1328             MDS_BUF_LIT(b, "\">");
    1329 57           html_escape(aTHX_ b, d->u.autolink.uri, d->u.autolink.uri_len);
    1330 57           break;
    1331             }
    1332 139           case MDS_INL_HTML_INLINE:
    1333             /* opens nothing; children come via cb_raw */
    1334 139           break;
    1335 24           case MDS_INL_FOOTNOTE_REF: {
    1336             const char* lab;
    1337             size_t ll;
    1338             unsigned idx;
    1339             unsigned uses;
    1340             char ibuf[16];
    1341             int in;
    1342             char nbuf[16];
    1343             int nn;
    1344              
    1345 24           lab = d->u.footnote_ref.label;
    1346 24           ll = d->u.footnote_ref.label_len;
    1347 24           idx = fn_register(st, lab, ll);
    1348 24 50         uses = st->fn_uses[idx - 1];
    1349             MDS_BUF_LIT(b, "
    1350 24           write_fn_label_attr(aTHX_ b, lab, ll);
    1351             MDS_BUF_LIT(b, "\" id=\"fnref-");
    1352 24           write_fn_label_attr(aTHX_ b, lab, ll);
    1353 24 100         if (uses > 1) {
    1354 6           nn = snprintf(nbuf, sizeof nbuf, "-%u", uses);
    1355 6 50         if (nn > 0) mds_buf_write(aTHX_ b, nbuf, (size_t)nn);
        50          
    1356             }
    1357             MDS_BUF_LIT(b, "\" data-footnote-ref>");
    1358 24           in = snprintf(ibuf, sizeof ibuf, "%u", idx);
    1359 24 50         if (in > 0) mds_buf_write(aTHX_ b, ibuf, (size_t)in);
        50          
    1360             MDS_BUF_LIT(b, "");
    1361 24           break;
    1362             }
    1363 0           case MDS_INL_TEXT:
    1364             case MDS_INL__COUNT:
    1365 0           break;
    1366             }
    1367             }
    1368              
    1369             /* (image-info stack defined above) */
    1370              
    1371 1743           static void cb_leave_inline(void* ud, mds_inline_type t) {
    1372 1743           render_state* st = (render_state*)ud;
    1373             dTHXa(st->my_perl);
    1374 1743           mds_buf* b = st->buf;
    1375 1743 50         if (st->fn_skip) return;
    1376 1743           flush_pending_text(aTHX_ st);
    1377             /* Suppress closing wrappers while we are still inside an image (alt mode).
    1378             * The leave for the image itself still fires this function with image_depth
    1379             * about to be decremented, so only short-circuit for nested non-image kinds. */
    1380 1743 100         if (st->image_depth > 0 && t != MDS_INL_IMAGE) {
        100          
    1381 21           return;
    1382             }
    1383 1722           switch (t) {
    1384 656 50         case MDS_INL_EMPH: MDS_BUF_LIT(b, ""); break;
    1385 474 50         case MDS_INL_STRONG: MDS_BUF_LIT(b, ""); break;
    1386 52 100         case MDS_INL_STRIKE: MDS_BUF_LIT(b, ""); break;
    1387 288 50         case MDS_INL_CODE: MDS_BUF_LIT(b, ""); break;
    1388 674 50         case MDS_INL_LINK: MDS_BUF_LIT(b, ""); break;
    1389 114 50         case MDS_INL_AUTOLINK: MDS_BUF_LIT(b, ""); break;
    1390 24           case MDS_INL_FOOTNOTE_REF: break; /* closing tags emitted in enter */
    1391 79           case MDS_INL_IMAGE: {
    1392 79           st->image_depth--;
    1393 79 50         if (g_img_stack_top > 0) g_img_stack_top--;
    1394 79 100         if (st->image_depth == 0) {
    1395 76           mds_buf* rb = st->buf;
    1396 76 50         img_info inf = g_img_stack[g_img_stack_top];
    1397             MDS_BUF_LIT(rb, "
    1398 152           if ((st->flags & MDS_FLAG_UNSAFE) ||
    1399 76           !url_is_dangerous(inf.href, inf.hlen, 1)) {
    1400 76           write_url_attr(aTHX_ rb, inf.href, inf.hlen);
    1401             }
    1402             MDS_BUF_LIT(rb, "\" alt=\"");
    1403 76 100         if (st->alt_len) html_escape(aTHX_ rb, st->alt, st->alt_len);
    1404             MDS_BUF_LIT(rb, "\"");
    1405 76 100         if (inf.tlen) {
    1406             MDS_BUF_LIT(rb, " title=\"");
    1407 38           html_escape_unesc(aTHX_ rb, inf.title, inf.tlen);
    1408             MDS_BUF_LIT(rb, "\"");
    1409             }
    1410             MDS_BUF_LIT(rb, " />");
    1411 76           st->alt_len = 0;
    1412             }
    1413 79           break;
    1414             }
    1415 490           case MDS_INL_SOFTBREAK:
    1416             case MDS_INL_LINEBREAK:
    1417             case MDS_INL_HTML_INLINE:
    1418             case MDS_INL_TEXT:
    1419             case MDS_INL__COUNT:
    1420 490           break;
    1421             }
    1422             }
    1423              
    1424             /* ------------------ install ------------------ */
    1425              
    1426             /* The caller allocates a `render_state` (on the stack) and passes &st as
    1427             * ud_storage. We initialise its `buf` field, capture the current aTHX,
    1428             * and wire up the callbacks. ud_out is set to point at the state. */
    1429 2443           void mds_render_html_install(mds_callbacks* cb, void** ud_out, mds_buf* buf,
    1430             unsigned flags) {
    1431             dTHX;
    1432 2443           render_state* st = (render_state*)*ud_out;
    1433             #ifdef MULTIPLICITY
    1434             st->my_perl = aTHX;
    1435             #endif
    1436 2443           st->buf = buf;
    1437 2443           st->top = 0;
    1438 2443           st->tight_depth = 0;
    1439 2443           st->image_depth = 0;
    1440 2443           st->alt = NULL; st->alt_len = 0; st->alt_cap = 0;
    1441 2443           st->flags = flags;
    1442 2443           st->li_check_pending = 0;
    1443 2443           st->in_thead = 0;
    1444 2443           st->tbl_aligns = NULL;
    1445 2443           st->tbl_ncols = 0;
    1446 2443           st->tbl_col = 0;
    1447 2443           st->pending_text = NULL;
    1448 2443           st->pending_len = 0;
    1449 2443           st->pending_cap = 0;
    1450 2443           st->fn_labels = NULL;
    1451 2443           st->fn_label_lens = NULL;
    1452 2443           st->fn_uses = NULL;
    1453 2443           st->fn_count = 0;
    1454 2443           st->fn_cap = 0;
    1455 2443           st->fn_skip = 0;
    1456 2443           st->fn_in_def = 0;
    1457 2443           st->fn_in_def_label = NULL;
    1458 2443           st->fn_in_def_label_len = 0;
    1459 2443           g_img_stack_top = 0;
    1460              
    1461 2443           cb->enter_block = cb_enter_block;
    1462 2443           cb->leave_block = cb_leave_block;
    1463 2443           cb->enter_inline = cb_enter_inline;
    1464 2443           cb->leave_inline = cb_leave_inline;
    1465 2443           cb->text = cb_text;
    1466 2443           cb->raw = cb_raw;
    1467 2443           }
    1468              
    1469 45           int mds_render_html_used_footnote(void* ud, size_t i,
    1470             const char** label_out,
    1471             size_t* label_len_out) {
    1472 45           render_state* st = (render_state*)ud;
    1473 45 50         if (!st || i >= st->fn_count) return 0;
        100          
    1474 36 50         if (label_out) *label_out = st->fn_labels[i];
    1475 36 50         if (label_len_out) *label_len_out = st->fn_label_lens[i];
    1476 36           return 1;
    1477             }