File Coverage

src/mds.c
Criterion Covered Total %
statement 46 49 93.8
branch 17 22 77.2
condition n/a
subroutine n/a
pod n/a
total 63 71 88.7


line stmt bran cond sub pod time code
1             /* mds.c — public entry point for the new Markdown::Simple parser.
2             *
3             * Pipeline: arena init -> block scanner -> SAX -> HTML renderer.
4             * The inline tokeniser HTML-escapes text bytes downstream.
5             */
6              
7             #include "mds.h"
8             #include "mds_buf.h"
9             #include "mds_ctx.h"
10             #include "mds_arena.h"
11             #include "mds_block.h"
12             #include "mds_render_html.h"
13             #include "simd/mds_simd.h"
14              
15             #include
16             #include
17             #include
18              
19             /* Opaque blob sized comfortably larger than the renderer's private
20             * `render_state`. The renderer placement-initialises this via the
21             * pointer returned in ud_out. Keeping it stack-local keeps us
22             * thread-safe and alloc-free. */
23             struct mds_render_state_blob {
24             unsigned char bytes[16 * 1024];
25             };
26              
27 2443           int mds_render_html_to_sv_ex(pTHX_
28             const char* input,
29             size_t len,
30             unsigned flags,
31             SV* output_sv,
32             mds_arena* borrowed_arena,
33             mds_block_scratch* borrowed_scratch) {
34             mds_buf buf;
35             struct mds_render_state_blob blob;
36             void* ud;
37             mds_callbacks cb;
38             mds_ctx ctx;
39             mds_arena local_arena;
40             mds_arena* arena;
41              
42             /* Pass raw `len` so mds_buf_init_for_input owns the output-size
43             * multiplier; avoids double-inflation. */
44 2443           mds_buf_init_for_input(aTHX_ &buf, output_sv, input, len);
45              
46 2443           memset(&blob, 0, sizeof blob);
47 2443           ud = &blob;
48              
49 2443           memset(&cb, 0, sizeof cb);
50 2443           mds_render_html_install(&cb, &ud, &buf, flags);
51              
52 2443           memset(&ctx, 0, sizeof ctx);
53 2443           ctx.input = input;
54 2443           ctx.len = len;
55 2443           ctx.flags = flags;
56 2443           ctx.cb = cb;
57 2443           ctx.ud = ud;
58 2443           ctx.scratch = borrowed_scratch;
59              
60             /* If a persistent (borrowed) arena was provided, use
61             * it by value and reset (not free) at the end so the warm head page
62             * survives between parses. Otherwise allocate a per-call arena. */
63 2443 100         if (borrowed_arena) {
64 221           arena = borrowed_arena;
65             } else {
66 2222           arena = &local_arena;
67 2222           mds_arena_init(arena);
68             }
69             /* mds_ctx embeds the arena by value, so callees that take &ctx.arena
70             * see the same storage; we copy in/out around mds_block_scan. */
71 2443           ctx.arena = *arena;
72              
73             /* ---- Preprocessing: validate UTF-8, build line index. ---- */
74             {
75 4886           const mds_simd_ops* ops = (flags & MDS_FLAG_NO_SIMD)
76 1           ? mds_simd_ops_scalar()
77 2443 100         : mds_simd_get();
78              
79 2443 100         if ((flags & MDS_FLAG_STRICT_UTF8) && len) {
    50          
80 2 100         if (MDS_UNLIKELY(!ops->validate_utf8(input, len))) {
81 1           mds_arena_snapshot(&ctx.arena, &mds_last_arena_profile);
82 1           *arena = ctx.arena;
83 1 50         if (borrowed_arena) mds_arena_reset(arena);
84 1           else mds_arena_free(arena);
85 1           mds_buf_finalize(aTHX_ &buf);
86 1           return -1; /* malformed UTF-8 */
87             }
88             }
89              
90 2442 100         if (len && len <= 0xFFFFFFFFu) {
    50          
91             /* Conservative cap: at most one newline per byte. We cap at
92             * len so worst-case all-newline inputs still fit. */
93 2440           size_t cap = len;
94 2440           uint32_t* offs = (uint32_t*)mds_arena_alloc(&ctx.arena,
95             cap * sizeof(uint32_t));
96 2440 50         if (offs) {
97 2440           size_t n = ops->find_newlines(input, len, offs, cap);
98 2440 50         if (n == (size_t)-1) {
99 0           ctx.line_idx_overflow = 1;
100 0           ctx.line_offsets = NULL;
101 0           ctx.n_lines = 0;
102             } else {
103 2440           ctx.line_offsets = offs;
104 2440           ctx.n_lines = n;
105             }
106             }
107             }
108             /* ctx.line_offsets is precomputed and validated by tests, but the
109             * scanner still does its own per-line walk. */
110             }
111              
112 2442           mds_block_scan(&ctx);
113              
114 2442           mds_arena_snapshot(&ctx.arena, &mds_last_arena_profile);
115 2442           *arena = ctx.arena;
116 2442 100         if (borrowed_arena) mds_arena_reset(arena);
117 2221           else mds_arena_free(arena);
118 2442           mds_buf_finalize(aTHX_ &buf);
119 2442           return 0;
120             }
121              
122 2222           int mds_render_html_to_sv(pTHX_
123             const char* input,
124             size_t len,
125             unsigned flags,
126             SV* output_sv) {
127 2222           return mds_render_html_to_sv_ex(aTHX_ input, len, flags, output_sv, NULL, NULL);
128             }
129              
130             /* Last-parse arena snapshot, populated unconditionally
131             * by mds_render_html_to_sv. Not thread-safe; intended for profiling
132             * scripts (bench/profile_arena.pl). */
133             mds_arena_profile mds_last_arena_profile;