File Coverage

src/mds_entity.h
Criterion Covered Total %
statement 12 12 100.0
branch 8 8 100.0
condition n/a
subroutine n/a
pod n/a
total 20 20 100.0


line stmt bran cond sub pod time code
1             /* mds_entity.h — minimal HTML named-entity table used by inline scanner.
2             *
3             * We ship the high-frequency subset (XML5 mandatory entities plus the
4             * most common typographic ones). Unknown names leave the '&name;'
5             * literal untouched, which is spec-conformant only for cases outside
6             * the HTML5 named list — the full ~2200-entry table can be generated
7             * later via tools/gen_entities.pl.
8             *
9             * Lookup is a tiny linear scan: < 50 entries, called rarely.
10             */
11             #ifndef MDS_ENTITY_H
12             #define MDS_ENTITY_H
13              
14             #include
15             #include
16              
17             typedef struct {
18             const char* name; /* without leading '&' and trailing ';' */
19             size_t nlen;
20             const char* utf8; /* replacement bytes */
21             size_t ulen;
22             } mds_entity;
23              
24             static const mds_entity MDS_ENTITIES[] = {
25             /* core 5 (XML) */
26             { "amp", 3, "&", 1 },
27             { "lt", 2, "<", 1 },
28             { "gt", 2, ">", 1 },
29             { "quot", 4, "\"", 1 },
30             { "apos", 4, "'", 1 },
31             /* high-frequency typographic / spacing */
32             { "nbsp", 4, "\xc2\xa0", 2 },
33             { "copy", 4, "\xc2\xa9", 2 },
34             { "reg", 3, "\xc2\xae", 2 },
35             { "trade", 5, "\xe2\x84\xa2", 3 },
36             { "hellip", 6, "\xe2\x80\xa6", 3 },
37             { "mdash", 5, "\xe2\x80\x94", 3 },
38             { "ndash", 5, "\xe2\x80\x93", 3 },
39             { "lsquo", 5, "\xe2\x80\x98", 3 },
40             { "rsquo", 5, "\xe2\x80\x99", 3 },
41             { "ldquo", 5, "\xe2\x80\x9c", 3 },
42             { "rdquo", 5, "\xe2\x80\x9d", 3 },
43             { "laquo", 5, "\xc2\xab", 2 },
44             { "raquo", 5, "\xc2\xbb", 2 },
45             { "deg", 3, "\xc2\xb0", 2 },
46             { "plusmn", 6, "\xc2\xb1", 2 },
47             { "times", 5, "\xc3\x97", 2 },
48             { "divide", 6, "\xc3\xb7", 2 },
49             { "para", 4, "\xc2\xb6", 2 },
50             { "sect", 4, "\xc2\xa7", 2 },
51             { "middot", 6, "\xc2\xb7", 2 },
52             { "bull", 4, "\xe2\x80\xa2", 3 },
53             { "dagger", 6, "\xe2\x80\xa0", 3 },
54             { "Dagger", 6, "\xe2\x80\xa1", 3 },
55             { "permil", 6, "\xe2\x80\xb0", 3 },
56             { "euro", 4, "\xe2\x82\xac", 3 },
57             { "pound", 5, "\xc2\xa3", 2 },
58             { "yen", 3, "\xc2\xa5", 2 },
59             { "cent", 4, "\xc2\xa2", 2 },
60             };
61             #define MDS_ENTITY_COUNT (sizeof(MDS_ENTITIES)/sizeof(MDS_ENTITIES[0]))
62              
63             #include "mds_entities_full.h"
64              
65             /* Look up a named entity (without leading '&' / trailing ';').
66             *
67             * On a hit in the high-frequency MDS_ENTITIES table, returns a pointer
68             * to the static entry directly (scratch untouched).
69             *
70             * On a hit in the full HTML5 table, copies the row into *scratch (which
71             * must be supplied by the caller — typically a stack local) and returns
72             * scratch.
73             *
74             * Returns NULL on a miss.
75             *
76             * NB: this used to cache the slow-path hit in a thread-local static so
77             * the signature could be a plain (name, n). That emitted a PT_TLS
78             * program header into the .so which OpenBSD's ld.so refuses to dlopen
79             * ("unsupported TLS program header"). Passing scratch from the caller
80             * eliminates the TLS dependency entirely and is also strictly more
81             * thread-safe: the buffer lives in the caller's stack frame, so there
82             * is no shared state to race on at all. */
83             static inline const mds_entity*
84 83           mds_entity_lookup(const char* name, size_t n, mds_entity* scratch) {
85             const mds_entity_full* f;
86             size_t i;
87             /* Fast path: small high-frequency table (linear scan). */
88 2222 100         for (i = 0; i < MDS_ENTITY_COUNT; i++) {
89 2159 100         if (MDS_ENTITIES[i].nlen == n &&
90 419 100         memcmp(MDS_ENTITIES[i].name, name, n) == 0)
91 20           return &MDS_ENTITIES[i];
92             }
93             /* Slow path: full HTML5 table (binary search). Materialise into the
94             * caller-supplied scratch so callers don't have to care which table
95             * hit. */
96 63           f = mds_entity_full_lookup(name, n);
97 63 100         if (!f) return NULL;
98 54           scratch->name = f->name;
99 54           scratch->nlen = f->nlen;
100 54           scratch->utf8 = f->utf8;
101 54           scratch->ulen = f->ulen;
102 54           return scratch;
103             }
104              
105             #endif