| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* src/simd/mds_simd_dispatch.c — runtime backend selection. |
|
2
|
|
|
|
|
|
|
* |
|
3
|
|
|
|
|
|
|
* Selection order: |
|
4
|
|
|
|
|
|
|
* 1. If `mds_simd_force_scalar(1)` has been called, OR the env var |
|
5
|
|
|
|
|
|
|
* MARKDOWN_SIMPLE_NO_SIMD is set to a truthy value, return scalar. |
|
6
|
|
|
|
|
|
|
* 2. On aarch64: return NEON (mandatory ISA extension). |
|
7
|
|
|
|
|
|
|
* 3. On x86_64: probe CPUID — AVX2 > SSE2 > scalar. |
|
8
|
|
|
|
|
|
|
* (Stub builds without -DMDS_HAVE_* compile flags fall through.) |
|
9
|
|
|
|
|
|
|
* 4. Otherwise scalar. |
|
10
|
|
|
|
|
|
|
* |
|
11
|
|
|
|
|
|
|
* The selected ops table is cached in a static after first call. |
|
12
|
|
|
|
|
|
|
* `mds_simd_force_scalar()` invalidates the cache. |
|
13
|
|
|
|
|
|
|
*/ |
|
14
|
|
|
|
|
|
|
#include "mds_simd.h" |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
#include |
|
17
|
|
|
|
|
|
|
#include |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || defined(_M_IX86) |
|
20
|
|
|
|
|
|
|
# define MDS_SIMD_X86 1 |
|
21
|
|
|
|
|
|
|
# if defined(_MSC_VER) |
|
22
|
|
|
|
|
|
|
# include |
|
23
|
|
|
|
|
|
|
# else |
|
24
|
|
|
|
|
|
|
# include |
|
25
|
|
|
|
|
|
|
# endif |
|
26
|
|
|
|
|
|
|
#endif |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
static int s_force_scalar = 0; |
|
29
|
|
|
|
|
|
|
static const mds_simd_ops* s_cached = 0; |
|
30
|
|
|
|
|
|
|
static const char* s_cached_name = "scalar"; |
|
31
|
|
|
|
|
|
|
|
|
32
|
732
|
|
|
|
|
|
static int env_no_simd(void) |
|
33
|
|
|
|
|
|
|
{ |
|
34
|
732
|
|
|
|
|
|
const char* v = getenv("MARKDOWN_SIMPLE_NO_SIMD"); |
|
35
|
732
|
100
|
|
|
|
|
if (!v || !*v) return 0; |
|
|
|
50
|
|
|
|
|
|
|
36
|
1
|
50
|
|
|
|
|
if (v[0] == '0' && v[1] == '\0') return 0; |
|
|
|
0
|
|
|
|
|
|
|
37
|
1
|
|
|
|
|
|
return 1; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
#ifdef MDS_SIMD_X86 |
|
41
|
1462
|
|
|
|
|
|
static void cpuid_call(unsigned leaf, unsigned subleaf, unsigned regs[4]) |
|
42
|
|
|
|
|
|
|
{ |
|
43
|
|
|
|
|
|
|
# if defined(_MSC_VER) |
|
44
|
|
|
|
|
|
|
int r[4]; |
|
45
|
|
|
|
|
|
|
__cpuidex(r, (int)leaf, (int)subleaf); |
|
46
|
|
|
|
|
|
|
regs[0] = (unsigned)r[0]; regs[1] = (unsigned)r[1]; |
|
47
|
|
|
|
|
|
|
regs[2] = (unsigned)r[2]; regs[3] = (unsigned)r[3]; |
|
48
|
|
|
|
|
|
|
# else |
|
49
|
|
|
|
|
|
|
unsigned a, b, c, d; |
|
50
|
1462
|
|
|
|
|
|
__cpuid_count(leaf, subleaf, a, b, c, d); |
|
51
|
1462
|
|
|
|
|
|
regs[0] = a; regs[1] = b; regs[2] = c; regs[3] = d; |
|
52
|
|
|
|
|
|
|
# endif |
|
53
|
1462
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
0
|
|
|
|
|
|
static int x86_has_sse2(void) |
|
56
|
|
|
|
|
|
|
{ |
|
57
|
|
|
|
|
|
|
unsigned r[4]; |
|
58
|
0
|
|
|
|
|
|
cpuid_call(1, 0, r); |
|
59
|
0
|
|
|
|
|
|
return (r[3] & (1u << 26)) != 0; /* EDX bit 26 */ |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
731
|
|
|
|
|
|
static int x86_has_avx2(void) |
|
63
|
|
|
|
|
|
|
{ |
|
64
|
|
|
|
|
|
|
unsigned r[4]; |
|
65
|
731
|
|
|
|
|
|
cpuid_call(0, 0, r); |
|
66
|
731
|
50
|
|
|
|
|
if (r[0] < 7) return 0; |
|
67
|
731
|
|
|
|
|
|
cpuid_call(7, 0, r); |
|
68
|
731
|
|
|
|
|
|
return (r[1] & (1u << 5)) != 0; /* EBX bit 5 */ |
|
69
|
|
|
|
|
|
|
} |
|
70
|
|
|
|
|
|
|
#endif /* MDS_SIMD_X86 */ |
|
71
|
|
|
|
|
|
|
|
|
72
|
1429
|
|
|
|
|
|
static void pick(void) |
|
73
|
|
|
|
|
|
|
{ |
|
74
|
1429
|
100
|
|
|
|
|
if (s_force_scalar || env_no_simd()) { |
|
|
|
100
|
|
|
|
|
|
|
75
|
698
|
|
|
|
|
|
s_cached = mds_simd_ops_scalar(); |
|
76
|
698
|
|
|
|
|
|
s_cached_name = "scalar"; |
|
77
|
698
|
|
|
|
|
|
return; |
|
78
|
|
|
|
|
|
|
} |
|
79
|
|
|
|
|
|
|
#ifdef MDS_HAVE_NEON |
|
80
|
|
|
|
|
|
|
/* aarch64 always; on 32-bit ARM the build system gates this. */ |
|
81
|
|
|
|
|
|
|
s_cached = mds_simd_ops_neon(); |
|
82
|
|
|
|
|
|
|
s_cached_name = "neon"; |
|
83
|
|
|
|
|
|
|
return; |
|
84
|
|
|
|
|
|
|
#endif |
|
85
|
|
|
|
|
|
|
#ifdef MDS_SIMD_X86 |
|
86
|
|
|
|
|
|
|
# ifdef MDS_HAVE_AVX2 |
|
87
|
731
|
50
|
|
|
|
|
if (x86_has_avx2()) { |
|
88
|
731
|
|
|
|
|
|
s_cached = mds_simd_ops_avx2(); |
|
89
|
731
|
|
|
|
|
|
s_cached_name = "avx2"; |
|
90
|
731
|
|
|
|
|
|
return; |
|
91
|
|
|
|
|
|
|
} |
|
92
|
|
|
|
|
|
|
# endif |
|
93
|
|
|
|
|
|
|
# ifdef MDS_HAVE_SSE2 |
|
94
|
0
|
0
|
|
|
|
|
if (x86_has_sse2()) { |
|
95
|
0
|
|
|
|
|
|
s_cached = mds_simd_ops_sse2(); |
|
96
|
0
|
|
|
|
|
|
s_cached_name = "sse2"; |
|
97
|
0
|
|
|
|
|
|
return; |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
# endif |
|
100
|
|
|
|
|
|
|
#endif |
|
101
|
0
|
|
|
|
|
|
s_cached = mds_simd_ops_scalar(); |
|
102
|
0
|
|
|
|
|
|
s_cached_name = "scalar"; |
|
103
|
|
|
|
|
|
|
} |
|
104
|
|
|
|
|
|
|
|
|
105
|
3631
|
|
|
|
|
|
const mds_simd_ops* mds_simd_get(void) |
|
106
|
|
|
|
|
|
|
{ |
|
107
|
3631
|
100
|
|
|
|
|
if (!s_cached) pick(); |
|
108
|
3631
|
|
|
|
|
|
return s_cached; |
|
109
|
|
|
|
|
|
|
} |
|
110
|
|
|
|
|
|
|
|
|
111
|
5
|
|
|
|
|
|
const char* mds_simd_backend(void) |
|
112
|
|
|
|
|
|
|
{ |
|
113
|
5
|
50
|
|
|
|
|
if (!s_cached) pick(); |
|
114
|
5
|
|
|
|
|
|
return s_cached_name; |
|
115
|
|
|
|
|
|
|
} |
|
116
|
|
|
|
|
|
|
|
|
117
|
1394
|
|
|
|
|
|
void mds_simd_force_scalar(int on) |
|
118
|
|
|
|
|
|
|
{ |
|
119
|
1394
|
|
|
|
|
|
s_force_scalar = on ? 1 : 0; |
|
120
|
1394
|
|
|
|
|
|
s_cached = 0; /* invalidate; next call to _get() re-picks */ |
|
121
|
1394
|
|
|
|
|
|
s_cached_name = "scalar"; |
|
122
|
1394
|
|
|
|
|
|
} |