| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
/* |
|
2
|
|
|
|
|
|
|
* xxHash - Extremely Fast Hash algorithm |
|
3
|
|
|
|
|
|
|
* Copyright (C) 2020 Yann Collet |
|
4
|
|
|
|
|
|
|
* |
|
5
|
|
|
|
|
|
|
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php) |
|
6
|
|
|
|
|
|
|
* |
|
7
|
|
|
|
|
|
|
* Redistribution and use in source and binary forms, with or without |
|
8
|
|
|
|
|
|
|
* modification, are permitted provided that the following conditions are |
|
9
|
|
|
|
|
|
|
* met: |
|
10
|
|
|
|
|
|
|
* |
|
11
|
|
|
|
|
|
|
* * Redistributions of source code must retain the above copyright |
|
12
|
|
|
|
|
|
|
* notice, this list of conditions and the following disclaimer. |
|
13
|
|
|
|
|
|
|
* * Redistributions in binary form must reproduce the above |
|
14
|
|
|
|
|
|
|
* copyright notice, this list of conditions and the following disclaimer |
|
15
|
|
|
|
|
|
|
* in the documentation and/or other materials provided with the |
|
16
|
|
|
|
|
|
|
* distribution. |
|
17
|
|
|
|
|
|
|
* |
|
18
|
|
|
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
|
19
|
|
|
|
|
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
|
20
|
|
|
|
|
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
|
21
|
|
|
|
|
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
|
22
|
|
|
|
|
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
|
23
|
|
|
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
|
24
|
|
|
|
|
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
|
25
|
|
|
|
|
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
|
26
|
|
|
|
|
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
|
27
|
|
|
|
|
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
|
28
|
|
|
|
|
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
29
|
|
|
|
|
|
|
* |
|
30
|
|
|
|
|
|
|
* You can contact the author at: |
|
31
|
|
|
|
|
|
|
* - xxHash homepage: https://www.xxhash.com |
|
32
|
|
|
|
|
|
|
* - xxHash source repository: https://github.com/Cyan4973/xxHash |
|
33
|
|
|
|
|
|
|
*/ |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
/*! |
|
37
|
|
|
|
|
|
|
* @file xxh_x86dispatch.c |
|
38
|
|
|
|
|
|
|
* |
|
39
|
|
|
|
|
|
|
* Automatic dispatcher code for the @ref xxh3_family on x86-based targets. |
|
40
|
|
|
|
|
|
|
* |
|
41
|
|
|
|
|
|
|
* Optional add-on. |
|
42
|
|
|
|
|
|
|
* |
|
43
|
|
|
|
|
|
|
* **Compile this file with the default flags for your target.** Do not compile |
|
44
|
|
|
|
|
|
|
* with flags like `-mavx*`, `-march=native`, or `/arch:AVX*`, there will be |
|
45
|
|
|
|
|
|
|
* an error. See @ref XXH_X86DISPATCH_ALLOW_AVX for details. |
|
46
|
|
|
|
|
|
|
* |
|
47
|
|
|
|
|
|
|
* @defgroup dispatch x86 Dispatcher |
|
48
|
|
|
|
|
|
|
* @{ |
|
49
|
|
|
|
|
|
|
*/ |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
#if defined (__cplusplus) |
|
52
|
|
|
|
|
|
|
extern "C" { |
|
53
|
|
|
|
|
|
|
#endif |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
#if !(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)) |
|
56
|
|
|
|
|
|
|
# error "Dispatching is currently only supported on x86 and x86_64." |
|
57
|
|
|
|
|
|
|
#endif |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
/*! |
|
60
|
|
|
|
|
|
|
* @def XXH_X86DISPATCH_ALLOW_AVX |
|
61
|
|
|
|
|
|
|
* @brief Disables the AVX sanity check. |
|
62
|
|
|
|
|
|
|
* |
|
63
|
|
|
|
|
|
|
* Don't compile xxh_x86dispatch.c with options like `-mavx*`, `-march=native`, |
|
64
|
|
|
|
|
|
|
* or `/arch:AVX*`. It is intended to be compiled for the minimum target, and |
|
65
|
|
|
|
|
|
|
* it selectively enables SSE2, AVX2, and AVX512 when it is needed. |
|
66
|
|
|
|
|
|
|
* |
|
67
|
|
|
|
|
|
|
* Using this option _globally_ allows this feature, and therefore makes it |
|
68
|
|
|
|
|
|
|
* undefined behavior to execute on any CPU without said feature. |
|
69
|
|
|
|
|
|
|
* |
|
70
|
|
|
|
|
|
|
* Even if the source code isn't directly using AVX intrinsics in a function, |
|
71
|
|
|
|
|
|
|
* the compiler can still generate AVX code from autovectorization and by |
|
72
|
|
|
|
|
|
|
* "upgrading" SSE2 intrinsics to use the VEX prefixes (a.k.a. AVX128). |
|
73
|
|
|
|
|
|
|
* |
|
74
|
|
|
|
|
|
|
* Use the same flags that you use to compile the rest of the program; this |
|
75
|
|
|
|
|
|
|
* file will safely generate SSE2, AVX2, and AVX512 without these flags. |
|
76
|
|
|
|
|
|
|
* |
|
77
|
|
|
|
|
|
|
* Define XXH_X86DISPATCH_ALLOW_AVX to ignore this check, and feel free to open |
|
78
|
|
|
|
|
|
|
* an issue if there is a target in the future where AVX is a default feature. |
|
79
|
|
|
|
|
|
|
*/ |
|
80
|
|
|
|
|
|
|
#ifdef XXH_DOXYGEN |
|
81
|
|
|
|
|
|
|
# define XXH_X86DISPATCH_ALLOW_AVX |
|
82
|
|
|
|
|
|
|
#endif |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
#if defined(__AVX__) && !defined(XXH_X86DISPATCH_ALLOW_AVX) |
|
85
|
|
|
|
|
|
|
# error "Do not compile xxh_x86dispatch.c with AVX enabled! See the comment above." |
|
86
|
|
|
|
|
|
|
#endif |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
#ifdef __has_include |
|
89
|
|
|
|
|
|
|
# define XXH_HAS_INCLUDE(header) __has_include(header) |
|
90
|
|
|
|
|
|
|
#else |
|
91
|
|
|
|
|
|
|
# define XXH_HAS_INCLUDE(header) 0 |
|
92
|
|
|
|
|
|
|
#endif |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
/*! |
|
95
|
|
|
|
|
|
|
* @def XXH_DISPATCH_SCALAR |
|
96
|
|
|
|
|
|
|
* @brief Enables/dispatching the scalar code path. |
|
97
|
|
|
|
|
|
|
* |
|
98
|
|
|
|
|
|
|
* If this is defined to 0, SSE2 support is assumed. This reduces code size |
|
99
|
|
|
|
|
|
|
* when the scalar path is not needed. |
|
100
|
|
|
|
|
|
|
* |
|
101
|
|
|
|
|
|
|
* This is automatically defined to 0 when... |
|
102
|
|
|
|
|
|
|
* - SSE2 support is enabled in the compiler |
|
103
|
|
|
|
|
|
|
* - Targeting x86_64 |
|
104
|
|
|
|
|
|
|
* - Targeting Android x86 |
|
105
|
|
|
|
|
|
|
* - Targeting macOS |
|
106
|
|
|
|
|
|
|
*/ |
|
107
|
|
|
|
|
|
|
#ifndef XXH_DISPATCH_SCALAR |
|
108
|
|
|
|
|
|
|
# if defined(__SSE2__) || (defined(_M_IX86_FP) && _M_IX86_FP >= 2) /* SSE2 on by default */ \ |
|
109
|
|
|
|
|
|
|
|| defined(__x86_64__) || defined(_M_X64) /* x86_64 */ \ |
|
110
|
|
|
|
|
|
|
|| defined(__ANDROID__) || defined(__APPLEv__) /* Android or macOS */ |
|
111
|
|
|
|
|
|
|
# define XXH_DISPATCH_SCALAR 0 /* disable */ |
|
112
|
|
|
|
|
|
|
# else |
|
113
|
|
|
|
|
|
|
# define XXH_DISPATCH_SCALAR 1 |
|
114
|
|
|
|
|
|
|
# endif |
|
115
|
|
|
|
|
|
|
#endif |
|
116
|
|
|
|
|
|
|
/*! |
|
117
|
|
|
|
|
|
|
* @def XXH_DISPATCH_AVX2 |
|
118
|
|
|
|
|
|
|
* @brief Enables/disables dispatching for AVX2. |
|
119
|
|
|
|
|
|
|
* |
|
120
|
|
|
|
|
|
|
* This is automatically detected if it is not defined. |
|
121
|
|
|
|
|
|
|
* - GCC 4.7 and later are known to support AVX2, but >4.9 is required for |
|
122
|
|
|
|
|
|
|
* to get the AVX2 intrinsics and typedefs without -mavx -mavx2. |
|
123
|
|
|
|
|
|
|
* - Visual Studio 2013 Update 2 and later are known to support AVX2. |
|
124
|
|
|
|
|
|
|
* - The GCC/Clang internal header `` is detected. While this is |
|
125
|
|
|
|
|
|
|
* not allowed to be included directly, it still appears in the builtin |
|
126
|
|
|
|
|
|
|
* include path and is detectable with `__has_include`. |
|
127
|
|
|
|
|
|
|
* |
|
128
|
|
|
|
|
|
|
* @see XXH_AVX2 |
|
129
|
|
|
|
|
|
|
*/ |
|
130
|
|
|
|
|
|
|
#ifndef XXH_DISPATCH_AVX2 |
|
131
|
|
|
|
|
|
|
# if (defined(__GNUC__) && (__GNUC__ > 4)) /* GCC 5.0+ */ \ |
|
132
|
|
|
|
|
|
|
|| (defined(_MSC_VER) && _MSC_VER >= 1900) /* VS 2015+ */ \ |
|
133
|
|
|
|
|
|
|
|| (defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 180030501) /* VS 2013 Update 2 */ \ |
|
134
|
|
|
|
|
|
|
|| XXH_HAS_INCLUDE() /* GCC/Clang internal header */ |
|
135
|
|
|
|
|
|
|
# define XXH_DISPATCH_AVX2 1 /* enable dispatch towards AVX2 */ |
|
136
|
|
|
|
|
|
|
# else |
|
137
|
|
|
|
|
|
|
# define XXH_DISPATCH_AVX2 0 |
|
138
|
|
|
|
|
|
|
# endif |
|
139
|
|
|
|
|
|
|
#endif /* XXH_DISPATCH_AVX2 */ |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
/*! |
|
142
|
|
|
|
|
|
|
* @def XXH_DISPATCH_AVX512 |
|
143
|
|
|
|
|
|
|
* @brief Enables/disables dispatching for AVX512. |
|
144
|
|
|
|
|
|
|
* |
|
145
|
|
|
|
|
|
|
* Automatically detected if one of the following conditions is met: |
|
146
|
|
|
|
|
|
|
* - GCC 4.9 and later are known to support AVX512. |
|
147
|
|
|
|
|
|
|
* - Visual Studio 2017 and later are known to support AVX2. |
|
148
|
|
|
|
|
|
|
* - The GCC/Clang internal header `` is detected. While this |
|
149
|
|
|
|
|
|
|
* is not allowed to be included directly, it still appears in the builtin |
|
150
|
|
|
|
|
|
|
* include path and is detectable with `__has_include`. |
|
151
|
|
|
|
|
|
|
* |
|
152
|
|
|
|
|
|
|
* @see XXH_AVX512 |
|
153
|
|
|
|
|
|
|
*/ |
|
154
|
|
|
|
|
|
|
#ifndef XXH_DISPATCH_AVX512 |
|
155
|
|
|
|
|
|
|
# if (defined(__GNUC__) \ |
|
156
|
|
|
|
|
|
|
&& (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))) /* GCC 4.9+ */ \ |
|
157
|
|
|
|
|
|
|
|| (defined(_MSC_VER) && _MSC_VER >= 1910) /* VS 2017+ */ \ |
|
158
|
|
|
|
|
|
|
|| XXH_HAS_INCLUDE() /* GCC/Clang internal header */ |
|
159
|
|
|
|
|
|
|
# define XXH_DISPATCH_AVX512 1 /* enable dispatch towards AVX512 */ |
|
160
|
|
|
|
|
|
|
# else |
|
161
|
|
|
|
|
|
|
# define XXH_DISPATCH_AVX512 0 |
|
162
|
|
|
|
|
|
|
# endif |
|
163
|
|
|
|
|
|
|
#endif /* XXH_DISPATCH_AVX512 */ |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
/*! |
|
166
|
|
|
|
|
|
|
* @def XXH_TARGET_SSE2 |
|
167
|
|
|
|
|
|
|
* @brief Allows a function to be compiled with SSE2 intrinsics. |
|
168
|
|
|
|
|
|
|
* |
|
169
|
|
|
|
|
|
|
* Uses `__attribute__((__target__("sse2")))` on GCC to allow SSE2 to be used |
|
170
|
|
|
|
|
|
|
* even with `-mno-sse2`. |
|
171
|
|
|
|
|
|
|
* |
|
172
|
|
|
|
|
|
|
* @def XXH_TARGET_AVX2 |
|
173
|
|
|
|
|
|
|
* @brief Like @ref XXH_TARGET_SSE2, but for AVX2. |
|
174
|
|
|
|
|
|
|
* |
|
175
|
|
|
|
|
|
|
* @def XXH_TARGET_AVX512 |
|
176
|
|
|
|
|
|
|
* @brief Like @ref XXH_TARGET_SSE2, but for AVX512. |
|
177
|
|
|
|
|
|
|
*/ |
|
178
|
|
|
|
|
|
|
#if defined(__GNUC__) |
|
179
|
|
|
|
|
|
|
# include /* SSE2 */ |
|
180
|
|
|
|
|
|
|
# if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 |
|
181
|
|
|
|
|
|
|
# include /* AVX2, AVX512F */ |
|
182
|
|
|
|
|
|
|
# endif |
|
183
|
|
|
|
|
|
|
# define XXH_TARGET_SSE2 __attribute__((__target__("sse2"))) |
|
184
|
|
|
|
|
|
|
# define XXH_TARGET_AVX2 __attribute__((__target__("avx2"))) |
|
185
|
|
|
|
|
|
|
# define XXH_TARGET_AVX512 __attribute__((__target__("avx512f"))) |
|
186
|
|
|
|
|
|
|
#elif defined(_MSC_VER) |
|
187
|
|
|
|
|
|
|
# include |
|
188
|
|
|
|
|
|
|
# define XXH_TARGET_SSE2 |
|
189
|
|
|
|
|
|
|
# define XXH_TARGET_AVX2 |
|
190
|
|
|
|
|
|
|
# define XXH_TARGET_AVX512 |
|
191
|
|
|
|
|
|
|
#else |
|
192
|
|
|
|
|
|
|
# error "Dispatching is currently not supported for your compiler." |
|
193
|
|
|
|
|
|
|
#endif |
|
194
|
|
|
|
|
|
|
|
|
195
|
|
|
|
|
|
|
#ifdef XXH_DISPATCH_DEBUG |
|
196
|
|
|
|
|
|
|
/* debug logging */ |
|
197
|
|
|
|
|
|
|
# include |
|
198
|
|
|
|
|
|
|
# define XXH_debugPrint(str) { fprintf(stderr, "DEBUG: xxHash dispatch: %s \n", str); fflush(NULL); } |
|
199
|
|
|
|
|
|
|
#else |
|
200
|
|
|
|
|
|
|
# define XXH_debugPrint(str) ((void)0) |
|
201
|
|
|
|
|
|
|
# undef NDEBUG /* avoid redefinition */ |
|
202
|
|
|
|
|
|
|
# define NDEBUG |
|
203
|
|
|
|
|
|
|
#endif |
|
204
|
|
|
|
|
|
|
#include |
|
205
|
|
|
|
|
|
|
|
|
206
|
|
|
|
|
|
|
#define XXH_INLINE_ALL |
|
207
|
|
|
|
|
|
|
#define XXH_X86DISPATCH |
|
208
|
|
|
|
|
|
|
#include "xxhash.h" |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
/* |
|
211
|
|
|
|
|
|
|
* Support both AT&T and Intel dialects |
|
212
|
|
|
|
|
|
|
* |
|
213
|
|
|
|
|
|
|
* GCC doesn't convert AT&T syntax to Intel syntax, and will error out if |
|
214
|
|
|
|
|
|
|
* compiled with -masm=intel. Instead, it supports dialect switching with |
|
215
|
|
|
|
|
|
|
* curly braces: { AT&T syntax | Intel syntax } |
|
216
|
|
|
|
|
|
|
* |
|
217
|
|
|
|
|
|
|
* Clang's integrated assembler automatically converts AT&T syntax to Intel if |
|
218
|
|
|
|
|
|
|
* needed, making the dialect switching useless (it isn't even supported). |
|
219
|
|
|
|
|
|
|
* |
|
220
|
|
|
|
|
|
|
* Note: Comments are written in the inline assembly itself. |
|
221
|
|
|
|
|
|
|
*/ |
|
222
|
|
|
|
|
|
|
#ifdef __clang__ |
|
223
|
|
|
|
|
|
|
# define XXH_I_ATT(intel, att) att "\n\t" |
|
224
|
|
|
|
|
|
|
#else |
|
225
|
|
|
|
|
|
|
# define XXH_I_ATT(intel, att) "{" att "|" intel "}\n\t" |
|
226
|
|
|
|
|
|
|
#endif |
|
227
|
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
/*! |
|
229
|
|
|
|
|
|
|
* @internal |
|
230
|
|
|
|
|
|
|
* @brief Runs CPUID. |
|
231
|
|
|
|
|
|
|
* |
|
232
|
|
|
|
|
|
|
* @param eax, ecx The parameters to pass to CPUID, %eax and %ecx respectively. |
|
233
|
|
|
|
|
|
|
* @param abcd The array to store the result in, `{ eax, ebx, ecx, edx }` |
|
234
|
|
|
|
|
|
|
*/ |
|
235
|
|
|
|
|
|
|
static void XXH_cpuid(xxh_u32 eax, xxh_u32 ecx, xxh_u32* abcd) |
|
236
|
|
|
|
|
|
|
{ |
|
237
|
|
|
|
|
|
|
#if defined(_MSC_VER) |
|
238
|
|
|
|
|
|
|
__cpuidex(abcd, eax, ecx); |
|
239
|
|
|
|
|
|
|
#else |
|
240
|
|
|
|
|
|
|
xxh_u32 ebx, edx; |
|
241
|
|
|
|
|
|
|
# if defined(__i386__) && defined(__PIC__) |
|
242
|
|
|
|
|
|
|
__asm__( |
|
243
|
|
|
|
|
|
|
"# Call CPUID\n\t" |
|
244
|
|
|
|
|
|
|
"#\n\t" |
|
245
|
|
|
|
|
|
|
"# On 32-bit x86 with PIC enabled, we are not allowed to overwrite\n\t" |
|
246
|
|
|
|
|
|
|
"# EBX, so we use EDI instead.\n\t" |
|
247
|
|
|
|
|
|
|
XXH_I_ATT("mov edi, ebx", "movl %%ebx, %%edi") |
|
248
|
|
|
|
|
|
|
XXH_I_ATT("cpuid", "cpuid" ) |
|
249
|
|
|
|
|
|
|
XXH_I_ATT("xchg edi, ebx", "xchgl %%ebx, %%edi") |
|
250
|
|
|
|
|
|
|
: "=D" (ebx), |
|
251
|
|
|
|
|
|
|
# else |
|
252
|
0
|
|
|
|
|
|
__asm__( |
|
253
|
|
|
|
|
|
|
"# Call CPUID\n\t" |
|
254
|
|
|
|
|
|
|
XXH_I_ATT("cpuid", "cpuid") |
|
255
|
|
|
|
|
|
|
: "=b" (ebx), |
|
256
|
|
|
|
|
|
|
# endif |
|
257
|
|
|
|
|
|
|
"+a" (eax), "+c" (ecx), "=d" (edx)); |
|
258
|
|
|
|
|
|
|
abcd[0] = eax; |
|
259
|
|
|
|
|
|
|
abcd[1] = ebx; |
|
260
|
|
|
|
|
|
|
abcd[2] = ecx; |
|
261
|
|
|
|
|
|
|
abcd[3] = edx; |
|
262
|
|
|
|
|
|
|
#endif |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
|
|
265
|
|
|
|
|
|
|
/* |
|
266
|
|
|
|
|
|
|
* Modified version of Intel's guide |
|
267
|
|
|
|
|
|
|
* https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family |
|
268
|
|
|
|
|
|
|
*/ |
|
269
|
|
|
|
|
|
|
|
|
270
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 |
|
271
|
|
|
|
|
|
|
/*! |
|
272
|
|
|
|
|
|
|
* @internal |
|
273
|
|
|
|
|
|
|
* @brief Runs `XGETBV`. |
|
274
|
|
|
|
|
|
|
* |
|
275
|
|
|
|
|
|
|
* While the CPU may support AVX2, the operating system might not properly save |
|
276
|
|
|
|
|
|
|
* the full YMM/ZMM registers. |
|
277
|
|
|
|
|
|
|
* |
|
278
|
|
|
|
|
|
|
* xgetbv is used for detecting this: Any compliant operating system will define |
|
279
|
|
|
|
|
|
|
* a set of flags in the xcr0 register indicating how it saves the AVX registers. |
|
280
|
|
|
|
|
|
|
* |
|
281
|
|
|
|
|
|
|
* You can manually disable this flag on Windows by running, as admin: |
|
282
|
|
|
|
|
|
|
* |
|
283
|
|
|
|
|
|
|
* bcdedit.exe /set xsavedisable 1 |
|
284
|
|
|
|
|
|
|
* |
|
285
|
|
|
|
|
|
|
* and rebooting. Run the same command with 0 to re-enable it. |
|
286
|
|
|
|
|
|
|
*/ |
|
287
|
|
|
|
|
|
|
static xxh_u64 XXH_xgetbv(void) |
|
288
|
|
|
|
|
|
|
{ |
|
289
|
|
|
|
|
|
|
#if defined(_MSC_VER) |
|
290
|
|
|
|
|
|
|
return _xgetbv(0); /* min VS2010 SP1 compiler is required */ |
|
291
|
|
|
|
|
|
|
#else |
|
292
|
|
|
|
|
|
|
xxh_u32 xcr0_lo, xcr0_hi; |
|
293
|
0
|
|
|
|
|
|
__asm__( |
|
294
|
|
|
|
|
|
|
"# Call XGETBV\n\t" |
|
295
|
|
|
|
|
|
|
"#\n\t" |
|
296
|
|
|
|
|
|
|
"# Older assemblers (e.g. macOS's ancient GAS version) don't support\n\t" |
|
297
|
|
|
|
|
|
|
"# the XGETBV opcode, so we encode it by hand instead.\n\t" |
|
298
|
|
|
|
|
|
|
"# See for details.\n\t" |
|
299
|
|
|
|
|
|
|
".byte 0x0f, 0x01, 0xd0\n\t" |
|
300
|
|
|
|
|
|
|
: "=a" (xcr0_lo), "=d" (xcr0_hi) : "c" (0)); |
|
301
|
0
|
|
|
|
|
|
return xcr0_lo | ((xxh_u64)xcr0_hi << 32); |
|
302
|
|
|
|
|
|
|
#endif |
|
303
|
|
|
|
|
|
|
} |
|
304
|
|
|
|
|
|
|
#endif |
|
305
|
|
|
|
|
|
|
|
|
306
|
|
|
|
|
|
|
#define XXH_SSE2_CPUID_MASK (1 << 26) |
|
307
|
|
|
|
|
|
|
#define XXH_OSXSAVE_CPUID_MASK ((1 << 26) | (1 << 27)) |
|
308
|
|
|
|
|
|
|
#define XXH_AVX2_CPUID_MASK (1 << 5) |
|
309
|
|
|
|
|
|
|
#define XXH_AVX2_XGETBV_MASK ((1 << 2) | (1 << 1)) |
|
310
|
|
|
|
|
|
|
#define XXH_AVX512F_CPUID_MASK (1 << 16) |
|
311
|
|
|
|
|
|
|
#define XXH_AVX512F_XGETBV_MASK ((7 << 5) | (1 << 2) | (1 << 1)) |
|
312
|
|
|
|
|
|
|
|
|
313
|
|
|
|
|
|
|
/*! |
|
314
|
|
|
|
|
|
|
* @internal |
|
315
|
|
|
|
|
|
|
* @brief Returns the best XXH3 implementation. |
|
316
|
|
|
|
|
|
|
* |
|
317
|
|
|
|
|
|
|
* Runs various CPUID/XGETBV tests to try and determine the best implementation. |
|
318
|
|
|
|
|
|
|
* |
|
319
|
|
|
|
|
|
|
* @ret The best @ref XXH_VECTOR implementation. |
|
320
|
|
|
|
|
|
|
* @see XXH_VECTOR_TYPES |
|
321
|
|
|
|
|
|
|
*/ |
|
322
|
0
|
|
|
|
|
|
static int XXH_featureTest(void) |
|
323
|
|
|
|
|
|
|
{ |
|
324
|
|
|
|
|
|
|
xxh_u32 abcd[4]; |
|
325
|
|
|
|
|
|
|
xxh_u32 max_leaves; |
|
326
|
|
|
|
|
|
|
int best = XXH_SCALAR; |
|
327
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 |
|
328
|
|
|
|
|
|
|
xxh_u64 xgetbv_val; |
|
329
|
|
|
|
|
|
|
#endif |
|
330
|
|
|
|
|
|
|
#if defined(__GNUC__) && defined(__i386__) |
|
331
|
|
|
|
|
|
|
xxh_u32 cpuid_supported; |
|
332
|
|
|
|
|
|
|
__asm__( |
|
333
|
|
|
|
|
|
|
"# For the sake of ruthless backwards compatibility, check if CPUID\n\t" |
|
334
|
|
|
|
|
|
|
"# is supported in the EFLAGS on i386.\n\t" |
|
335
|
|
|
|
|
|
|
"# This is not necessary on x86_64 - CPUID is mandatory.\n\t" |
|
336
|
|
|
|
|
|
|
"# The ID flag (bit 21) in the EFLAGS register indicates support\n\t" |
|
337
|
|
|
|
|
|
|
"# for the CPUID instruction. If a software procedure can set and\n\t" |
|
338
|
|
|
|
|
|
|
"# clear this flag, the processor executing the procedure supports\n\t" |
|
339
|
|
|
|
|
|
|
"# the CPUID instruction.\n\t" |
|
340
|
|
|
|
|
|
|
"# \n\t" |
|
341
|
|
|
|
|
|
|
"#\n\t" |
|
342
|
|
|
|
|
|
|
"# Routine is from .\n\t" |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
"# Save EFLAGS\n\t" |
|
345
|
|
|
|
|
|
|
XXH_I_ATT("pushfd", "pushfl" ) |
|
346
|
|
|
|
|
|
|
"# Store EFLAGS\n\t" |
|
347
|
|
|
|
|
|
|
XXH_I_ATT("pushfd", "pushfl" ) |
|
348
|
|
|
|
|
|
|
"# Invert the ID bit in stored EFLAGS\n\t" |
|
349
|
|
|
|
|
|
|
XXH_I_ATT("xor dword ptr[esp], 0x200000", "xorl $0x200000, (%%esp)") |
|
350
|
|
|
|
|
|
|
"# Load stored EFLAGS (with ID bit inverted)\n\t" |
|
351
|
|
|
|
|
|
|
XXH_I_ATT("popfd", "popfl" ) |
|
352
|
|
|
|
|
|
|
"# Store EFLAGS again (ID bit may or not be inverted)\n\t" |
|
353
|
|
|
|
|
|
|
XXH_I_ATT("pushfd", "pushfl" ) |
|
354
|
|
|
|
|
|
|
"# eax = modified EFLAGS (ID bit may or may not be inverted)\n\t" |
|
355
|
|
|
|
|
|
|
XXH_I_ATT("pop eax", "popl %%eax" ) |
|
356
|
|
|
|
|
|
|
"# eax = whichever bits were changed\n\t" |
|
357
|
|
|
|
|
|
|
XXH_I_ATT("xor eax, dword ptr[esp]", "xorl (%%esp), %%eax" ) |
|
358
|
|
|
|
|
|
|
"# Restore original EFLAGS\n\t" |
|
359
|
|
|
|
|
|
|
XXH_I_ATT("popfd", "popfl" ) |
|
360
|
|
|
|
|
|
|
"# eax = zero if ID bit can't be changed, else non-zero\n\t" |
|
361
|
|
|
|
|
|
|
XXH_I_ATT("and eax, 0x200000", "andl $0x200000, %%eax" ) |
|
362
|
|
|
|
|
|
|
: "=a" (cpuid_supported) :: "cc"); |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
if (XXH_unlikely(!cpuid_supported)) { |
|
365
|
|
|
|
|
|
|
XXH_debugPrint("CPUID support is not detected!"); |
|
366
|
|
|
|
|
|
|
return best; |
|
367
|
|
|
|
|
|
|
} |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
#endif |
|
370
|
|
|
|
|
|
|
/* Check how many CPUID pages we have */ |
|
371
|
|
|
|
|
|
|
XXH_cpuid(0, 0, abcd); |
|
372
|
|
|
|
|
|
|
max_leaves = abcd[0]; |
|
373
|
|
|
|
|
|
|
|
|
374
|
|
|
|
|
|
|
/* Shouldn't happen on hardware, but happens on some QEMU configs. */ |
|
375
|
0
|
0
|
|
|
|
|
if (XXH_unlikely(max_leaves == 0)) { |
|
376
|
|
|
|
|
|
|
XXH_debugPrint("Max CPUID leaves == 0!"); |
|
377
|
|
|
|
|
|
|
return best; |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
/* Check for SSE2, OSXSAVE and xgetbv */ |
|
381
|
|
|
|
|
|
|
XXH_cpuid(1, 0, abcd); |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
/* |
|
384
|
|
|
|
|
|
|
* Test for SSE2. The check is redundant on x86_64, but it doesn't hurt. |
|
385
|
|
|
|
|
|
|
*/ |
|
386
|
0
|
0
|
|
|
|
|
if (XXH_unlikely((abcd[3] & XXH_SSE2_CPUID_MASK) != XXH_SSE2_CPUID_MASK)) |
|
387
|
|
|
|
|
|
|
return best; |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
XXH_debugPrint("SSE2 support detected."); |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
best = XXH_SSE2; |
|
392
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX2 || XXH_DISPATCH_AVX512 |
|
393
|
|
|
|
|
|
|
/* Make sure we have enough leaves */ |
|
394
|
0
|
0
|
|
|
|
|
if (XXH_unlikely(max_leaves < 7)) |
|
395
|
|
|
|
|
|
|
return best; |
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
/* Test for OSXSAVE and XGETBV */ |
|
398
|
0
|
0
|
|
|
|
|
if ((abcd[2] & XXH_OSXSAVE_CPUID_MASK) != XXH_OSXSAVE_CPUID_MASK) |
|
399
|
|
|
|
|
|
|
return best; |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
/* CPUID check for AVX features */ |
|
402
|
|
|
|
|
|
|
XXH_cpuid(7, 0, abcd); |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
xgetbv_val = XXH_xgetbv(); |
|
405
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX2 |
|
406
|
|
|
|
|
|
|
/* Validate that AVX2 is supported by the CPU */ |
|
407
|
0
|
0
|
|
|
|
|
if ((abcd[1] & XXH_AVX2_CPUID_MASK) != XXH_AVX2_CPUID_MASK) |
|
408
|
|
|
|
|
|
|
return best; |
|
409
|
|
|
|
|
|
|
|
|
410
|
|
|
|
|
|
|
/* Validate that the OS supports YMM registers */ |
|
411
|
0
|
0
|
|
|
|
|
if ((xgetbv_val & XXH_AVX2_XGETBV_MASK) != XXH_AVX2_XGETBV_MASK) { |
|
412
|
|
|
|
|
|
|
XXH_debugPrint("AVX2 supported by the CPU, but not the OS."); |
|
413
|
|
|
|
|
|
|
return best; |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
/* AVX2 supported */ |
|
417
|
|
|
|
|
|
|
XXH_debugPrint("AVX2 support detected."); |
|
418
|
|
|
|
|
|
|
best = XXH_AVX2; |
|
419
|
|
|
|
|
|
|
#endif |
|
420
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX512 |
|
421
|
|
|
|
|
|
|
/* Check if AVX512F is supported by the CPU */ |
|
422
|
0
|
0
|
|
|
|
|
if ((abcd[1] & XXH_AVX512F_CPUID_MASK) != XXH_AVX512F_CPUID_MASK) { |
|
423
|
|
|
|
|
|
|
XXH_debugPrint("AVX512F not supported by CPU"); |
|
424
|
|
|
|
|
|
|
return best; |
|
425
|
|
|
|
|
|
|
} |
|
426
|
|
|
|
|
|
|
|
|
427
|
|
|
|
|
|
|
/* Validate that the OS supports ZMM registers */ |
|
428
|
0
|
0
|
|
|
|
|
if ((xgetbv_val & XXH_AVX512F_XGETBV_MASK) != XXH_AVX512F_XGETBV_MASK) { |
|
429
|
|
|
|
|
|
|
XXH_debugPrint("AVX512F supported by the CPU, but not the OS."); |
|
430
|
|
|
|
|
|
|
return best; |
|
431
|
|
|
|
|
|
|
} |
|
432
|
|
|
|
|
|
|
|
|
433
|
|
|
|
|
|
|
/* AVX512F supported */ |
|
434
|
|
|
|
|
|
|
XXH_debugPrint("AVX512F support detected."); |
|
435
|
|
|
|
|
|
|
best = XXH_AVX512; |
|
436
|
|
|
|
|
|
|
#endif |
|
437
|
|
|
|
|
|
|
#endif |
|
438
|
0
|
|
|
|
|
|
return best; |
|
439
|
|
|
|
|
|
|
} |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
/* === Vector implementations === */ |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
/*! |
|
445
|
|
|
|
|
|
|
* @internal |
|
446
|
|
|
|
|
|
|
* @brief Defines the various dispatch functions. |
|
447
|
|
|
|
|
|
|
* |
|
448
|
|
|
|
|
|
|
* TODO: Consolidate? |
|
449
|
|
|
|
|
|
|
* |
|
450
|
|
|
|
|
|
|
* @param suffix The suffix for the functions, e.g. sse2 or scalar |
|
451
|
|
|
|
|
|
|
* @param target XXH_TARGET_* or empty. |
|
452
|
|
|
|
|
|
|
*/ |
|
453
|
|
|
|
|
|
|
#define XXH_DEFINE_DISPATCH_FUNCS(suffix, target) \ |
|
454
|
|
|
|
|
|
|
\ |
|
455
|
|
|
|
|
|
|
/* === XXH3, default variants === */ \ |
|
456
|
|
|
|
|
|
|
\ |
|
457
|
|
|
|
|
|
|
XXH_NO_INLINE target XXH64_hash_t \ |
|
458
|
|
|
|
|
|
|
XXHL64_default_##suffix(const void* XXH_RESTRICT input, size_t len) \ |
|
459
|
|
|
|
|
|
|
{ \ |
|
460
|
|
|
|
|
|
|
return XXH3_hashLong_64b_internal( \ |
|
461
|
|
|
|
|
|
|
input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \ |
|
462
|
|
|
|
|
|
|
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ |
|
463
|
|
|
|
|
|
|
); \ |
|
464
|
|
|
|
|
|
|
} \ |
|
465
|
|
|
|
|
|
|
\ |
|
466
|
|
|
|
|
|
|
/* === XXH3, Seeded variants === */ \ |
|
467
|
|
|
|
|
|
|
\ |
|
468
|
|
|
|
|
|
|
XXH_NO_INLINE target XXH64_hash_t \ |
|
469
|
|
|
|
|
|
|
XXHL64_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \ |
|
470
|
|
|
|
|
|
|
XXH64_hash_t seed) \ |
|
471
|
|
|
|
|
|
|
{ \ |
|
472
|
|
|
|
|
|
|
return XXH3_hashLong_64b_withSeed_internal( \ |
|
473
|
|
|
|
|
|
|
input, len, seed, XXH3_accumulate_512_##suffix, \ |
|
474
|
|
|
|
|
|
|
XXH3_scrambleAcc_##suffix, XXH3_initCustomSecret_##suffix \ |
|
475
|
|
|
|
|
|
|
); \ |
|
476
|
|
|
|
|
|
|
} \ |
|
477
|
|
|
|
|
|
|
\ |
|
478
|
|
|
|
|
|
|
/* === XXH3, Secret variants === */ \ |
|
479
|
|
|
|
|
|
|
\ |
|
480
|
|
|
|
|
|
|
XXH_NO_INLINE target XXH64_hash_t \ |
|
481
|
|
|
|
|
|
|
XXHL64_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \ |
|
482
|
|
|
|
|
|
|
const void* secret, size_t secretLen) \ |
|
483
|
|
|
|
|
|
|
{ \ |
|
484
|
|
|
|
|
|
|
return XXH3_hashLong_64b_internal( \ |
|
485
|
|
|
|
|
|
|
input, len, secret, secretLen, \ |
|
486
|
|
|
|
|
|
|
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ |
|
487
|
|
|
|
|
|
|
); \ |
|
488
|
|
|
|
|
|
|
} \ |
|
489
|
|
|
|
|
|
|
\ |
|
490
|
|
|
|
|
|
|
/* === XXH3 update variants === */ \ |
|
491
|
|
|
|
|
|
|
\ |
|
492
|
|
|
|
|
|
|
XXH_NO_INLINE target XXH_errorcode \ |
|
493
|
|
|
|
|
|
|
XXH3_update_##suffix(XXH3_state_t* state, const void* input, size_t len) \ |
|
494
|
|
|
|
|
|
|
{ \ |
|
495
|
|
|
|
|
|
|
return XXH3_update(state, (const xxh_u8*)input, len, \ |
|
496
|
|
|
|
|
|
|
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \ |
|
497
|
|
|
|
|
|
|
} \ |
|
498
|
|
|
|
|
|
|
\ |
|
499
|
|
|
|
|
|
|
/* === XXH128 default variants === */ \ |
|
500
|
|
|
|
|
|
|
\ |
|
501
|
|
|
|
|
|
|
XXH_NO_INLINE target XXH128_hash_t \ |
|
502
|
|
|
|
|
|
|
XXHL128_default_##suffix(const void* XXH_RESTRICT input, size_t len) \ |
|
503
|
|
|
|
|
|
|
{ \ |
|
504
|
|
|
|
|
|
|
return XXH3_hashLong_128b_internal( \ |
|
505
|
|
|
|
|
|
|
input, len, XXH3_kSecret, sizeof(XXH3_kSecret), \ |
|
506
|
|
|
|
|
|
|
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix \ |
|
507
|
|
|
|
|
|
|
); \ |
|
508
|
|
|
|
|
|
|
} \ |
|
509
|
|
|
|
|
|
|
\ |
|
510
|
|
|
|
|
|
|
/* === XXH128 Secret variants === */ \ |
|
511
|
|
|
|
|
|
|
\ |
|
512
|
|
|
|
|
|
|
XXH_NO_INLINE target XXH128_hash_t \ |
|
513
|
|
|
|
|
|
|
XXHL128_secret_##suffix(const void* XXH_RESTRICT input, size_t len, \ |
|
514
|
|
|
|
|
|
|
const void* XXH_RESTRICT secret, size_t secretLen) \ |
|
515
|
|
|
|
|
|
|
{ \ |
|
516
|
|
|
|
|
|
|
return XXH3_hashLong_128b_internal( \ |
|
517
|
|
|
|
|
|
|
input, len, (const xxh_u8*)secret, secretLen, \ |
|
518
|
|
|
|
|
|
|
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix); \ |
|
519
|
|
|
|
|
|
|
} \ |
|
520
|
|
|
|
|
|
|
\ |
|
521
|
|
|
|
|
|
|
/* === XXH128 Seeded variants === */ \ |
|
522
|
|
|
|
|
|
|
\ |
|
523
|
|
|
|
|
|
|
XXH_NO_INLINE target XXH128_hash_t \ |
|
524
|
|
|
|
|
|
|
XXHL128_seed_##suffix(const void* XXH_RESTRICT input, size_t len, \ |
|
525
|
|
|
|
|
|
|
XXH64_hash_t seed) \ |
|
526
|
|
|
|
|
|
|
{ \ |
|
527
|
|
|
|
|
|
|
return XXH3_hashLong_128b_withSeed_internal(input, len, seed, \ |
|
528
|
|
|
|
|
|
|
XXH3_accumulate_512_##suffix, XXH3_scrambleAcc_##suffix, \ |
|
529
|
|
|
|
|
|
|
XXH3_initCustomSecret_##suffix); \ |
|
530
|
|
|
|
|
|
|
} |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
/* End XXH_DEFINE_DISPATCH_FUNCS */ |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
#if XXH_DISPATCH_SCALAR |
|
535
|
|
|
|
|
|
|
XXH_DEFINE_DISPATCH_FUNCS(scalar, /* nothing */) |
|
536
|
|
|
|
|
|
|
#endif |
|
537
|
0
|
|
|
|
|
|
XXH_DEFINE_DISPATCH_FUNCS(sse2, XXH_TARGET_SSE2) |
|
538
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX2 |
|
539
|
0
|
|
|
|
|
|
XXH_DEFINE_DISPATCH_FUNCS(avx2, XXH_TARGET_AVX2) |
|
540
|
|
|
|
|
|
|
#endif |
|
541
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX512 |
|
542
|
0
|
|
|
|
|
|
XXH_DEFINE_DISPATCH_FUNCS(avx512, XXH_TARGET_AVX512) |
|
543
|
|
|
|
|
|
|
#endif |
|
544
|
|
|
|
|
|
|
#undef XXH_DEFINE_DISPATCH_FUNCS |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
/* ==== Dispatchers ==== */ |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_default)(const void* XXH_RESTRICT, size_t); |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t); |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
typedef XXH64_hash_t (*XXH3_dispatchx86_hashLong64_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t); |
|
553
|
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
typedef XXH_errorcode (*XXH3_dispatchx86_update)(XXH3_state_t*, const void*, size_t); |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
typedef struct { |
|
557
|
|
|
|
|
|
|
XXH3_dispatchx86_hashLong64_default hashLong64_default; |
|
558
|
|
|
|
|
|
|
XXH3_dispatchx86_hashLong64_withSeed hashLong64_seed; |
|
559
|
|
|
|
|
|
|
XXH3_dispatchx86_hashLong64_withSecret hashLong64_secret; |
|
560
|
|
|
|
|
|
|
XXH3_dispatchx86_update update; |
|
561
|
|
|
|
|
|
|
} XXH_dispatchFunctions_s; |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
#define XXH_NB_DISPATCHES 4 |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
/*! |
|
566
|
|
|
|
|
|
|
* @internal |
|
567
|
|
|
|
|
|
|
* @brief Table of dispatchers for @ref XXH3_64bits(). |
|
568
|
|
|
|
|
|
|
* |
|
569
|
|
|
|
|
|
|
* @pre The indices must match @ref XXH_VECTOR_TYPE. |
|
570
|
|
|
|
|
|
|
*/ |
|
571
|
|
|
|
|
|
|
static const XXH_dispatchFunctions_s XXH_kDispatch[XXH_NB_DISPATCHES] = { |
|
572
|
|
|
|
|
|
|
#if XXH_DISPATCH_SCALAR |
|
573
|
|
|
|
|
|
|
/* Scalar */ { XXHL64_default_scalar, XXHL64_seed_scalar, XXHL64_secret_scalar, XXH3_update_scalar }, |
|
574
|
|
|
|
|
|
|
#else |
|
575
|
|
|
|
|
|
|
/* Scalar */ { NULL, NULL, NULL, NULL }, |
|
576
|
|
|
|
|
|
|
#endif |
|
577
|
|
|
|
|
|
|
/* SSE2 */ { XXHL64_default_sse2, XXHL64_seed_sse2, XXHL64_secret_sse2, XXH3_update_sse2 }, |
|
578
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX2 |
|
579
|
|
|
|
|
|
|
/* AVX2 */ { XXHL64_default_avx2, XXHL64_seed_avx2, XXHL64_secret_avx2, XXH3_update_avx2 }, |
|
580
|
|
|
|
|
|
|
#else |
|
581
|
|
|
|
|
|
|
/* AVX2 */ { NULL, NULL, NULL, NULL }, |
|
582
|
|
|
|
|
|
|
#endif |
|
583
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX512 |
|
584
|
|
|
|
|
|
|
/* AVX512 */ { XXHL64_default_avx512, XXHL64_seed_avx512, XXHL64_secret_avx512, XXH3_update_avx512 } |
|
585
|
|
|
|
|
|
|
#else |
|
586
|
|
|
|
|
|
|
/* AVX512 */ { NULL, NULL, NULL, NULL } |
|
587
|
|
|
|
|
|
|
#endif |
|
588
|
|
|
|
|
|
|
}; |
|
589
|
|
|
|
|
|
|
/*! |
|
590
|
|
|
|
|
|
|
* @internal |
|
591
|
|
|
|
|
|
|
* @brief The selected dispatch table for @ref XXH3_64bits(). |
|
592
|
|
|
|
|
|
|
*/ |
|
593
|
|
|
|
|
|
|
static XXH_dispatchFunctions_s XXH_g_dispatch = { NULL, NULL, NULL, NULL }; |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_default)(const void* XXH_RESTRICT, size_t); |
|
597
|
|
|
|
|
|
|
|
|
598
|
|
|
|
|
|
|
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSeed)(const void* XXH_RESTRICT, size_t, XXH64_hash_t); |
|
599
|
|
|
|
|
|
|
|
|
600
|
|
|
|
|
|
|
typedef XXH128_hash_t (*XXH3_dispatchx86_hashLong128_withSecret)(const void* XXH_RESTRICT, size_t, const void* XXH_RESTRICT, size_t); |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
typedef struct { |
|
603
|
|
|
|
|
|
|
XXH3_dispatchx86_hashLong128_default hashLong128_default; |
|
604
|
|
|
|
|
|
|
XXH3_dispatchx86_hashLong128_withSeed hashLong128_seed; |
|
605
|
|
|
|
|
|
|
XXH3_dispatchx86_hashLong128_withSecret hashLong128_secret; |
|
606
|
|
|
|
|
|
|
XXH3_dispatchx86_update update; |
|
607
|
|
|
|
|
|
|
} XXH_dispatch128Functions_s; |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
/*! |
|
611
|
|
|
|
|
|
|
* @internal |
|
612
|
|
|
|
|
|
|
* @brief Table of dispatchers for @ref XXH3_128bits(). |
|
613
|
|
|
|
|
|
|
* |
|
614
|
|
|
|
|
|
|
* @pre The indices must match @ref XXH_VECTOR_TYPE. |
|
615
|
|
|
|
|
|
|
*/ |
|
616
|
|
|
|
|
|
|
static const XXH_dispatch128Functions_s XXH_kDispatch128[XXH_NB_DISPATCHES] = { |
|
617
|
|
|
|
|
|
|
#if XXH_DISPATCH_SCALAR |
|
618
|
|
|
|
|
|
|
/* Scalar */ { XXHL128_default_scalar, XXHL128_seed_scalar, XXHL128_secret_scalar, XXH3_update_scalar }, |
|
619
|
|
|
|
|
|
|
#else |
|
620
|
|
|
|
|
|
|
/* Scalar */ { NULL, NULL, NULL, NULL }, |
|
621
|
|
|
|
|
|
|
#endif |
|
622
|
|
|
|
|
|
|
/* SSE2 */ { XXHL128_default_sse2, XXHL128_seed_sse2, XXHL128_secret_sse2, XXH3_update_sse2 }, |
|
623
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX2 |
|
624
|
|
|
|
|
|
|
/* AVX2 */ { XXHL128_default_avx2, XXHL128_seed_avx2, XXHL128_secret_avx2, XXH3_update_avx2 }, |
|
625
|
|
|
|
|
|
|
#else |
|
626
|
|
|
|
|
|
|
/* AVX2 */ { NULL, NULL, NULL, NULL }, |
|
627
|
|
|
|
|
|
|
#endif |
|
628
|
|
|
|
|
|
|
#if XXH_DISPATCH_AVX512 |
|
629
|
|
|
|
|
|
|
/* AVX512 */ { XXHL128_default_avx512, XXHL128_seed_avx512, XXHL128_secret_avx512, XXH3_update_avx512 } |
|
630
|
|
|
|
|
|
|
#else |
|
631
|
|
|
|
|
|
|
/* AVX512 */ { NULL, NULL, NULL, NULL } |
|
632
|
|
|
|
|
|
|
#endif |
|
633
|
|
|
|
|
|
|
}; |
|
634
|
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
/*! |
|
636
|
|
|
|
|
|
|
* @internal |
|
637
|
|
|
|
|
|
|
* @brief The selected dispatch table for @ref XXH3_64bits(). |
|
638
|
|
|
|
|
|
|
*/ |
|
639
|
|
|
|
|
|
|
static XXH_dispatch128Functions_s XXH_g_dispatch128 = { NULL, NULL, NULL, NULL }; |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
/*! |
|
642
|
|
|
|
|
|
|
* @internal |
|
643
|
|
|
|
|
|
|
* @brief Runs a CPUID check and sets the correct dispatch tables. |
|
644
|
|
|
|
|
|
|
*/ |
|
645
|
0
|
|
|
|
|
|
static void XXH_setDispatch(void) |
|
646
|
|
|
|
|
|
|
{ |
|
647
|
0
|
|
|
|
|
|
int vecID = XXH_featureTest(); |
|
648
|
|
|
|
|
|
|
XXH_STATIC_ASSERT(XXH_AVX512 == XXH_NB_DISPATCHES-1); |
|
649
|
|
|
|
|
|
|
assert(XXH_SCALAR <= vecID && vecID <= XXH_AVX512); |
|
650
|
|
|
|
|
|
|
#if !XXH_DISPATCH_SCALAR |
|
651
|
|
|
|
|
|
|
assert(vecID != XXH_SCALAR); |
|
652
|
|
|
|
|
|
|
#endif |
|
653
|
|
|
|
|
|
|
#if !XXH_DISPATCH_AVX512 |
|
654
|
|
|
|
|
|
|
assert(vecID != XXH_AVX512); |
|
655
|
|
|
|
|
|
|
#endif |
|
656
|
|
|
|
|
|
|
#if !XXH_DISPATCH_AVX2 |
|
657
|
|
|
|
|
|
|
assert(vecID != XXH_AVX2); |
|
658
|
|
|
|
|
|
|
#endif |
|
659
|
0
|
|
|
|
|
|
XXH_g_dispatch = XXH_kDispatch[vecID]; |
|
660
|
0
|
|
|
|
|
|
XXH_g_dispatch128 = XXH_kDispatch128[vecID]; |
|
661
|
0
|
|
|
|
|
|
} |
|
662
|
|
|
|
|
|
|
|
|
663
|
|
|
|
|
|
|
|
|
664
|
|
|
|
|
|
|
/* ==== XXH3 public functions ==== */ |
|
665
|
|
|
|
|
|
|
|
|
666
|
|
|
|
|
|
|
static XXH64_hash_t |
|
667
|
|
|
|
|
|
|
XXH3_hashLong_64b_defaultSecret_selection(const void* input, size_t len, |
|
668
|
|
|
|
|
|
|
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) |
|
669
|
|
|
|
|
|
|
{ |
|
670
|
|
|
|
|
|
|
(void)seed64; (void)secret; (void)secretLen; |
|
671
|
0
|
0
|
|
|
|
|
if (XXH_g_dispatch.hashLong64_default == NULL) XXH_setDispatch(); |
|
672
|
0
|
|
|
|
|
|
return XXH_g_dispatch.hashLong64_default(input, len); |
|
673
|
|
|
|
|
|
|
} |
|
674
|
|
|
|
|
|
|
|
|
675
|
0
|
|
|
|
|
|
XXH64_hash_t XXH3_64bits_dispatch(const void* input, size_t len) |
|
676
|
|
|
|
|
|
|
{ |
|
677
|
0
|
|
|
|
|
|
return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_defaultSecret_selection); |
|
678
|
|
|
|
|
|
|
} |
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
static XXH64_hash_t |
|
681
|
|
|
|
|
|
|
XXH3_hashLong_64b_withSeed_selection(const void* input, size_t len, |
|
682
|
|
|
|
|
|
|
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) |
|
683
|
|
|
|
|
|
|
{ |
|
684
|
|
|
|
|
|
|
(void)secret; (void)secretLen; |
|
685
|
0
|
0
|
|
|
|
|
if (XXH_g_dispatch.hashLong64_seed == NULL) XXH_setDispatch(); |
|
686
|
0
|
|
|
|
|
|
return XXH_g_dispatch.hashLong64_seed(input, len, seed64); |
|
687
|
|
|
|
|
|
|
} |
|
688
|
|
|
|
|
|
|
|
|
689
|
0
|
|
|
|
|
|
XXH64_hash_t XXH3_64bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) |
|
690
|
|
|
|
|
|
|
{ |
|
691
|
0
|
|
|
|
|
|
return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed_selection); |
|
692
|
|
|
|
|
|
|
} |
|
693
|
|
|
|
|
|
|
|
|
694
|
|
|
|
|
|
|
static XXH64_hash_t |
|
695
|
|
|
|
|
|
|
XXH3_hashLong_64b_withSecret_selection(const void* input, size_t len, |
|
696
|
|
|
|
|
|
|
XXH64_hash_t seed64, const xxh_u8* secret, size_t secretLen) |
|
697
|
|
|
|
|
|
|
{ |
|
698
|
|
|
|
|
|
|
(void)seed64; |
|
699
|
0
|
0
|
|
|
|
|
if (XXH_g_dispatch.hashLong64_secret == NULL) XXH_setDispatch(); |
|
700
|
0
|
|
|
|
|
|
return XXH_g_dispatch.hashLong64_secret(input, len, secret, secretLen); |
|
701
|
|
|
|
|
|
|
} |
|
702
|
|
|
|
|
|
|
|
|
703
|
0
|
|
|
|
|
|
XXH64_hash_t XXH3_64bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) |
|
704
|
|
|
|
|
|
|
{ |
|
705
|
0
|
|
|
|
|
|
return XXH3_64bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_64b_withSecret_selection); |
|
706
|
|
|
|
|
|
|
} |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
XXH_errorcode |
|
709
|
0
|
|
|
|
|
|
XXH3_64bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) |
|
710
|
|
|
|
|
|
|
{ |
|
711
|
0
|
0
|
|
|
|
|
if (XXH_g_dispatch.update == NULL) XXH_setDispatch(); |
|
712
|
0
|
|
|
|
|
|
return XXH_g_dispatch.update(state, (const xxh_u8*)input, len); |
|
713
|
|
|
|
|
|
|
} |
|
714
|
|
|
|
|
|
|
|
|
715
|
|
|
|
|
|
|
|
|
716
|
|
|
|
|
|
|
/* ==== XXH128 public functions ==== */ |
|
717
|
|
|
|
|
|
|
|
|
718
|
|
|
|
|
|
|
static XXH128_hash_t |
|
719
|
|
|
|
|
|
|
XXH3_hashLong_128b_defaultSecret_selection(const void* input, size_t len, |
|
720
|
|
|
|
|
|
|
XXH64_hash_t seed64, const void* secret, size_t secretLen) |
|
721
|
|
|
|
|
|
|
{ |
|
722
|
|
|
|
|
|
|
(void)seed64; (void)secret; (void)secretLen; |
|
723
|
0
|
0
|
|
|
|
|
if (XXH_g_dispatch128.hashLong128_default == NULL) XXH_setDispatch(); |
|
724
|
0
|
|
|
|
|
|
return XXH_g_dispatch128.hashLong128_default(input, len); |
|
725
|
|
|
|
|
|
|
} |
|
726
|
|
|
|
|
|
|
|
|
727
|
0
|
|
|
|
|
|
XXH128_hash_t XXH3_128bits_dispatch(const void* input, size_t len) |
|
728
|
|
|
|
|
|
|
{ |
|
729
|
0
|
|
|
|
|
|
return XXH3_128bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_defaultSecret_selection); |
|
730
|
|
|
|
|
|
|
} |
|
731
|
|
|
|
|
|
|
|
|
732
|
|
|
|
|
|
|
static XXH128_hash_t |
|
733
|
|
|
|
|
|
|
XXH3_hashLong_128b_withSeed_selection(const void* input, size_t len, |
|
734
|
|
|
|
|
|
|
XXH64_hash_t seed64, const void* secret, size_t secretLen) |
|
735
|
|
|
|
|
|
|
{ |
|
736
|
|
|
|
|
|
|
(void)secret; (void)secretLen; |
|
737
|
0
|
0
|
|
|
|
|
if (XXH_g_dispatch128.hashLong128_seed == NULL) XXH_setDispatch(); |
|
738
|
0
|
|
|
|
|
|
return XXH_g_dispatch128.hashLong128_seed(input, len, seed64); |
|
739
|
|
|
|
|
|
|
} |
|
740
|
|
|
|
|
|
|
|
|
741
|
0
|
|
|
|
|
|
XXH128_hash_t XXH3_128bits_withSeed_dispatch(const void* input, size_t len, XXH64_hash_t seed) |
|
742
|
|
|
|
|
|
|
{ |
|
743
|
0
|
|
|
|
|
|
return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_128b_withSeed_selection); |
|
744
|
|
|
|
|
|
|
} |
|
745
|
|
|
|
|
|
|
|
|
746
|
|
|
|
|
|
|
static XXH128_hash_t |
|
747
|
|
|
|
|
|
|
XXH3_hashLong_128b_withSecret_selection(const void* input, size_t len, |
|
748
|
|
|
|
|
|
|
XXH64_hash_t seed64, const void* secret, size_t secretLen) |
|
749
|
|
|
|
|
|
|
{ |
|
750
|
|
|
|
|
|
|
(void)seed64; |
|
751
|
0
|
0
|
|
|
|
|
if (XXH_g_dispatch128.hashLong128_secret == NULL) XXH_setDispatch(); |
|
752
|
0
|
|
|
|
|
|
return XXH_g_dispatch128.hashLong128_secret(input, len, secret, secretLen); |
|
753
|
|
|
|
|
|
|
} |
|
754
|
|
|
|
|
|
|
|
|
755
|
0
|
|
|
|
|
|
XXH128_hash_t XXH3_128bits_withSecret_dispatch(const void* input, size_t len, const void* secret, size_t secretLen) |
|
756
|
|
|
|
|
|
|
{ |
|
757
|
0
|
|
|
|
|
|
return XXH3_128bits_internal(input, len, 0, secret, secretLen, XXH3_hashLong_128b_withSecret_selection); |
|
758
|
|
|
|
|
|
|
} |
|
759
|
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
XXH_errorcode |
|
761
|
0
|
|
|
|
|
|
XXH3_128bits_update_dispatch(XXH3_state_t* state, const void* input, size_t len) |
|
762
|
|
|
|
|
|
|
{ |
|
763
|
0
|
0
|
|
|
|
|
if (XXH_g_dispatch128.update == NULL) XXH_setDispatch(); |
|
764
|
0
|
|
|
|
|
|
return XXH_g_dispatch128.update(state, (const xxh_u8*)input, len); |
|
765
|
|
|
|
|
|
|
} |
|
766
|
|
|
|
|
|
|
|
|
767
|
|
|
|
|
|
|
#if defined (__cplusplus) |
|
768
|
|
|
|
|
|
|
} |
|
769
|
|
|
|
|
|
|
#endif |
|
770
|
|
|
|
|
|
|
/*! @} */ |