line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
/* |
2
|
|
|
|
|
|
|
* libecb - http://software.schmorp.de/pkg/libecb |
3
|
|
|
|
|
|
|
* |
4
|
|
|
|
|
|
|
* Copyright (©) 2009-2015,2018-2021 Marc Alexander Lehmann |
5
|
|
|
|
|
|
|
* Copyright (©) 2011 Emanuele Giaquinta |
6
|
|
|
|
|
|
|
* All rights reserved. |
7
|
|
|
|
|
|
|
* |
8
|
|
|
|
|
|
|
* Redistribution and use in source and binary forms, with or without modifica- |
9
|
|
|
|
|
|
|
* tion, are permitted provided that the following conditions are met: |
10
|
|
|
|
|
|
|
* |
11
|
|
|
|
|
|
|
* 1. Redistributions of source code must retain the above copyright notice, |
12
|
|
|
|
|
|
|
* this list of conditions and the following disclaimer. |
13
|
|
|
|
|
|
|
* |
14
|
|
|
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
15
|
|
|
|
|
|
|
* notice, this list of conditions and the following disclaimer in the |
16
|
|
|
|
|
|
|
* documentation and/or other materials provided with the distribution. |
17
|
|
|
|
|
|
|
* |
18
|
|
|
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED |
19
|
|
|
|
|
|
|
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- |
20
|
|
|
|
|
|
|
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO |
21
|
|
|
|
|
|
|
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- |
22
|
|
|
|
|
|
|
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
23
|
|
|
|
|
|
|
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; |
24
|
|
|
|
|
|
|
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, |
25
|
|
|
|
|
|
|
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- |
26
|
|
|
|
|
|
|
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED |
27
|
|
|
|
|
|
|
* OF THE POSSIBILITY OF SUCH DAMAGE. |
28
|
|
|
|
|
|
|
* |
29
|
|
|
|
|
|
|
* Alternatively, the contents of this file may be used under the terms of |
30
|
|
|
|
|
|
|
* the GNU General Public License ("GPL") version 2 or any later version, |
31
|
|
|
|
|
|
|
* in which case the provisions of the GPL are applicable instead of |
32
|
|
|
|
|
|
|
* the above. If you wish to allow the use of your version of this file |
33
|
|
|
|
|
|
|
* only under the terms of the GPL and not to allow others to use your |
34
|
|
|
|
|
|
|
* version of this file under the BSD license, indicate your decision |
35
|
|
|
|
|
|
|
* by deleting the provisions above and replace them with the notice |
36
|
|
|
|
|
|
|
* and other provisions required by the GPL. If you do not delete the |
37
|
|
|
|
|
|
|
* provisions above, a recipient may use your version of this file under |
38
|
|
|
|
|
|
|
* either the BSD or the GPL. |
39
|
|
|
|
|
|
|
*/ |
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
#ifndef ECB_H |
42
|
|
|
|
|
|
|
#define ECB_H |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
/* 16 bits major, 16 bits minor */ |
45
|
|
|
|
|
|
|
#define ECB_VERSION 0x00010009 |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
#include /* for memcpy */ |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
#if defined (_WIN32) && !defined (__MINGW32__) |
50
|
|
|
|
|
|
|
typedef signed char int8_t; |
51
|
|
|
|
|
|
|
typedef unsigned char uint8_t; |
52
|
|
|
|
|
|
|
typedef signed char int_fast8_t; |
53
|
|
|
|
|
|
|
typedef unsigned char uint_fast8_t; |
54
|
|
|
|
|
|
|
typedef signed short int16_t; |
55
|
|
|
|
|
|
|
typedef unsigned short uint16_t; |
56
|
|
|
|
|
|
|
typedef signed int int_fast16_t; |
57
|
|
|
|
|
|
|
typedef unsigned int uint_fast16_t; |
58
|
|
|
|
|
|
|
typedef signed int int32_t; |
59
|
|
|
|
|
|
|
typedef unsigned int uint32_t; |
60
|
|
|
|
|
|
|
typedef signed int int_fast32_t; |
61
|
|
|
|
|
|
|
typedef unsigned int uint_fast32_t; |
62
|
|
|
|
|
|
|
#if __GNUC__ |
63
|
|
|
|
|
|
|
typedef signed long long int64_t; |
64
|
|
|
|
|
|
|
typedef unsigned long long uint64_t; |
65
|
|
|
|
|
|
|
#else /* _MSC_VER || __BORLANDC__ */ |
66
|
|
|
|
|
|
|
typedef signed __int64 int64_t; |
67
|
|
|
|
|
|
|
typedef unsigned __int64 uint64_t; |
68
|
|
|
|
|
|
|
#endif |
69
|
|
|
|
|
|
|
typedef int64_t int_fast64_t; |
70
|
|
|
|
|
|
|
typedef uint64_t uint_fast64_t; |
71
|
|
|
|
|
|
|
#ifdef _WIN64 |
72
|
|
|
|
|
|
|
#define ECB_PTRSIZE 8 |
73
|
|
|
|
|
|
|
typedef uint64_t uintptr_t; |
74
|
|
|
|
|
|
|
typedef int64_t intptr_t; |
75
|
|
|
|
|
|
|
#else |
76
|
|
|
|
|
|
|
#define ECB_PTRSIZE 4 |
77
|
|
|
|
|
|
|
typedef uint32_t uintptr_t; |
78
|
|
|
|
|
|
|
typedef int32_t intptr_t; |
79
|
|
|
|
|
|
|
#endif |
80
|
|
|
|
|
|
|
#else |
81
|
|
|
|
|
|
|
#include |
82
|
|
|
|
|
|
|
#if (defined INTPTR_MAX ? INTPTR_MAX : ULONG_MAX) > 0xffffffffU |
83
|
|
|
|
|
|
|
#define ECB_PTRSIZE 8 |
84
|
|
|
|
|
|
|
#else |
85
|
|
|
|
|
|
|
#define ECB_PTRSIZE 4 |
86
|
|
|
|
|
|
|
#endif |
87
|
|
|
|
|
|
|
#endif |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
#define ECB_GCC_AMD64 (__amd64 || __amd64__ || __x86_64 || __x86_64__) |
90
|
|
|
|
|
|
|
#define ECB_MSVC_AMD64 (_M_AMD64 || _M_X64) |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
#ifndef ECB_OPTIMIZE_SIZE |
93
|
|
|
|
|
|
|
#if __OPTIMIZE_SIZE__ |
94
|
|
|
|
|
|
|
#define ECB_OPTIMIZE_SIZE 1 |
95
|
|
|
|
|
|
|
#else |
96
|
|
|
|
|
|
|
#define ECB_OPTIMIZE_SIZE 0 |
97
|
|
|
|
|
|
|
#endif |
98
|
|
|
|
|
|
|
#endif |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
/* work around x32 idiocy by defining proper macros */ |
101
|
|
|
|
|
|
|
#if ECB_GCC_AMD64 || ECB_MSVC_AMD64 |
102
|
|
|
|
|
|
|
#if _ILP32 |
103
|
|
|
|
|
|
|
#define ECB_AMD64_X32 1 |
104
|
|
|
|
|
|
|
#else |
105
|
|
|
|
|
|
|
#define ECB_AMD64 1 |
106
|
|
|
|
|
|
|
#endif |
107
|
|
|
|
|
|
|
#endif |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
#if ECB_PTRSIZE >= 8 || ECB_AMD64_X32 |
110
|
|
|
|
|
|
|
#define ECB_64BIT_NATIVE 1 |
111
|
|
|
|
|
|
|
#else |
112
|
|
|
|
|
|
|
#define ECB_64BIT_NATIVE 0 |
113
|
|
|
|
|
|
|
#endif |
114
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
/* many compilers define _GNUC_ to some versions but then only implement |
116
|
|
|
|
|
|
|
* what their idiot authors think are the "more important" extensions, |
117
|
|
|
|
|
|
|
* causing enormous grief in return for some better fake benchmark numbers. |
118
|
|
|
|
|
|
|
* or so. |
119
|
|
|
|
|
|
|
* we try to detect these and simply assume they are not gcc - if they have |
120
|
|
|
|
|
|
|
* an issue with that they should have done it right in the first place. |
121
|
|
|
|
|
|
|
*/ |
122
|
|
|
|
|
|
|
#if !defined __GNUC_MINOR__ || defined __INTEL_COMPILER || defined __SUNPRO_C || defined __SUNPRO_CC || defined __llvm__ || defined __clang__ |
123
|
|
|
|
|
|
|
#define ECB_GCC_VERSION(major,minor) 0 |
124
|
|
|
|
|
|
|
#else |
125
|
|
|
|
|
|
|
#define ECB_GCC_VERSION(major,minor) (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor))) |
126
|
|
|
|
|
|
|
#endif |
127
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
#define ECB_CLANG_VERSION(major,minor) (__clang_major__ > (major) || (__clang_major__ == (major) && __clang_minor__ >= (minor))) |
129
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
#if __clang__ && defined __has_builtin |
131
|
|
|
|
|
|
|
#define ECB_CLANG_BUILTIN(x) __has_builtin (x) |
132
|
|
|
|
|
|
|
#else |
133
|
|
|
|
|
|
|
#define ECB_CLANG_BUILTIN(x) 0 |
134
|
|
|
|
|
|
|
#endif |
135
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
#if __clang__ && defined __has_extension |
137
|
|
|
|
|
|
|
#define ECB_CLANG_EXTENSION(x) __has_extension (x) |
138
|
|
|
|
|
|
|
#else |
139
|
|
|
|
|
|
|
#define ECB_CLANG_EXTENSION(x) 0 |
140
|
|
|
|
|
|
|
#endif |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
#define ECB_CPP (__cplusplus+0) |
143
|
|
|
|
|
|
|
#define ECB_CPP11 (__cplusplus >= 201103L) |
144
|
|
|
|
|
|
|
#define ECB_CPP14 (__cplusplus >= 201402L) |
145
|
|
|
|
|
|
|
#define ECB_CPP17 (__cplusplus >= 201703L) |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
#if ECB_CPP |
148
|
|
|
|
|
|
|
#define ECB_C 0 |
149
|
|
|
|
|
|
|
#define ECB_STDC_VERSION 0 |
150
|
|
|
|
|
|
|
#else |
151
|
|
|
|
|
|
|
#define ECB_C 1 |
152
|
|
|
|
|
|
|
#define ECB_STDC_VERSION __STDC_VERSION__ |
153
|
|
|
|
|
|
|
#endif |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
#define ECB_C99 (ECB_STDC_VERSION >= 199901L) |
156
|
|
|
|
|
|
|
#define ECB_C11 (ECB_STDC_VERSION >= 201112L) |
157
|
|
|
|
|
|
|
#define ECB_C17 (ECB_STDC_VERSION >= 201710L) |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
#if ECB_CPP |
160
|
|
|
|
|
|
|
#define ECB_EXTERN_C extern "C" |
161
|
|
|
|
|
|
|
#define ECB_EXTERN_C_BEG ECB_EXTERN_C { |
162
|
|
|
|
|
|
|
#define ECB_EXTERN_C_END } |
163
|
|
|
|
|
|
|
#else |
164
|
|
|
|
|
|
|
#define ECB_EXTERN_C extern |
165
|
|
|
|
|
|
|
#define ECB_EXTERN_C_BEG |
166
|
|
|
|
|
|
|
#define ECB_EXTERN_C_END |
167
|
|
|
|
|
|
|
#endif |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
/*****************************************************************************/ |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
/* ECB_NO_THREADS - ecb is not used by multiple threads, ever */ |
172
|
|
|
|
|
|
|
/* ECB_NO_SMP - ecb might be used in multiple threads, but only on a single cpu */ |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
#if ECB_NO_THREADS |
175
|
|
|
|
|
|
|
#define ECB_NO_SMP 1 |
176
|
|
|
|
|
|
|
#endif |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
#if ECB_NO_SMP |
179
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE do { } while (0) |
180
|
|
|
|
|
|
|
#endif |
181
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
/* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/compiler_ref/compiler_builtins.html */ |
183
|
|
|
|
|
|
|
#if __xlC__ && ECB_CPP |
184
|
|
|
|
|
|
|
#include |
185
|
|
|
|
|
|
|
#endif |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
#if 1400 <= _MSC_VER |
188
|
|
|
|
|
|
|
#include /* fence functions _ReadBarrier, also bit search functions _BitScanReverse */ |
189
|
|
|
|
|
|
|
#endif |
190
|
|
|
|
|
|
|
|
191
|
|
|
|
|
|
|
#ifndef ECB_MEMORY_FENCE |
192
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 |
193
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELAXED __asm__ __volatile__ ("" : : : "memory") |
194
|
|
|
|
|
|
|
#if __i386 || __i386__ |
195
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("lock; orb $0, -1(%%esp)" : : : "memory") |
196
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") |
197
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("" : : : "memory") |
198
|
|
|
|
|
|
|
#elif ECB_GCC_AMD64 |
199
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("mfence" : : : "memory") |
200
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory") |
201
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("" : : : "memory") |
202
|
|
|
|
|
|
|
#elif __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ |
203
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("sync" : : : "memory") |
204
|
|
|
|
|
|
|
#elif defined __ARM_ARCH_2__ \ |
205
|
|
|
|
|
|
|
|| defined __ARM_ARCH_3__ || defined __ARM_ARCH_3M__ \ |
206
|
|
|
|
|
|
|
|| defined __ARM_ARCH_4__ || defined __ARM_ARCH_4T__ \ |
207
|
|
|
|
|
|
|
|| defined __ARM_ARCH_5__ || defined __ARM_ARCH_5E__ \ |
208
|
|
|
|
|
|
|
|| defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__ \ |
209
|
|
|
|
|
|
|
|| defined __ARM_ARCH_5TEJ__ |
210
|
|
|
|
|
|
|
/* should not need any, unless running old code on newer cpu - arm doesn't support that */ |
211
|
|
|
|
|
|
|
#elif defined __ARM_ARCH_6__ || defined __ARM_ARCH_6J__ \ |
212
|
|
|
|
|
|
|
|| defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__ \ |
213
|
|
|
|
|
|
|
|| defined __ARM_ARCH_6T2__ |
214
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("mcr p15,0,%0,c7,c10,5" : : "r" (0) : "memory") |
215
|
|
|
|
|
|
|
#elif defined __ARM_ARCH_7__ || defined __ARM_ARCH_7A__ \ |
216
|
|
|
|
|
|
|
|| defined __ARM_ARCH_7R__ || defined __ARM_ARCH_7M__ |
217
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb" : : : "memory") |
218
|
|
|
|
|
|
|
#elif __aarch64__ |
219
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb ish" : : : "memory") |
220
|
|
|
|
|
|
|
#elif (__sparc || __sparc__) && !(__sparc_v8__ || defined __sparcv8) |
221
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad | #StoreStore | #StoreLoad" : : : "memory") |
222
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad" : : : "memory") |
223
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("membar #LoadStore | #StoreStore") |
224
|
|
|
|
|
|
|
#elif defined __s390__ || defined __s390x__ |
225
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("bcr 15,0" : : : "memory") |
226
|
|
|
|
|
|
|
#elif defined __mips__ |
227
|
|
|
|
|
|
|
/* GNU/Linux emulates sync on mips1 architectures, so we force its use */ |
228
|
|
|
|
|
|
|
/* anybody else who still uses mips1 is supposed to send in their version, with detection code. */ |
229
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ (".set mips2; sync; .set mips0" : : : "memory") |
230
|
|
|
|
|
|
|
#elif defined __alpha__ |
231
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("mb" : : : "memory") |
232
|
|
|
|
|
|
|
#elif defined __hppa__ |
233
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("" : : : "memory") |
234
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("") |
235
|
|
|
|
|
|
|
#elif defined __ia64__ |
236
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("mf" : : : "memory") |
237
|
|
|
|
|
|
|
#elif defined __m68k__ |
238
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("" : : : "memory") |
239
|
|
|
|
|
|
|
#elif defined __m88k__ |
240
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("tb1 0,%%r0,128" : : : "memory") |
241
|
|
|
|
|
|
|
#elif defined __sh__ |
242
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __asm__ __volatile__ ("" : : : "memory") |
243
|
|
|
|
|
|
|
#endif |
244
|
|
|
|
|
|
|
#endif |
245
|
|
|
|
|
|
|
#endif |
246
|
|
|
|
|
|
|
|
247
|
|
|
|
|
|
|
#ifndef ECB_MEMORY_FENCE |
248
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(4,7) |
249
|
|
|
|
|
|
|
/* see comment below (stdatomic.h) about the C11 memory model. */ |
250
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST) |
251
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE) |
252
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE) |
253
|
|
|
|
|
|
|
#undef ECB_MEMORY_FENCE_RELAXED |
254
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELAXED __atomic_thread_fence (__ATOMIC_RELAXED) |
255
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
#elif ECB_CLANG_EXTENSION(c_atomic) |
257
|
|
|
|
|
|
|
/* see comment below (stdatomic.h) about the C11 memory model. */ |
258
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST) |
259
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE) |
260
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE) |
261
|
|
|
|
|
|
|
#undef ECB_MEMORY_FENCE_RELAXED |
262
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELAXED __c11_atomic_thread_fence (__ATOMIC_RELAXED) |
263
|
|
|
|
|
|
|
|
264
|
|
|
|
|
|
|
#elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__ |
265
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __sync_synchronize () |
266
|
|
|
|
|
|
|
#elif _MSC_VER >= 1500 /* VC++ 2008 */ |
267
|
|
|
|
|
|
|
/* apparently, microsoft broke all the memory barrier stuff in Visual Studio 2008... */ |
268
|
|
|
|
|
|
|
#pragma intrinsic(_ReadBarrier,_WriteBarrier,_ReadWriteBarrier) |
269
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE _ReadWriteBarrier (); MemoryBarrier() |
270
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE _ReadWriteBarrier (); MemoryBarrier() /* according to msdn, _ReadBarrier is not a load fence */ |
271
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE _WriteBarrier (); MemoryBarrier() |
272
|
|
|
|
|
|
|
#elif _MSC_VER >= 1400 /* VC++ 2005 */ |
273
|
|
|
|
|
|
|
#pragma intrinsic(_ReadBarrier,_WriteBarrier,_ReadWriteBarrier) |
274
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE _ReadWriteBarrier () |
275
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE _ReadWriteBarrier () /* according to msdn, _ReadBarrier is not a load fence */ |
276
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE _WriteBarrier () |
277
|
|
|
|
|
|
|
#elif defined _WIN32 |
278
|
|
|
|
|
|
|
#include |
279
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE MemoryBarrier () /* actually just xchg on x86... scary */ |
280
|
|
|
|
|
|
|
#elif __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110 |
281
|
|
|
|
|
|
|
#include |
282
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __machine_rw_barrier () |
283
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE __machine_acq_barrier () |
284
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE __machine_rel_barrier () |
285
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELAXED __compiler_barrier () |
286
|
|
|
|
|
|
|
#elif __xlC__ |
287
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE __sync () |
288
|
|
|
|
|
|
|
#endif |
289
|
|
|
|
|
|
|
#endif |
290
|
|
|
|
|
|
|
|
291
|
|
|
|
|
|
|
#ifndef ECB_MEMORY_FENCE |
292
|
|
|
|
|
|
|
#if ECB_C11 && !defined __STDC_NO_ATOMICS__ |
293
|
|
|
|
|
|
|
/* we assume that these memory fences work on all variables/all memory accesses, */ |
294
|
|
|
|
|
|
|
/* not just C11 atomics and atomic accesses */ |
295
|
|
|
|
|
|
|
#include |
296
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE atomic_thread_fence (memory_order_seq_cst) |
297
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE atomic_thread_fence (memory_order_acquire) |
298
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE atomic_thread_fence (memory_order_release) |
299
|
|
|
|
|
|
|
#endif |
300
|
|
|
|
|
|
|
#endif |
301
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
#ifndef ECB_MEMORY_FENCE |
303
|
|
|
|
|
|
|
#if !ECB_AVOID_PTHREADS |
304
|
|
|
|
|
|
|
/* |
305
|
|
|
|
|
|
|
* if you get undefined symbol references to pthread_mutex_lock, |
306
|
|
|
|
|
|
|
* or failure to find pthread.h, then you should implement |
307
|
|
|
|
|
|
|
* the ECB_MEMORY_FENCE operations for your cpu/compiler |
308
|
|
|
|
|
|
|
* OR provide pthread.h and link against the posix thread library |
309
|
|
|
|
|
|
|
* of your system. |
310
|
|
|
|
|
|
|
*/ |
311
|
|
|
|
|
|
|
#include |
312
|
|
|
|
|
|
|
#define ECB_NEEDS_PTHREADS 1 |
313
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_NEEDS_PTHREADS 1 |
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
static pthread_mutex_t ecb_mf_lock = PTHREAD_MUTEX_INITIALIZER; |
316
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE do { pthread_mutex_lock (&ecb_mf_lock); pthread_mutex_unlock (&ecb_mf_lock); } while (0) |
317
|
|
|
|
|
|
|
#endif |
318
|
|
|
|
|
|
|
#endif |
319
|
|
|
|
|
|
|
|
320
|
|
|
|
|
|
|
#if !defined ECB_MEMORY_FENCE_ACQUIRE && defined ECB_MEMORY_FENCE |
321
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_ACQUIRE ECB_MEMORY_FENCE |
322
|
|
|
|
|
|
|
#endif |
323
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
#if !defined ECB_MEMORY_FENCE_RELEASE && defined ECB_MEMORY_FENCE |
325
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE |
326
|
|
|
|
|
|
|
#endif |
327
|
|
|
|
|
|
|
|
328
|
|
|
|
|
|
|
#if !defined ECB_MEMORY_FENCE_RELAXED && defined ECB_MEMORY_FENCE |
329
|
|
|
|
|
|
|
#define ECB_MEMORY_FENCE_RELAXED ECB_MEMORY_FENCE /* very heavy-handed */ |
330
|
|
|
|
|
|
|
#endif |
331
|
|
|
|
|
|
|
|
332
|
|
|
|
|
|
|
/*****************************************************************************/ |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
#if ECB_CPP |
335
|
|
|
|
|
|
|
#define ecb_inline static inline |
336
|
|
|
|
|
|
|
#elif ECB_GCC_VERSION(2,5) |
337
|
|
|
|
|
|
|
#define ecb_inline static __inline__ |
338
|
|
|
|
|
|
|
#elif ECB_C99 |
339
|
|
|
|
|
|
|
#define ecb_inline static inline |
340
|
|
|
|
|
|
|
#else |
341
|
|
|
|
|
|
|
#define ecb_inline static |
342
|
|
|
|
|
|
|
#endif |
343
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(3,3) |
345
|
|
|
|
|
|
|
#define ecb_restrict __restrict__ |
346
|
|
|
|
|
|
|
#elif ECB_C99 |
347
|
|
|
|
|
|
|
#define ecb_restrict restrict |
348
|
|
|
|
|
|
|
#else |
349
|
|
|
|
|
|
|
#define ecb_restrict |
350
|
|
|
|
|
|
|
#endif |
351
|
|
|
|
|
|
|
|
352
|
|
|
|
|
|
|
typedef int ecb_bool; |
353
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
#define ECB_CONCAT_(a, b) a ## b |
355
|
|
|
|
|
|
|
#define ECB_CONCAT(a, b) ECB_CONCAT_(a, b) |
356
|
|
|
|
|
|
|
#define ECB_STRINGIFY_(a) # a |
357
|
|
|
|
|
|
|
#define ECB_STRINGIFY(a) ECB_STRINGIFY_(a) |
358
|
|
|
|
|
|
|
#define ECB_STRINGIFY_EXPR(expr) ((expr), ECB_STRINGIFY_ (expr)) |
359
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
#define ecb_function_ ecb_inline |
361
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(3,1) || ECB_CLANG_VERSION(2,8) |
363
|
|
|
|
|
|
|
#define ecb_attribute(attrlist) __attribute__ (attrlist) |
364
|
|
|
|
|
|
|
#else |
365
|
|
|
|
|
|
|
#define ecb_attribute(attrlist) |
366
|
|
|
|
|
|
|
#endif |
367
|
|
|
|
|
|
|
|
368
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_constant_p) |
369
|
|
|
|
|
|
|
#define ecb_is_constant(expr) __builtin_constant_p (expr) |
370
|
|
|
|
|
|
|
#else |
371
|
|
|
|
|
|
|
/* possible C11 impl for integral types |
372
|
|
|
|
|
|
|
typedef struct ecb_is_constant_struct ecb_is_constant_struct; |
373
|
|
|
|
|
|
|
#define ecb_is_constant(expr) _Generic ((1 ? (struct ecb_is_constant_struct *)0 : (void *)((expr) - (expr)), ecb_is_constant_struct *: 0, default: 1)) */ |
374
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
#define ecb_is_constant(expr) 0 |
376
|
|
|
|
|
|
|
#endif |
377
|
|
|
|
|
|
|
|
378
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_expect) |
379
|
|
|
|
|
|
|
#define ecb_expect(expr,value) __builtin_expect ((expr),(value)) |
380
|
|
|
|
|
|
|
#else |
381
|
|
|
|
|
|
|
#define ecb_expect(expr,value) (expr) |
382
|
|
|
|
|
|
|
#endif |
383
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_prefetch) |
385
|
|
|
|
|
|
|
#define ecb_prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality) |
386
|
|
|
|
|
|
|
#else |
387
|
|
|
|
|
|
|
#define ecb_prefetch(addr,rw,locality) |
388
|
|
|
|
|
|
|
#endif |
389
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
/* no emulation for ecb_decltype */ |
391
|
|
|
|
|
|
|
#if ECB_CPP11 |
392
|
|
|
|
|
|
|
// older implementations might have problems with decltype(x)::type, work around it |
393
|
|
|
|
|
|
|
template struct ecb_decltype_t { typedef T type; }; |
394
|
|
|
|
|
|
|
#define ecb_decltype(x) ecb_decltype_t::type |
395
|
|
|
|
|
|
|
#elif ECB_GCC_VERSION(3,0) || ECB_CLANG_VERSION(2,8) |
396
|
|
|
|
|
|
|
#define ecb_decltype(x) __typeof__ (x) |
397
|
|
|
|
|
|
|
#endif |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
#if _MSC_VER >= 1300 |
400
|
|
|
|
|
|
|
#define ecb_deprecated __declspec (deprecated) |
401
|
|
|
|
|
|
|
#else |
402
|
|
|
|
|
|
|
#define ecb_deprecated ecb_attribute ((__deprecated__)) |
403
|
|
|
|
|
|
|
#endif |
404
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
#if _MSC_VER >= 1500 |
406
|
|
|
|
|
|
|
#define ecb_deprecated_message(msg) __declspec (deprecated (msg)) |
407
|
|
|
|
|
|
|
#elif ECB_GCC_VERSION(4,5) |
408
|
|
|
|
|
|
|
#define ecb_deprecated_message(msg) ecb_attribute ((__deprecated__ (msg)) |
409
|
|
|
|
|
|
|
#else |
410
|
|
|
|
|
|
|
#define ecb_deprecated_message(msg) ecb_deprecated |
411
|
|
|
|
|
|
|
#endif |
412
|
|
|
|
|
|
|
|
413
|
|
|
|
|
|
|
#if _MSC_VER >= 1400 |
414
|
|
|
|
|
|
|
#define ecb_noinline __declspec (noinline) |
415
|
|
|
|
|
|
|
#else |
416
|
|
|
|
|
|
|
#define ecb_noinline ecb_attribute ((__noinline__)) |
417
|
|
|
|
|
|
|
#endif |
418
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
#define ecb_unused ecb_attribute ((__unused__)) |
420
|
|
|
|
|
|
|
#define ecb_const ecb_attribute ((__const__)) |
421
|
|
|
|
|
|
|
#define ecb_pure ecb_attribute ((__pure__)) |
422
|
|
|
|
|
|
|
|
423
|
|
|
|
|
|
|
#if ECB_C11 || __IBMC_NORETURN |
424
|
|
|
|
|
|
|
/* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/language_ref/noreturn.html */ |
425
|
|
|
|
|
|
|
#define ecb_noreturn _Noreturn |
426
|
|
|
|
|
|
|
#elif ECB_CPP11 |
427
|
|
|
|
|
|
|
#define ecb_noreturn [[noreturn]] |
428
|
|
|
|
|
|
|
#elif _MSC_VER >= 1200 |
429
|
|
|
|
|
|
|
/* http://msdn.microsoft.com/en-us/library/k6ktzx3s.aspx */ |
430
|
|
|
|
|
|
|
#define ecb_noreturn __declspec (noreturn) |
431
|
|
|
|
|
|
|
#else |
432
|
|
|
|
|
|
|
#define ecb_noreturn ecb_attribute ((__noreturn__)) |
433
|
|
|
|
|
|
|
#endif |
434
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(4,3) |
436
|
|
|
|
|
|
|
#define ecb_artificial ecb_attribute ((__artificial__)) |
437
|
|
|
|
|
|
|
#define ecb_hot ecb_attribute ((__hot__)) |
438
|
|
|
|
|
|
|
#define ecb_cold ecb_attribute ((__cold__)) |
439
|
|
|
|
|
|
|
#else |
440
|
|
|
|
|
|
|
#define ecb_artificial |
441
|
|
|
|
|
|
|
#define ecb_hot |
442
|
|
|
|
|
|
|
#define ecb_cold |
443
|
|
|
|
|
|
|
#endif |
444
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
/* put around conditional expressions if you are very sure that the */ |
446
|
|
|
|
|
|
|
/* expression is mostly true or mostly false. note that these return */ |
447
|
|
|
|
|
|
|
/* booleans, not the expression. */ |
448
|
|
|
|
|
|
|
#define ecb_expect_false(expr) ecb_expect (!!(expr), 0) |
449
|
|
|
|
|
|
|
#define ecb_expect_true(expr) ecb_expect (!!(expr), 1) |
450
|
|
|
|
|
|
|
/* for compatibility to the rest of the world */ |
451
|
|
|
|
|
|
|
#define ecb_likely(expr) ecb_expect_true (expr) |
452
|
|
|
|
|
|
|
#define ecb_unlikely(expr) ecb_expect_false (expr) |
453
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
/* count trailing zero bits and count # of one bits */ |
455
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(3,4) \ |
456
|
|
|
|
|
|
|
|| (ECB_CLANG_BUILTIN(__builtin_clz) && ECB_CLANG_BUILTIN(__builtin_clzll) \ |
457
|
|
|
|
|
|
|
&& ECB_CLANG_BUILTIN(__builtin_ctz) && ECB_CLANG_BUILTIN(__builtin_ctzll) \ |
458
|
|
|
|
|
|
|
&& ECB_CLANG_BUILTIN(__builtin_popcount)) |
459
|
|
|
|
|
|
|
/* we assume int == 32 bit, long == 32 or 64 bit and long long == 64 bit */ |
460
|
|
|
|
|
|
|
#define ecb_ld32(x) (__builtin_clz (x) ^ 31) |
461
|
|
|
|
|
|
|
#define ecb_ld64(x) (__builtin_clzll (x) ^ 63) |
462
|
|
|
|
|
|
|
#define ecb_ctz32(x) __builtin_ctz (x) |
463
|
|
|
|
|
|
|
#define ecb_ctz64(x) __builtin_ctzll (x) |
464
|
|
|
|
|
|
|
#define ecb_popcount32(x) __builtin_popcount (x) |
465
|
|
|
|
|
|
|
/* no popcountll */ |
466
|
|
|
|
|
|
|
#else |
467
|
|
|
|
|
|
|
ecb_function_ ecb_const int ecb_ctz32 (uint32_t x); |
468
|
|
|
|
|
|
|
ecb_function_ ecb_const int |
469
|
|
|
|
|
|
|
ecb_ctz32 (uint32_t x) |
470
|
|
|
|
|
|
|
{ |
471
|
|
|
|
|
|
|
#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
472
|
|
|
|
|
|
|
unsigned long r; |
473
|
|
|
|
|
|
|
_BitScanForward (&r, x); |
474
|
|
|
|
|
|
|
return (int)r; |
475
|
|
|
|
|
|
|
#else |
476
|
|
|
|
|
|
|
int r = 0; |
477
|
|
|
|
|
|
|
|
478
|
|
|
|
|
|
|
x &= ~x + 1; /* this isolates the lowest bit */ |
479
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
#if ECB_branchless_on_i386 |
481
|
|
|
|
|
|
|
r += !!(x & 0xaaaaaaaa) << 0; |
482
|
|
|
|
|
|
|
r += !!(x & 0xcccccccc) << 1; |
483
|
|
|
|
|
|
|
r += !!(x & 0xf0f0f0f0) << 2; |
484
|
|
|
|
|
|
|
r += !!(x & 0xff00ff00) << 3; |
485
|
|
|
|
|
|
|
r += !!(x & 0xffff0000) << 4; |
486
|
|
|
|
|
|
|
#else |
487
|
|
|
|
|
|
|
if (x & 0xaaaaaaaa) r += 1; |
488
|
|
|
|
|
|
|
if (x & 0xcccccccc) r += 2; |
489
|
|
|
|
|
|
|
if (x & 0xf0f0f0f0) r += 4; |
490
|
|
|
|
|
|
|
if (x & 0xff00ff00) r += 8; |
491
|
|
|
|
|
|
|
if (x & 0xffff0000) r += 16; |
492
|
|
|
|
|
|
|
#endif |
493
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
return r; |
495
|
|
|
|
|
|
|
#endif |
496
|
|
|
|
|
|
|
} |
497
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
ecb_function_ ecb_const int ecb_ctz64 (uint64_t x); |
499
|
|
|
|
|
|
|
ecb_function_ ecb_const int |
500
|
|
|
|
|
|
|
ecb_ctz64 (uint64_t x) |
501
|
|
|
|
|
|
|
{ |
502
|
|
|
|
|
|
|
#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
503
|
|
|
|
|
|
|
unsigned long r; |
504
|
|
|
|
|
|
|
_BitScanForward64 (&r, x); |
505
|
|
|
|
|
|
|
return (int)r; |
506
|
|
|
|
|
|
|
#else |
507
|
|
|
|
|
|
|
int shift = x & 0xffffffff ? 0 : 32; |
508
|
|
|
|
|
|
|
return ecb_ctz32 (x >> shift) + shift; |
509
|
|
|
|
|
|
|
#endif |
510
|
|
|
|
|
|
|
} |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
ecb_function_ ecb_const int ecb_popcount32 (uint32_t x); |
513
|
|
|
|
|
|
|
ecb_function_ ecb_const int |
514
|
|
|
|
|
|
|
ecb_popcount32 (uint32_t x) |
515
|
|
|
|
|
|
|
{ |
516
|
|
|
|
|
|
|
x -= (x >> 1) & 0x55555555; |
517
|
|
|
|
|
|
|
x = ((x >> 2) & 0x33333333) + (x & 0x33333333); |
518
|
|
|
|
|
|
|
x = ((x >> 4) + x) & 0x0f0f0f0f; |
519
|
|
|
|
|
|
|
x *= 0x01010101; |
520
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
return x >> 24; |
522
|
|
|
|
|
|
|
} |
523
|
|
|
|
|
|
|
|
524
|
|
|
|
|
|
|
ecb_function_ ecb_const int ecb_ld32 (uint32_t x); |
525
|
|
|
|
|
|
|
ecb_function_ ecb_const int ecb_ld32 (uint32_t x) |
526
|
|
|
|
|
|
|
{ |
527
|
|
|
|
|
|
|
#if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM) |
528
|
|
|
|
|
|
|
unsigned long r; |
529
|
|
|
|
|
|
|
_BitScanReverse (&r, x); |
530
|
|
|
|
|
|
|
return (int)r; |
531
|
|
|
|
|
|
|
#else |
532
|
|
|
|
|
|
|
int r = 0; |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
if (x >> 16) { x >>= 16; r += 16; } |
535
|
|
|
|
|
|
|
if (x >> 8) { x >>= 8; r += 8; } |
536
|
|
|
|
|
|
|
if (x >> 4) { x >>= 4; r += 4; } |
537
|
|
|
|
|
|
|
if (x >> 2) { x >>= 2; r += 2; } |
538
|
|
|
|
|
|
|
if (x >> 1) { r += 1; } |
539
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
return r; |
541
|
|
|
|
|
|
|
#endif |
542
|
|
|
|
|
|
|
} |
543
|
|
|
|
|
|
|
|
544
|
|
|
|
|
|
|
ecb_function_ ecb_const int ecb_ld64 (uint64_t x); |
545
|
|
|
|
|
|
|
ecb_function_ ecb_const int ecb_ld64 (uint64_t x) |
546
|
|
|
|
|
|
|
{ |
547
|
|
|
|
|
|
|
#if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM) |
548
|
|
|
|
|
|
|
unsigned long r; |
549
|
|
|
|
|
|
|
_BitScanReverse64 (&r, x); |
550
|
|
|
|
|
|
|
return (int)r; |
551
|
|
|
|
|
|
|
#else |
552
|
|
|
|
|
|
|
int r = 0; |
553
|
|
|
|
|
|
|
|
554
|
|
|
|
|
|
|
if (x >> 32) { x >>= 32; r += 32; } |
555
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
return r + ecb_ld32 (x); |
557
|
|
|
|
|
|
|
#endif |
558
|
|
|
|
|
|
|
} |
559
|
|
|
|
|
|
|
#endif |
560
|
|
|
|
|
|
|
|
561
|
|
|
|
|
|
|
ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x); |
562
|
|
|
|
|
|
|
ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); } |
563
|
|
|
|
|
|
|
ecb_function_ ecb_const ecb_bool ecb_is_pot64 (uint64_t x); |
564
|
|
|
|
|
|
|
ecb_function_ ecb_const ecb_bool ecb_is_pot64 (uint64_t x) { return !(x & (x - 1)); } |
565
|
|
|
|
|
|
|
|
566
|
|
|
|
|
|
|
ecb_function_ ecb_const uint8_t ecb_bitrev8 (uint8_t x); |
567
|
|
|
|
|
|
|
ecb_function_ ecb_const uint8_t ecb_bitrev8 (uint8_t x) |
568
|
|
|
|
|
|
|
{ |
569
|
|
|
|
|
|
|
return ( (x * 0x0802U & 0x22110U) |
570
|
|
|
|
|
|
|
| (x * 0x8020U & 0x88440U)) * 0x10101U >> 16; |
571
|
|
|
|
|
|
|
} |
572
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
ecb_function_ ecb_const uint16_t ecb_bitrev16 (uint16_t x); |
574
|
|
|
|
|
|
|
ecb_function_ ecb_const uint16_t ecb_bitrev16 (uint16_t x) |
575
|
|
|
|
|
|
|
{ |
576
|
|
|
|
|
|
|
x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1); |
577
|
|
|
|
|
|
|
x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2); |
578
|
|
|
|
|
|
|
x = ((x >> 4) & 0x0f0f) | ((x & 0x0f0f) << 4); |
579
|
|
|
|
|
|
|
x = ( x >> 8 ) | ( x << 8); |
580
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
return x; |
582
|
|
|
|
|
|
|
} |
583
|
|
|
|
|
|
|
|
584
|
|
|
|
|
|
|
ecb_function_ ecb_const uint32_t ecb_bitrev32 (uint32_t x); |
585
|
|
|
|
|
|
|
ecb_function_ ecb_const uint32_t ecb_bitrev32 (uint32_t x) |
586
|
|
|
|
|
|
|
{ |
587
|
|
|
|
|
|
|
x = ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1); |
588
|
|
|
|
|
|
|
x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2); |
589
|
|
|
|
|
|
|
x = ((x >> 4) & 0x0f0f0f0f) | ((x & 0x0f0f0f0f) << 4); |
590
|
|
|
|
|
|
|
x = ((x >> 8) & 0x00ff00ff) | ((x & 0x00ff00ff) << 8); |
591
|
|
|
|
|
|
|
x = ( x >> 16 ) | ( x << 16); |
592
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
return x; |
594
|
|
|
|
|
|
|
} |
595
|
|
|
|
|
|
|
|
596
|
|
|
|
|
|
|
/* popcount64 is only available on 64 bit cpus as gcc builtin */ |
597
|
|
|
|
|
|
|
/* so for this version we are lazy */ |
598
|
|
|
|
|
|
|
ecb_function_ ecb_const int ecb_popcount64 (uint64_t x); |
599
|
|
|
|
|
|
|
ecb_function_ ecb_const int |
600
|
|
|
|
|
|
|
ecb_popcount64 (uint64_t x) |
601
|
|
|
|
|
|
|
{ |
602
|
|
|
|
|
|
|
return ecb_popcount32 (x) + ecb_popcount32 (x >> 32); |
603
|
|
|
|
|
|
|
} |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count); |
606
|
|
|
|
|
|
|
ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count); |
607
|
|
|
|
|
|
|
ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count); |
608
|
|
|
|
|
|
|
ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count); |
609
|
|
|
|
|
|
|
ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count); |
610
|
|
|
|
|
|
|
ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count); |
611
|
|
|
|
|
|
|
ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count); |
612
|
|
|
|
|
|
|
ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count); |
613
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> ( 8 - count)) | (x << count); } |
615
|
|
|
|
|
|
|
ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << ( 8 - count)) | (x >> count); } |
616
|
|
|
|
|
|
|
ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (16 - count)) | (x << count); } |
617
|
|
|
|
|
|
|
ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (16 - count)) | (x >> count); } |
618
|
|
|
|
|
|
|
ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (32 - count)) | (x << count); } |
619
|
|
|
|
|
|
|
ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (32 - count)) | (x >> count); } |
620
|
|
|
|
|
|
|
ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); } |
621
|
|
|
|
|
|
|
ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); } |
622
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
#if ECB_CPP |
624
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); } |
626
|
|
|
|
|
|
|
inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); } |
627
|
|
|
|
|
|
|
inline uint32_t ecb_ctz (uint32_t v) { return ecb_ctz32 (v); } |
628
|
|
|
|
|
|
|
inline uint64_t ecb_ctz (uint64_t v) { return ecb_ctz64 (v); } |
629
|
|
|
|
|
|
|
|
630
|
|
|
|
|
|
|
inline bool ecb_is_pot (uint8_t v) { return ecb_is_pot32 (v); } |
631
|
|
|
|
|
|
|
inline bool ecb_is_pot (uint16_t v) { return ecb_is_pot32 (v); } |
632
|
|
|
|
|
|
|
inline bool ecb_is_pot (uint32_t v) { return ecb_is_pot32 (v); } |
633
|
|
|
|
|
|
|
inline bool ecb_is_pot (uint64_t v) { return ecb_is_pot64 (v); } |
634
|
|
|
|
|
|
|
|
635
|
|
|
|
|
|
|
inline int ecb_ld (uint8_t v) { return ecb_ld32 (v); } |
636
|
|
|
|
|
|
|
inline int ecb_ld (uint16_t v) { return ecb_ld32 (v); } |
637
|
|
|
|
|
|
|
inline int ecb_ld (uint32_t v) { return ecb_ld32 (v); } |
638
|
|
|
|
|
|
|
inline int ecb_ld (uint64_t v) { return ecb_ld64 (v); } |
639
|
|
|
|
|
|
|
|
640
|
|
|
|
|
|
|
inline int ecb_popcount (uint8_t v) { return ecb_popcount32 (v); } |
641
|
|
|
|
|
|
|
inline int ecb_popcount (uint16_t v) { return ecb_popcount32 (v); } |
642
|
|
|
|
|
|
|
inline int ecb_popcount (uint32_t v) { return ecb_popcount32 (v); } |
643
|
|
|
|
|
|
|
inline int ecb_popcount (uint64_t v) { return ecb_popcount64 (v); } |
644
|
|
|
|
|
|
|
|
645
|
|
|
|
|
|
|
inline uint8_t ecb_bitrev (uint8_t v) { return ecb_bitrev8 (v); } |
646
|
|
|
|
|
|
|
inline uint16_t ecb_bitrev (uint16_t v) { return ecb_bitrev16 (v); } |
647
|
|
|
|
|
|
|
inline uint32_t ecb_bitrev (uint32_t v) { return ecb_bitrev32 (v); } |
648
|
|
|
|
|
|
|
|
649
|
|
|
|
|
|
|
inline uint8_t ecb_rotl (uint8_t v, unsigned int count) { return ecb_rotl8 (v, count); } |
650
|
|
|
|
|
|
|
inline uint16_t ecb_rotl (uint16_t v, unsigned int count) { return ecb_rotl16 (v, count); } |
651
|
|
|
|
|
|
|
inline uint32_t ecb_rotl (uint32_t v, unsigned int count) { return ecb_rotl32 (v, count); } |
652
|
|
|
|
|
|
|
inline uint64_t ecb_rotl (uint64_t v, unsigned int count) { return ecb_rotl64 (v, count); } |
653
|
|
|
|
|
|
|
|
654
|
|
|
|
|
|
|
inline uint8_t ecb_rotr (uint8_t v, unsigned int count) { return ecb_rotr8 (v, count); } |
655
|
|
|
|
|
|
|
inline uint16_t ecb_rotr (uint16_t v, unsigned int count) { return ecb_rotr16 (v, count); } |
656
|
|
|
|
|
|
|
inline uint32_t ecb_rotr (uint32_t v, unsigned int count) { return ecb_rotr32 (v, count); } |
657
|
|
|
|
|
|
|
inline uint64_t ecb_rotr (uint64_t v, unsigned int count) { return ecb_rotr64 (v, count); } |
658
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
#endif |
660
|
|
|
|
|
|
|
|
661
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(4,3) || (ECB_CLANG_BUILTIN(__builtin_bswap32) && ECB_CLANG_BUILTIN(__builtin_bswap64)) |
662
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(4,8) || ECB_CLANG_BUILTIN(__builtin_bswap16) |
663
|
|
|
|
|
|
|
#define ecb_bswap16(x) __builtin_bswap16 (x) |
664
|
|
|
|
|
|
|
#else |
665
|
|
|
|
|
|
|
#define ecb_bswap16(x) (__builtin_bswap32 (x) >> 16) |
666
|
|
|
|
|
|
|
#endif |
667
|
|
|
|
|
|
|
#define ecb_bswap32(x) __builtin_bswap32 (x) |
668
|
|
|
|
|
|
|
#define ecb_bswap64(x) __builtin_bswap64 (x) |
669
|
|
|
|
|
|
|
#elif _MSC_VER |
670
|
|
|
|
|
|
|
#include |
671
|
|
|
|
|
|
|
#define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x))) |
672
|
|
|
|
|
|
|
#define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x))) |
673
|
|
|
|
|
|
|
#define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x))) |
674
|
|
|
|
|
|
|
#else |
675
|
|
|
|
|
|
|
ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x); |
676
|
|
|
|
|
|
|
ecb_function_ ecb_const uint16_t |
677
|
|
|
|
|
|
|
ecb_bswap16 (uint16_t x) |
678
|
|
|
|
|
|
|
{ |
679
|
|
|
|
|
|
|
return ecb_rotl16 (x, 8); |
680
|
|
|
|
|
|
|
} |
681
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x); |
683
|
|
|
|
|
|
|
ecb_function_ ecb_const uint32_t |
684
|
|
|
|
|
|
|
ecb_bswap32 (uint32_t x) |
685
|
|
|
|
|
|
|
{ |
686
|
|
|
|
|
|
|
return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16); |
687
|
|
|
|
|
|
|
} |
688
|
|
|
|
|
|
|
|
689
|
|
|
|
|
|
|
ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x); |
690
|
|
|
|
|
|
|
ecb_function_ ecb_const uint64_t |
691
|
|
|
|
|
|
|
ecb_bswap64 (uint64_t x) |
692
|
|
|
|
|
|
|
{ |
693
|
|
|
|
|
|
|
return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32); |
694
|
|
|
|
|
|
|
} |
695
|
|
|
|
|
|
|
#endif |
696
|
|
|
|
|
|
|
|
697
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(4,5) || ECB_CLANG_BUILTIN(__builtin_unreachable) |
698
|
|
|
|
|
|
|
#define ecb_unreachable() __builtin_unreachable () |
699
|
|
|
|
|
|
|
#else |
700
|
|
|
|
|
|
|
/* this seems to work fine, but gcc always emits a warning for it :/ */ |
701
|
|
|
|
|
|
|
ecb_inline ecb_noreturn void ecb_unreachable (void); |
702
|
|
|
|
|
|
|
ecb_inline ecb_noreturn void ecb_unreachable (void) { } |
703
|
|
|
|
|
|
|
#endif |
704
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
/* try to tell the compiler that some condition is definitely true */ |
706
|
|
|
|
|
|
|
#define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0 |
707
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
ecb_inline ecb_const uint32_t ecb_byteorder_helper (void); |
709
|
|
|
|
|
|
|
ecb_inline ecb_const uint32_t |
710
|
23
|
|
|
|
|
|
ecb_byteorder_helper (void) |
711
|
|
|
|
|
|
|
{ |
712
|
|
|
|
|
|
|
/* the union code still generates code under pressure in gcc, */ |
713
|
|
|
|
|
|
|
/* but less than using pointers, and always seems to */ |
714
|
|
|
|
|
|
|
/* successfully return a constant. */ |
715
|
|
|
|
|
|
|
/* the reason why we have this horrible preprocessor mess */ |
716
|
|
|
|
|
|
|
/* is to avoid it in all cases, at least on common architectures */ |
717
|
|
|
|
|
|
|
/* or when using a recent enough gcc version (>= 4.6) */ |
718
|
|
|
|
|
|
|
#if (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \ |
719
|
|
|
|
|
|
|
|| ((__i386 || __i386__ || _M_IX86 || ECB_GCC_AMD64 || ECB_MSVC_AMD64) && !__VOS__) |
720
|
|
|
|
|
|
|
#define ECB_LITTLE_ENDIAN 1 |
721
|
23
|
|
|
|
|
|
return 0x44332211; |
722
|
|
|
|
|
|
|
#elif (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \ |
723
|
|
|
|
|
|
|
|| ((__AARCH64EB__ || __MIPSEB__ || __ARMEB__) && !__VOS__) |
724
|
|
|
|
|
|
|
#define ECB_BIG_ENDIAN 1 |
725
|
|
|
|
|
|
|
return 0x11223344; |
726
|
|
|
|
|
|
|
#else |
727
|
|
|
|
|
|
|
union |
728
|
|
|
|
|
|
|
{ |
729
|
|
|
|
|
|
|
uint8_t c[4]; |
730
|
|
|
|
|
|
|
uint32_t u; |
731
|
|
|
|
|
|
|
} u = { 0x11, 0x22, 0x33, 0x44 }; |
732
|
|
|
|
|
|
|
return u.u; |
733
|
|
|
|
|
|
|
#endif |
734
|
|
|
|
|
|
|
} |
735
|
|
|
|
|
|
|
|
736
|
|
|
|
|
|
|
ecb_inline ecb_const ecb_bool ecb_big_endian (void); |
737
|
46
|
|
|
|
|
|
ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; } |
738
|
|
|
|
|
|
|
ecb_inline ecb_const ecb_bool ecb_little_endian (void); |
739
|
|
|
|
|
|
|
ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; } |
740
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
/*****************************************************************************/ |
742
|
|
|
|
|
|
|
/* unaligned load/store */ |
743
|
|
|
|
|
|
|
|
744
|
|
|
|
|
|
|
ecb_inline uint_fast16_t ecb_be_u16_to_host (uint_fast16_t v) { return ecb_little_endian () ? ecb_bswap16 (v) : v; } |
745
|
|
|
|
|
|
|
ecb_inline uint_fast32_t ecb_be_u32_to_host (uint_fast32_t v) { return ecb_little_endian () ? ecb_bswap32 (v) : v; } |
746
|
|
|
|
|
|
|
ecb_inline uint_fast64_t ecb_be_u64_to_host (uint_fast64_t v) { return ecb_little_endian () ? ecb_bswap64 (v) : v; } |
747
|
|
|
|
|
|
|
|
748
|
|
|
|
|
|
|
ecb_inline uint_fast16_t ecb_le_u16_to_host (uint_fast16_t v) { return ecb_big_endian () ? ecb_bswap16 (v) : v; } |
749
|
|
|
|
|
|
|
ecb_inline uint_fast32_t ecb_le_u32_to_host (uint_fast32_t v) { return ecb_big_endian () ? ecb_bswap32 (v) : v; } |
750
|
|
|
|
|
|
|
ecb_inline uint_fast64_t ecb_le_u64_to_host (uint_fast64_t v) { return ecb_big_endian () ? ecb_bswap64 (v) : v; } |
751
|
|
|
|
|
|
|
|
752
|
|
|
|
|
|
|
ecb_inline uint_fast16_t ecb_peek_u16_u (const void *ptr) { uint16_t v; memcpy (&v, ptr, sizeof (v)); return v; } |
753
|
|
|
|
|
|
|
ecb_inline uint_fast32_t ecb_peek_u32_u (const void *ptr) { uint32_t v; memcpy (&v, ptr, sizeof (v)); return v; } |
754
|
|
|
|
|
|
|
ecb_inline uint_fast64_t ecb_peek_u64_u (const void *ptr) { uint64_t v; memcpy (&v, ptr, sizeof (v)); return v; } |
755
|
|
|
|
|
|
|
|
756
|
|
|
|
|
|
|
ecb_inline uint_fast16_t ecb_peek_be_u16_u (const void *ptr) { return ecb_be_u16_to_host (ecb_peek_u16_u (ptr)); } |
757
|
|
|
|
|
|
|
ecb_inline uint_fast32_t ecb_peek_be_u32_u (const void *ptr) { return ecb_be_u32_to_host (ecb_peek_u32_u (ptr)); } |
758
|
|
|
|
|
|
|
ecb_inline uint_fast64_t ecb_peek_be_u64_u (const void *ptr) { return ecb_be_u64_to_host (ecb_peek_u64_u (ptr)); } |
759
|
|
|
|
|
|
|
|
760
|
|
|
|
|
|
|
ecb_inline uint_fast16_t ecb_peek_le_u16_u (const void *ptr) { return ecb_le_u16_to_host (ecb_peek_u16_u (ptr)); } |
761
|
|
|
|
|
|
|
ecb_inline uint_fast32_t ecb_peek_le_u32_u (const void *ptr) { return ecb_le_u32_to_host (ecb_peek_u32_u (ptr)); } |
762
|
|
|
|
|
|
|
ecb_inline uint_fast64_t ecb_peek_le_u64_u (const void *ptr) { return ecb_le_u64_to_host (ecb_peek_u64_u (ptr)); } |
763
|
|
|
|
|
|
|
|
764
|
|
|
|
|
|
|
ecb_inline uint_fast16_t ecb_host_to_be_u16 (uint_fast16_t v) { return ecb_little_endian () ? ecb_bswap16 (v) : v; } |
765
|
|
|
|
|
|
|
ecb_inline uint_fast32_t ecb_host_to_be_u32 (uint_fast32_t v) { return ecb_little_endian () ? ecb_bswap32 (v) : v; } |
766
|
|
|
|
|
|
|
ecb_inline uint_fast64_t ecb_host_to_be_u64 (uint_fast64_t v) { return ecb_little_endian () ? ecb_bswap64 (v) : v; } |
767
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
ecb_inline uint_fast16_t ecb_host_to_le_u16 (uint_fast16_t v) { return ecb_big_endian () ? ecb_bswap16 (v) : v; } |
769
|
|
|
|
|
|
|
ecb_inline uint_fast32_t ecb_host_to_le_u32 (uint_fast32_t v) { return ecb_big_endian () ? ecb_bswap32 (v) : v; } |
770
|
|
|
|
|
|
|
ecb_inline uint_fast64_t ecb_host_to_le_u64 (uint_fast64_t v) { return ecb_big_endian () ? ecb_bswap64 (v) : v; } |
771
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
ecb_inline void ecb_poke_u16_u (void *ptr, uint16_t v) { memcpy (ptr, &v, sizeof (v)); } |
773
|
|
|
|
|
|
|
ecb_inline void ecb_poke_u32_u (void *ptr, uint32_t v) { memcpy (ptr, &v, sizeof (v)); } |
774
|
|
|
|
|
|
|
ecb_inline void ecb_poke_u64_u (void *ptr, uint64_t v) { memcpy (ptr, &v, sizeof (v)); } |
775
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
ecb_inline void ecb_poke_be_u16_u (void *ptr, uint_fast16_t v) { ecb_poke_u16_u (ptr, ecb_host_to_be_u16 (v)); } |
777
|
|
|
|
|
|
|
ecb_inline void ecb_poke_be_u32_u (void *ptr, uint_fast32_t v) { ecb_poke_u32_u (ptr, ecb_host_to_be_u32 (v)); } |
778
|
|
|
|
|
|
|
ecb_inline void ecb_poke_be_u64_u (void *ptr, uint_fast64_t v) { ecb_poke_u64_u (ptr, ecb_host_to_be_u64 (v)); } |
779
|
|
|
|
|
|
|
|
780
|
|
|
|
|
|
|
ecb_inline void ecb_poke_le_u16_u (void *ptr, uint_fast16_t v) { ecb_poke_u16_u (ptr, ecb_host_to_le_u16 (v)); } |
781
|
|
|
|
|
|
|
ecb_inline void ecb_poke_le_u32_u (void *ptr, uint_fast32_t v) { ecb_poke_u32_u (ptr, ecb_host_to_le_u32 (v)); } |
782
|
|
|
|
|
|
|
ecb_inline void ecb_poke_le_u64_u (void *ptr, uint_fast64_t v) { ecb_poke_u64_u (ptr, ecb_host_to_le_u64 (v)); } |
783
|
|
|
|
|
|
|
|
784
|
|
|
|
|
|
|
#if ECB_CPP |
785
|
|
|
|
|
|
|
|
786
|
|
|
|
|
|
|
inline uint8_t ecb_bswap (uint8_t v) { return v; } |
787
|
|
|
|
|
|
|
inline uint16_t ecb_bswap (uint16_t v) { return ecb_bswap16 (v); } |
788
|
|
|
|
|
|
|
inline uint32_t ecb_bswap (uint32_t v) { return ecb_bswap32 (v); } |
789
|
|
|
|
|
|
|
inline uint64_t ecb_bswap (uint64_t v) { return ecb_bswap64 (v); } |
790
|
|
|
|
|
|
|
|
791
|
|
|
|
|
|
|
template inline T ecb_be_to_host (T v) { return ecb_little_endian () ? ecb_bswap (v) : v; } |
792
|
|
|
|
|
|
|
template inline T ecb_le_to_host (T v) { return ecb_big_endian () ? ecb_bswap (v) : v; } |
793
|
|
|
|
|
|
|
template inline T ecb_peek (const void *ptr) { return *(const T *)ptr; } |
794
|
|
|
|
|
|
|
template inline T ecb_peek_be (const void *ptr) { return ecb_be_to_host (ecb_peek (ptr)); } |
795
|
|
|
|
|
|
|
template inline T ecb_peek_le (const void *ptr) { return ecb_le_to_host (ecb_peek (ptr)); } |
796
|
|
|
|
|
|
|
template inline T ecb_peek_u (const void *ptr) { T v; memcpy (&v, ptr, sizeof (v)); return v; } |
797
|
|
|
|
|
|
|
template inline T ecb_peek_be_u (const void *ptr) { return ecb_be_to_host (ecb_peek_u (ptr)); } |
798
|
|
|
|
|
|
|
template inline T ecb_peek_le_u (const void *ptr) { return ecb_le_to_host (ecb_peek_u (ptr)); } |
799
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
template inline T ecb_host_to_be (T v) { return ecb_little_endian () ? ecb_bswap (v) : v; } |
801
|
|
|
|
|
|
|
template inline T ecb_host_to_le (T v) { return ecb_big_endian () ? ecb_bswap (v) : v; } |
802
|
|
|
|
|
|
|
template inline void ecb_poke (void *ptr, T v) { *(T *)ptr = v; } |
803
|
|
|
|
|
|
|
template inline void ecb_poke_be (void *ptr, T v) { return ecb_poke (ptr, ecb_host_to_be (v)); } |
804
|
|
|
|
|
|
|
template inline void ecb_poke_le (void *ptr, T v) { return ecb_poke (ptr, ecb_host_to_le (v)); } |
805
|
|
|
|
|
|
|
template inline void ecb_poke_u (void *ptr, T v) { memcpy (ptr, &v, sizeof (v)); } |
806
|
|
|
|
|
|
|
template inline void ecb_poke_be_u (void *ptr, T v) { return ecb_poke_u (ptr, ecb_host_to_be (v)); } |
807
|
|
|
|
|
|
|
template inline void ecb_poke_le_u (void *ptr, T v) { return ecb_poke_u (ptr, ecb_host_to_le (v)); } |
808
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
#endif |
810
|
|
|
|
|
|
|
|
811
|
|
|
|
|
|
|
/*****************************************************************************/ |
812
|
|
|
|
|
|
|
/* division */ |
813
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
#if ECB_GCC_VERSION(3,0) || ECB_C99 |
815
|
|
|
|
|
|
|
/* C99 tightened the definition of %, so we can use a more efficient version */ |
816
|
|
|
|
|
|
|
#define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0)) |
817
|
|
|
|
|
|
|
#else |
818
|
|
|
|
|
|
|
#define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n))) |
819
|
|
|
|
|
|
|
#endif |
820
|
|
|
|
|
|
|
|
821
|
|
|
|
|
|
|
#if ECB_CPP |
822
|
|
|
|
|
|
|
template |
823
|
|
|
|
|
|
|
static inline T ecb_div_rd (T val, T div) |
824
|
|
|
|
|
|
|
{ |
825
|
|
|
|
|
|
|
return val < 0 ? - ((-val + div - 1) / div) : (val ) / div; |
826
|
|
|
|
|
|
|
} |
827
|
|
|
|
|
|
|
template |
828
|
|
|
|
|
|
|
static inline T ecb_div_ru (T val, T div) |
829
|
|
|
|
|
|
|
{ |
830
|
|
|
|
|
|
|
return val < 0 ? - ((-val ) / div) : (val + div - 1) / div; |
831
|
|
|
|
|
|
|
} |
832
|
|
|
|
|
|
|
#else |
833
|
|
|
|
|
|
|
#define ecb_div_rd(val,div) ((val) < 0 ? - ((-(val) + (div) - 1) / (div)) : ((val) ) / (div)) |
834
|
|
|
|
|
|
|
#define ecb_div_ru(val,div) ((val) < 0 ? - ((-(val) ) / (div)) : ((val) + (div) - 1) / (div)) |
835
|
|
|
|
|
|
|
#endif |
836
|
|
|
|
|
|
|
|
837
|
|
|
|
|
|
|
/*****************************************************************************/ |
838
|
|
|
|
|
|
|
/* array length */ |
839
|
|
|
|
|
|
|
|
840
|
|
|
|
|
|
|
#if ecb_cplusplus_does_not_suck |
841
|
|
|
|
|
|
|
/* does not work for local types (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm) */ |
842
|
|
|
|
|
|
|
template |
843
|
|
|
|
|
|
|
static inline int ecb_array_length (const T (&arr)[N]) |
844
|
|
|
|
|
|
|
{ |
845
|
|
|
|
|
|
|
return N; |
846
|
|
|
|
|
|
|
} |
847
|
|
|
|
|
|
|
#else |
848
|
|
|
|
|
|
|
#define ecb_array_length(name) (sizeof (name) / sizeof (name [0])) |
849
|
|
|
|
|
|
|
#endif |
850
|
|
|
|
|
|
|
|
851
|
|
|
|
|
|
|
/*****************************************************************************/ |
852
|
|
|
|
|
|
|
/* IEEE 754-2008 half float conversions */ |
853
|
|
|
|
|
|
|
|
854
|
|
|
|
|
|
|
ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x); |
855
|
|
|
|
|
|
|
ecb_function_ ecb_const uint32_t |
856
|
5
|
|
|
|
|
|
ecb_binary16_to_binary32 (uint32_t x) |
857
|
|
|
|
|
|
|
{ |
858
|
5
|
|
|
|
|
|
unsigned int s = (x & 0x8000) << (31 - 15); |
859
|
5
|
|
|
|
|
|
int e = (x >> 10) & 0x001f; |
860
|
5
|
|
|
|
|
|
unsigned int m = x & 0x03ff; |
861
|
|
|
|
|
|
|
|
862
|
5
|
50
|
|
|
|
|
if (ecb_expect_false (e == 31)) |
863
|
|
|
|
|
|
|
/* infinity or NaN */ |
864
|
0
|
|
|
|
|
|
e = 255 - (127 - 15); |
865
|
5
|
100
|
|
|
|
|
else if (ecb_expect_false (!e)) |
866
|
|
|
|
|
|
|
{ |
867
|
1
|
50
|
|
|
|
|
if (ecb_expect_true (!m)) |
868
|
|
|
|
|
|
|
/* zero, handled by code below by forcing e to 0 */ |
869
|
1
|
|
|
|
|
|
e = 0 - (127 - 15); |
870
|
|
|
|
|
|
|
else |
871
|
|
|
|
|
|
|
{ |
872
|
|
|
|
|
|
|
/* subnormal, renormalise */ |
873
|
0
|
|
|
|
|
|
unsigned int s = 10 - ecb_ld32 (m); |
874
|
|
|
|
|
|
|
|
875
|
0
|
|
|
|
|
|
m = (m << s) & 0x3ff; /* mask implicit bit */ |
876
|
0
|
|
|
|
|
|
e -= s - 1; |
877
|
|
|
|
|
|
|
} |
878
|
|
|
|
|
|
|
} |
879
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
/* e and m now are normalised, or zero, (or inf or nan) */ |
881
|
5
|
|
|
|
|
|
e += 127 - 15; |
882
|
|
|
|
|
|
|
|
883
|
5
|
|
|
|
|
|
return s | (e << 23) | (m << (23 - 10)); |
884
|
|
|
|
|
|
|
} |
885
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x); |
887
|
|
|
|
|
|
|
ecb_function_ ecb_const uint16_t |
888
|
0
|
|
|
|
|
|
ecb_binary32_to_binary16 (uint32_t x) |
889
|
|
|
|
|
|
|
{ |
890
|
0
|
|
|
|
|
|
unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */ |
891
|
0
|
|
|
|
|
|
int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */ |
892
|
0
|
|
|
|
|
|
unsigned int m = x & 0x007fffff; |
893
|
|
|
|
|
|
|
|
894
|
0
|
|
|
|
|
|
x &= 0x7fffffff; |
895
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
/* if it's within range of binary16 normals, use fast path */ |
897
|
0
|
0
|
|
|
|
|
if (ecb_expect_true (0x38800000 <= x && x <= 0x477fefff)) |
|
|
0
|
|
|
|
|
|
898
|
|
|
|
|
|
|
{ |
899
|
|
|
|
|
|
|
/* mantissa round-to-even */ |
900
|
0
|
|
|
|
|
|
m += 0x00000fff + ((m >> (23 - 10)) & 1); |
901
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
/* handle overflow */ |
903
|
0
|
0
|
|
|
|
|
if (ecb_expect_false (m >= 0x00800000)) |
904
|
|
|
|
|
|
|
{ |
905
|
0
|
|
|
|
|
|
m >>= 1; |
906
|
0
|
|
|
|
|
|
e += 1; |
907
|
|
|
|
|
|
|
} |
908
|
|
|
|
|
|
|
|
909
|
0
|
|
|
|
|
|
return s | (e << 10) | (m >> (23 - 10)); |
910
|
|
|
|
|
|
|
} |
911
|
|
|
|
|
|
|
|
912
|
|
|
|
|
|
|
/* handle large numbers and infinity */ |
913
|
0
|
0
|
|
|
|
|
if (ecb_expect_true (0x477fefff < x && x <= 0x7f800000)) |
|
|
0
|
|
|
|
|
|
914
|
0
|
|
|
|
|
|
return s | 0x7c00; |
915
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
/* handle zero, subnormals and small numbers */ |
917
|
0
|
0
|
|
|
|
|
if (ecb_expect_true (x < 0x38800000)) |
918
|
|
|
|
|
|
|
{ |
919
|
|
|
|
|
|
|
/* zero */ |
920
|
0
|
0
|
|
|
|
|
if (ecb_expect_true (!x)) |
921
|
0
|
|
|
|
|
|
return s; |
922
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
/* handle subnormals */ |
924
|
|
|
|
|
|
|
|
925
|
|
|
|
|
|
|
/* too small, will be zero */ |
926
|
0
|
0
|
|
|
|
|
if (e < (14 - 24)) /* might not be sharp, but is good enough */ |
927
|
0
|
|
|
|
|
|
return s; |
928
|
|
|
|
|
|
|
|
929
|
0
|
|
|
|
|
|
m |= 0x00800000; /* make implicit bit explicit */ |
930
|
|
|
|
|
|
|
|
931
|
|
|
|
|
|
|
/* very tricky - we need to round to the nearest e (+10) bit value */ |
932
|
|
|
|
|
|
|
{ |
933
|
0
|
|
|
|
|
|
unsigned int bits = 14 - e; |
934
|
0
|
|
|
|
|
|
unsigned int half = (1 << (bits - 1)) - 1; |
935
|
0
|
|
|
|
|
|
unsigned int even = (m >> bits) & 1; |
936
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
/* if this overflows, we will end up with a normalised number */ |
938
|
0
|
|
|
|
|
|
m = (m + half + even) >> bits; |
939
|
|
|
|
|
|
|
} |
940
|
|
|
|
|
|
|
|
941
|
0
|
|
|
|
|
|
return s | m; |
942
|
|
|
|
|
|
|
} |
943
|
|
|
|
|
|
|
|
944
|
|
|
|
|
|
|
/* handle NaNs, preserve leftmost nan bits, but make sure we don't turn them into infinities */ |
945
|
0
|
|
|
|
|
|
m >>= 13; |
946
|
|
|
|
|
|
|
|
947
|
0
|
|
|
|
|
|
return s | 0x7c00 | m | !m; |
948
|
|
|
|
|
|
|
} |
949
|
|
|
|
|
|
|
|
950
|
|
|
|
|
|
|
/*******************************************************************************/ |
951
|
|
|
|
|
|
|
/* fast integer to ascii */ |
952
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
/* |
954
|
|
|
|
|
|
|
* This code is pretty complicated because it is general. The idea behind it, |
955
|
|
|
|
|
|
|
* however, is pretty simple: first, the number is multiplied with a scaling |
956
|
|
|
|
|
|
|
* factor (2**bits / 10**(digits-1)) to convert the integer into a fixed-point |
957
|
|
|
|
|
|
|
* number with the first digit in the upper bits. |
958
|
|
|
|
|
|
|
* Then this digit is converted to text and masked out. The resulting number |
959
|
|
|
|
|
|
|
* is then multiplied by 10, by multiplying the fixed point representation |
960
|
|
|
|
|
|
|
* by 5 and shifting the (binary) decimal point one to the right, so a 4.28 |
961
|
|
|
|
|
|
|
* format becomes 5.27, 6.26 and so on. |
962
|
|
|
|
|
|
|
* The rest involves only advancing the pointer if we already generated a |
963
|
|
|
|
|
|
|
* non-zero digit, so leading zeroes are overwritten. |
964
|
|
|
|
|
|
|
*/ |
965
|
|
|
|
|
|
|
|
966
|
|
|
|
|
|
|
// simply return a mask with "bits" bits set |
967
|
|
|
|
|
|
|
#define ecb_i2a_mask(type,bits) ((((type)1) << (bits)) - 1) |
968
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
// oputput a single digit. maskvalue is 10**digitidx |
970
|
|
|
|
|
|
|
#define ecb_i2a_digit(type,bits,digitmask,maskvalue,digitidx) \ |
971
|
|
|
|
|
|
|
if (digitmask >= maskvalue) /* constant, used to decide how many digits to generate */ \ |
972
|
|
|
|
|
|
|
{ \ |
973
|
|
|
|
|
|
|
char digit = x >> (bits - digitidx); /* calculate the topmost digit */ \ |
974
|
|
|
|
|
|
|
*ptr = digit + '0'; /* output it */ \ |
975
|
|
|
|
|
|
|
nz = (digitmask == maskvalue) || nz || digit; /* first term == always output last digit */ \ |
976
|
|
|
|
|
|
|
ptr += nz; /* output digit only if non-zero digit seen */ \ |
977
|
|
|
|
|
|
|
x = (x & ecb_i2a_mask (type, bits - digitidx)) * 5; /* *10, but shift decimal point right */ \ |
978
|
|
|
|
|
|
|
} |
979
|
|
|
|
|
|
|
|
980
|
|
|
|
|
|
|
// convert integer to fixed point format and multiply out digits, highest first |
981
|
|
|
|
|
|
|
// requires magic constants: max. digits and number of bits after the decimal point |
982
|
|
|
|
|
|
|
#define ecb_i2a_def(suffix,ptr,v,type,bits,digitmask,lz) \ |
983
|
|
|
|
|
|
|
ecb_inline char *ecb_i2a_ ## suffix (char *ptr, uint32_t u) \ |
984
|
|
|
|
|
|
|
{ \ |
985
|
|
|
|
|
|
|
char nz = lz; /* non-zero digit seen? */ \ |
986
|
|
|
|
|
|
|
/* convert to x.bits fixed-point */ \ |
987
|
|
|
|
|
|
|
type x = u * ((ecb_i2a_mask (type, bits) + digitmask) / digitmask); \ |
988
|
|
|
|
|
|
|
/* output up to 10 digits */ \ |
989
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 1, 0); \ |
990
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 10, 1); \ |
991
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 100, 2); \ |
992
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 1000, 3); \ |
993
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 10000, 4); \ |
994
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 100000, 5); \ |
995
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 1000000, 6); \ |
996
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 10000000, 7); \ |
997
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 100000000, 8); \ |
998
|
|
|
|
|
|
|
ecb_i2a_digit (type,bits,digitmask, 1000000000, 9); \ |
999
|
|
|
|
|
|
|
return ptr; \ |
1000
|
|
|
|
|
|
|
} |
1001
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
// predefined versions of the above, for various digits |
1003
|
|
|
|
|
|
|
// ecb_i2a_xN = almost N digits, limit defined by macro |
1004
|
|
|
|
|
|
|
// ecb_i2a_N = up to N digits, leading zeroes suppressed |
1005
|
|
|
|
|
|
|
// ecb_i2a_0N = exactly N digits, including leading zeroes |
1006
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
// non-leading-zero versions, limited range |
1008
|
|
|
|
|
|
|
#define ECB_I2A_MAX_X5 59074 // limit for ecb_i2a_x5 |
1009
|
|
|
|
|
|
|
#define ECB_I2A_MAX_X10 2932500665 // limit for ecb_i2a_x10 |
1010
|
|
|
|
|
|
|
ecb_i2a_def ( x5, ptr, v, uint32_t, 26, 10000, 0) |
1011
|
|
|
|
|
|
|
ecb_i2a_def (x10, ptr, v, uint64_t, 60, 1000000000, 0) |
1012
|
|
|
|
|
|
|
|
1013
|
|
|
|
|
|
|
// non-leading zero versions, all digits, 4 and 9 are optimal for 32/64 bit |
1014
|
|
|
|
|
|
|
ecb_i2a_def ( 2, ptr, v, uint32_t, 10, 10, 0) |
1015
|
|
|
|
|
|
|
ecb_i2a_def ( 3, ptr, v, uint32_t, 12, 100, 0) |
1016
|
|
|
|
|
|
|
ecb_i2a_def ( 4, ptr, v, uint32_t, 26, 1000, 0) |
1017
|
|
|
|
|
|
|
ecb_i2a_def ( 5, ptr, v, uint64_t, 30, 10000, 0) |
1018
|
|
|
|
|
|
|
ecb_i2a_def ( 6, ptr, v, uint64_t, 36, 100000, 0) |
1019
|
|
|
|
|
|
|
ecb_i2a_def ( 7, ptr, v, uint64_t, 44, 1000000, 0) |
1020
|
|
|
|
|
|
|
ecb_i2a_def ( 8, ptr, v, uint64_t, 50, 10000000, 0) |
1021
|
|
|
|
|
|
|
ecb_i2a_def ( 9, ptr, v, uint64_t, 56, 100000000, 0) |
1022
|
|
|
|
|
|
|
|
1023
|
|
|
|
|
|
|
// leading-zero versions, all digits, 04 and 09 are optimal for 32/64 bit |
1024
|
|
|
|
|
|
|
ecb_i2a_def (02, ptr, v, uint32_t, 10, 10, 1) |
1025
|
|
|
|
|
|
|
ecb_i2a_def (03, ptr, v, uint32_t, 12, 100, 1) |
1026
|
|
|
|
|
|
|
ecb_i2a_def (04, ptr, v, uint32_t, 26, 1000, 1) |
1027
|
|
|
|
|
|
|
ecb_i2a_def (05, ptr, v, uint64_t, 30, 10000, 1) |
1028
|
|
|
|
|
|
|
ecb_i2a_def (06, ptr, v, uint64_t, 36, 100000, 1) |
1029
|
|
|
|
|
|
|
ecb_i2a_def (07, ptr, v, uint64_t, 44, 1000000, 1) |
1030
|
|
|
|
|
|
|
ecb_i2a_def (08, ptr, v, uint64_t, 50, 10000000, 1) |
1031
|
|
|
|
|
|
|
ecb_i2a_def (09, ptr, v, uint64_t, 56, 100000000, 1) |
1032
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
#define ECB_I2A_I32_DIGITS 11 |
1034
|
|
|
|
|
|
|
#define ECB_I2A_U32_DIGITS 10 |
1035
|
|
|
|
|
|
|
#define ECB_I2A_I64_DIGITS 20 |
1036
|
|
|
|
|
|
|
#define ECB_I2A_U64_DIGITS 21 |
1037
|
|
|
|
|
|
|
#define ECB_I2A_MAX_DIGITS 21 |
1038
|
|
|
|
|
|
|
|
1039
|
|
|
|
|
|
|
ecb_inline char * |
1040
|
|
|
|
|
|
|
ecb_i2a_u32 (char *ptr, uint32_t u) |
1041
|
|
|
|
|
|
|
{ |
1042
|
|
|
|
|
|
|
#if ECB_64BIT_NATIVE |
1043
|
|
|
|
|
|
|
if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1044
|
|
|
|
|
|
|
ptr = ecb_i2a_x10 (ptr, u); |
1045
|
|
|
|
|
|
|
else // x10 almost, but not fully, covers 32 bit |
1046
|
|
|
|
|
|
|
{ |
1047
|
|
|
|
|
|
|
uint32_t u1 = u % 1000000000; |
1048
|
|
|
|
|
|
|
uint32_t u2 = u / 1000000000; |
1049
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
*ptr++ = u2 + '0'; |
1051
|
|
|
|
|
|
|
ptr = ecb_i2a_09 (ptr, u1); |
1052
|
|
|
|
|
|
|
} |
1053
|
|
|
|
|
|
|
#else |
1054
|
|
|
|
|
|
|
if (ecb_expect_true (u <= ECB_I2A_MAX_X5)) |
1055
|
|
|
|
|
|
|
ecb_i2a_x5 (ptr, u); |
1056
|
|
|
|
|
|
|
else if (ecb_expect_true (u <= ECB_I2A_MAX_X5 * 10000)) |
1057
|
|
|
|
|
|
|
{ |
1058
|
|
|
|
|
|
|
uint32_t u1 = u % 10000; |
1059
|
|
|
|
|
|
|
uint32_t u2 = u / 10000; |
1060
|
|
|
|
|
|
|
|
1061
|
|
|
|
|
|
|
ptr = ecb_i2a_x5 (ptr, u2); |
1062
|
|
|
|
|
|
|
ptr = ecb_i2a_04 (ptr, u1); |
1063
|
|
|
|
|
|
|
} |
1064
|
|
|
|
|
|
|
else |
1065
|
|
|
|
|
|
|
{ |
1066
|
|
|
|
|
|
|
uint32_t u1 = u % 10000; |
1067
|
|
|
|
|
|
|
uint32_t ua = u / 10000; |
1068
|
|
|
|
|
|
|
uint32_t u2 = ua % 10000; |
1069
|
|
|
|
|
|
|
uint32_t u3 = ua / 10000; |
1070
|
|
|
|
|
|
|
|
1071
|
|
|
|
|
|
|
ptr = ecb_i2a_2 (ptr, u3); |
1072
|
|
|
|
|
|
|
ptr = ecb_i2a_04 (ptr, u2); |
1073
|
|
|
|
|
|
|
ptr = ecb_i2a_04 (ptr, u1); |
1074
|
|
|
|
|
|
|
} |
1075
|
|
|
|
|
|
|
#endif |
1076
|
|
|
|
|
|
|
|
1077
|
|
|
|
|
|
|
return ptr; |
1078
|
|
|
|
|
|
|
} |
1079
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
ecb_inline char * |
1081
|
|
|
|
|
|
|
ecb_i2a_i32 (char *ptr, int32_t v) |
1082
|
|
|
|
|
|
|
{ |
1083
|
|
|
|
|
|
|
*ptr = '-'; ptr += v < 0; |
1084
|
|
|
|
|
|
|
uint32_t u = v < 0 ? -(uint32_t)v : v; |
1085
|
|
|
|
|
|
|
|
1086
|
|
|
|
|
|
|
#if ECB_64BIT_NATIVE |
1087
|
|
|
|
|
|
|
ptr = ecb_i2a_x10 (ptr, u); // x10 fully covers 31 bit |
1088
|
|
|
|
|
|
|
#else |
1089
|
|
|
|
|
|
|
ptr = ecb_i2a_u32 (ptr, u); |
1090
|
|
|
|
|
|
|
#endif |
1091
|
|
|
|
|
|
|
|
1092
|
|
|
|
|
|
|
return ptr; |
1093
|
|
|
|
|
|
|
} |
1094
|
|
|
|
|
|
|
|
1095
|
|
|
|
|
|
|
ecb_inline char * |
1096
|
|
|
|
|
|
|
ecb_i2a_u64 (char *ptr, uint64_t u) |
1097
|
|
|
|
|
|
|
{ |
1098
|
|
|
|
|
|
|
#if ECB_64BIT_NATIVE |
1099
|
|
|
|
|
|
|
if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1100
|
|
|
|
|
|
|
ptr = ecb_i2a_x10 (ptr, u); |
1101
|
|
|
|
|
|
|
else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000)) |
1102
|
|
|
|
|
|
|
{ |
1103
|
|
|
|
|
|
|
uint64_t u1 = u % 1000000000; |
1104
|
|
|
|
|
|
|
uint64_t u2 = u / 1000000000; |
1105
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
ptr = ecb_i2a_x10 (ptr, u2); |
1107
|
|
|
|
|
|
|
ptr = ecb_i2a_09 (ptr, u1); |
1108
|
|
|
|
|
|
|
} |
1109
|
|
|
|
|
|
|
else |
1110
|
|
|
|
|
|
|
{ |
1111
|
|
|
|
|
|
|
uint64_t u1 = u % 1000000000; |
1112
|
|
|
|
|
|
|
uint64_t ua = u / 1000000000; |
1113
|
|
|
|
|
|
|
uint64_t u2 = ua % 1000000000; |
1114
|
|
|
|
|
|
|
uint64_t u3 = ua / 1000000000; |
1115
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
ptr = ecb_i2a_2 (ptr, u3); |
1117
|
|
|
|
|
|
|
ptr = ecb_i2a_09 (ptr, u2); |
1118
|
|
|
|
|
|
|
ptr = ecb_i2a_09 (ptr, u1); |
1119
|
|
|
|
|
|
|
} |
1120
|
|
|
|
|
|
|
#else |
1121
|
|
|
|
|
|
|
if (ecb_expect_true (u <= ECB_I2A_MAX_X5)) |
1122
|
|
|
|
|
|
|
ptr = ecb_i2a_x5 (ptr, u); |
1123
|
|
|
|
|
|
|
else |
1124
|
|
|
|
|
|
|
{ |
1125
|
|
|
|
|
|
|
uint64_t u1 = u % 10000; |
1126
|
|
|
|
|
|
|
uint64_t u2 = u / 10000; |
1127
|
|
|
|
|
|
|
|
1128
|
|
|
|
|
|
|
ptr = ecb_i2a_u64 (ptr, u2); |
1129
|
|
|
|
|
|
|
ptr = ecb_i2a_04 (ptr, u1); |
1130
|
|
|
|
|
|
|
} |
1131
|
|
|
|
|
|
|
#endif |
1132
|
|
|
|
|
|
|
|
1133
|
|
|
|
|
|
|
return ptr; |
1134
|
|
|
|
|
|
|
} |
1135
|
|
|
|
|
|
|
|
1136
|
|
|
|
|
|
|
ecb_inline char * |
1137
|
|
|
|
|
|
|
ecb_i2a_i64 (char *ptr, int64_t v) |
1138
|
|
|
|
|
|
|
{ |
1139
|
|
|
|
|
|
|
*ptr = '-'; ptr += v < 0; |
1140
|
|
|
|
|
|
|
uint64_t u = v < 0 ? -(uint64_t)v : v; |
1141
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
#if ECB_64BIT_NATIVE |
1143
|
|
|
|
|
|
|
if (ecb_expect_true (u <= ECB_I2A_MAX_X10)) |
1144
|
|
|
|
|
|
|
ptr = ecb_i2a_x10 (ptr, u); |
1145
|
|
|
|
|
|
|
else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000)) |
1146
|
|
|
|
|
|
|
{ |
1147
|
|
|
|
|
|
|
uint64_t u1 = u % 1000000000; |
1148
|
|
|
|
|
|
|
uint64_t u2 = u / 1000000000; |
1149
|
|
|
|
|
|
|
|
1150
|
|
|
|
|
|
|
ptr = ecb_i2a_x10 (ptr, u2); |
1151
|
|
|
|
|
|
|
ptr = ecb_i2a_09 (ptr, u1); |
1152
|
|
|
|
|
|
|
} |
1153
|
|
|
|
|
|
|
else |
1154
|
|
|
|
|
|
|
{ |
1155
|
|
|
|
|
|
|
uint64_t u1 = u % 1000000000; |
1156
|
|
|
|
|
|
|
uint64_t ua = u / 1000000000; |
1157
|
|
|
|
|
|
|
uint64_t u2 = ua % 1000000000; |
1158
|
|
|
|
|
|
|
uint64_t u3 = ua / 1000000000; |
1159
|
|
|
|
|
|
|
|
1160
|
|
|
|
|
|
|
// 2**31 is 19 digits, so the top is exactly one digit |
1161
|
|
|
|
|
|
|
*ptr++ = u3 + '0'; |
1162
|
|
|
|
|
|
|
ptr = ecb_i2a_09 (ptr, u2); |
1163
|
|
|
|
|
|
|
ptr = ecb_i2a_09 (ptr, u1); |
1164
|
|
|
|
|
|
|
} |
1165
|
|
|
|
|
|
|
#else |
1166
|
|
|
|
|
|
|
ptr = ecb_i2a_u64 (ptr, u); |
1167
|
|
|
|
|
|
|
#endif |
1168
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
return ptr; |
1170
|
|
|
|
|
|
|
} |
1171
|
|
|
|
|
|
|
|
1172
|
|
|
|
|
|
|
/*******************************************************************************/ |
1173
|
|
|
|
|
|
|
/* floating point stuff, can be disabled by defining ECB_NO_LIBM */ |
1174
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
/* basically, everything uses "ieee pure-endian" floating point numbers */ |
1176
|
|
|
|
|
|
|
/* the only noteworthy exception is ancient armle, which uses order 43218765 */ |
1177
|
|
|
|
|
|
|
#if 0 \ |
1178
|
|
|
|
|
|
|
|| __i386 || __i386__ \ |
1179
|
|
|
|
|
|
|
|| ECB_GCC_AMD64 \ |
1180
|
|
|
|
|
|
|
|| __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ \ |
1181
|
|
|
|
|
|
|
|| defined __s390__ || defined __s390x__ \ |
1182
|
|
|
|
|
|
|
|| defined __mips__ \ |
1183
|
|
|
|
|
|
|
|| defined __alpha__ \ |
1184
|
|
|
|
|
|
|
|| defined __hppa__ \ |
1185
|
|
|
|
|
|
|
|| defined __ia64__ \ |
1186
|
|
|
|
|
|
|
|| defined __m68k__ \ |
1187
|
|
|
|
|
|
|
|| defined __m88k__ \ |
1188
|
|
|
|
|
|
|
|| defined __sh__ \ |
1189
|
|
|
|
|
|
|
|| defined _M_IX86 || defined ECB_MSVC_AMD64 || defined _M_IA64 \ |
1190
|
|
|
|
|
|
|
|| (defined __arm__ && (defined __ARM_EABI__ || defined __EABI__ || defined __VFP_FP__ || defined _WIN32_WCE || defined __ANDROID__)) \ |
1191
|
|
|
|
|
|
|
|| defined __aarch64__ |
1192
|
|
|
|
|
|
|
#define ECB_STDFP 1 |
1193
|
|
|
|
|
|
|
#else |
1194
|
|
|
|
|
|
|
#define ECB_STDFP 0 |
1195
|
|
|
|
|
|
|
#endif |
1196
|
|
|
|
|
|
|
|
1197
|
|
|
|
|
|
|
#ifndef ECB_NO_LIBM |
1198
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
#include /* for frexp*, ldexp*, INFINITY, NAN */ |
1200
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
/* only the oldest of old doesn't have this one. solaris. */ |
1202
|
|
|
|
|
|
|
#ifdef INFINITY |
1203
|
|
|
|
|
|
|
#define ECB_INFINITY INFINITY |
1204
|
|
|
|
|
|
|
#else |
1205
|
|
|
|
|
|
|
#define ECB_INFINITY HUGE_VAL |
1206
|
|
|
|
|
|
|
#endif |
1207
|
|
|
|
|
|
|
|
1208
|
|
|
|
|
|
|
#ifdef NAN |
1209
|
|
|
|
|
|
|
#define ECB_NAN NAN |
1210
|
|
|
|
|
|
|
#else |
1211
|
|
|
|
|
|
|
#define ECB_NAN ECB_INFINITY |
1212
|
|
|
|
|
|
|
#endif |
1213
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
#if ECB_C99 || _XOPEN_VERSION >= 600 || _POSIX_VERSION >= 200112L |
1215
|
|
|
|
|
|
|
#define ecb_ldexpf(x,e) ldexpf ((x), (e)) |
1216
|
|
|
|
|
|
|
#define ecb_frexpf(x,e) frexpf ((x), (e)) |
1217
|
|
|
|
|
|
|
#else |
1218
|
|
|
|
|
|
|
#define ecb_ldexpf(x,e) (float) ldexp ((double) (x), (e)) |
1219
|
|
|
|
|
|
|
#define ecb_frexpf(x,e) (float) frexp ((double) (x), (e)) |
1220
|
|
|
|
|
|
|
#endif |
1221
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
/* convert a float to ieee single/binary32 */ |
1223
|
|
|
|
|
|
|
ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x); |
1224
|
|
|
|
|
|
|
ecb_function_ ecb_const uint32_t |
1225
|
3
|
|
|
|
|
|
ecb_float_to_binary32 (float x) |
1226
|
|
|
|
|
|
|
{ |
1227
|
|
|
|
|
|
|
uint32_t r; |
1228
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
#if ECB_STDFP |
1230
|
3
|
|
|
|
|
|
memcpy (&r, &x, 4); |
1231
|
|
|
|
|
|
|
#else |
1232
|
|
|
|
|
|
|
/* slow emulation, works for anything but -0 */ |
1233
|
|
|
|
|
|
|
uint32_t m; |
1234
|
|
|
|
|
|
|
int e; |
1235
|
|
|
|
|
|
|
|
1236
|
|
|
|
|
|
|
if (x == 0e0f ) return 0x00000000U; |
1237
|
|
|
|
|
|
|
if (x > +3.40282346638528860e+38f) return 0x7f800000U; |
1238
|
|
|
|
|
|
|
if (x < -3.40282346638528860e+38f) return 0xff800000U; |
1239
|
|
|
|
|
|
|
if (x != x ) return 0x7fbfffffU; |
1240
|
|
|
|
|
|
|
|
1241
|
|
|
|
|
|
|
m = ecb_frexpf (x, &e) * 0x1000000U; |
1242
|
|
|
|
|
|
|
|
1243
|
|
|
|
|
|
|
r = m & 0x80000000U; |
1244
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
if (r) |
1246
|
|
|
|
|
|
|
m = -m; |
1247
|
|
|
|
|
|
|
|
1248
|
|
|
|
|
|
|
if (e <= -126) |
1249
|
|
|
|
|
|
|
{ |
1250
|
|
|
|
|
|
|
m &= 0xffffffU; |
1251
|
|
|
|
|
|
|
m >>= (-125 - e); |
1252
|
|
|
|
|
|
|
e = -126; |
1253
|
|
|
|
|
|
|
} |
1254
|
|
|
|
|
|
|
|
1255
|
|
|
|
|
|
|
r |= (e + 126) << 23; |
1256
|
|
|
|
|
|
|
r |= m & 0x7fffffU; |
1257
|
|
|
|
|
|
|
#endif |
1258
|
|
|
|
|
|
|
|
1259
|
3
|
|
|
|
|
|
return r; |
1260
|
|
|
|
|
|
|
} |
1261
|
|
|
|
|
|
|
|
1262
|
|
|
|
|
|
|
/* converts an ieee single/binary32 to a float */ |
1263
|
|
|
|
|
|
|
ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x); |
1264
|
|
|
|
|
|
|
ecb_function_ ecb_const float |
1265
|
9
|
|
|
|
|
|
ecb_binary32_to_float (uint32_t x) |
1266
|
|
|
|
|
|
|
{ |
1267
|
|
|
|
|
|
|
float r; |
1268
|
|
|
|
|
|
|
|
1269
|
|
|
|
|
|
|
#if ECB_STDFP |
1270
|
9
|
|
|
|
|
|
memcpy (&r, &x, 4); |
1271
|
|
|
|
|
|
|
#else |
1272
|
|
|
|
|
|
|
/* emulation, only works for normals and subnormals and +0 */ |
1273
|
|
|
|
|
|
|
int neg = x >> 31; |
1274
|
|
|
|
|
|
|
int e = (x >> 23) & 0xffU; |
1275
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
x &= 0x7fffffU; |
1277
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
if (e) |
1279
|
|
|
|
|
|
|
x |= 0x800000U; |
1280
|
|
|
|
|
|
|
else |
1281
|
|
|
|
|
|
|
e = 1; |
1282
|
|
|
|
|
|
|
|
1283
|
|
|
|
|
|
|
/* we distrust ldexpf a bit and do the 2**-24 scaling by an extra multiply */ |
1284
|
|
|
|
|
|
|
r = ecb_ldexpf (x * (0.5f / 0x800000U), e - 126); |
1285
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
r = neg ? -r : r; |
1287
|
|
|
|
|
|
|
#endif |
1288
|
|
|
|
|
|
|
|
1289
|
9
|
|
|
|
|
|
return r; |
1290
|
|
|
|
|
|
|
} |
1291
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
/* convert a double to ieee double/binary64 */ |
1293
|
|
|
|
|
|
|
ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x); |
1294
|
|
|
|
|
|
|
ecb_function_ ecb_const uint64_t |
1295
|
8
|
|
|
|
|
|
ecb_double_to_binary64 (double x) |
1296
|
|
|
|
|
|
|
{ |
1297
|
|
|
|
|
|
|
uint64_t r; |
1298
|
|
|
|
|
|
|
|
1299
|
|
|
|
|
|
|
#if ECB_STDFP |
1300
|
8
|
|
|
|
|
|
memcpy (&r, &x, 8); |
1301
|
|
|
|
|
|
|
#else |
1302
|
|
|
|
|
|
|
/* slow emulation, works for anything but -0 */ |
1303
|
|
|
|
|
|
|
uint64_t m; |
1304
|
|
|
|
|
|
|
int e; |
1305
|
|
|
|
|
|
|
|
1306
|
|
|
|
|
|
|
if (x == 0e0 ) return 0x0000000000000000U; |
1307
|
|
|
|
|
|
|
if (x > +1.79769313486231470e+308) return 0x7ff0000000000000U; |
1308
|
|
|
|
|
|
|
if (x < -1.79769313486231470e+308) return 0xfff0000000000000U; |
1309
|
|
|
|
|
|
|
if (x != x ) return 0X7ff7ffffffffffffU; |
1310
|
|
|
|
|
|
|
|
1311
|
|
|
|
|
|
|
m = frexp (x, &e) * 0x20000000000000U; |
1312
|
|
|
|
|
|
|
|
1313
|
|
|
|
|
|
|
r = m & 0x8000000000000000;; |
1314
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
if (r) |
1316
|
|
|
|
|
|
|
m = -m; |
1317
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
if (e <= -1022) |
1319
|
|
|
|
|
|
|
{ |
1320
|
|
|
|
|
|
|
m &= 0x1fffffffffffffU; |
1321
|
|
|
|
|
|
|
m >>= (-1021 - e); |
1322
|
|
|
|
|
|
|
e = -1022; |
1323
|
|
|
|
|
|
|
} |
1324
|
|
|
|
|
|
|
|
1325
|
|
|
|
|
|
|
r |= ((uint64_t)(e + 1022)) << 52; |
1326
|
|
|
|
|
|
|
r |= m & 0xfffffffffffffU; |
1327
|
|
|
|
|
|
|
#endif |
1328
|
|
|
|
|
|
|
|
1329
|
8
|
|
|
|
|
|
return r; |
1330
|
|
|
|
|
|
|
} |
1331
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
/* converts an ieee double/binary64 to a double */ |
1333
|
|
|
|
|
|
|
ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x); |
1334
|
|
|
|
|
|
|
ecb_function_ ecb_const double |
1335
|
8
|
|
|
|
|
|
ecb_binary64_to_double (uint64_t x) |
1336
|
|
|
|
|
|
|
{ |
1337
|
|
|
|
|
|
|
double r; |
1338
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
#if ECB_STDFP |
1340
|
8
|
|
|
|
|
|
memcpy (&r, &x, 8); |
1341
|
|
|
|
|
|
|
#else |
1342
|
|
|
|
|
|
|
/* emulation, only works for normals and subnormals and +0 */ |
1343
|
|
|
|
|
|
|
int neg = x >> 63; |
1344
|
|
|
|
|
|
|
int e = (x >> 52) & 0x7ffU; |
1345
|
|
|
|
|
|
|
|
1346
|
|
|
|
|
|
|
x &= 0xfffffffffffffU; |
1347
|
|
|
|
|
|
|
|
1348
|
|
|
|
|
|
|
if (e) |
1349
|
|
|
|
|
|
|
x |= 0x10000000000000U; |
1350
|
|
|
|
|
|
|
else |
1351
|
|
|
|
|
|
|
e = 1; |
1352
|
|
|
|
|
|
|
|
1353
|
|
|
|
|
|
|
/* we distrust ldexp a bit and do the 2**-53 scaling by an extra multiply */ |
1354
|
|
|
|
|
|
|
r = ldexp (x * (0.5 / 0x10000000000000U), e - 1022); |
1355
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
r = neg ? -r : r; |
1357
|
|
|
|
|
|
|
#endif |
1358
|
|
|
|
|
|
|
|
1359
|
8
|
|
|
|
|
|
return r; |
1360
|
|
|
|
|
|
|
} |
1361
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
/* convert a float to ieee half/binary16 */ |
1363
|
|
|
|
|
|
|
ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x); |
1364
|
|
|
|
|
|
|
ecb_function_ ecb_const uint16_t |
1365
|
0
|
|
|
|
|
|
ecb_float_to_binary16 (float x) |
1366
|
|
|
|
|
|
|
{ |
1367
|
0
|
|
|
|
|
|
return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x)); |
1368
|
|
|
|
|
|
|
} |
1369
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
/* convert an ieee half/binary16 to float */ |
1371
|
|
|
|
|
|
|
ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x); |
1372
|
|
|
|
|
|
|
ecb_function_ ecb_const float |
1373
|
5
|
|
|
|
|
|
ecb_binary16_to_float (uint16_t x) |
1374
|
|
|
|
|
|
|
{ |
1375
|
5
|
|
|
|
|
|
return ecb_binary32_to_float (ecb_binary16_to_binary32 (x)); |
1376
|
|
|
|
|
|
|
} |
1377
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
#endif |
1379
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
#endif |
1381
|
|
|
|
|
|
|
|