File Coverage

ecb.h
Criterion Covered Total %
statement 27 59 45.7
branch 4 22 18.1
condition n/a
subroutine n/a
pod n/a
total 31 81 38.2


line stmt bran cond sub pod time code
1             /*
2             * libecb - http://software.schmorp.de/pkg/libecb
3             *
4             * Copyright (©) 2009-2015,2018-2021 Marc Alexander Lehmann
5             * Copyright (©) 2011 Emanuele Giaquinta
6             * All rights reserved.
7             *
8             * Redistribution and use in source and binary forms, with or without modifica-
9             * tion, are permitted provided that the following conditions are met:
10             *
11             * 1. Redistributions of source code must retain the above copyright notice,
12             * this list of conditions and the following disclaimer.
13             *
14             * 2. Redistributions in binary form must reproduce the above copyright
15             * notice, this list of conditions and the following disclaimer in the
16             * documentation and/or other materials provided with the distribution.
17             *
18             * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
19             * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
20             * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
21             * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
22             * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
23             * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
24             * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25             * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
26             * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
27             * OF THE POSSIBILITY OF SUCH DAMAGE.
28             *
29             * Alternatively, the contents of this file may be used under the terms of
30             * the GNU General Public License ("GPL") version 2 or any later version,
31             * in which case the provisions of the GPL are applicable instead of
32             * the above. If you wish to allow the use of your version of this file
33             * only under the terms of the GPL and not to allow others to use your
34             * version of this file under the BSD license, indicate your decision
35             * by deleting the provisions above and replace them with the notice
36             * and other provisions required by the GPL. If you do not delete the
37             * provisions above, a recipient may use your version of this file under
38             * either the BSD or the GPL.
39             */
40              
41             #ifndef ECB_H
42             #define ECB_H
43              
44             /* 16 bits major, 16 bits minor */
45             #define ECB_VERSION 0x00010009
46              
47             #include /* for memcpy */
48              
49             #if defined (_WIN32) && !defined (__MINGW32__)
50             typedef signed char int8_t;
51             typedef unsigned char uint8_t;
52             typedef signed char int_fast8_t;
53             typedef unsigned char uint_fast8_t;
54             typedef signed short int16_t;
55             typedef unsigned short uint16_t;
56             typedef signed int int_fast16_t;
57             typedef unsigned int uint_fast16_t;
58             typedef signed int int32_t;
59             typedef unsigned int uint32_t;
60             typedef signed int int_fast32_t;
61             typedef unsigned int uint_fast32_t;
62             #if __GNUC__
63             typedef signed long long int64_t;
64             typedef unsigned long long uint64_t;
65             #else /* _MSC_VER || __BORLANDC__ */
66             typedef signed __int64 int64_t;
67             typedef unsigned __int64 uint64_t;
68             #endif
69             typedef int64_t int_fast64_t;
70             typedef uint64_t uint_fast64_t;
71             #ifdef _WIN64
72             #define ECB_PTRSIZE 8
73             typedef uint64_t uintptr_t;
74             typedef int64_t intptr_t;
75             #else
76             #define ECB_PTRSIZE 4
77             typedef uint32_t uintptr_t;
78             typedef int32_t intptr_t;
79             #endif
80             #else
81             #include
82             #if (defined INTPTR_MAX ? INTPTR_MAX : ULONG_MAX) > 0xffffffffU
83             #define ECB_PTRSIZE 8
84             #else
85             #define ECB_PTRSIZE 4
86             #endif
87             #endif
88              
89             #define ECB_GCC_AMD64 (__amd64 || __amd64__ || __x86_64 || __x86_64__)
90             #define ECB_MSVC_AMD64 (_M_AMD64 || _M_X64)
91              
92             #ifndef ECB_OPTIMIZE_SIZE
93             #if __OPTIMIZE_SIZE__
94             #define ECB_OPTIMIZE_SIZE 1
95             #else
96             #define ECB_OPTIMIZE_SIZE 0
97             #endif
98             #endif
99              
100             /* work around x32 idiocy by defining proper macros */
101             #if ECB_GCC_AMD64 || ECB_MSVC_AMD64
102             #if _ILP32
103             #define ECB_AMD64_X32 1
104             #else
105             #define ECB_AMD64 1
106             #endif
107             #endif
108              
109             #if ECB_PTRSIZE >= 8 || ECB_AMD64_X32
110             #define ECB_64BIT_NATIVE 1
111             #else
112             #define ECB_64BIT_NATIVE 0
113             #endif
114              
115             /* many compilers define _GNUC_ to some versions but then only implement
116             * what their idiot authors think are the "more important" extensions,
117             * causing enormous grief in return for some better fake benchmark numbers.
118             * or so.
119             * we try to detect these and simply assume they are not gcc - if they have
120             * an issue with that they should have done it right in the first place.
121             */
122             #if !defined __GNUC_MINOR__ || defined __INTEL_COMPILER || defined __SUNPRO_C || defined __SUNPRO_CC || defined __llvm__ || defined __clang__
123             #define ECB_GCC_VERSION(major,minor) 0
124             #else
125             #define ECB_GCC_VERSION(major,minor) (__GNUC__ > (major) || (__GNUC__ == (major) && __GNUC_MINOR__ >= (minor)))
126             #endif
127              
128             #define ECB_CLANG_VERSION(major,minor) (__clang_major__ > (major) || (__clang_major__ == (major) && __clang_minor__ >= (minor)))
129              
130             #if __clang__ && defined __has_builtin
131             #define ECB_CLANG_BUILTIN(x) __has_builtin (x)
132             #else
133             #define ECB_CLANG_BUILTIN(x) 0
134             #endif
135              
136             #if __clang__ && defined __has_extension
137             #define ECB_CLANG_EXTENSION(x) __has_extension (x)
138             #else
139             #define ECB_CLANG_EXTENSION(x) 0
140             #endif
141              
142             #define ECB_CPP (__cplusplus+0)
143             #define ECB_CPP11 (__cplusplus >= 201103L)
144             #define ECB_CPP14 (__cplusplus >= 201402L)
145             #define ECB_CPP17 (__cplusplus >= 201703L)
146              
147             #if ECB_CPP
148             #define ECB_C 0
149             #define ECB_STDC_VERSION 0
150             #else
151             #define ECB_C 1
152             #define ECB_STDC_VERSION __STDC_VERSION__
153             #endif
154              
155             #define ECB_C99 (ECB_STDC_VERSION >= 199901L)
156             #define ECB_C11 (ECB_STDC_VERSION >= 201112L)
157             #define ECB_C17 (ECB_STDC_VERSION >= 201710L)
158              
159             #if ECB_CPP
160             #define ECB_EXTERN_C extern "C"
161             #define ECB_EXTERN_C_BEG ECB_EXTERN_C {
162             #define ECB_EXTERN_C_END }
163             #else
164             #define ECB_EXTERN_C extern
165             #define ECB_EXTERN_C_BEG
166             #define ECB_EXTERN_C_END
167             #endif
168              
169             /*****************************************************************************/
170              
171             /* ECB_NO_THREADS - ecb is not used by multiple threads, ever */
172             /* ECB_NO_SMP - ecb might be used in multiple threads, but only on a single cpu */
173              
174             #if ECB_NO_THREADS
175             #define ECB_NO_SMP 1
176             #endif
177              
178             #if ECB_NO_SMP
179             #define ECB_MEMORY_FENCE do { } while (0)
180             #endif
181              
182             /* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/compiler_ref/compiler_builtins.html */
183             #if __xlC__ && ECB_CPP
184             #include
185             #endif
186              
187             #if 1400 <= _MSC_VER
188             #include /* fence functions _ReadBarrier, also bit search functions _BitScanReverse */
189             #endif
190              
191             #ifndef ECB_MEMORY_FENCE
192             #if ECB_GCC_VERSION(2,5) || defined __INTEL_COMPILER || (__llvm__ && __GNUC__) || __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
193             #define ECB_MEMORY_FENCE_RELAXED __asm__ __volatile__ ("" : : : "memory")
194             #if __i386 || __i386__
195             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("lock; orb $0, -1(%%esp)" : : : "memory")
196             #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory")
197             #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("" : : : "memory")
198             #elif ECB_GCC_AMD64
199             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mfence" : : : "memory")
200             #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("" : : : "memory")
201             #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("" : : : "memory")
202             #elif __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__
203             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("sync" : : : "memory")
204             #elif defined __ARM_ARCH_2__ \
205             || defined __ARM_ARCH_3__ || defined __ARM_ARCH_3M__ \
206             || defined __ARM_ARCH_4__ || defined __ARM_ARCH_4T__ \
207             || defined __ARM_ARCH_5__ || defined __ARM_ARCH_5E__ \
208             || defined __ARM_ARCH_5T__ || defined __ARM_ARCH_5TE__ \
209             || defined __ARM_ARCH_5TEJ__
210             /* should not need any, unless running old code on newer cpu - arm doesn't support that */
211             #elif defined __ARM_ARCH_6__ || defined __ARM_ARCH_6J__ \
212             || defined __ARM_ARCH_6K__ || defined __ARM_ARCH_6ZK__ \
213             || defined __ARM_ARCH_6T2__
214             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mcr p15,0,%0,c7,c10,5" : : "r" (0) : "memory")
215             #elif defined __ARM_ARCH_7__ || defined __ARM_ARCH_7A__ \
216             || defined __ARM_ARCH_7R__ || defined __ARM_ARCH_7M__
217             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb" : : : "memory")
218             #elif __aarch64__
219             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("dmb ish" : : : "memory")
220             #elif (__sparc || __sparc__) && !(__sparc_v8__ || defined __sparcv8)
221             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad | #StoreStore | #StoreLoad" : : : "memory")
222             #define ECB_MEMORY_FENCE_ACQUIRE __asm__ __volatile__ ("membar #LoadStore | #LoadLoad" : : : "memory")
223             #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("membar #LoadStore | #StoreStore")
224             #elif defined __s390__ || defined __s390x__
225             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("bcr 15,0" : : : "memory")
226             #elif defined __mips__
227             /* GNU/Linux emulates sync on mips1 architectures, so we force its use */
228             /* anybody else who still uses mips1 is supposed to send in their version, with detection code. */
229             #define ECB_MEMORY_FENCE __asm__ __volatile__ (".set mips2; sync; .set mips0" : : : "memory")
230             #elif defined __alpha__
231             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mb" : : : "memory")
232             #elif defined __hppa__
233             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("" : : : "memory")
234             #define ECB_MEMORY_FENCE_RELEASE __asm__ __volatile__ ("")
235             #elif defined __ia64__
236             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("mf" : : : "memory")
237             #elif defined __m68k__
238             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("" : : : "memory")
239             #elif defined __m88k__
240             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("tb1 0,%%r0,128" : : : "memory")
241             #elif defined __sh__
242             #define ECB_MEMORY_FENCE __asm__ __volatile__ ("" : : : "memory")
243             #endif
244             #endif
245             #endif
246              
247             #ifndef ECB_MEMORY_FENCE
248             #if ECB_GCC_VERSION(4,7)
249             /* see comment below (stdatomic.h) about the C11 memory model. */
250             #define ECB_MEMORY_FENCE __atomic_thread_fence (__ATOMIC_SEQ_CST)
251             #define ECB_MEMORY_FENCE_ACQUIRE __atomic_thread_fence (__ATOMIC_ACQUIRE)
252             #define ECB_MEMORY_FENCE_RELEASE __atomic_thread_fence (__ATOMIC_RELEASE)
253             #undef ECB_MEMORY_FENCE_RELAXED
254             #define ECB_MEMORY_FENCE_RELAXED __atomic_thread_fence (__ATOMIC_RELAXED)
255              
256             #elif ECB_CLANG_EXTENSION(c_atomic)
257             /* see comment below (stdatomic.h) about the C11 memory model. */
258             #define ECB_MEMORY_FENCE __c11_atomic_thread_fence (__ATOMIC_SEQ_CST)
259             #define ECB_MEMORY_FENCE_ACQUIRE __c11_atomic_thread_fence (__ATOMIC_ACQUIRE)
260             #define ECB_MEMORY_FENCE_RELEASE __c11_atomic_thread_fence (__ATOMIC_RELEASE)
261             #undef ECB_MEMORY_FENCE_RELAXED
262             #define ECB_MEMORY_FENCE_RELAXED __c11_atomic_thread_fence (__ATOMIC_RELAXED)
263              
264             #elif ECB_GCC_VERSION(4,4) || defined __INTEL_COMPILER || defined __clang__
265             #define ECB_MEMORY_FENCE __sync_synchronize ()
266             #elif _MSC_VER >= 1500 /* VC++ 2008 */
267             /* apparently, microsoft broke all the memory barrier stuff in Visual Studio 2008... */
268             #pragma intrinsic(_ReadBarrier,_WriteBarrier,_ReadWriteBarrier)
269             #define ECB_MEMORY_FENCE _ReadWriteBarrier (); MemoryBarrier()
270             #define ECB_MEMORY_FENCE_ACQUIRE _ReadWriteBarrier (); MemoryBarrier() /* according to msdn, _ReadBarrier is not a load fence */
271             #define ECB_MEMORY_FENCE_RELEASE _WriteBarrier (); MemoryBarrier()
272             #elif _MSC_VER >= 1400 /* VC++ 2005 */
273             #pragma intrinsic(_ReadBarrier,_WriteBarrier,_ReadWriteBarrier)
274             #define ECB_MEMORY_FENCE _ReadWriteBarrier ()
275             #define ECB_MEMORY_FENCE_ACQUIRE _ReadWriteBarrier () /* according to msdn, _ReadBarrier is not a load fence */
276             #define ECB_MEMORY_FENCE_RELEASE _WriteBarrier ()
277             #elif defined _WIN32
278             #include
279             #define ECB_MEMORY_FENCE MemoryBarrier () /* actually just xchg on x86... scary */
280             #elif __SUNPRO_C >= 0x5110 || __SUNPRO_CC >= 0x5110
281             #include
282             #define ECB_MEMORY_FENCE __machine_rw_barrier ()
283             #define ECB_MEMORY_FENCE_ACQUIRE __machine_acq_barrier ()
284             #define ECB_MEMORY_FENCE_RELEASE __machine_rel_barrier ()
285             #define ECB_MEMORY_FENCE_RELAXED __compiler_barrier ()
286             #elif __xlC__
287             #define ECB_MEMORY_FENCE __sync ()
288             #endif
289             #endif
290              
291             #ifndef ECB_MEMORY_FENCE
292             #if ECB_C11 && !defined __STDC_NO_ATOMICS__
293             /* we assume that these memory fences work on all variables/all memory accesses, */
294             /* not just C11 atomics and atomic accesses */
295             #include
296             #define ECB_MEMORY_FENCE atomic_thread_fence (memory_order_seq_cst)
297             #define ECB_MEMORY_FENCE_ACQUIRE atomic_thread_fence (memory_order_acquire)
298             #define ECB_MEMORY_FENCE_RELEASE atomic_thread_fence (memory_order_release)
299             #endif
300             #endif
301              
302             #ifndef ECB_MEMORY_FENCE
303             #if !ECB_AVOID_PTHREADS
304             /*
305             * if you get undefined symbol references to pthread_mutex_lock,
306             * or failure to find pthread.h, then you should implement
307             * the ECB_MEMORY_FENCE operations for your cpu/compiler
308             * OR provide pthread.h and link against the posix thread library
309             * of your system.
310             */
311             #include
312             #define ECB_NEEDS_PTHREADS 1
313             #define ECB_MEMORY_FENCE_NEEDS_PTHREADS 1
314              
315             static pthread_mutex_t ecb_mf_lock = PTHREAD_MUTEX_INITIALIZER;
316             #define ECB_MEMORY_FENCE do { pthread_mutex_lock (&ecb_mf_lock); pthread_mutex_unlock (&ecb_mf_lock); } while (0)
317             #endif
318             #endif
319              
320             #if !defined ECB_MEMORY_FENCE_ACQUIRE && defined ECB_MEMORY_FENCE
321             #define ECB_MEMORY_FENCE_ACQUIRE ECB_MEMORY_FENCE
322             #endif
323              
324             #if !defined ECB_MEMORY_FENCE_RELEASE && defined ECB_MEMORY_FENCE
325             #define ECB_MEMORY_FENCE_RELEASE ECB_MEMORY_FENCE
326             #endif
327              
328             #if !defined ECB_MEMORY_FENCE_RELAXED && defined ECB_MEMORY_FENCE
329             #define ECB_MEMORY_FENCE_RELAXED ECB_MEMORY_FENCE /* very heavy-handed */
330             #endif
331              
332             /*****************************************************************************/
333              
334             #if ECB_CPP
335             #define ecb_inline static inline
336             #elif ECB_GCC_VERSION(2,5)
337             #define ecb_inline static __inline__
338             #elif ECB_C99
339             #define ecb_inline static inline
340             #else
341             #define ecb_inline static
342             #endif
343              
344             #if ECB_GCC_VERSION(3,3)
345             #define ecb_restrict __restrict__
346             #elif ECB_C99
347             #define ecb_restrict restrict
348             #else
349             #define ecb_restrict
350             #endif
351              
352             typedef int ecb_bool;
353              
354             #define ECB_CONCAT_(a, b) a ## b
355             #define ECB_CONCAT(a, b) ECB_CONCAT_(a, b)
356             #define ECB_STRINGIFY_(a) # a
357             #define ECB_STRINGIFY(a) ECB_STRINGIFY_(a)
358             #define ECB_STRINGIFY_EXPR(expr) ((expr), ECB_STRINGIFY_ (expr))
359              
360             #define ecb_function_ ecb_inline
361              
362             #if ECB_GCC_VERSION(3,1) || ECB_CLANG_VERSION(2,8)
363             #define ecb_attribute(attrlist) __attribute__ (attrlist)
364             #else
365             #define ecb_attribute(attrlist)
366             #endif
367              
368             #if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_constant_p)
369             #define ecb_is_constant(expr) __builtin_constant_p (expr)
370             #else
371             /* possible C11 impl for integral types
372             typedef struct ecb_is_constant_struct ecb_is_constant_struct;
373             #define ecb_is_constant(expr) _Generic ((1 ? (struct ecb_is_constant_struct *)0 : (void *)((expr) - (expr)), ecb_is_constant_struct *: 0, default: 1)) */
374              
375             #define ecb_is_constant(expr) 0
376             #endif
377              
378             #if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_expect)
379             #define ecb_expect(expr,value) __builtin_expect ((expr),(value))
380             #else
381             #define ecb_expect(expr,value) (expr)
382             #endif
383              
384             #if ECB_GCC_VERSION(3,1) || ECB_CLANG_BUILTIN(__builtin_prefetch)
385             #define ecb_prefetch(addr,rw,locality) __builtin_prefetch (addr, rw, locality)
386             #else
387             #define ecb_prefetch(addr,rw,locality)
388             #endif
389              
390             /* no emulation for ecb_decltype */
391             #if ECB_CPP11
392             // older implementations might have problems with decltype(x)::type, work around it
393             template struct ecb_decltype_t { typedef T type; };
394             #define ecb_decltype(x) ecb_decltype_t::type
395             #elif ECB_GCC_VERSION(3,0) || ECB_CLANG_VERSION(2,8)
396             #define ecb_decltype(x) __typeof__ (x)
397             #endif
398              
399             #if _MSC_VER >= 1300
400             #define ecb_deprecated __declspec (deprecated)
401             #else
402             #define ecb_deprecated ecb_attribute ((__deprecated__))
403             #endif
404              
405             #if _MSC_VER >= 1500
406             #define ecb_deprecated_message(msg) __declspec (deprecated (msg))
407             #elif ECB_GCC_VERSION(4,5)
408             #define ecb_deprecated_message(msg) ecb_attribute ((__deprecated__ (msg))
409             #else
410             #define ecb_deprecated_message(msg) ecb_deprecated
411             #endif
412              
413             #if _MSC_VER >= 1400
414             #define ecb_noinline __declspec (noinline)
415             #else
416             #define ecb_noinline ecb_attribute ((__noinline__))
417             #endif
418              
419             #define ecb_unused ecb_attribute ((__unused__))
420             #define ecb_const ecb_attribute ((__const__))
421             #define ecb_pure ecb_attribute ((__pure__))
422              
423             #if ECB_C11 || __IBMC_NORETURN
424             /* http://www-01.ibm.com/support/knowledgecenter/SSGH3R_13.1.0/com.ibm.xlcpp131.aix.doc/language_ref/noreturn.html */
425             #define ecb_noreturn _Noreturn
426             #elif ECB_CPP11
427             #define ecb_noreturn [[noreturn]]
428             #elif _MSC_VER >= 1200
429             /* http://msdn.microsoft.com/en-us/library/k6ktzx3s.aspx */
430             #define ecb_noreturn __declspec (noreturn)
431             #else
432             #define ecb_noreturn ecb_attribute ((__noreturn__))
433             #endif
434              
435             #if ECB_GCC_VERSION(4,3)
436             #define ecb_artificial ecb_attribute ((__artificial__))
437             #define ecb_hot ecb_attribute ((__hot__))
438             #define ecb_cold ecb_attribute ((__cold__))
439             #else
440             #define ecb_artificial
441             #define ecb_hot
442             #define ecb_cold
443             #endif
444              
445             /* put around conditional expressions if you are very sure that the */
446             /* expression is mostly true or mostly false. note that these return */
447             /* booleans, not the expression. */
448             #define ecb_expect_false(expr) ecb_expect (!!(expr), 0)
449             #define ecb_expect_true(expr) ecb_expect (!!(expr), 1)
450             /* for compatibility to the rest of the world */
451             #define ecb_likely(expr) ecb_expect_true (expr)
452             #define ecb_unlikely(expr) ecb_expect_false (expr)
453              
454             /* count trailing zero bits and count # of one bits */
455             #if ECB_GCC_VERSION(3,4) \
456             || (ECB_CLANG_BUILTIN(__builtin_clz) && ECB_CLANG_BUILTIN(__builtin_clzll) \
457             && ECB_CLANG_BUILTIN(__builtin_ctz) && ECB_CLANG_BUILTIN(__builtin_ctzll) \
458             && ECB_CLANG_BUILTIN(__builtin_popcount))
459             /* we assume int == 32 bit, long == 32 or 64 bit and long long == 64 bit */
460             #define ecb_ld32(x) (__builtin_clz (x) ^ 31)
461             #define ecb_ld64(x) (__builtin_clzll (x) ^ 63)
462             #define ecb_ctz32(x) __builtin_ctz (x)
463             #define ecb_ctz64(x) __builtin_ctzll (x)
464             #define ecb_popcount32(x) __builtin_popcount (x)
465             /* no popcountll */
466             #else
467             ecb_function_ ecb_const int ecb_ctz32 (uint32_t x);
468             ecb_function_ ecb_const int
469             ecb_ctz32 (uint32_t x)
470             {
471             #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
472             unsigned long r;
473             _BitScanForward (&r, x);
474             return (int)r;
475             #else
476             int r = 0;
477              
478             x &= ~x + 1; /* this isolates the lowest bit */
479              
480             #if ECB_branchless_on_i386
481             r += !!(x & 0xaaaaaaaa) << 0;
482             r += !!(x & 0xcccccccc) << 1;
483             r += !!(x & 0xf0f0f0f0) << 2;
484             r += !!(x & 0xff00ff00) << 3;
485             r += !!(x & 0xffff0000) << 4;
486             #else
487             if (x & 0xaaaaaaaa) r += 1;
488             if (x & 0xcccccccc) r += 2;
489             if (x & 0xf0f0f0f0) r += 4;
490             if (x & 0xff00ff00) r += 8;
491             if (x & 0xffff0000) r += 16;
492             #endif
493              
494             return r;
495             #endif
496             }
497              
498             ecb_function_ ecb_const int ecb_ctz64 (uint64_t x);
499             ecb_function_ ecb_const int
500             ecb_ctz64 (uint64_t x)
501             {
502             #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
503             unsigned long r;
504             _BitScanForward64 (&r, x);
505             return (int)r;
506             #else
507             int shift = x & 0xffffffff ? 0 : 32;
508             return ecb_ctz32 (x >> shift) + shift;
509             #endif
510             }
511              
512             ecb_function_ ecb_const int ecb_popcount32 (uint32_t x);
513             ecb_function_ ecb_const int
514             ecb_popcount32 (uint32_t x)
515             {
516             x -= (x >> 1) & 0x55555555;
517             x = ((x >> 2) & 0x33333333) + (x & 0x33333333);
518             x = ((x >> 4) + x) & 0x0f0f0f0f;
519             x *= 0x01010101;
520              
521             return x >> 24;
522             }
523              
524             ecb_function_ ecb_const int ecb_ld32 (uint32_t x);
525             ecb_function_ ecb_const int ecb_ld32 (uint32_t x)
526             {
527             #if 1400 <= _MSC_VER && (_M_IX86 || _M_X64 || _M_IA64 || _M_ARM)
528             unsigned long r;
529             _BitScanReverse (&r, x);
530             return (int)r;
531             #else
532             int r = 0;
533              
534             if (x >> 16) { x >>= 16; r += 16; }
535             if (x >> 8) { x >>= 8; r += 8; }
536             if (x >> 4) { x >>= 4; r += 4; }
537             if (x >> 2) { x >>= 2; r += 2; }
538             if (x >> 1) { r += 1; }
539              
540             return r;
541             #endif
542             }
543              
544             ecb_function_ ecb_const int ecb_ld64 (uint64_t x);
545             ecb_function_ ecb_const int ecb_ld64 (uint64_t x)
546             {
547             #if 1400 <= _MSC_VER && (_M_X64 || _M_IA64 || _M_ARM)
548             unsigned long r;
549             _BitScanReverse64 (&r, x);
550             return (int)r;
551             #else
552             int r = 0;
553              
554             if (x >> 32) { x >>= 32; r += 32; }
555              
556             return r + ecb_ld32 (x);
557             #endif
558             }
559             #endif
560              
561             ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x);
562             ecb_function_ ecb_const ecb_bool ecb_is_pot32 (uint32_t x) { return !(x & (x - 1)); }
563             ecb_function_ ecb_const ecb_bool ecb_is_pot64 (uint64_t x);
564             ecb_function_ ecb_const ecb_bool ecb_is_pot64 (uint64_t x) { return !(x & (x - 1)); }
565              
566             ecb_function_ ecb_const uint8_t ecb_bitrev8 (uint8_t x);
567             ecb_function_ ecb_const uint8_t ecb_bitrev8 (uint8_t x)
568             {
569             return ( (x * 0x0802U & 0x22110U)
570             | (x * 0x8020U & 0x88440U)) * 0x10101U >> 16;
571             }
572              
573             ecb_function_ ecb_const uint16_t ecb_bitrev16 (uint16_t x);
574             ecb_function_ ecb_const uint16_t ecb_bitrev16 (uint16_t x)
575             {
576             x = ((x >> 1) & 0x5555) | ((x & 0x5555) << 1);
577             x = ((x >> 2) & 0x3333) | ((x & 0x3333) << 2);
578             x = ((x >> 4) & 0x0f0f) | ((x & 0x0f0f) << 4);
579             x = ( x >> 8 ) | ( x << 8);
580              
581             return x;
582             }
583              
584             ecb_function_ ecb_const uint32_t ecb_bitrev32 (uint32_t x);
585             ecb_function_ ecb_const uint32_t ecb_bitrev32 (uint32_t x)
586             {
587             x = ((x >> 1) & 0x55555555) | ((x & 0x55555555) << 1);
588             x = ((x >> 2) & 0x33333333) | ((x & 0x33333333) << 2);
589             x = ((x >> 4) & 0x0f0f0f0f) | ((x & 0x0f0f0f0f) << 4);
590             x = ((x >> 8) & 0x00ff00ff) | ((x & 0x00ff00ff) << 8);
591             x = ( x >> 16 ) | ( x << 16);
592              
593             return x;
594             }
595              
596             /* popcount64 is only available on 64 bit cpus as gcc builtin */
597             /* so for this version we are lazy */
598             ecb_function_ ecb_const int ecb_popcount64 (uint64_t x);
599             ecb_function_ ecb_const int
600             ecb_popcount64 (uint64_t x)
601             {
602             return ecb_popcount32 (x) + ecb_popcount32 (x >> 32);
603             }
604              
605             ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count);
606             ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count);
607             ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count);
608             ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count);
609             ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count);
610             ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count);
611             ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count);
612             ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count);
613              
614             ecb_inline ecb_const uint8_t ecb_rotl8 (uint8_t x, unsigned int count) { return (x >> ( 8 - count)) | (x << count); }
615             ecb_inline ecb_const uint8_t ecb_rotr8 (uint8_t x, unsigned int count) { return (x << ( 8 - count)) | (x >> count); }
616             ecb_inline ecb_const uint16_t ecb_rotl16 (uint16_t x, unsigned int count) { return (x >> (16 - count)) | (x << count); }
617             ecb_inline ecb_const uint16_t ecb_rotr16 (uint16_t x, unsigned int count) { return (x << (16 - count)) | (x >> count); }
618             ecb_inline ecb_const uint32_t ecb_rotl32 (uint32_t x, unsigned int count) { return (x >> (32 - count)) | (x << count); }
619             ecb_inline ecb_const uint32_t ecb_rotr32 (uint32_t x, unsigned int count) { return (x << (32 - count)) | (x >> count); }
620             ecb_inline ecb_const uint64_t ecb_rotl64 (uint64_t x, unsigned int count) { return (x >> (64 - count)) | (x << count); }
621             ecb_inline ecb_const uint64_t ecb_rotr64 (uint64_t x, unsigned int count) { return (x << (64 - count)) | (x >> count); }
622              
623             #if ECB_CPP
624              
625             inline uint8_t ecb_ctz (uint8_t v) { return ecb_ctz32 (v); }
626             inline uint16_t ecb_ctz (uint16_t v) { return ecb_ctz32 (v); }
627             inline uint32_t ecb_ctz (uint32_t v) { return ecb_ctz32 (v); }
628             inline uint64_t ecb_ctz (uint64_t v) { return ecb_ctz64 (v); }
629              
630             inline bool ecb_is_pot (uint8_t v) { return ecb_is_pot32 (v); }
631             inline bool ecb_is_pot (uint16_t v) { return ecb_is_pot32 (v); }
632             inline bool ecb_is_pot (uint32_t v) { return ecb_is_pot32 (v); }
633             inline bool ecb_is_pot (uint64_t v) { return ecb_is_pot64 (v); }
634              
635             inline int ecb_ld (uint8_t v) { return ecb_ld32 (v); }
636             inline int ecb_ld (uint16_t v) { return ecb_ld32 (v); }
637             inline int ecb_ld (uint32_t v) { return ecb_ld32 (v); }
638             inline int ecb_ld (uint64_t v) { return ecb_ld64 (v); }
639              
640             inline int ecb_popcount (uint8_t v) { return ecb_popcount32 (v); }
641             inline int ecb_popcount (uint16_t v) { return ecb_popcount32 (v); }
642             inline int ecb_popcount (uint32_t v) { return ecb_popcount32 (v); }
643             inline int ecb_popcount (uint64_t v) { return ecb_popcount64 (v); }
644              
645             inline uint8_t ecb_bitrev (uint8_t v) { return ecb_bitrev8 (v); }
646             inline uint16_t ecb_bitrev (uint16_t v) { return ecb_bitrev16 (v); }
647             inline uint32_t ecb_bitrev (uint32_t v) { return ecb_bitrev32 (v); }
648              
649             inline uint8_t ecb_rotl (uint8_t v, unsigned int count) { return ecb_rotl8 (v, count); }
650             inline uint16_t ecb_rotl (uint16_t v, unsigned int count) { return ecb_rotl16 (v, count); }
651             inline uint32_t ecb_rotl (uint32_t v, unsigned int count) { return ecb_rotl32 (v, count); }
652             inline uint64_t ecb_rotl (uint64_t v, unsigned int count) { return ecb_rotl64 (v, count); }
653              
654             inline uint8_t ecb_rotr (uint8_t v, unsigned int count) { return ecb_rotr8 (v, count); }
655             inline uint16_t ecb_rotr (uint16_t v, unsigned int count) { return ecb_rotr16 (v, count); }
656             inline uint32_t ecb_rotr (uint32_t v, unsigned int count) { return ecb_rotr32 (v, count); }
657             inline uint64_t ecb_rotr (uint64_t v, unsigned int count) { return ecb_rotr64 (v, count); }
658              
659             #endif
660              
661             #if ECB_GCC_VERSION(4,3) || (ECB_CLANG_BUILTIN(__builtin_bswap32) && ECB_CLANG_BUILTIN(__builtin_bswap64))
662             #if ECB_GCC_VERSION(4,8) || ECB_CLANG_BUILTIN(__builtin_bswap16)
663             #define ecb_bswap16(x) __builtin_bswap16 (x)
664             #else
665             #define ecb_bswap16(x) (__builtin_bswap32 (x) >> 16)
666             #endif
667             #define ecb_bswap32(x) __builtin_bswap32 (x)
668             #define ecb_bswap64(x) __builtin_bswap64 (x)
669             #elif _MSC_VER
670             #include
671             #define ecb_bswap16(x) ((uint16_t)_byteswap_ushort ((uint16_t)(x)))
672             #define ecb_bswap32(x) ((uint32_t)_byteswap_ulong ((uint32_t)(x)))
673             #define ecb_bswap64(x) ((uint64_t)_byteswap_uint64 ((uint64_t)(x)))
674             #else
675             ecb_function_ ecb_const uint16_t ecb_bswap16 (uint16_t x);
676             ecb_function_ ecb_const uint16_t
677             ecb_bswap16 (uint16_t x)
678             {
679             return ecb_rotl16 (x, 8);
680             }
681              
682             ecb_function_ ecb_const uint32_t ecb_bswap32 (uint32_t x);
683             ecb_function_ ecb_const uint32_t
684             ecb_bswap32 (uint32_t x)
685             {
686             return (((uint32_t)ecb_bswap16 (x)) << 16) | ecb_bswap16 (x >> 16);
687             }
688              
689             ecb_function_ ecb_const uint64_t ecb_bswap64 (uint64_t x);
690             ecb_function_ ecb_const uint64_t
691             ecb_bswap64 (uint64_t x)
692             {
693             return (((uint64_t)ecb_bswap32 (x)) << 32) | ecb_bswap32 (x >> 32);
694             }
695             #endif
696              
697             #if ECB_GCC_VERSION(4,5) || ECB_CLANG_BUILTIN(__builtin_unreachable)
698             #define ecb_unreachable() __builtin_unreachable ()
699             #else
700             /* this seems to work fine, but gcc always emits a warning for it :/ */
701             ecb_inline ecb_noreturn void ecb_unreachable (void);
702             ecb_inline ecb_noreturn void ecb_unreachable (void) { }
703             #endif
704              
705             /* try to tell the compiler that some condition is definitely true */
706             #define ecb_assume(cond) if (!(cond)) ecb_unreachable (); else 0
707              
708             ecb_inline ecb_const uint32_t ecb_byteorder_helper (void);
709             ecb_inline ecb_const uint32_t
710 23           ecb_byteorder_helper (void)
711             {
712             /* the union code still generates code under pressure in gcc, */
713             /* but less than using pointers, and always seems to */
714             /* successfully return a constant. */
715             /* the reason why we have this horrible preprocessor mess */
716             /* is to avoid it in all cases, at least on common architectures */
717             /* or when using a recent enough gcc version (>= 4.6) */
718             #if (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
719             || ((__i386 || __i386__ || _M_IX86 || ECB_GCC_AMD64 || ECB_MSVC_AMD64) && !__VOS__)
720             #define ECB_LITTLE_ENDIAN 1
721 23           return 0x44332211;
722             #elif (defined __BYTE_ORDER__ && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
723             || ((__AARCH64EB__ || __MIPSEB__ || __ARMEB__) && !__VOS__)
724             #define ECB_BIG_ENDIAN 1
725             return 0x11223344;
726             #else
727             union
728             {
729             uint8_t c[4];
730             uint32_t u;
731             } u = { 0x11, 0x22, 0x33, 0x44 };
732             return u.u;
733             #endif
734             }
735              
736             ecb_inline ecb_const ecb_bool ecb_big_endian (void);
737 46           ecb_inline ecb_const ecb_bool ecb_big_endian (void) { return ecb_byteorder_helper () == 0x11223344; }
738             ecb_inline ecb_const ecb_bool ecb_little_endian (void);
739             ecb_inline ecb_const ecb_bool ecb_little_endian (void) { return ecb_byteorder_helper () == 0x44332211; }
740              
741             /*****************************************************************************/
742             /* unaligned load/store */
743              
744             ecb_inline uint_fast16_t ecb_be_u16_to_host (uint_fast16_t v) { return ecb_little_endian () ? ecb_bswap16 (v) : v; }
745             ecb_inline uint_fast32_t ecb_be_u32_to_host (uint_fast32_t v) { return ecb_little_endian () ? ecb_bswap32 (v) : v; }
746             ecb_inline uint_fast64_t ecb_be_u64_to_host (uint_fast64_t v) { return ecb_little_endian () ? ecb_bswap64 (v) : v; }
747              
748             ecb_inline uint_fast16_t ecb_le_u16_to_host (uint_fast16_t v) { return ecb_big_endian () ? ecb_bswap16 (v) : v; }
749             ecb_inline uint_fast32_t ecb_le_u32_to_host (uint_fast32_t v) { return ecb_big_endian () ? ecb_bswap32 (v) : v; }
750             ecb_inline uint_fast64_t ecb_le_u64_to_host (uint_fast64_t v) { return ecb_big_endian () ? ecb_bswap64 (v) : v; }
751              
752             ecb_inline uint_fast16_t ecb_peek_u16_u (const void *ptr) { uint16_t v; memcpy (&v, ptr, sizeof (v)); return v; }
753             ecb_inline uint_fast32_t ecb_peek_u32_u (const void *ptr) { uint32_t v; memcpy (&v, ptr, sizeof (v)); return v; }
754             ecb_inline uint_fast64_t ecb_peek_u64_u (const void *ptr) { uint64_t v; memcpy (&v, ptr, sizeof (v)); return v; }
755              
756             ecb_inline uint_fast16_t ecb_peek_be_u16_u (const void *ptr) { return ecb_be_u16_to_host (ecb_peek_u16_u (ptr)); }
757             ecb_inline uint_fast32_t ecb_peek_be_u32_u (const void *ptr) { return ecb_be_u32_to_host (ecb_peek_u32_u (ptr)); }
758             ecb_inline uint_fast64_t ecb_peek_be_u64_u (const void *ptr) { return ecb_be_u64_to_host (ecb_peek_u64_u (ptr)); }
759              
760             ecb_inline uint_fast16_t ecb_peek_le_u16_u (const void *ptr) { return ecb_le_u16_to_host (ecb_peek_u16_u (ptr)); }
761             ecb_inline uint_fast32_t ecb_peek_le_u32_u (const void *ptr) { return ecb_le_u32_to_host (ecb_peek_u32_u (ptr)); }
762             ecb_inline uint_fast64_t ecb_peek_le_u64_u (const void *ptr) { return ecb_le_u64_to_host (ecb_peek_u64_u (ptr)); }
763              
764             ecb_inline uint_fast16_t ecb_host_to_be_u16 (uint_fast16_t v) { return ecb_little_endian () ? ecb_bswap16 (v) : v; }
765             ecb_inline uint_fast32_t ecb_host_to_be_u32 (uint_fast32_t v) { return ecb_little_endian () ? ecb_bswap32 (v) : v; }
766             ecb_inline uint_fast64_t ecb_host_to_be_u64 (uint_fast64_t v) { return ecb_little_endian () ? ecb_bswap64 (v) : v; }
767              
768             ecb_inline uint_fast16_t ecb_host_to_le_u16 (uint_fast16_t v) { return ecb_big_endian () ? ecb_bswap16 (v) : v; }
769             ecb_inline uint_fast32_t ecb_host_to_le_u32 (uint_fast32_t v) { return ecb_big_endian () ? ecb_bswap32 (v) : v; }
770             ecb_inline uint_fast64_t ecb_host_to_le_u64 (uint_fast64_t v) { return ecb_big_endian () ? ecb_bswap64 (v) : v; }
771              
772             ecb_inline void ecb_poke_u16_u (void *ptr, uint16_t v) { memcpy (ptr, &v, sizeof (v)); }
773             ecb_inline void ecb_poke_u32_u (void *ptr, uint32_t v) { memcpy (ptr, &v, sizeof (v)); }
774             ecb_inline void ecb_poke_u64_u (void *ptr, uint64_t v) { memcpy (ptr, &v, sizeof (v)); }
775              
776             ecb_inline void ecb_poke_be_u16_u (void *ptr, uint_fast16_t v) { ecb_poke_u16_u (ptr, ecb_host_to_be_u16 (v)); }
777             ecb_inline void ecb_poke_be_u32_u (void *ptr, uint_fast32_t v) { ecb_poke_u32_u (ptr, ecb_host_to_be_u32 (v)); }
778             ecb_inline void ecb_poke_be_u64_u (void *ptr, uint_fast64_t v) { ecb_poke_u64_u (ptr, ecb_host_to_be_u64 (v)); }
779            
780             ecb_inline void ecb_poke_le_u16_u (void *ptr, uint_fast16_t v) { ecb_poke_u16_u (ptr, ecb_host_to_le_u16 (v)); }
781             ecb_inline void ecb_poke_le_u32_u (void *ptr, uint_fast32_t v) { ecb_poke_u32_u (ptr, ecb_host_to_le_u32 (v)); }
782             ecb_inline void ecb_poke_le_u64_u (void *ptr, uint_fast64_t v) { ecb_poke_u64_u (ptr, ecb_host_to_le_u64 (v)); }
783              
784             #if ECB_CPP
785              
786             inline uint8_t ecb_bswap (uint8_t v) { return v; }
787             inline uint16_t ecb_bswap (uint16_t v) { return ecb_bswap16 (v); }
788             inline uint32_t ecb_bswap (uint32_t v) { return ecb_bswap32 (v); }
789             inline uint64_t ecb_bswap (uint64_t v) { return ecb_bswap64 (v); }
790              
791             template inline T ecb_be_to_host (T v) { return ecb_little_endian () ? ecb_bswap (v) : v; }
792             template inline T ecb_le_to_host (T v) { return ecb_big_endian () ? ecb_bswap (v) : v; }
793             template inline T ecb_peek (const void *ptr) { return *(const T *)ptr; }
794             template inline T ecb_peek_be (const void *ptr) { return ecb_be_to_host (ecb_peek (ptr)); }
795             template inline T ecb_peek_le (const void *ptr) { return ecb_le_to_host (ecb_peek (ptr)); }
796             template inline T ecb_peek_u (const void *ptr) { T v; memcpy (&v, ptr, sizeof (v)); return v; }
797             template inline T ecb_peek_be_u (const void *ptr) { return ecb_be_to_host (ecb_peek_u (ptr)); }
798             template inline T ecb_peek_le_u (const void *ptr) { return ecb_le_to_host (ecb_peek_u (ptr)); }
799              
800             template inline T ecb_host_to_be (T v) { return ecb_little_endian () ? ecb_bswap (v) : v; }
801             template inline T ecb_host_to_le (T v) { return ecb_big_endian () ? ecb_bswap (v) : v; }
802             template inline void ecb_poke (void *ptr, T v) { *(T *)ptr = v; }
803             template inline void ecb_poke_be (void *ptr, T v) { return ecb_poke (ptr, ecb_host_to_be (v)); }
804             template inline void ecb_poke_le (void *ptr, T v) { return ecb_poke (ptr, ecb_host_to_le (v)); }
805             template inline void ecb_poke_u (void *ptr, T v) { memcpy (ptr, &v, sizeof (v)); }
806             template inline void ecb_poke_be_u (void *ptr, T v) { return ecb_poke_u (ptr, ecb_host_to_be (v)); }
807             template inline void ecb_poke_le_u (void *ptr, T v) { return ecb_poke_u (ptr, ecb_host_to_le (v)); }
808              
809             #endif
810              
811             /*****************************************************************************/
812             /* division */
813              
814             #if ECB_GCC_VERSION(3,0) || ECB_C99
815             /* C99 tightened the definition of %, so we can use a more efficient version */
816             #define ecb_mod(m,n) ((m) % (n) + ((m) % (n) < 0 ? (n) : 0))
817             #else
818             #define ecb_mod(m,n) ((m) < 0 ? ((n) - 1 - ((-1 - (m)) % (n))) : ((m) % (n)))
819             #endif
820              
821             #if ECB_CPP
822             template
823             static inline T ecb_div_rd (T val, T div)
824             {
825             return val < 0 ? - ((-val + div - 1) / div) : (val ) / div;
826             }
827             template
828             static inline T ecb_div_ru (T val, T div)
829             {
830             return val < 0 ? - ((-val ) / div) : (val + div - 1) / div;
831             }
832             #else
833             #define ecb_div_rd(val,div) ((val) < 0 ? - ((-(val) + (div) - 1) / (div)) : ((val) ) / (div))
834             #define ecb_div_ru(val,div) ((val) < 0 ? - ((-(val) ) / (div)) : ((val) + (div) - 1) / (div))
835             #endif
836              
837             /*****************************************************************************/
838             /* array length */
839              
840             #if ecb_cplusplus_does_not_suck
841             /* does not work for local types (http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2657.htm) */
842             template
843             static inline int ecb_array_length (const T (&arr)[N])
844             {
845             return N;
846             }
847             #else
848             #define ecb_array_length(name) (sizeof (name) / sizeof (name [0]))
849             #endif
850              
851             /*****************************************************************************/
852             /* IEEE 754-2008 half float conversions */
853              
854             ecb_function_ ecb_const uint32_t ecb_binary16_to_binary32 (uint32_t x);
855             ecb_function_ ecb_const uint32_t
856 5           ecb_binary16_to_binary32 (uint32_t x)
857             {
858 5           unsigned int s = (x & 0x8000) << (31 - 15);
859 5           int e = (x >> 10) & 0x001f;
860 5           unsigned int m = x & 0x03ff;
861              
862 5 50         if (ecb_expect_false (e == 31))
863             /* infinity or NaN */
864 0           e = 255 - (127 - 15);
865 5 100         else if (ecb_expect_false (!e))
866             {
867 1 50         if (ecb_expect_true (!m))
868             /* zero, handled by code below by forcing e to 0 */
869 1           e = 0 - (127 - 15);
870             else
871             {
872             /* subnormal, renormalise */
873 0           unsigned int s = 10 - ecb_ld32 (m);
874              
875 0           m = (m << s) & 0x3ff; /* mask implicit bit */
876 0           e -= s - 1;
877             }
878             }
879              
880             /* e and m now are normalised, or zero, (or inf or nan) */
881 5           e += 127 - 15;
882              
883 5           return s | (e << 23) | (m << (23 - 10));
884             }
885              
886             ecb_function_ ecb_const uint16_t ecb_binary32_to_binary16 (uint32_t x);
887             ecb_function_ ecb_const uint16_t
888 0           ecb_binary32_to_binary16 (uint32_t x)
889             {
890 0           unsigned int s = (x >> 16) & 0x00008000; /* sign bit, the easy part */
891 0           int e = ((x >> 23) & 0x000000ff) - (127 - 15); /* the desired exponent */
892 0           unsigned int m = x & 0x007fffff;
893              
894 0           x &= 0x7fffffff;
895              
896             /* if it's within range of binary16 normals, use fast path */
897 0 0         if (ecb_expect_true (0x38800000 <= x && x <= 0x477fefff))
    0          
898             {
899             /* mantissa round-to-even */
900 0           m += 0x00000fff + ((m >> (23 - 10)) & 1);
901              
902             /* handle overflow */
903 0 0         if (ecb_expect_false (m >= 0x00800000))
904             {
905 0           m >>= 1;
906 0           e += 1;
907             }
908              
909 0           return s | (e << 10) | (m >> (23 - 10));
910             }
911              
912             /* handle large numbers and infinity */
913 0 0         if (ecb_expect_true (0x477fefff < x && x <= 0x7f800000))
    0          
914 0           return s | 0x7c00;
915              
916             /* handle zero, subnormals and small numbers */
917 0 0         if (ecb_expect_true (x < 0x38800000))
918             {
919             /* zero */
920 0 0         if (ecb_expect_true (!x))
921 0           return s;
922              
923             /* handle subnormals */
924              
925             /* too small, will be zero */
926 0 0         if (e < (14 - 24)) /* might not be sharp, but is good enough */
927 0           return s;
928              
929 0           m |= 0x00800000; /* make implicit bit explicit */
930              
931             /* very tricky - we need to round to the nearest e (+10) bit value */
932             {
933 0           unsigned int bits = 14 - e;
934 0           unsigned int half = (1 << (bits - 1)) - 1;
935 0           unsigned int even = (m >> bits) & 1;
936              
937             /* if this overflows, we will end up with a normalised number */
938 0           m = (m + half + even) >> bits;
939             }
940              
941 0           return s | m;
942             }
943              
944             /* handle NaNs, preserve leftmost nan bits, but make sure we don't turn them into infinities */
945 0           m >>= 13;
946              
947 0           return s | 0x7c00 | m | !m;
948             }
949              
950             /*******************************************************************************/
951             /* fast integer to ascii */
952              
953             /*
954             * This code is pretty complicated because it is general. The idea behind it,
955             * however, is pretty simple: first, the number is multiplied with a scaling
956             * factor (2**bits / 10**(digits-1)) to convert the integer into a fixed-point
957             * number with the first digit in the upper bits.
958             * Then this digit is converted to text and masked out. The resulting number
959             * is then multiplied by 10, by multiplying the fixed point representation
960             * by 5 and shifting the (binary) decimal point one to the right, so a 4.28
961             * format becomes 5.27, 6.26 and so on.
962             * The rest involves only advancing the pointer if we already generated a
963             * non-zero digit, so leading zeroes are overwritten.
964             */
965              
966             // simply return a mask with "bits" bits set
967             #define ecb_i2a_mask(type,bits) ((((type)1) << (bits)) - 1)
968              
969             // oputput a single digit. maskvalue is 10**digitidx
970             #define ecb_i2a_digit(type,bits,digitmask,maskvalue,digitidx) \
971             if (digitmask >= maskvalue) /* constant, used to decide how many digits to generate */ \
972             { \
973             char digit = x >> (bits - digitidx); /* calculate the topmost digit */ \
974             *ptr = digit + '0'; /* output it */ \
975             nz = (digitmask == maskvalue) || nz || digit; /* first term == always output last digit */ \
976             ptr += nz; /* output digit only if non-zero digit seen */ \
977             x = (x & ecb_i2a_mask (type, bits - digitidx)) * 5; /* *10, but shift decimal point right */ \
978             }
979              
980             // convert integer to fixed point format and multiply out digits, highest first
981             // requires magic constants: max. digits and number of bits after the decimal point
982             #define ecb_i2a_def(suffix,ptr,v,type,bits,digitmask,lz) \
983             ecb_inline char *ecb_i2a_ ## suffix (char *ptr, uint32_t u) \
984             { \
985             char nz = lz; /* non-zero digit seen? */ \
986             /* convert to x.bits fixed-point */ \
987             type x = u * ((ecb_i2a_mask (type, bits) + digitmask) / digitmask); \
988             /* output up to 10 digits */ \
989             ecb_i2a_digit (type,bits,digitmask, 1, 0); \
990             ecb_i2a_digit (type,bits,digitmask, 10, 1); \
991             ecb_i2a_digit (type,bits,digitmask, 100, 2); \
992             ecb_i2a_digit (type,bits,digitmask, 1000, 3); \
993             ecb_i2a_digit (type,bits,digitmask, 10000, 4); \
994             ecb_i2a_digit (type,bits,digitmask, 100000, 5); \
995             ecb_i2a_digit (type,bits,digitmask, 1000000, 6); \
996             ecb_i2a_digit (type,bits,digitmask, 10000000, 7); \
997             ecb_i2a_digit (type,bits,digitmask, 100000000, 8); \
998             ecb_i2a_digit (type,bits,digitmask, 1000000000, 9); \
999             return ptr; \
1000             }
1001              
1002             // predefined versions of the above, for various digits
1003             // ecb_i2a_xN = almost N digits, limit defined by macro
1004             // ecb_i2a_N = up to N digits, leading zeroes suppressed
1005             // ecb_i2a_0N = exactly N digits, including leading zeroes
1006              
1007             // non-leading-zero versions, limited range
1008             #define ECB_I2A_MAX_X5 59074 // limit for ecb_i2a_x5
1009             #define ECB_I2A_MAX_X10 2932500665 // limit for ecb_i2a_x10
1010             ecb_i2a_def ( x5, ptr, v, uint32_t, 26, 10000, 0)
1011             ecb_i2a_def (x10, ptr, v, uint64_t, 60, 1000000000, 0)
1012              
1013             // non-leading zero versions, all digits, 4 and 9 are optimal for 32/64 bit
1014             ecb_i2a_def ( 2, ptr, v, uint32_t, 10, 10, 0)
1015             ecb_i2a_def ( 3, ptr, v, uint32_t, 12, 100, 0)
1016             ecb_i2a_def ( 4, ptr, v, uint32_t, 26, 1000, 0)
1017             ecb_i2a_def ( 5, ptr, v, uint64_t, 30, 10000, 0)
1018             ecb_i2a_def ( 6, ptr, v, uint64_t, 36, 100000, 0)
1019             ecb_i2a_def ( 7, ptr, v, uint64_t, 44, 1000000, 0)
1020             ecb_i2a_def ( 8, ptr, v, uint64_t, 50, 10000000, 0)
1021             ecb_i2a_def ( 9, ptr, v, uint64_t, 56, 100000000, 0)
1022              
1023             // leading-zero versions, all digits, 04 and 09 are optimal for 32/64 bit
1024             ecb_i2a_def (02, ptr, v, uint32_t, 10, 10, 1)
1025             ecb_i2a_def (03, ptr, v, uint32_t, 12, 100, 1)
1026             ecb_i2a_def (04, ptr, v, uint32_t, 26, 1000, 1)
1027             ecb_i2a_def (05, ptr, v, uint64_t, 30, 10000, 1)
1028             ecb_i2a_def (06, ptr, v, uint64_t, 36, 100000, 1)
1029             ecb_i2a_def (07, ptr, v, uint64_t, 44, 1000000, 1)
1030             ecb_i2a_def (08, ptr, v, uint64_t, 50, 10000000, 1)
1031             ecb_i2a_def (09, ptr, v, uint64_t, 56, 100000000, 1)
1032              
1033             #define ECB_I2A_I32_DIGITS 11
1034             #define ECB_I2A_U32_DIGITS 10
1035             #define ECB_I2A_I64_DIGITS 20
1036             #define ECB_I2A_U64_DIGITS 21
1037             #define ECB_I2A_MAX_DIGITS 21
1038              
1039             ecb_inline char *
1040             ecb_i2a_u32 (char *ptr, uint32_t u)
1041             {
1042             #if ECB_64BIT_NATIVE
1043             if (ecb_expect_true (u <= ECB_I2A_MAX_X10))
1044             ptr = ecb_i2a_x10 (ptr, u);
1045             else // x10 almost, but not fully, covers 32 bit
1046             {
1047             uint32_t u1 = u % 1000000000;
1048             uint32_t u2 = u / 1000000000;
1049              
1050             *ptr++ = u2 + '0';
1051             ptr = ecb_i2a_09 (ptr, u1);
1052             }
1053             #else
1054             if (ecb_expect_true (u <= ECB_I2A_MAX_X5))
1055             ecb_i2a_x5 (ptr, u);
1056             else if (ecb_expect_true (u <= ECB_I2A_MAX_X5 * 10000))
1057             {
1058             uint32_t u1 = u % 10000;
1059             uint32_t u2 = u / 10000;
1060              
1061             ptr = ecb_i2a_x5 (ptr, u2);
1062             ptr = ecb_i2a_04 (ptr, u1);
1063             }
1064             else
1065             {
1066             uint32_t u1 = u % 10000;
1067             uint32_t ua = u / 10000;
1068             uint32_t u2 = ua % 10000;
1069             uint32_t u3 = ua / 10000;
1070              
1071             ptr = ecb_i2a_2 (ptr, u3);
1072             ptr = ecb_i2a_04 (ptr, u2);
1073             ptr = ecb_i2a_04 (ptr, u1);
1074             }
1075             #endif
1076              
1077             return ptr;
1078             }
1079              
1080             ecb_inline char *
1081             ecb_i2a_i32 (char *ptr, int32_t v)
1082             {
1083             *ptr = '-'; ptr += v < 0;
1084             uint32_t u = v < 0 ? -(uint32_t)v : v;
1085              
1086             #if ECB_64BIT_NATIVE
1087             ptr = ecb_i2a_x10 (ptr, u); // x10 fully covers 31 bit
1088             #else
1089             ptr = ecb_i2a_u32 (ptr, u);
1090             #endif
1091              
1092             return ptr;
1093             }
1094              
1095             ecb_inline char *
1096             ecb_i2a_u64 (char *ptr, uint64_t u)
1097             {
1098             #if ECB_64BIT_NATIVE
1099             if (ecb_expect_true (u <= ECB_I2A_MAX_X10))
1100             ptr = ecb_i2a_x10 (ptr, u);
1101             else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000))
1102             {
1103             uint64_t u1 = u % 1000000000;
1104             uint64_t u2 = u / 1000000000;
1105              
1106             ptr = ecb_i2a_x10 (ptr, u2);
1107             ptr = ecb_i2a_09 (ptr, u1);
1108             }
1109             else
1110             {
1111             uint64_t u1 = u % 1000000000;
1112             uint64_t ua = u / 1000000000;
1113             uint64_t u2 = ua % 1000000000;
1114             uint64_t u3 = ua / 1000000000;
1115              
1116             ptr = ecb_i2a_2 (ptr, u3);
1117             ptr = ecb_i2a_09 (ptr, u2);
1118             ptr = ecb_i2a_09 (ptr, u1);
1119             }
1120             #else
1121             if (ecb_expect_true (u <= ECB_I2A_MAX_X5))
1122             ptr = ecb_i2a_x5 (ptr, u);
1123             else
1124             {
1125             uint64_t u1 = u % 10000;
1126             uint64_t u2 = u / 10000;
1127              
1128             ptr = ecb_i2a_u64 (ptr, u2);
1129             ptr = ecb_i2a_04 (ptr, u1);
1130             }
1131             #endif
1132              
1133             return ptr;
1134             }
1135              
1136             ecb_inline char *
1137             ecb_i2a_i64 (char *ptr, int64_t v)
1138             {
1139             *ptr = '-'; ptr += v < 0;
1140             uint64_t u = v < 0 ? -(uint64_t)v : v;
1141              
1142             #if ECB_64BIT_NATIVE
1143             if (ecb_expect_true (u <= ECB_I2A_MAX_X10))
1144             ptr = ecb_i2a_x10 (ptr, u);
1145             else if (ecb_expect_false (u <= ECB_I2A_MAX_X10 * 1000000000))
1146             {
1147             uint64_t u1 = u % 1000000000;
1148             uint64_t u2 = u / 1000000000;
1149              
1150             ptr = ecb_i2a_x10 (ptr, u2);
1151             ptr = ecb_i2a_09 (ptr, u1);
1152             }
1153             else
1154             {
1155             uint64_t u1 = u % 1000000000;
1156             uint64_t ua = u / 1000000000;
1157             uint64_t u2 = ua % 1000000000;
1158             uint64_t u3 = ua / 1000000000;
1159              
1160             // 2**31 is 19 digits, so the top is exactly one digit
1161             *ptr++ = u3 + '0';
1162             ptr = ecb_i2a_09 (ptr, u2);
1163             ptr = ecb_i2a_09 (ptr, u1);
1164             }
1165             #else
1166             ptr = ecb_i2a_u64 (ptr, u);
1167             #endif
1168              
1169             return ptr;
1170             }
1171              
1172             /*******************************************************************************/
1173             /* floating point stuff, can be disabled by defining ECB_NO_LIBM */
1174              
1175             /* basically, everything uses "ieee pure-endian" floating point numbers */
1176             /* the only noteworthy exception is ancient armle, which uses order 43218765 */
1177             #if 0 \
1178             || __i386 || __i386__ \
1179             || ECB_GCC_AMD64 \
1180             || __powerpc__ || __ppc__ || __powerpc64__ || __ppc64__ \
1181             || defined __s390__ || defined __s390x__ \
1182             || defined __mips__ \
1183             || defined __alpha__ \
1184             || defined __hppa__ \
1185             || defined __ia64__ \
1186             || defined __m68k__ \
1187             || defined __m88k__ \
1188             || defined __sh__ \
1189             || defined _M_IX86 || defined ECB_MSVC_AMD64 || defined _M_IA64 \
1190             || (defined __arm__ && (defined __ARM_EABI__ || defined __EABI__ || defined __VFP_FP__ || defined _WIN32_WCE || defined __ANDROID__)) \
1191             || defined __aarch64__
1192             #define ECB_STDFP 1
1193             #else
1194             #define ECB_STDFP 0
1195             #endif
1196              
1197             #ifndef ECB_NO_LIBM
1198              
1199             #include /* for frexp*, ldexp*, INFINITY, NAN */
1200              
1201             /* only the oldest of old doesn't have this one. solaris. */
1202             #ifdef INFINITY
1203             #define ECB_INFINITY INFINITY
1204             #else
1205             #define ECB_INFINITY HUGE_VAL
1206             #endif
1207              
1208             #ifdef NAN
1209             #define ECB_NAN NAN
1210             #else
1211             #define ECB_NAN ECB_INFINITY
1212             #endif
1213              
1214             #if ECB_C99 || _XOPEN_VERSION >= 600 || _POSIX_VERSION >= 200112L
1215             #define ecb_ldexpf(x,e) ldexpf ((x), (e))
1216             #define ecb_frexpf(x,e) frexpf ((x), (e))
1217             #else
1218             #define ecb_ldexpf(x,e) (float) ldexp ((double) (x), (e))
1219             #define ecb_frexpf(x,e) (float) frexp ((double) (x), (e))
1220             #endif
1221              
1222             /* convert a float to ieee single/binary32 */
1223             ecb_function_ ecb_const uint32_t ecb_float_to_binary32 (float x);
1224             ecb_function_ ecb_const uint32_t
1225 3           ecb_float_to_binary32 (float x)
1226             {
1227             uint32_t r;
1228              
1229             #if ECB_STDFP
1230 3           memcpy (&r, &x, 4);
1231             #else
1232             /* slow emulation, works for anything but -0 */
1233             uint32_t m;
1234             int e;
1235              
1236             if (x == 0e0f ) return 0x00000000U;
1237             if (x > +3.40282346638528860e+38f) return 0x7f800000U;
1238             if (x < -3.40282346638528860e+38f) return 0xff800000U;
1239             if (x != x ) return 0x7fbfffffU;
1240              
1241             m = ecb_frexpf (x, &e) * 0x1000000U;
1242              
1243             r = m & 0x80000000U;
1244              
1245             if (r)
1246             m = -m;
1247              
1248             if (e <= -126)
1249             {
1250             m &= 0xffffffU;
1251             m >>= (-125 - e);
1252             e = -126;
1253             }
1254              
1255             r |= (e + 126) << 23;
1256             r |= m & 0x7fffffU;
1257             #endif
1258              
1259 3           return r;
1260             }
1261              
1262             /* converts an ieee single/binary32 to a float */
1263             ecb_function_ ecb_const float ecb_binary32_to_float (uint32_t x);
1264             ecb_function_ ecb_const float
1265 9           ecb_binary32_to_float (uint32_t x)
1266             {
1267             float r;
1268              
1269             #if ECB_STDFP
1270 9           memcpy (&r, &x, 4);
1271             #else
1272             /* emulation, only works for normals and subnormals and +0 */
1273             int neg = x >> 31;
1274             int e = (x >> 23) & 0xffU;
1275              
1276             x &= 0x7fffffU;
1277              
1278             if (e)
1279             x |= 0x800000U;
1280             else
1281             e = 1;
1282              
1283             /* we distrust ldexpf a bit and do the 2**-24 scaling by an extra multiply */
1284             r = ecb_ldexpf (x * (0.5f / 0x800000U), e - 126);
1285              
1286             r = neg ? -r : r;
1287             #endif
1288              
1289 9           return r;
1290             }
1291              
1292             /* convert a double to ieee double/binary64 */
1293             ecb_function_ ecb_const uint64_t ecb_double_to_binary64 (double x);
1294             ecb_function_ ecb_const uint64_t
1295 8           ecb_double_to_binary64 (double x)
1296             {
1297             uint64_t r;
1298              
1299             #if ECB_STDFP
1300 8           memcpy (&r, &x, 8);
1301             #else
1302             /* slow emulation, works for anything but -0 */
1303             uint64_t m;
1304             int e;
1305              
1306             if (x == 0e0 ) return 0x0000000000000000U;
1307             if (x > +1.79769313486231470e+308) return 0x7ff0000000000000U;
1308             if (x < -1.79769313486231470e+308) return 0xfff0000000000000U;
1309             if (x != x ) return 0X7ff7ffffffffffffU;
1310              
1311             m = frexp (x, &e) * 0x20000000000000U;
1312              
1313             r = m & 0x8000000000000000;;
1314              
1315             if (r)
1316             m = -m;
1317              
1318             if (e <= -1022)
1319             {
1320             m &= 0x1fffffffffffffU;
1321             m >>= (-1021 - e);
1322             e = -1022;
1323             }
1324              
1325             r |= ((uint64_t)(e + 1022)) << 52;
1326             r |= m & 0xfffffffffffffU;
1327             #endif
1328              
1329 8           return r;
1330             }
1331              
1332             /* converts an ieee double/binary64 to a double */
1333             ecb_function_ ecb_const double ecb_binary64_to_double (uint64_t x);
1334             ecb_function_ ecb_const double
1335 8           ecb_binary64_to_double (uint64_t x)
1336             {
1337             double r;
1338              
1339             #if ECB_STDFP
1340 8           memcpy (&r, &x, 8);
1341             #else
1342             /* emulation, only works for normals and subnormals and +0 */
1343             int neg = x >> 63;
1344             int e = (x >> 52) & 0x7ffU;
1345              
1346             x &= 0xfffffffffffffU;
1347              
1348             if (e)
1349             x |= 0x10000000000000U;
1350             else
1351             e = 1;
1352              
1353             /* we distrust ldexp a bit and do the 2**-53 scaling by an extra multiply */
1354             r = ldexp (x * (0.5 / 0x10000000000000U), e - 1022);
1355              
1356             r = neg ? -r : r;
1357             #endif
1358              
1359 8           return r;
1360             }
1361              
1362             /* convert a float to ieee half/binary16 */
1363             ecb_function_ ecb_const uint16_t ecb_float_to_binary16 (float x);
1364             ecb_function_ ecb_const uint16_t
1365 0           ecb_float_to_binary16 (float x)
1366             {
1367 0           return ecb_binary32_to_binary16 (ecb_float_to_binary32 (x));
1368             }
1369              
1370             /* convert an ieee half/binary16 to float */
1371             ecb_function_ ecb_const float ecb_binary16_to_float (uint16_t x);
1372             ecb_function_ ecb_const float
1373 5           ecb_binary16_to_float (uint16_t x)
1374             {
1375 5           return ecb_binary32_to_float (ecb_binary16_to_binary32 (x));
1376             }
1377              
1378             #endif
1379              
1380             #endif
1381